From e85be36bd2c12f335abdf75669b994c535bbb126 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 8 Sep 2023 14:06:58 -0400 Subject: [PATCH] Add a penultimate_hidden_states to the clip vision output. --- comfy/clip_vision.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/comfy/clip_vision.py b/comfy/clip_vision.py index daaa2f2b..9b95ae00 100644 --- a/comfy/clip_vision.py +++ b/comfy/clip_vision.py @@ -49,12 +49,16 @@ class ClipVisionModel(): precision_scope = lambda a, b: contextlib.nullcontext(a) with precision_scope(comfy.model_management.get_autocast_device(self.load_device), torch.float32): - outputs = self.model(pixel_values=pixel_values) + outputs = self.model(pixel_values=pixel_values, output_hidden_states=True) for k in outputs: t = outputs[k] if t is not None: - outputs[k] = t.cpu() + if k == 'hidden_states': + outputs["penultimate_hidden_states"] = t[-2].cpu() + else: + outputs[k] = t.cpu() + return outputs def convert_to_transformers(sd, prefix):