Add a penultimate_hidden_states to the clip vision output.

This commit is contained in:
comfyanonymous 2023-09-08 14:06:58 -04:00
parent 10de64af7f
commit e85be36bd2
1 changed files with 6 additions and 2 deletions

View File

@ -49,12 +49,16 @@ class ClipVisionModel():
precision_scope = lambda a, b: contextlib.nullcontext(a) precision_scope = lambda a, b: contextlib.nullcontext(a)
with precision_scope(comfy.model_management.get_autocast_device(self.load_device), torch.float32): with precision_scope(comfy.model_management.get_autocast_device(self.load_device), torch.float32):
outputs = self.model(pixel_values=pixel_values) outputs = self.model(pixel_values=pixel_values, output_hidden_states=True)
for k in outputs: for k in outputs:
t = outputs[k] t = outputs[k]
if t is not None: if t is not None:
outputs[k] = t.cpu() if k == 'hidden_states':
outputs["penultimate_hidden_states"] = t[-2].cpu()
else:
outputs[k] = t.cpu()
return outputs return outputs
def convert_to_transformers(sd, prefix): def convert_to_transformers(sd, prefix):