Add a penultimate_hidden_states to the clip vision output.
This commit is contained in:
parent
10de64af7f
commit
e85be36bd2
|
@ -49,12 +49,16 @@ class ClipVisionModel():
|
||||||
precision_scope = lambda a, b: contextlib.nullcontext(a)
|
precision_scope = lambda a, b: contextlib.nullcontext(a)
|
||||||
|
|
||||||
with precision_scope(comfy.model_management.get_autocast_device(self.load_device), torch.float32):
|
with precision_scope(comfy.model_management.get_autocast_device(self.load_device), torch.float32):
|
||||||
outputs = self.model(pixel_values=pixel_values)
|
outputs = self.model(pixel_values=pixel_values, output_hidden_states=True)
|
||||||
|
|
||||||
for k in outputs:
|
for k in outputs:
|
||||||
t = outputs[k]
|
t = outputs[k]
|
||||||
if t is not None:
|
if t is not None:
|
||||||
|
if k == 'hidden_states':
|
||||||
|
outputs["penultimate_hidden_states"] = t[-2].cpu()
|
||||||
|
else:
|
||||||
outputs[k] = t.cpu()
|
outputs[k] = t.cpu()
|
||||||
|
|
||||||
return outputs
|
return outputs
|
||||||
|
|
||||||
def convert_to_transformers(sd, prefix):
|
def convert_to_transformers(sd, prefix):
|
||||||
|
|
Loading…
Reference in New Issue