Removed a .to call on results of calculate_weight in patch_hook_weight_to_device that was screwing up the intermediate results for fp8 prior to being passed into stochastic_rounding call

This commit is contained in:
Jedrzej Kosinski 2024-11-18 20:42:51 -06:00
parent 9fe3db4c3a
commit 9b2b1303b7
1 changed files with 2 additions and 2 deletions

View File

@ -1036,8 +1036,8 @@ class ModelPatcher:
self.hook_backup[key] = (weight.to(device=target_device, copy=True), weight.device)
# TODO: properly handle lowvram situations for cached hook patches
out_weight = comfy.lora.calculate_weight(combined_patches[key],
comfy.model_management.cast_to_device(weight, weight.device, torch.float32, copy=True),
key, original_weights=original_weights).to(weight.dtype)
comfy.model_management.cast_to_device(weight, weight.device, torch.float32, copy=True),
key, original_weights=original_weights)
del original_weights[key]
out_weight = comfy.float.stochastic_rounding(out_weight, weight.dtype, seed=string_to_seed(key))
if self.hook_mode == comfy.hooks.EnumHookMode.MaxSpeed: