Removed a .to call on results of calculate_weight in patch_hook_weight_to_device that was screwing up the intermediate results for fp8 prior to being passed into stochastic_rounding call

2024-11-18 20:42:51 -06:00 · 2024-11-18 20:42:51 -06:00 · 9b2b1303b7
parent 9fe3db4c3a
commit 9b2b1303b7
1 changed files with 2 additions and 2 deletions
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@ -1036,8 +1036,8 @@ class ModelPatcher:
            self.hook_backup[key] = (weight.to(device=target_device, copy=True), weight.device)
        # TODO: properly handle lowvram situations for cached hook patches
        out_weight = comfy.lora.calculate_weight(combined_patches[key],
-                                                comfy.model_management.cast_to_device(weight, weight.device, torch.float32, copy=True),
-                                                key, original_weights=original_weights).to(weight.dtype)
+                                                 comfy.model_management.cast_to_device(weight, weight.device, torch.float32, copy=True),
+                                                 key, original_weights=original_weights)
        del original_weights[key]
        out_weight = comfy.float.stochastic_rounding(out_weight, weight.dtype, seed=string_to_seed(key))
        if self.hook_mode == comfy.hooks.EnumHookMode.MaxSpeed: