diff --git a/comfy/ldm/flux/model.py b/comfy/ldm/flux/model.py index 233d7839..ae1ed109 100644 --- a/comfy/ldm/flux/model.py +++ b/comfy/ldm/flux/model.py @@ -96,7 +96,9 @@ class Flux(nn.Module): y: Tensor, guidance: Tensor = None, control=None, + transformer_options={}, ) -> Tensor: + patches_replace = transformer_options.get("patches_replace", {}) if img.ndim != 3 or txt.ndim != 3: raise ValueError("Input img and txt tensors must have 3 dimensions.") @@ -114,8 +116,19 @@ class Flux(nn.Module): ids = torch.cat((txt_ids, img_ids), dim=1) pe = self.pe_embedder(ids) + blocks_replace = patches_replace.get("dit", {}) for i, block in enumerate(self.double_blocks): - img, txt = block(img=img, txt=txt, vec=vec, pe=pe) + if ("double_block", i) in blocks_replace: + def block_wrap(args): + out = {} + out["img"], out["txt"] = block(img=args["img"], txt=args["txt"], vec=args["vec"], pe=args["pe"]) + return out + + out = blocks_replace[("double_block", i)]({"img": img, "txt": txt, "vec": vec, "pe": pe}, {"original_block": block_wrap}) + txt = out["txt"] + img = out["img"] + else: + img, txt = block(img=img, txt=txt, vec=vec, pe=pe) if control is not None: # Controlnet control_i = control.get("input") @@ -127,7 +140,16 @@ class Flux(nn.Module): img = torch.cat((txt, img), 1) for i, block in enumerate(self.single_blocks): - img = block(img, vec=vec, pe=pe) + if ("single_block", i) in blocks_replace: + def block_wrap(args): + out = {} + out["img"] = block(args["img"], vec=args["vec"], pe=args["pe"]) + return out + + out = blocks_replace[("single_block", i)]({"img": img, "vec": vec, "pe": pe}, {"original_block": block_wrap}) + img = out["img"] + else: + img = block(img, vec=vec, pe=pe) if control is not None: # Controlnet control_o = control.get("output") @@ -141,7 +163,7 @@ class Flux(nn.Module): img = self.final_layer(img, vec) # (N, T, patch_size ** 2 * out_channels) return img - def forward(self, x, timestep, context, y, guidance, control=None, **kwargs): + def forward(self, x, timestep, context, y, guidance, control=None, transformer_options={}, **kwargs): bs, c, h, w = x.shape patch_size = 2 x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size)) @@ -156,5 +178,5 @@ class Flux(nn.Module): img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs) txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype) - out = self.forward_orig(img, img_ids, context, txt_ids, timestep, y, guidance, control) + out = self.forward_orig(img, img_ids, context, txt_ids, timestep, y, guidance, control, transformer_options) return rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h,:w] diff --git a/comfy/utils.py b/comfy/utils.py index 3c5d06a4..04926c1e 100644 --- a/comfy/utils.py +++ b/comfy/utils.py @@ -853,6 +853,7 @@ def reshape_mask(input_mask, output_shape): dims = len(output_shape) - 2 if dims == 1: + mask = input_mask scale_mode = "linear" if dims == 2: diff --git a/comfy_extras/nodes_sd3.py b/comfy_extras/nodes_sd3.py index 2567a413..2d8113da 100644 --- a/comfy_extras/nodes_sd3.py +++ b/comfy_extras/nodes_sd3.py @@ -130,6 +130,9 @@ class SkipLayerGuidanceSD3: sigma_start = model_sampling.percent_to_sigma(start_percent) sigma_end = model_sampling.percent_to_sigma(end_percent) + layers = re.findall(r'\d+', layers) + layers = [int(i) for i in layers] + def post_cfg_function(args): model = args["model"] cond_pred = args["cond_denoised"] @@ -149,8 +152,6 @@ class SkipLayerGuidanceSD3: cfg_result = cfg_result + (cond_pred - slg) * scale return cfg_result - layers = re.findall(r'\d+', layers) - layers = [int(i) for i in layers] m = model.clone() m.set_model_sampler_post_cfg_function(post_cfg_function) diff --git a/main.py b/main.py index 47a04589..c2c2ff8c 100644 --- a/main.py +++ b/main.py @@ -71,6 +71,7 @@ if os.name == "nt": if __name__ == "__main__": if args.cuda_device is not None: os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda_device) + os.environ['HIP_VISIBLE_DEVICES'] = str(args.cuda_device) logging.info("Set cuda device to: {}".format(args.cuda_device)) if args.deterministic: