Merge branch 'comfyanonymous:master' into master
This commit is contained in:
commit
e440423c28
|
@ -141,7 +141,7 @@ Put your VAE in: models/vae
|
||||||
### AMD GPUs (Linux only)
|
### AMD GPUs (Linux only)
|
||||||
AMD users can install rocm and pytorch with pip if you don't have it already installed, this is the command to install the stable version:
|
AMD users can install rocm and pytorch with pip if you don't have it already installed, this is the command to install the stable version:
|
||||||
|
|
||||||
```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.1```
|
```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.2```
|
||||||
|
|
||||||
This is the command to install the nightly with ROCm 6.2 which might have some performance improvements:
|
This is the command to install the nightly with ROCm 6.2 which might have some performance improvements:
|
||||||
|
|
||||||
|
|
|
@ -612,7 +612,9 @@ class ContinuousTransformer(nn.Module):
|
||||||
return_info = False,
|
return_info = False,
|
||||||
**kwargs
|
**kwargs
|
||||||
):
|
):
|
||||||
|
patches_replace = kwargs.get("transformer_options", {}).get("patches_replace", {})
|
||||||
batch, seq, device = *x.shape[:2], x.device
|
batch, seq, device = *x.shape[:2], x.device
|
||||||
|
context = kwargs["context"]
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
"hidden_states": [],
|
"hidden_states": [],
|
||||||
|
@ -643,9 +645,19 @@ class ContinuousTransformer(nn.Module):
|
||||||
if self.use_sinusoidal_emb or self.use_abs_pos_emb:
|
if self.use_sinusoidal_emb or self.use_abs_pos_emb:
|
||||||
x = x + self.pos_emb(x)
|
x = x + self.pos_emb(x)
|
||||||
|
|
||||||
|
blocks_replace = patches_replace.get("dit", {})
|
||||||
# Iterate over the transformer layers
|
# Iterate over the transformer layers
|
||||||
for layer in self.layers:
|
for i, layer in enumerate(self.layers):
|
||||||
x = layer(x, rotary_pos_emb = rotary_pos_emb, global_cond=global_cond, **kwargs)
|
if ("double_block", i) in blocks_replace:
|
||||||
|
def block_wrap(args):
|
||||||
|
out = {}
|
||||||
|
out["img"] = layer(args["img"], rotary_pos_emb=args["pe"], global_cond=args["vec"], context=args["txt"])
|
||||||
|
return out
|
||||||
|
|
||||||
|
out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "vec": global_cond, "pe": rotary_pos_emb}, {"original_block": block_wrap})
|
||||||
|
x = out["img"]
|
||||||
|
else:
|
||||||
|
x = layer(x, rotary_pos_emb = rotary_pos_emb, global_cond=global_cond, context=context)
|
||||||
# x = checkpoint(layer, x, rotary_pos_emb = rotary_pos_emb, global_cond=global_cond, **kwargs)
|
# x = checkpoint(layer, x, rotary_pos_emb = rotary_pos_emb, global_cond=global_cond, **kwargs)
|
||||||
|
|
||||||
if return_info:
|
if return_info:
|
||||||
|
@ -874,7 +886,6 @@ class AudioDiffusionTransformer(nn.Module):
|
||||||
mask=None,
|
mask=None,
|
||||||
return_info=False,
|
return_info=False,
|
||||||
control=None,
|
control=None,
|
||||||
transformer_options={},
|
|
||||||
**kwargs):
|
**kwargs):
|
||||||
return self._forward(
|
return self._forward(
|
||||||
x,
|
x,
|
||||||
|
|
|
@ -372,10 +372,10 @@ def attention_xformers(q, k, v, heads, mask=None, attn_precision=None, skip_resh
|
||||||
)
|
)
|
||||||
|
|
||||||
if mask is not None:
|
if mask is not None:
|
||||||
pad = 8 - q.shape[1] % 8
|
pad = 8 - mask.shape[-1] % 8
|
||||||
mask_out = torch.empty([q.shape[0], q.shape[1], q.shape[1] + pad], dtype=q.dtype, device=q.device)
|
mask_out = torch.empty([q.shape[0], q.shape[2], q.shape[1], mask.shape[-1] + pad], dtype=q.dtype, device=q.device)
|
||||||
mask_out[:, :, :mask.shape[-1]] = mask
|
mask_out[..., :mask.shape[-1]] = mask
|
||||||
mask = mask_out[:, :, :mask.shape[-1]]
|
mask = mask_out[..., :mask.shape[-1]]
|
||||||
|
|
||||||
out = xformers.ops.memory_efficient_attention(q, k, v, attn_bias=mask)
|
out = xformers.ops.memory_efficient_attention(q, k, v, attn_bias=mask)
|
||||||
|
|
||||||
|
|
6
nodes.py
6
nodes.py
|
@ -382,7 +382,7 @@ class InpaintModelConditioning:
|
||||||
"vae": ("VAE", ),
|
"vae": ("VAE", ),
|
||||||
"pixels": ("IMAGE", ),
|
"pixels": ("IMAGE", ),
|
||||||
"mask": ("MASK", ),
|
"mask": ("MASK", ),
|
||||||
"add_noise_mask": ("BOOLEAN", {"default": True, "tooltip": "Add a noise mask to the latent so sampling will only happen within the mask. Might improve results or completely break things depending on the model."}),
|
"noise_mask": ("BOOLEAN", {"default": True, "tooltip": "Add a noise mask to the latent so sampling will only happen within the mask. Might improve results or completely break things depending on the model."}),
|
||||||
}}
|
}}
|
||||||
|
|
||||||
RETURN_TYPES = ("CONDITIONING","CONDITIONING","LATENT")
|
RETURN_TYPES = ("CONDITIONING","CONDITIONING","LATENT")
|
||||||
|
@ -391,7 +391,7 @@ class InpaintModelConditioning:
|
||||||
|
|
||||||
CATEGORY = "conditioning/inpaint"
|
CATEGORY = "conditioning/inpaint"
|
||||||
|
|
||||||
def encode(self, positive, negative, pixels, vae, mask, add_noise_mask):
|
def encode(self, positive, negative, pixels, vae, mask, noise_mask):
|
||||||
x = (pixels.shape[1] // 8) * 8
|
x = (pixels.shape[1] // 8) * 8
|
||||||
y = (pixels.shape[2] // 8) * 8
|
y = (pixels.shape[2] // 8) * 8
|
||||||
mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(pixels.shape[1], pixels.shape[2]), mode="bilinear")
|
mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(pixels.shape[1], pixels.shape[2]), mode="bilinear")
|
||||||
|
@ -415,7 +415,7 @@ class InpaintModelConditioning:
|
||||||
out_latent = {}
|
out_latent = {}
|
||||||
|
|
||||||
out_latent["samples"] = orig_latent
|
out_latent["samples"] = orig_latent
|
||||||
if add_noise_mask:
|
if noise_mask:
|
||||||
out_latent["noise_mask"] = mask
|
out_latent["noise_mask"] = mask
|
||||||
|
|
||||||
out = []
|
out = []
|
||||||
|
|
Loading…
Reference in New Issue