diff --git a/README.md b/README.md
index 03d2978a..1d3e64dd 100644
--- a/README.md
+++ b/README.md
@@ -141,7 +141,7 @@ Put your VAE in: models/vae
 ### AMD GPUs (Linux only)
 AMD users can install rocm and pytorch with pip if you don't have it already installed, this is the command to install the stable version:
 
-```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.1```
+```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.2```
 
 This is the command to install the nightly with ROCm 6.2 which might have some performance improvements:
 
diff --git a/comfy/ldm/audio/dit.py b/comfy/ldm/audio/dit.py
index 4d2185be..5b3f498f 100644
--- a/comfy/ldm/audio/dit.py
+++ b/comfy/ldm/audio/dit.py
@@ -612,7 +612,9 @@ class ContinuousTransformer(nn.Module):
         return_info = False,
         **kwargs
     ):
+        patches_replace = kwargs.get("transformer_options", {}).get("patches_replace", {})
         batch, seq, device = *x.shape[:2], x.device
+        context = kwargs["context"]
 
         info = {
             "hidden_states": [],
@@ -643,9 +645,19 @@ class ContinuousTransformer(nn.Module):
         if self.use_sinusoidal_emb or self.use_abs_pos_emb:
             x = x + self.pos_emb(x)
 
+        blocks_replace = patches_replace.get("dit", {})
         # Iterate over the transformer layers
-        for layer in self.layers:
-            x = layer(x, rotary_pos_emb = rotary_pos_emb, global_cond=global_cond, **kwargs)
+        for i, layer in enumerate(self.layers):
+            if ("double_block", i) in blocks_replace:
+                def block_wrap(args):
+                    out = {}
+                    out["img"] = layer(args["img"], rotary_pos_emb=args["pe"], global_cond=args["vec"], context=args["txt"])
+                    return out
+
+                out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "vec": global_cond, "pe": rotary_pos_emb}, {"original_block": block_wrap})
+                x = out["img"]
+            else:
+                x = layer(x, rotary_pos_emb = rotary_pos_emb, global_cond=global_cond, context=context)
             # x = checkpoint(layer, x, rotary_pos_emb = rotary_pos_emb, global_cond=global_cond, **kwargs)
 
             if return_info:
@@ -874,7 +886,6 @@ class AudioDiffusionTransformer(nn.Module):
         mask=None,
         return_info=False,
         control=None,
-        transformer_options={},
         **kwargs):
             return self._forward(
                 x,
diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py
index 3f543abd..7b4ee215 100644
--- a/comfy/ldm/modules/attention.py
+++ b/comfy/ldm/modules/attention.py
@@ -372,10 +372,10 @@ def attention_xformers(q, k, v, heads, mask=None, attn_precision=None, skip_resh
         )
 
     if mask is not None:
-        pad = 8 - q.shape[1] % 8
-        mask_out = torch.empty([q.shape[0], q.shape[1], q.shape[1] + pad], dtype=q.dtype, device=q.device)
-        mask_out[:, :, :mask.shape[-1]] = mask
-        mask = mask_out[:, :, :mask.shape[-1]]
+        pad = 8 - mask.shape[-1] % 8
+        mask_out = torch.empty([q.shape[0], q.shape[2], q.shape[1], mask.shape[-1] + pad], dtype=q.dtype, device=q.device)
+        mask_out[..., :mask.shape[-1]] = mask
+        mask = mask_out[..., :mask.shape[-1]]
 
     out = xformers.ops.memory_efficient_attention(q, k, v, attn_bias=mask)
 
diff --git a/nodes.py b/nodes.py
index 300cfcf1..7f151586 100644
--- a/nodes.py
+++ b/nodes.py
@@ -382,7 +382,7 @@ class InpaintModelConditioning:
                              "vae": ("VAE", ),
                              "pixels": ("IMAGE", ),
                              "mask": ("MASK", ),
-                             "add_noise_mask": ("BOOLEAN", {"default": True, "tooltip": "Add a noise mask to the latent so sampling will only happen within the mask. Might improve results or completely break things depending on the model."}),
+                             "noise_mask": ("BOOLEAN", {"default": True, "tooltip": "Add a noise mask to the latent so sampling will only happen within the mask. Might improve results or completely break things depending on the model."}),
                              }}
 
     RETURN_TYPES = ("CONDITIONING","CONDITIONING","LATENT")
@@ -391,7 +391,7 @@ class InpaintModelConditioning:
 
     CATEGORY = "conditioning/inpaint"
 
-    def encode(self, positive, negative, pixels, vae, mask, add_noise_mask):
+    def encode(self, positive, negative, pixels, vae, mask, noise_mask):
         x = (pixels.shape[1] // 8) * 8
         y = (pixels.shape[2] // 8) * 8
         mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(pixels.shape[1], pixels.shape[2]), mode="bilinear")
@@ -415,7 +415,7 @@ class InpaintModelConditioning:
         out_latent = {}
 
         out_latent["samples"] = orig_latent
-        if add_noise_mask:
+        if noise_mask:
             out_latent["noise_mask"] = mask
 
         out = []