2024-11-22 13:44:42 +00:00
|
|
|
import nodes
|
|
|
|
import node_helpers
|
|
|
|
import torch
|
|
|
|
import comfy.model_management
|
|
|
|
import comfy.model_sampling
|
|
|
|
import math
|
|
|
|
|
|
|
|
class EmptyLTXVLatentVideo:
|
|
|
|
@classmethod
|
|
|
|
def INPUT_TYPES(s):
|
|
|
|
return {"required": { "width": ("INT", {"default": 768, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
|
|
|
|
"height": ("INT", {"default": 512, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
|
2024-11-24 02:33:08 +00:00
|
|
|
"length": ("INT", {"default": 97, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 8}),
|
2024-11-22 13:44:42 +00:00
|
|
|
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}}
|
|
|
|
RETURN_TYPES = ("LATENT",)
|
|
|
|
FUNCTION = "generate"
|
|
|
|
|
|
|
|
CATEGORY = "latent/video/ltxv"
|
|
|
|
|
|
|
|
def generate(self, width, height, length, batch_size=1):
|
|
|
|
latent = torch.zeros([batch_size, 128, ((length - 1) // 8) + 1, height // 32, width // 32], device=comfy.model_management.intermediate_device())
|
|
|
|
return ({"samples": latent}, )
|
|
|
|
|
|
|
|
|
|
|
|
class LTXVImgToVideo:
|
|
|
|
@classmethod
|
|
|
|
def INPUT_TYPES(s):
|
|
|
|
return {"required": {"positive": ("CONDITIONING", ),
|
|
|
|
"negative": ("CONDITIONING", ),
|
|
|
|
"vae": ("VAE",),
|
|
|
|
"image": ("IMAGE",),
|
|
|
|
"width": ("INT", {"default": 768, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
|
|
|
|
"height": ("INT", {"default": 512, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
|
|
|
|
"length": ("INT", {"default": 97, "min": 9, "max": nodes.MAX_RESOLUTION, "step": 8}),
|
|
|
|
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}}
|
|
|
|
|
|
|
|
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
|
|
|
|
RETURN_NAMES = ("positive", "negative", "latent")
|
|
|
|
|
|
|
|
CATEGORY = "conditioning/video_models"
|
|
|
|
FUNCTION = "generate"
|
|
|
|
|
|
|
|
def generate(self, positive, negative, image, vae, width, height, length, batch_size):
|
|
|
|
pixels = comfy.utils.common_upscale(image.movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
|
|
|
|
encode_pixels = pixels[:, :, :, :3]
|
|
|
|
t = vae.encode(encode_pixels)
|
|
|
|
positive = node_helpers.conditioning_set_values(positive, {"guiding_latent": t})
|
|
|
|
negative = node_helpers.conditioning_set_values(negative, {"guiding_latent": t})
|
|
|
|
|
|
|
|
latent = torch.zeros([batch_size, 128, ((length - 1) // 8) + 1, height // 32, width // 32], device=comfy.model_management.intermediate_device())
|
|
|
|
latent[:, :, :t.shape[2]] = t
|
|
|
|
return (positive, negative, {"samples": latent}, )
|
|
|
|
|
|
|
|
|
|
|
|
class LTXVConditioning:
|
|
|
|
@classmethod
|
|
|
|
def INPUT_TYPES(s):
|
|
|
|
return {"required": {"positive": ("CONDITIONING", ),
|
|
|
|
"negative": ("CONDITIONING", ),
|
|
|
|
"frame_rate": ("FLOAT", {"default": 25.0, "min": 0.0, "max": 1000.0, "step": 0.01}),
|
|
|
|
}}
|
|
|
|
RETURN_TYPES = ("CONDITIONING", "CONDITIONING")
|
|
|
|
RETURN_NAMES = ("positive", "negative")
|
|
|
|
FUNCTION = "append"
|
|
|
|
|
|
|
|
CATEGORY = "conditioning/video_models"
|
|
|
|
|
|
|
|
def append(self, positive, negative, frame_rate):
|
|
|
|
positive = node_helpers.conditioning_set_values(positive, {"frame_rate": frame_rate})
|
|
|
|
negative = node_helpers.conditioning_set_values(negative, {"frame_rate": frame_rate})
|
|
|
|
return (positive, negative)
|
|
|
|
|
|
|
|
|
|
|
|
class ModelSamplingLTXV:
|
|
|
|
@classmethod
|
|
|
|
def INPUT_TYPES(s):
|
|
|
|
return {"required": { "model": ("MODEL",),
|
|
|
|
"max_shift": ("FLOAT", {"default": 2.05, "min": 0.0, "max": 100.0, "step":0.01}),
|
|
|
|
"base_shift": ("FLOAT", {"default": 0.95, "min": 0.0, "max": 100.0, "step":0.01}),
|
|
|
|
},
|
|
|
|
"optional": {"latent": ("LATENT",), }
|
|
|
|
}
|
|
|
|
|
|
|
|
RETURN_TYPES = ("MODEL",)
|
|
|
|
FUNCTION = "patch"
|
|
|
|
|
|
|
|
CATEGORY = "advanced/model"
|
|
|
|
|
|
|
|
def patch(self, model, max_shift, base_shift, latent=None):
|
|
|
|
m = model.clone()
|
|
|
|
|
|
|
|
if latent is None:
|
|
|
|
tokens = 4096
|
|
|
|
else:
|
|
|
|
tokens = math.prod(latent["samples"].shape[2:])
|
|
|
|
|
|
|
|
x1 = 1024
|
|
|
|
x2 = 4096
|
|
|
|
mm = (max_shift - base_shift) / (x2 - x1)
|
|
|
|
b = base_shift - mm * x1
|
|
|
|
shift = (tokens) * mm + b
|
|
|
|
|
|
|
|
sampling_base = comfy.model_sampling.ModelSamplingFlux
|
|
|
|
sampling_type = comfy.model_sampling.CONST
|
|
|
|
|
|
|
|
class ModelSamplingAdvanced(sampling_base, sampling_type):
|
|
|
|
pass
|
|
|
|
|
|
|
|
model_sampling = ModelSamplingAdvanced(model.model.model_config)
|
|
|
|
model_sampling.set_parameters(shift=shift)
|
|
|
|
m.add_object_patch("model_sampling", model_sampling)
|
|
|
|
return (m, )
|
|
|
|
|
|
|
|
|
|
|
|
class LTXVScheduler:
|
|
|
|
@classmethod
|
|
|
|
def INPUT_TYPES(s):
|
|
|
|
return {"required":
|
|
|
|
{"steps": ("INT", {"default": 20, "min": 1, "max": 10000}),
|
|
|
|
"max_shift": ("FLOAT", {"default": 2.05, "min": 0.0, "max": 100.0, "step":0.01}),
|
|
|
|
"base_shift": ("FLOAT", {"default": 0.95, "min": 0.0, "max": 100.0, "step":0.01}),
|
|
|
|
"stretch": ("BOOLEAN", {
|
|
|
|
"default": True,
|
|
|
|
"tooltip": "Stretch the sigmas to be in the range [terminal, 1]."
|
|
|
|
}),
|
|
|
|
"terminal": (
|
|
|
|
"FLOAT",
|
|
|
|
{
|
|
|
|
"default": 0.1, "min": 0.0, "max": 0.99, "step": 0.01,
|
|
|
|
"tooltip": "The terminal value of the sigmas after stretching."
|
|
|
|
},
|
|
|
|
),
|
|
|
|
},
|
|
|
|
"optional": {"latent": ("LATENT",), }
|
|
|
|
}
|
|
|
|
|
|
|
|
RETURN_TYPES = ("SIGMAS",)
|
|
|
|
CATEGORY = "sampling/custom_sampling/schedulers"
|
|
|
|
|
|
|
|
FUNCTION = "get_sigmas"
|
|
|
|
|
|
|
|
def get_sigmas(self, steps, max_shift, base_shift, stretch, terminal, latent=None):
|
|
|
|
if latent is None:
|
|
|
|
tokens = 4096
|
|
|
|
else:
|
|
|
|
tokens = math.prod(latent["samples"].shape[2:])
|
|
|
|
|
|
|
|
sigmas = torch.linspace(1.0, 0.0, steps + 1)
|
|
|
|
|
|
|
|
x1 = 1024
|
|
|
|
x2 = 4096
|
|
|
|
mm = (max_shift - base_shift) / (x2 - x1)
|
|
|
|
b = base_shift - mm * x1
|
|
|
|
sigma_shift = (tokens) * mm + b
|
|
|
|
|
|
|
|
power = 1
|
|
|
|
sigmas = torch.where(
|
|
|
|
sigmas != 0,
|
|
|
|
math.exp(sigma_shift) / (math.exp(sigma_shift) + (1 / sigmas - 1) ** power),
|
|
|
|
0,
|
|
|
|
)
|
|
|
|
|
|
|
|
# Stretch sigmas so that its final value matches the given terminal value.
|
|
|
|
if stretch:
|
|
|
|
non_zero_mask = sigmas != 0
|
|
|
|
non_zero_sigmas = sigmas[non_zero_mask]
|
|
|
|
one_minus_z = 1.0 - non_zero_sigmas
|
|
|
|
scale_factor = one_minus_z[-1] / (1.0 - terminal)
|
|
|
|
stretched = 1.0 - (one_minus_z / scale_factor)
|
|
|
|
sigmas[non_zero_mask] = stretched
|
|
|
|
|
|
|
|
return (sigmas,)
|
|
|
|
|
|
|
|
|
|
|
|
NODE_CLASS_MAPPINGS = {
|
|
|
|
"EmptyLTXVLatentVideo": EmptyLTXVLatentVideo,
|
|
|
|
"LTXVImgToVideo": LTXVImgToVideo,
|
|
|
|
"ModelSamplingLTXV": ModelSamplingLTXV,
|
|
|
|
"LTXVConditioning": LTXVConditioning,
|
|
|
|
"LTXVScheduler": LTXVScheduler,
|
|
|
|
}
|