ComfyUI/comfy_extras/nodes_lt.py

import nodes
import node_helpers
import torch
import comfy.model_management
import comfy.model_sampling
import math

class EmptyLTXVLatentVideo:
    @classmethod
    def INPUT_TYPES(s):
        return {"required": { "width": ("INT", {"default": 768, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
                              "height": ("INT", {"default": 512, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
                              "length": ("INT", {"default": 97, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 8}),
                              "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}}
    RETURN_TYPES = ("LATENT",)
    FUNCTION = "generate"

    CATEGORY = "latent/video/ltxv"

    def generate(self, width, height, length, batch_size=1):
        latent = torch.zeros([batch_size, 128, ((length - 1) // 8) + 1, height // 32, width // 32], device=comfy.model_management.intermediate_device())
        return ({"samples": latent}, )


class LTXVImgToVideo:
    @classmethod
    def INPUT_TYPES(s):
        return {"required": {"positive": ("CONDITIONING", ),
                             "negative": ("CONDITIONING", ),
                             "vae": ("VAE",),
                             "image": ("IMAGE",),
                             "width": ("INT", {"default": 768, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
                             "height": ("INT", {"default": 512, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
                             "length": ("INT", {"default": 97, "min": 9, "max": nodes.MAX_RESOLUTION, "step": 8}),
                             "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}}

    RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
    RETURN_NAMES = ("positive", "negative", "latent")

    CATEGORY = "conditioning/video_models"
    FUNCTION = "generate"

    def generate(self, positive, negative, image, vae, width, height, length, batch_size):
        pixels = comfy.utils.common_upscale(image.movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
        encode_pixels = pixels[:, :, :, :3]
        t = vae.encode(encode_pixels)
        positive = node_helpers.conditioning_set_values(positive, {"guiding_latent": t})
        negative = node_helpers.conditioning_set_values(negative, {"guiding_latent": t})

        latent = torch.zeros([batch_size, 128, ((length - 1) // 8) + 1, height // 32, width // 32], device=comfy.model_management.intermediate_device())
        latent[:, :, :t.shape[2]] = t
        return (positive, negative, {"samples": latent}, )


class LTXVConditioning:
    @classmethod
    def INPUT_TYPES(s):
        return {"required": {"positive": ("CONDITIONING", ),
                             "negative": ("CONDITIONING", ),
                             "frame_rate": ("FLOAT", {"default": 25.0, "min": 0.0, "max": 1000.0, "step": 0.01}),
                             }}
    RETURN_TYPES = ("CONDITIONING", "CONDITIONING")
    RETURN_NAMES = ("positive", "negative")
    FUNCTION = "append"

    CATEGORY = "conditioning/video_models"

    def append(self, positive, negative, frame_rate):
        positive = node_helpers.conditioning_set_values(positive, {"frame_rate": frame_rate})
        negative = node_helpers.conditioning_set_values(negative, {"frame_rate": frame_rate})
        return (positive, negative)


class ModelSamplingLTXV:
    @classmethod
    def INPUT_TYPES(s):
        return {"required": { "model": ("MODEL",),
                              "max_shift": ("FLOAT", {"default": 2.05, "min": 0.0, "max": 100.0, "step":0.01}),
                              "base_shift": ("FLOAT", {"default": 0.95, "min": 0.0, "max": 100.0, "step":0.01}),
                              },
                "optional": {"latent": ("LATENT",), }
                }

    RETURN_TYPES = ("MODEL",)
    FUNCTION = "patch"

    CATEGORY = "advanced/model"

    def patch(self, model, max_shift, base_shift, latent=None):
        m = model.clone()

        if latent is None:
            tokens = 4096
        else:
            tokens = math.prod(latent["samples"].shape[2:])

        x1 = 1024
        x2 = 4096
        mm = (max_shift - base_shift) / (x2 - x1)
        b = base_shift - mm * x1
        shift = (tokens) * mm + b

        sampling_base = comfy.model_sampling.ModelSamplingFlux
        sampling_type = comfy.model_sampling.CONST

        class ModelSamplingAdvanced(sampling_base, sampling_type):
            pass

        model_sampling = ModelSamplingAdvanced(model.model.model_config)
        model_sampling.set_parameters(shift=shift)
        m.add_object_patch("model_sampling", model_sampling)
        return (m, )


class LTXVScheduler:
    @classmethod
    def INPUT_TYPES(s):
        return {"required":
                    {"steps": ("INT", {"default": 20, "min": 1, "max": 10000}),
                     "max_shift": ("FLOAT", {"default": 2.05, "min": 0.0, "max": 100.0, "step":0.01}),
                     "base_shift": ("FLOAT", {"default": 0.95, "min": 0.0, "max": 100.0, "step":0.01}),
                     "stretch": ("BOOLEAN", {
                        "default": True,
                        "tooltip": "Stretch the sigmas to be in the range [terminal, 1]."
                    }),
                     "terminal": (
                        "FLOAT",
                        {
                            "default": 0.1, "min": 0.0, "max": 0.99, "step": 0.01,
                            "tooltip": "The terminal value of the sigmas after stretching."
                        },
                    ),
                    },
                "optional": {"latent": ("LATENT",), }
               }

    RETURN_TYPES = ("SIGMAS",)
    CATEGORY = "sampling/custom_sampling/schedulers"

    FUNCTION = "get_sigmas"

    def get_sigmas(self, steps, max_shift, base_shift, stretch, terminal, latent=None):
        if latent is None:
            tokens = 4096
        else:
            tokens = math.prod(latent["samples"].shape[2:])

        sigmas = torch.linspace(1.0, 0.0, steps + 1)

        x1 = 1024
        x2 = 4096
        mm = (max_shift - base_shift) / (x2 - x1)
        b = base_shift - mm * x1
        sigma_shift = (tokens) * mm + b

        power = 1
        sigmas = torch.where(
            sigmas != 0,
            math.exp(sigma_shift) / (math.exp(sigma_shift) + (1 / sigmas - 1) ** power),
            0,
        )

        # Stretch sigmas so that its final value matches the given terminal value.
        if stretch:
            non_zero_mask = sigmas != 0
            non_zero_sigmas = sigmas[non_zero_mask]
            one_minus_z = 1.0 - non_zero_sigmas
            scale_factor = one_minus_z[-1] / (1.0 - terminal)
            stretched = 1.0 - (one_minus_z / scale_factor)
            sigmas[non_zero_mask] = stretched

        return (sigmas,)


NODE_CLASS_MAPPINGS = {
    "EmptyLTXVLatentVideo": EmptyLTXVLatentVideo,
    "LTXVImgToVideo": LTXVImgToVideo,
    "ModelSamplingLTXV": ModelSamplingLTXV,
    "LTXVConditioning": LTXVConditioning,
    "LTXVScheduler": LTXVScheduler,
}
Support Lightricks LTX-Video model. 2024-11-22 13:44:42 +00:00			`import nodes`
			`import node_helpers`
			`import torch`
			`import comfy.model_management`
			`import comfy.model_sampling`
			`import math`

			`class EmptyLTXVLatentVideo:`
			`@classmethod`
			`def INPUT_TYPES(s):`
			`return {"required": { "width": ("INT", {"default": 768, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),`
			`"height": ("INT", {"default": 512, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),`
set LTX min length to 1 for t2i (#5750) At length=1, the LTX model can do txt2img and img2img with no other changes required. 2024-11-24 02:33:08 +00:00			`"length": ("INT", {"default": 97, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 8}),`
Support Lightricks LTX-Video model. 2024-11-22 13:44:42 +00:00			`"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}}`
			`RETURN_TYPES = ("LATENT",)`
			`FUNCTION = "generate"`

			`CATEGORY = "latent/video/ltxv"`

			`def generate(self, width, height, length, batch_size=1):`
			`latent = torch.zeros([batch_size, 128, ((length - 1) // 8) + 1, height // 32, width // 32], device=comfy.model_management.intermediate_device())`
			`return ({"samples": latent}, )`


			`class LTXVImgToVideo:`
			`@classmethod`
			`def INPUT_TYPES(s):`
			`return {"required": {"positive": ("CONDITIONING", ),`
			`"negative": ("CONDITIONING", ),`
			`"vae": ("VAE",),`
			`"image": ("IMAGE",),`
			`"width": ("INT", {"default": 768, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),`
			`"height": ("INT", {"default": 512, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),`
			`"length": ("INT", {"default": 97, "min": 9, "max": nodes.MAX_RESOLUTION, "step": 8}),`
			`"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}}`

			`RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")`
			`RETURN_NAMES = ("positive", "negative", "latent")`

			`CATEGORY = "conditioning/video_models"`
			`FUNCTION = "generate"`

			`def generate(self, positive, negative, image, vae, width, height, length, batch_size):`
			`pixels = comfy.utils.common_upscale(image.movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)`
			`encode_pixels = pixels[:, :, :, :3]`
			`t = vae.encode(encode_pixels)`
			`positive = node_helpers.conditioning_set_values(positive, {"guiding_latent": t})`
			`negative = node_helpers.conditioning_set_values(negative, {"guiding_latent": t})`

			`latent = torch.zeros([batch_size, 128, ((length - 1) // 8) + 1, height // 32, width // 32], device=comfy.model_management.intermediate_device())`
			`latent[:, :, :t.shape[2]] = t`
			`return (positive, negative, {"samples": latent}, )`


			`class LTXVConditioning:`
			`@classmethod`
			`def INPUT_TYPES(s):`
			`return {"required": {"positive": ("CONDITIONING", ),`
			`"negative": ("CONDITIONING", ),`
			`"frame_rate": ("FLOAT", {"default": 25.0, "min": 0.0, "max": 1000.0, "step": 0.01}),`
			`}}`
			`RETURN_TYPES = ("CONDITIONING", "CONDITIONING")`
			`RETURN_NAMES = ("positive", "negative")`
			`FUNCTION = "append"`

			`CATEGORY = "conditioning/video_models"`

			`def append(self, positive, negative, frame_rate):`
			`positive = node_helpers.conditioning_set_values(positive, {"frame_rate": frame_rate})`
			`negative = node_helpers.conditioning_set_values(negative, {"frame_rate": frame_rate})`
			`return (positive, negative)`


			`class ModelSamplingLTXV:`
			`@classmethod`
			`def INPUT_TYPES(s):`
			`return {"required": { "model": ("MODEL",),`
			`"max_shift": ("FLOAT", {"default": 2.05, "min": 0.0, "max": 100.0, "step":0.01}),`
			`"base_shift": ("FLOAT", {"default": 0.95, "min": 0.0, "max": 100.0, "step":0.01}),`
			`},`
			`"optional": {"latent": ("LATENT",), }`
			`}`

			`RETURN_TYPES = ("MODEL",)`
			`FUNCTION = "patch"`

			`CATEGORY = "advanced/model"`

			`def patch(self, model, max_shift, base_shift, latent=None):`
			`m = model.clone()`

			`if latent is None:`
			`tokens = 4096`
			`else:`
			`tokens = math.prod(latent["samples"].shape[2:])`

			`x1 = 1024`
			`x2 = 4096`
			`mm = (max_shift - base_shift) / (x2 - x1)`
			`b = base_shift - mm * x1`
			`shift = (tokens) * mm + b`

			`sampling_base = comfy.model_sampling.ModelSamplingFlux`
			`sampling_type = comfy.model_sampling.CONST`

			`class ModelSamplingAdvanced(sampling_base, sampling_type):`
			`pass`

			`model_sampling = ModelSamplingAdvanced(model.model.model_config)`
			`model_sampling.set_parameters(shift=shift)`
			`m.add_object_patch("model_sampling", model_sampling)`
			`return (m, )`


			`class LTXVScheduler:`
			`@classmethod`
			`def INPUT_TYPES(s):`
			`return {"required":`
			`{"steps": ("INT", {"default": 20, "min": 1, "max": 10000}),`
			`"max_shift": ("FLOAT", {"default": 2.05, "min": 0.0, "max": 100.0, "step":0.01}),`
			`"base_shift": ("FLOAT", {"default": 0.95, "min": 0.0, "max": 100.0, "step":0.01}),`
			`"stretch": ("BOOLEAN", {`
			`"default": True,`
			`"tooltip": "Stretch the sigmas to be in the range [terminal, 1]."`
			`}),`
			`"terminal": (`
			`"FLOAT",`
			`{`
			`"default": 0.1, "min": 0.0, "max": 0.99, "step": 0.01,`
			`"tooltip": "The terminal value of the sigmas after stretching."`
			`},`
			`),`
			`},`
			`"optional": {"latent": ("LATENT",), }`
			`}`

			`RETURN_TYPES = ("SIGMAS",)`
			`CATEGORY = "sampling/custom_sampling/schedulers"`

			`FUNCTION = "get_sigmas"`

			`def get_sigmas(self, steps, max_shift, base_shift, stretch, terminal, latent=None):`
			`if latent is None:`
			`tokens = 4096`
			`else:`
			`tokens = math.prod(latent["samples"].shape[2:])`

			`sigmas = torch.linspace(1.0, 0.0, steps + 1)`

			`x1 = 1024`
			`x2 = 4096`
			`mm = (max_shift - base_shift) / (x2 - x1)`
			`b = base_shift - mm * x1`
			`sigma_shift = (tokens) * mm + b`

			`power = 1`
			`sigmas = torch.where(`
			`sigmas != 0,`
			`math.exp(sigma_shift) / (math.exp(sigma_shift) + (1 / sigmas - 1) ** power),`
			`0,`
			`)`

			`# Stretch sigmas so that its final value matches the given terminal value.`
			`if stretch:`
			`non_zero_mask = sigmas != 0`
			`non_zero_sigmas = sigmas[non_zero_mask]`
			`one_minus_z = 1.0 - non_zero_sigmas`
			`scale_factor = one_minus_z[-1] / (1.0 - terminal)`
			`stretched = 1.0 - (one_minus_z / scale_factor)`
			`sigmas[non_zero_mask] = stretched`

			`return (sigmas,)`


			`NODE_CLASS_MAPPINGS = {`
			`"EmptyLTXVLatentVideo": EmptyLTXVLatentVideo,`
			`"LTXVImgToVideo": LTXVImgToVideo,`
			`"ModelSamplingLTXV": ModelSamplingLTXV,`
			`"LTXVConditioning": LTXVConditioning,`
			`"LTXVScheduler": LTXVScheduler,`
			`}`