ComfyUI/comfy/ldm/modules/encoders/noise_aug_modules.py

from ldm.modules.diffusionmodules.upscaling import ImageConcatWithNoiseAugmentation
from ldm.modules.diffusionmodules.openaimodel import Timestep
import torch

class CLIPEmbeddingNoiseAugmentation(ImageConcatWithNoiseAugmentation):
    def __init__(self, *args, clip_stats_path=None, timestep_dim=256, **kwargs):
        super().__init__(*args, **kwargs)
        if clip_stats_path is None:
            clip_mean, clip_std = torch.zeros(timestep_dim), torch.ones(timestep_dim)
        else:
            clip_mean, clip_std = torch.load(clip_stats_path, map_location="cpu")
        self.register_buffer("data_mean", clip_mean[None, :], persistent=False)
        self.register_buffer("data_std", clip_std[None, :], persistent=False)
        self.time_embed = Timestep(timestep_dim)

    def scale(self, x):
        # re-normalize to centered mean and unit variance
        x = (x - self.data_mean) * 1. / self.data_std
        return x

    def unscale(self, x):
        # back to original data stats
        x = (x * self.data_std) + self.data_mean
        return x

    def forward(self, x, noise_level=None):
        if noise_level is None:
            noise_level = torch.randint(0, self.max_noise_level, (x.shape[0],), device=x.device).long()
        else:
            assert isinstance(noise_level, torch.Tensor)
        x = self.scale(x)
        z = self.q_sample(x, noise_level)
        z = self.unscale(z)
        noise_level = self.time_embed(noise_level)
        return z, noise_level
Add support for unCLIP SD2.x models. See _for_testing/unclip in the UI for the new nodes. unCLIPCheckpointLoader is used to load them. unCLIPConditioning is used to add the image cond and takes as input a CLIPVisionEncode output which has been moved to the conditioning section. 2023-04-02 03:19:15 +00:00			`from ldm.modules.diffusionmodules.upscaling import ImageConcatWithNoiseAugmentation`
			`from ldm.modules.diffusionmodules.openaimodel import Timestep`
			`import torch`

			`class CLIPEmbeddingNoiseAugmentation(ImageConcatWithNoiseAugmentation):`
			`def __init__(self, args, clip_stats_path=None, timestep_dim=256, *kwargs):`
			`super().__init__(args, *kwargs)`
			`if clip_stats_path is None:`
			`clip_mean, clip_std = torch.zeros(timestep_dim), torch.ones(timestep_dim)`
			`else:`
			`clip_mean, clip_std = torch.load(clip_stats_path, map_location="cpu")`
			`self.register_buffer("data_mean", clip_mean[None, :], persistent=False)`
			`self.register_buffer("data_std", clip_std[None, :], persistent=False)`
			`self.time_embed = Timestep(timestep_dim)`

			`def scale(self, x):`
			`# re-normalize to centered mean and unit variance`
			`x = (x - self.data_mean) * 1. / self.data_std`
			`return x`

			`def unscale(self, x):`
			`# back to original data stats`
			`x = (x * self.data_std) + self.data_mean`
			`return x`

			`def forward(self, x, noise_level=None):`
			`if noise_level is None:`
			`noise_level = torch.randint(0, self.max_noise_level, (x.shape[0],), device=x.device).long()`
			`else:`
			`assert isinstance(noise_level, torch.Tensor)`
			`x = self.scale(x)`
			`z = self.q_sample(x, noise_level)`
			`z = self.unscale(z)`
			`noise_level = self.time_embed(noise_level)`
			`return z, noise_level`