ComfyUI/comfy/model_base.py

import torch
from comfy.ldm.modules.diffusionmodules.openaimodel import UNetModel
from comfy.ldm.modules.encoders.noise_aug_modules import CLIPEmbeddingNoiseAugmentation
from comfy.ldm.modules.diffusionmodules.util import make_beta_schedule
from comfy.ldm.modules.diffusionmodules.openaimodel import Timestep
import comfy.model_management
import comfy.conds
import numpy as np
from enum import Enum
from . import utils

class ModelType(Enum):
    EPS = 1
    V_PREDICTION = 2

class BaseModel(torch.nn.Module):
    def __init__(self, model_config, model_type=ModelType.EPS, device=None):
        super().__init__()

        unet_config = model_config.unet_config
        self.latent_format = model_config.latent_format
        self.model_config = model_config
        self.register_schedule(given_betas=None, beta_schedule=model_config.beta_schedule, timesteps=1000, linear_start=0.00085, linear_end=0.012, cosine_s=8e-3)
        if not unet_config.get("disable_unet_model_creation", False):
            self.diffusion_model = UNetModel(**unet_config, device=device)
        self.model_type = model_type
        self.adm_channels = unet_config.get("adm_in_channels", None)
        if self.adm_channels is None:
            self.adm_channels = 0
        self.inpaint_model = False
        print("model_type", model_type.name)
        print("adm", self.adm_channels)

    def register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000,
                          linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
        if given_betas is not None:
            betas = given_betas
        else:
            betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s)
        alphas = 1. - betas
        alphas_cumprod = np.cumprod(alphas, axis=0)
        alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1])

        timesteps, = betas.shape
        self.num_timesteps = int(timesteps)
        self.linear_start = linear_start
        self.linear_end = linear_end

        self.register_buffer('betas', torch.tensor(betas, dtype=torch.float32))
        self.register_buffer('alphas_cumprod', torch.tensor(alphas_cumprod, dtype=torch.float32))
        self.register_buffer('alphas_cumprod_prev', torch.tensor(alphas_cumprod_prev, dtype=torch.float32))

    def apply_model(self, x, t, c_concat=None, c_crossattn=None, c_adm=None, control=None, transformer_options={}, **kwargs):
        if c_concat is not None:
            xc = torch.cat([x] + [c_concat], dim=1)
        else:
            xc = x
        context = c_crossattn
        dtype = self.get_dtype()
        xc = xc.to(dtype)
        t = t.to(dtype)
        context = context.to(dtype)
        if c_adm is not None:
            c_adm = c_adm.to(dtype)
        return self.diffusion_model(xc, t, context=context, y=c_adm, control=control, transformer_options=transformer_options).float()

    def get_dtype(self):
        return self.diffusion_model.dtype

    def is_adm(self):
        return self.adm_channels > 0

    def encode_adm(self, **kwargs):
        return None

    def extra_conds(self, **kwargs):
        out = {}
        if self.inpaint_model:
            concat_keys = ("mask", "masked_image")
            cond_concat = []
            denoise_mask = kwargs.get("denoise_mask", None)
            latent_image = kwargs.get("latent_image", None)
            noise = kwargs.get("noise", None)
            device = kwargs["device"]

            def blank_inpaint_image_like(latent_image):
                blank_image = torch.ones_like(latent_image)
                # these are the values for "zero" in pixel space translated to latent space
                blank_image[:,0] *= 0.8223
                blank_image[:,1] *= -0.6876
                blank_image[:,2] *= 0.6364
                blank_image[:,3] *= 0.1380
                return blank_image

            for ck in concat_keys:
                if denoise_mask is not None:
                    if ck == "mask":
                        cond_concat.append(denoise_mask[:,:1].to(device))
                    elif ck == "masked_image":
                        cond_concat.append(latent_image.to(device)) #NOTE: the latent_image should be masked by the mask in pixel space
                else:
                    if ck == "mask":
                        cond_concat.append(torch.ones_like(noise)[:,:1])
                    elif ck == "masked_image":
                        cond_concat.append(blank_inpaint_image_like(noise))
            data = torch.cat(cond_concat, dim=1)
            out['c_concat'] = comfy.conds.CONDNoiseShape(data)
        adm = self.encode_adm(**kwargs)
        if adm is not None:
            out['c_adm'] = comfy.conds.CONDRegular(adm)
        return out

    def load_model_weights(self, sd, unet_prefix=""):
        to_load = {}
        keys = list(sd.keys())
        for k in keys:
            if k.startswith(unet_prefix):
                to_load[k[len(unet_prefix):]] = sd.pop(k)

        m, u = self.diffusion_model.load_state_dict(to_load, strict=False)
        if len(m) > 0:
            print("unet missing:", m)

        if len(u) > 0:
            print("unet unexpected:", u)
        del to_load
        return self

    def process_latent_in(self, latent):
        return self.latent_format.process_in(latent)

    def process_latent_out(self, latent):
        return self.latent_format.process_out(latent)

    def state_dict_for_saving(self, clip_state_dict, vae_state_dict):
        clip_state_dict = self.model_config.process_clip_state_dict_for_saving(clip_state_dict)
        unet_sd = self.diffusion_model.state_dict()
        unet_state_dict = {}
        for k in unet_sd:
            unet_state_dict[k] = comfy.model_management.resolve_lowvram_weight(unet_sd[k], self.diffusion_model, k)

        unet_state_dict = self.model_config.process_unet_state_dict_for_saving(unet_state_dict)
        vae_state_dict = self.model_config.process_vae_state_dict_for_saving(vae_state_dict)
        if self.get_dtype() == torch.float16:
            clip_state_dict = utils.convert_sd_to(clip_state_dict, torch.float16)
            vae_state_dict = utils.convert_sd_to(vae_state_dict, torch.float16)

        if self.model_type == ModelType.V_PREDICTION:
            unet_state_dict["v_pred"] = torch.tensor([])

        return {**unet_state_dict, **vae_state_dict, **clip_state_dict}

    def set_inpaint(self):
        self.inpaint_model = True

def unclip_adm(unclip_conditioning, device, noise_augmentor, noise_augment_merge=0.0):
    adm_inputs = []
    weights = []
    noise_aug = []
    for unclip_cond in unclip_conditioning:
        for adm_cond in unclip_cond["clip_vision_output"].image_embeds:
            weight = unclip_cond["strength"]
            noise_augment = unclip_cond["noise_augmentation"]
            noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment)
            c_adm, noise_level_emb = noise_augmentor(adm_cond.to(device), noise_level=torch.tensor([noise_level], device=device))
            adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight
            weights.append(weight)
            noise_aug.append(noise_augment)
            adm_inputs.append(adm_out)

    if len(noise_aug) > 1:
        adm_out = torch.stack(adm_inputs).sum(0)
        noise_augment = noise_augment_merge
        noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment)
        c_adm, noise_level_emb = noise_augmentor(adm_out[:, :noise_augmentor.time_embed.dim], noise_level=torch.tensor([noise_level], device=device))
        adm_out = torch.cat((c_adm, noise_level_emb), 1)

    return adm_out

class SD21UNCLIP(BaseModel):
    def __init__(self, model_config, noise_aug_config, model_type=ModelType.V_PREDICTION, device=None):
        super().__init__(model_config, model_type, device=device)
        self.noise_augmentor = CLIPEmbeddingNoiseAugmentation(**noise_aug_config)

    def encode_adm(self, **kwargs):
        unclip_conditioning = kwargs.get("unclip_conditioning", None)
        device = kwargs["device"]
        if unclip_conditioning is None:
            return torch.zeros((1, self.adm_channels))
        else:
            return unclip_adm(unclip_conditioning, device, self.noise_augmentor, kwargs.get("unclip_noise_augment_merge", 0.05))

def sdxl_pooled(args, noise_augmentor):
    if "unclip_conditioning" in args:
        return unclip_adm(args.get("unclip_conditioning", None), args["device"], noise_augmentor)[:,:1280]
    else:
        return args["pooled_output"]

class SDXLRefiner(BaseModel):
    def __init__(self, model_config, model_type=ModelType.EPS, device=None):
        super().__init__(model_config, model_type, device=device)
        self.embedder = Timestep(256)
        self.noise_augmentor = CLIPEmbeddingNoiseAugmentation(**{"noise_schedule_config": {"timesteps": 1000, "beta_schedule": "squaredcos_cap_v2"}, "timestep_dim": 1280})

    def encode_adm(self, **kwargs):
        clip_pooled = sdxl_pooled(kwargs, self.noise_augmentor)
        width = kwargs.get("width", 768)
        height = kwargs.get("height", 768)
        crop_w = kwargs.get("crop_w", 0)
        crop_h = kwargs.get("crop_h", 0)

        if kwargs.get("prompt_type", "") == "negative":
            aesthetic_score = kwargs.get("aesthetic_score", 2.5)
        else:
            aesthetic_score = kwargs.get("aesthetic_score", 6)

        out = []
        out.append(self.embedder(torch.Tensor([height])))
        out.append(self.embedder(torch.Tensor([width])))
        out.append(self.embedder(torch.Tensor([crop_h])))
        out.append(self.embedder(torch.Tensor([crop_w])))
        out.append(self.embedder(torch.Tensor([aesthetic_score])))
        flat = torch.flatten(torch.cat(out)).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1)
        return torch.cat((clip_pooled.to(flat.device), flat), dim=1)

class SDXL(BaseModel):
    def __init__(self, model_config, model_type=ModelType.EPS, device=None):
        super().__init__(model_config, model_type, device=device)
        self.embedder = Timestep(256)
        self.noise_augmentor = CLIPEmbeddingNoiseAugmentation(**{"noise_schedule_config": {"timesteps": 1000, "beta_schedule": "squaredcos_cap_v2"}, "timestep_dim": 1280})

    def encode_adm(self, **kwargs):
        clip_pooled = sdxl_pooled(kwargs, self.noise_augmentor)
        width = kwargs.get("width", 768)
        height = kwargs.get("height", 768)
        crop_w = kwargs.get("crop_w", 0)
        crop_h = kwargs.get("crop_h", 0)
        target_width = kwargs.get("target_width", width)
        target_height = kwargs.get("target_height", height)

        out = []
        out.append(self.embedder(torch.Tensor([height])))
        out.append(self.embedder(torch.Tensor([width])))
        out.append(self.embedder(torch.Tensor([crop_h])))
        out.append(self.embedder(torch.Tensor([crop_w])))
        out.append(self.embedder(torch.Tensor([target_height])))
        out.append(self.embedder(torch.Tensor([target_width])))
        flat = torch.flatten(torch.cat(out)).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1)
        return torch.cat((clip_pooled.to(flat.device), flat), dim=1)
Simpler base model code. 2023-06-09 16:24:24 +00:00			`import torch`
			`from comfy.ldm.modules.diffusionmodules.openaimodel import UNetModel`
			`from comfy.ldm.modules.encoders.noise_aug_modules import CLIPEmbeddingNoiseAugmentation`
			`from comfy.ldm.modules.diffusionmodules.util import make_beta_schedule`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`from comfy.ldm.modules.diffusionmodules.openaimodel import Timestep`
Fix lowvram model merging. 2023-08-26 15:52:07 +00:00			`import comfy.model_management`
Refactor to make it easier to add custom conds to models. 2023-10-25 03:31:12 +00:00			`import comfy.conds`
Simpler base model code. 2023-06-09 16:24:24 +00:00			`import numpy as np`
Refactor of sampler code to deal more easily with different model types. 2023-07-17 05:22:12 +00:00			`from enum import Enum`
Add CheckpointSave node to save checkpoints. The created checkpoints contain workflow metadata that can be loaded by dragging them on top of the UI or loading them with the "Load" button. Checkpoints will be saved in fp16 or fp32 depending on the format ComfyUI is using for inference on your hardware. To force fp32 use: --force-fp32 Anything that patches the model weights like merging or loras will be saved. The output directory is currently set to: output/checkpoints but that might change in the future. 2023-06-26 16:21:07 +00:00			`from . import utils`
Simpler base model code. 2023-06-09 16:24:24 +00:00
Refactor of sampler code to deal more easily with different model types. 2023-07-17 05:22:12 +00:00			`class ModelType(Enum):`
			`EPS = 1`
			`V_PREDICTION = 2`

Simpler base model code. 2023-06-09 16:24:24 +00:00			`class BaseModel(torch.nn.Module):`
Initialize the unet directly on the target device. 2023-07-29 18:51:56 +00:00			`def __init__(self, model_config, model_type=ModelType.EPS, device=None):`
Simpler base model code. 2023-06-09 16:24:24 +00:00			`super().__init__()`

Move latent scale factor from VAE to model. 2023-06-23 06:14:12 +00:00			`unet_config = model_config.unet_config`
			`self.latent_format = model_config.latent_format`
Add CheckpointSave node to save checkpoints. The created checkpoints contain workflow metadata that can be loaded by dragging them on top of the UI or loading them with the "Load" button. Checkpoints will be saved in fp16 or fp32 depending on the format ComfyUI is using for inference on your hardware. To force fp32 use: --force-fp32 Anything that patches the model weights like merging or loras will be saved. The output directory is currently set to: output/checkpoints but that might change in the future. 2023-06-26 16:21:07 +00:00			`self.model_config = model_config`
Move beta_schedule to model_config and allow disabling unet creation. 2023-08-29 18:22:53 +00:00			`self.register_schedule(given_betas=None, beta_schedule=model_config.beta_schedule, timesteps=1000, linear_start=0.00085, linear_end=0.012, cosine_s=8e-3)`
			`if not unet_config.get("disable_unet_model_creation", False):`
			`self.diffusion_model = UNetModel(**unet_config, device=device)`
Refactor of sampler code to deal more easily with different model types. 2023-07-17 05:22:12 +00:00			`self.model_type = model_type`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`self.adm_channels = unet_config.get("adm_in_channels", None)`
			`if self.adm_channels is None:`
Simpler base model code. 2023-06-09 16:24:24 +00:00			`self.adm_channels = 0`
Refactor cond_concat into model object. 2023-10-18 20:48:37 +00:00			`self.inpaint_model = False`
Refactor of sampler code to deal more easily with different model types. 2023-07-17 05:22:12 +00:00			`print("model_type", model_type.name)`
Simpler base model code. 2023-06-09 16:24:24 +00:00			`print("adm", self.adm_channels)`

			`def register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000,`
			`linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):`
			`if given_betas is not None:`
			`betas = given_betas`
			`else:`
			`betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s)`
			`alphas = 1. - betas`
			`alphas_cumprod = np.cumprod(alphas, axis=0)`
			`alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1])`

			`timesteps, = betas.shape`
			`self.num_timesteps = int(timesteps)`
			`self.linear_start = linear_start`
			`self.linear_end = linear_end`

			`self.register_buffer('betas', torch.tensor(betas, dtype=torch.float32))`
			`self.register_buffer('alphas_cumprod', torch.tensor(alphas_cumprod, dtype=torch.float32))`
			`self.register_buffer('alphas_cumprod_prev', torch.tensor(alphas_cumprod_prev, dtype=torch.float32))`

Refactor to make it easier to add custom conds to models. 2023-10-25 03:31:12 +00:00			`def apply_model(self, x, t, c_concat=None, c_crossattn=None, c_adm=None, control=None, transformer_options={}, **kwargs):`
Simpler base model code. 2023-06-09 16:24:24 +00:00			`if c_concat is not None:`
It doesn't make sense for c_crossattn and c_concat to be lists. 2023-08-31 17:25:00 +00:00			`xc = torch.cat([x] + [c_concat], dim=1)`
Simpler base model code. 2023-06-09 16:24:24 +00:00			`else:`
			`xc = x`
It doesn't make sense for c_crossattn and c_concat to be lists. 2023-08-31 17:25:00 +00:00			`context = c_crossattn`
Disable autocast in unet for increased speed. 2023-07-06 00:58:44 +00:00			`dtype = self.get_dtype()`
			`xc = xc.to(dtype)`
			`t = t.to(dtype)`
			`context = context.to(dtype)`
			`if c_adm is not None:`
			`c_adm = c_adm.to(dtype)`
			`return self.diffusion_model(xc, t, context=context, y=c_adm, control=control, transformer_options=transformer_options).float()`
Simpler base model code. 2023-06-09 16:24:24 +00:00
			`def get_dtype(self):`
			`return self.diffusion_model.dtype`

			`def is_adm(self):`
			`return self.adm_channels > 0`

Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`def encode_adm(self, **kwargs):`
			`return None`

Refactor to make it easier to add custom conds to models. 2023-10-25 03:31:12 +00:00			`def extra_conds(self, **kwargs):`
			`out = {}`
Refactor cond_concat into model object. 2023-10-18 20:48:37 +00:00			`if self.inpaint_model:`
			`concat_keys = ("mask", "masked_image")`
			`cond_concat = []`
			`denoise_mask = kwargs.get("denoise_mask", None)`
			`latent_image = kwargs.get("latent_image", None)`
			`noise = kwargs.get("noise", None)`
Make sure cond_concat is on the right device. 2023-10-19 05:10:41 +00:00			`device = kwargs["device"]`
Refactor cond_concat into model object. 2023-10-18 20:48:37 +00:00
			`def blank_inpaint_image_like(latent_image):`
			`blank_image = torch.ones_like(latent_image)`
			`# these are the values for "zero" in pixel space translated to latent space`
			`blank_image[:,0] *= 0.8223`
			`blank_image[:,1] *= -0.6876`
			`blank_image[:,2] *= 0.6364`
			`blank_image[:,3] *= 0.1380`
			`return blank_image`

			`for ck in concat_keys:`
			`if denoise_mask is not None:`
			`if ck == "mask":`
Make sure cond_concat is on the right device. 2023-10-19 05:10:41 +00:00			`cond_concat.append(denoise_mask[:,:1].to(device))`
Refactor cond_concat into model object. 2023-10-18 20:48:37 +00:00			`elif ck == "masked_image":`
Make sure cond_concat is on the right device. 2023-10-19 05:10:41 +00:00			`cond_concat.append(latent_image.to(device)) #NOTE: the latent_image should be masked by the mask in pixel space`
Refactor cond_concat into model object. 2023-10-18 20:48:37 +00:00			`else:`
			`if ck == "mask":`
			`cond_concat.append(torch.ones_like(noise)[:,:1])`
			`elif ck == "masked_image":`
			`cond_concat.append(blank_inpaint_image_like(noise))`
Refactor to make it easier to add custom conds to models. 2023-10-25 03:31:12 +00:00			`data = torch.cat(cond_concat, dim=1)`
			`out['c_concat'] = comfy.conds.CONDNoiseShape(data)`
			`adm = self.encode_adm(**kwargs)`
			`if adm is not None:`
			`out['c_adm'] = comfy.conds.CONDRegular(adm)`
			`return out`
Refactor cond_concat into model object. 2023-10-18 20:48:37 +00:00
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`def load_model_weights(self, sd, unet_prefix=""):`
			`to_load = {}`
			`keys = list(sd.keys())`
			`for k in keys:`
			`if k.startswith(unet_prefix):`
			`to_load[k[len(unet_prefix):]] = sd.pop(k)`

			`m, u = self.diffusion_model.load_state_dict(to_load, strict=False)`
			`if len(m) > 0:`
			`print("unet missing:", m)`

			`if len(u) > 0:`
			`print("unet unexpected:", u)`
			`del to_load`
			`return self`

Move latent scale factor from VAE to model. 2023-06-23 06:14:12 +00:00			`def process_latent_in(self, latent):`
			`return self.latent_format.process_in(latent)`

			`def process_latent_out(self, latent):`
			`return self.latent_format.process_out(latent)`

Add CheckpointSave node to save checkpoints. The created checkpoints contain workflow metadata that can be loaded by dragging them on top of the UI or loading them with the "Load" button. Checkpoints will be saved in fp16 or fp32 depending on the format ComfyUI is using for inference on your hardware. To force fp32 use: --force-fp32 Anything that patches the model weights like merging or loras will be saved. The output directory is currently set to: output/checkpoints but that might change in the future. 2023-06-26 16:21:07 +00:00			`def state_dict_for_saving(self, clip_state_dict, vae_state_dict):`
			`clip_state_dict = self.model_config.process_clip_state_dict_for_saving(clip_state_dict)`
Fix lowvram model merging. 2023-08-26 15:52:07 +00:00			`unet_sd = self.diffusion_model.state_dict()`
			`unet_state_dict = {}`
			`for k in unet_sd:`
			`unet_state_dict[k] = comfy.model_management.resolve_lowvram_weight(unet_sd[k], self.diffusion_model, k)`

Add CheckpointSave node to save checkpoints. The created checkpoints contain workflow metadata that can be loaded by dragging them on top of the UI or loading them with the "Load" button. Checkpoints will be saved in fp16 or fp32 depending on the format ComfyUI is using for inference on your hardware. To force fp32 use: --force-fp32 Anything that patches the model weights like merging or loras will be saved. The output directory is currently set to: output/checkpoints but that might change in the future. 2023-06-26 16:21:07 +00:00			`unet_state_dict = self.model_config.process_unet_state_dict_for_saving(unet_state_dict)`
			`vae_state_dict = self.model_config.process_vae_state_dict_for_saving(vae_state_dict)`
			`if self.get_dtype() == torch.float16:`
			`clip_state_dict = utils.convert_sd_to(clip_state_dict, torch.float16)`
			`vae_state_dict = utils.convert_sd_to(vae_state_dict, torch.float16)`
Add key to indicate checkpoint is v_prediction when saving. 2023-07-18 04:25:53 +00:00
			`if self.model_type == ModelType.V_PREDICTION:`
			`unet_state_dict["v_pred"] = torch.tensor([])`

Add CheckpointSave node to save checkpoints. The created checkpoints contain workflow metadata that can be loaded by dragging them on top of the UI or loading them with the "Load" button. Checkpoints will be saved in fp16 or fp32 depending on the format ComfyUI is using for inference on your hardware. To force fp32 use: --force-fp32 Anything that patches the model weights like merging or loras will be saved. The output directory is currently set to: output/checkpoints but that might change in the future. 2023-06-26 16:21:07 +00:00			`return {unet_state_dict, vae_state_dict, **clip_state_dict}`

Support SDXL inpaint models. 2023-09-01 19:18:25 +00:00			`def set_inpaint(self):`
Refactor cond_concat into model object. 2023-10-18 20:48:37 +00:00			`self.inpaint_model = True`
Support SDXL inpaint models. 2023-09-01 19:18:25 +00:00
Refactor unclip code. 2023-08-15 03:41:52 +00:00			`def unclip_adm(unclip_conditioning, device, noise_augmentor, noise_augment_merge=0.0):`
			`adm_inputs = []`
			`weights = []`
			`noise_aug = []`
			`for unclip_cond in unclip_conditioning:`
			`for adm_cond in unclip_cond["clip_vision_output"].image_embeds:`
			`weight = unclip_cond["strength"]`
			`noise_augment = unclip_cond["noise_augmentation"]`
			`noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment)`
			`c_adm, noise_level_emb = noise_augmentor(adm_cond.to(device), noise_level=torch.tensor([noise_level], device=device))`
			`adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight`
			`weights.append(weight)`
			`noise_aug.append(noise_augment)`
			`adm_inputs.append(adm_out)`

			`if len(noise_aug) > 1:`
			`adm_out = torch.stack(adm_inputs).sum(0)`
			`noise_augment = noise_augment_merge`
			`noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment)`
			`c_adm, noise_level_emb = noise_augmentor(adm_out[:, :noise_augmentor.time_embed.dim], noise_level=torch.tensor([noise_level], device=device))`
			`adm_out = torch.cat((c_adm, noise_level_emb), 1)`

			`return adm_out`
Move latent scale factor from VAE to model. 2023-06-23 06:14:12 +00:00
Simpler base model code. 2023-06-09 16:24:24 +00:00			`class SD21UNCLIP(BaseModel):`
Initialize the unet directly on the target device. 2023-07-29 18:51:56 +00:00			`def __init__(self, model_config, noise_aug_config, model_type=ModelType.V_PREDICTION, device=None):`
			`super().__init__(model_config, model_type, device=device)`
Simpler base model code. 2023-06-09 16:24:24 +00:00			`self.noise_augmentor = CLIPEmbeddingNoiseAugmentation(**noise_aug_config)`

Refactor unCLIP noise augment out of samplers.py 2023-06-11 08:01:18 +00:00			`def encode_adm(self, **kwargs):`
			`unclip_conditioning = kwargs.get("unclip_conditioning", None)`
			`device = kwargs["device"]`
Refactor unclip code. 2023-08-15 03:41:52 +00:00			`if unclip_conditioning is None:`
			`return torch.zeros((1, self.adm_channels))`
Refactor unCLIP noise augment out of samplers.py 2023-06-11 08:01:18 +00:00			`else:`
Refactor unclip code. 2023-08-15 03:41:52 +00:00			`return unclip_adm(unclip_conditioning, device, self.noise_augmentor, kwargs.get("unclip_noise_augment_merge", 0.05))`
Refactor unCLIP noise augment out of samplers.py 2023-06-11 08:01:18 +00:00
ReVision support: unclip nodes can now be used with SDXL. 2023-08-18 06:39:23 +00:00			`def sdxl_pooled(args, noise_augmentor):`
			`if "unclip_conditioning" in args:`
			`return unclip_adm(args.get("unclip_conditioning", None), args["device"], noise_augmentor)[:,:1280]`
			`else:`
			`return args["pooled_output"]`

Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`class SDXLRefiner(BaseModel):`
Initialize the unet directly on the target device. 2023-07-29 18:51:56 +00:00			`def __init__(self, model_config, model_type=ModelType.EPS, device=None):`
			`super().__init__(model_config, model_type, device=device)`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`self.embedder = Timestep(256)`
ReVision support: unclip nodes can now be used with SDXL. 2023-08-18 06:39:23 +00:00			`self.noise_augmentor = CLIPEmbeddingNoiseAugmentation(**{"noise_schedule_config": {"timesteps": 1000, "beta_schedule": "squaredcos_cap_v2"}, "timestep_dim": 1280})`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00
			`def encode_adm(self, **kwargs):`
ReVision support: unclip nodes can now be used with SDXL. 2023-08-18 06:39:23 +00:00			`clip_pooled = sdxl_pooled(kwargs, self.noise_augmentor)`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`width = kwargs.get("width", 768)`
			`height = kwargs.get("height", 768)`
			`crop_w = kwargs.get("crop_w", 0)`
			`crop_h = kwargs.get("crop_h", 0)`

			`if kwargs.get("prompt_type", "") == "negative":`
			`aesthetic_score = kwargs.get("aesthetic_score", 2.5)`
			`else:`
			`aesthetic_score = kwargs.get("aesthetic_score", 6)`

			`out = []`
			`out.append(self.embedder(torch.Tensor([height])))`
Fix bug. 2023-06-28 04:38:07 +00:00			`out.append(self.embedder(torch.Tensor([width])))`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`out.append(self.embedder(torch.Tensor([crop_h])))`
Fix bug. 2023-06-28 04:38:07 +00:00			`out.append(self.embedder(torch.Tensor([crop_w])))`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`out.append(self.embedder(torch.Tensor([aesthetic_score])))`
Allow having a different pooled output for each image in a batch. 2023-09-21 05:14:42 +00:00			`flat = torch.flatten(torch.cat(out)).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1)`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`return torch.cat((clip_pooled.to(flat.device), flat), dim=1)`

			`class SDXL(BaseModel):`
Initialize the unet directly on the target device. 2023-07-29 18:51:56 +00:00			`def __init__(self, model_config, model_type=ModelType.EPS, device=None):`
			`super().__init__(model_config, model_type, device=device)`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`self.embedder = Timestep(256)`
ReVision support: unclip nodes can now be used with SDXL. 2023-08-18 06:39:23 +00:00			`self.noise_augmentor = CLIPEmbeddingNoiseAugmentation(**{"noise_schedule_config": {"timesteps": 1000, "beta_schedule": "squaredcos_cap_v2"}, "timestep_dim": 1280})`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00
			`def encode_adm(self, **kwargs):`
ReVision support: unclip nodes can now be used with SDXL. 2023-08-18 06:39:23 +00:00			`clip_pooled = sdxl_pooled(kwargs, self.noise_augmentor)`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`width = kwargs.get("width", 768)`
			`height = kwargs.get("height", 768)`
			`crop_w = kwargs.get("crop_w", 0)`
			`crop_h = kwargs.get("crop_h", 0)`
			`target_width = kwargs.get("target_width", width)`
			`target_height = kwargs.get("target_height", height)`

			`out = []`
			`out.append(self.embedder(torch.Tensor([height])))`
Fix bug. 2023-06-28 04:38:07 +00:00			`out.append(self.embedder(torch.Tensor([width])))`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`out.append(self.embedder(torch.Tensor([crop_h])))`
Fix bug. 2023-06-28 04:38:07 +00:00			`out.append(self.embedder(torch.Tensor([crop_w])))`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`out.append(self.embedder(torch.Tensor([target_height])))`
Fix bug. 2023-06-28 04:38:07 +00:00			`out.append(self.embedder(torch.Tensor([target_width])))`
Allow having a different pooled output for each image in a batch. 2023-09-21 05:14:42 +00:00			`flat = torch.flatten(torch.cat(out)).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1)`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`return torch.cat((clip_pooled.to(flat.device), flat), dim=1)`