ComfyUI/comfy/supported_models_base.py

"""
    This file is part of ComfyUI.
    Copyright (C) 2024 Comfy

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
"""

import torch
from . import model_base
from . import utils
from . import latent_formats

class ClipTarget:
    def __init__(self, tokenizer, clip):
        self.clip = clip
        self.tokenizer = tokenizer
        self.params = {}

class BASE:
    unet_config = {}
    unet_extra_config = {
        "num_heads": -1,
        "num_head_channels": 64,
    }

    required_keys = {}

    clip_prefix = []
    clip_vision_prefix = None
    noise_aug_config = None
    sampling_settings = {}
    latent_format = latent_formats.LatentFormat
    vae_key_prefix = ["first_stage_model."]
    text_encoder_key_prefix = ["cond_stage_model."]
    supported_inference_dtypes = [torch.float16, torch.bfloat16, torch.float32]

    memory_usage_factor = 2.0

    manual_cast_dtype = None
    custom_operations = None
    optimizations = {"fp8": False}

    @classmethod
    def matches(s, unet_config, state_dict=None):
        for k in s.unet_config:
            if k not in unet_config or s.unet_config[k] != unet_config[k]:
                return False
        if state_dict is not None:
            for k in s.required_keys:
                if k not in state_dict:
                    return False
        return True

    def model_type(self, state_dict, prefix=""):
        return model_base.ModelType.EPS

    def inpaint_model(self):
        return self.unet_config["in_channels"] > 4

    def __init__(self, unet_config):
        self.unet_config = unet_config.copy()
        self.sampling_settings = self.sampling_settings.copy()
        self.latent_format = self.latent_format()
        for x in self.unet_extra_config:
            self.unet_config[x] = self.unet_extra_config[x]

    def get_model(self, state_dict, prefix="", device=None):
        if self.noise_aug_config is not None:
            out = model_base.SD21UNCLIP(self, self.noise_aug_config, model_type=self.model_type(state_dict, prefix), device=device)
        else:
            out = model_base.BaseModel(self, model_type=self.model_type(state_dict, prefix), device=device)
        if self.inpaint_model():
            out.set_inpaint()
        return out

    def process_clip_state_dict(self, state_dict):
        state_dict = utils.state_dict_prefix_replace(state_dict, {k: "" for k in self.text_encoder_key_prefix}, filter_keys=True)
        return state_dict

    def process_unet_state_dict(self, state_dict):
        return state_dict

    def process_vae_state_dict(self, state_dict):
        return state_dict

    def process_clip_state_dict_for_saving(self, state_dict):
        replace_prefix = {"": self.text_encoder_key_prefix[0]}
        return utils.state_dict_prefix_replace(state_dict, replace_prefix)

    def process_clip_vision_state_dict_for_saving(self, state_dict):
        replace_prefix = {}
        if self.clip_vision_prefix is not None:
            replace_prefix[""] = self.clip_vision_prefix
        return utils.state_dict_prefix_replace(state_dict, replace_prefix)

    def process_unet_state_dict_for_saving(self, state_dict):
        replace_prefix = {"": "model.diffusion_model."}
        return utils.state_dict_prefix_replace(state_dict, replace_prefix)

    def process_vae_state_dict_for_saving(self, state_dict):
        replace_prefix = {"": self.vae_key_prefix[0]}
        return utils.state_dict_prefix_replace(state_dict, replace_prefix)

    def set_inference_dtype(self, dtype, manual_cast_dtype):
        self.unet_config['dtype'] = dtype
        self.manual_cast_dtype = manual_cast_dtype
Add a way to set model dtype and ops from load_checkpoint_guess_config. 2024-08-11 12:50:34 +00:00			`"""`
			`This file is part of ComfyUI.`
			`Copyright (C) 2024 Comfy`

			`This program is free software: you can redistribute it and/or modify`
			`it under the terms of the GNU General Public License as published by`
			`the Free Software Foundation, either version 3 of the License, or`
			`(at your option) any later version.`

			`This program is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`GNU General Public License for more details.`

			`You should have received a copy of the GNU General Public License`
			`along with this program. If not, see <https://www.gnu.org/licenses/>.`
			`"""`

Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`import torch`
			`from . import model_base`
			`from . import utils`
Fix "Load Checkpoint with config" node. 2023-08-30 03:58:32 +00:00			`from . import latent_formats`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00
			`class ClipTarget:`
			`def __init__(self, tokenizer, clip):`
			`self.clip = clip`
			`self.tokenizer = tokenizer`
			`self.params = {}`

			`class BASE:`
			`unet_config = {}`
			`unet_extra_config = {`
			`"num_heads": -1,`
			`"num_head_channels": 64,`
			`}`

IP2P model loading support. This is the code to load the model and inference it with only a text prompt. This commit does not contain the nodes to properly use it with an image input. This supports both the original SD1 instructpix2pix model and the diffusers SDXL one. 2024-03-31 05:25:16 +00:00			`required_keys = {}`

Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`clip_prefix = []`
			`clip_vision_prefix = None`
			`noise_aug_config = None`
Add sampling_settings so models can specify specific sampling settings. 2023-11-22 22:23:37 +00:00			`sampling_settings = {}`
Fix "Load Checkpoint with config" node. 2023-08-30 03:58:32 +00:00			`latent_format = latent_formats.LatentFormat`
Put VAE key name in model config. 2024-01-30 07:24:38 +00:00			`vae_key_prefix = ["first_stage_model."]`
Some cleanups to how the text encoders are loaded. 2024-02-19 15:29:18 +00:00			`text_encoder_key_prefix = ["cond_stage_model."]`
Stable Cascade Stage C. 2024-02-16 15:55:08 +00:00			`supported_inference_dtypes = [torch.float16, torch.bfloat16, torch.float32]`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00
Better per model memory usage estimations. 2024-08-02 22:08:21 +00:00			`memory_usage_factor = 2.0`

Use faster manual cast for fp8 in unet. 2023-12-11 23:24:44 +00:00			`manual_cast_dtype = None`
Add a way to set model dtype and ops from load_checkpoint_guess_config. 2024-08-11 12:50:34 +00:00			`custom_operations = None`
Add a weight_dtype fp8_e4m3fn_fast to the Diffusion Model Loader node. This is used to load weights in fp8 and use fp8 matrix multiplication. 2024-10-09 23:43:17 +00:00			`optimizations = {"fp8": False}`
Use faster manual cast for fp8 in unet. 2023-12-11 23:24:44 +00:00
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`@classmethod`
IP2P model loading support. This is the code to load the model and inference it with only a text prompt. This commit does not contain the nodes to properly use it with an image input. This supports both the original SD1 instructpix2pix model and the diffusers SDXL one. 2024-03-31 05:25:16 +00:00			`def matches(s, unet_config, state_dict=None):`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`for k in s.unet_config:`
Stable Cascade Stage C. 2024-02-16 15:55:08 +00:00			`if k not in unet_config or s.unet_config[k] != unet_config[k]:`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`return False`
IP2P model loading support. This is the code to load the model and inference it with only a text prompt. This commit does not contain the nodes to properly use it with an image input. This supports both the original SD1 instructpix2pix model and the diffusers SDXL one. 2024-03-31 05:25:16 +00:00			`if state_dict is not None:`
			`for k in s.required_keys:`
			`if k not in state_dict:`
			`return False`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`return True`

Refactor of sampler code to deal more easily with different model types. 2023-07-17 05:22:12 +00:00			`def model_type(self, state_dict, prefix=""):`
			`return model_base.ModelType.EPS`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00
			`def inpaint_model(self):`
			`return self.unet_config["in_channels"] > 4`

			`def __init__(self, unet_config):`
Fix issue with sampling_settings persisting across models. 2024-04-10 03:20:43 +00:00			`self.unet_config = unet_config.copy()`
			`self.sampling_settings = self.sampling_settings.copy()`
Move latent scale factor from VAE to model. 2023-06-23 06:14:12 +00:00			`self.latent_format = self.latent_format()`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`for x in self.unet_extra_config:`
			`self.unet_config[x] = self.unet_extra_config[x]`

Initialize the unet directly on the target device. 2023-07-29 18:51:56 +00:00			`def get_model(self, state_dict, prefix="", device=None):`
Support SDXL inpaint models. 2023-09-01 19:18:25 +00:00			`if self.noise_aug_config is not None:`
			`out = model_base.SD21UNCLIP(self, self.noise_aug_config, model_type=self.model_type(state_dict, prefix), device=device)`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`else:`
Support SDXL inpaint models. 2023-09-01 19:18:25 +00:00			`out = model_base.BaseModel(self, model_type=self.model_type(state_dict, prefix), device=device)`
			`if self.inpaint_model():`
			`out.set_inpaint()`
			`return out`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00
			`def process_clip_state_dict(self, state_dict):`
Some cleanups to how the text encoders are loaded. 2024-02-19 15:29:18 +00:00			`state_dict = utils.state_dict_prefix_replace(state_dict, {k: "" for k in self.text_encoder_key_prefix}, filter_keys=True)`
Support base SDXL and SDXL refiner models. Large refactor of the model detection and loading code. 2023-06-22 17:03:50 +00:00			`return state_dict`

Make it easy for models to process the unet state dict on load. 2023-11-21 03:27:36 +00:00			`def process_unet_state_dict(self, state_dict):`
			`return state_dict`

Allow model config to preprocess the vae state dict on load. 2023-11-21 21:29:18 +00:00			`def process_vae_state_dict(self, state_dict):`
			`return state_dict`

Add CheckpointSave node to save checkpoints. The created checkpoints contain workflow metadata that can be loaded by dragging them on top of the UI or loading them with the "Load" button. Checkpoints will be saved in fp16 or fp32 depending on the format ComfyUI is using for inference on your hardware. To force fp32 use: --force-fp32 Anything that patches the model weights like merging or loras will be saved. The output directory is currently set to: output/checkpoints but that might change in the future. 2023-06-26 16:21:07 +00:00			`def process_clip_state_dict_for_saving(self, state_dict):`
Some cleanups to how the text encoders are loaded. 2024-02-19 15:29:18 +00:00			`replace_prefix = {"": self.text_encoder_key_prefix[0]}`
Move some functions to utils.py 2023-09-03 02:33:37 +00:00			`return utils.state_dict_prefix_replace(state_dict, replace_prefix)`
Add CheckpointSave node to save checkpoints. The created checkpoints contain workflow metadata that can be loaded by dragging them on top of the UI or loading them with the "Load" button. Checkpoints will be saved in fp16 or fp32 depending on the format ComfyUI is using for inference on your hardware. To force fp32 use: --force-fp32 Anything that patches the model weights like merging or loras will be saved. The output directory is currently set to: output/checkpoints but that might change in the future. 2023-06-26 16:21:07 +00:00
Add unfinished ImageOnlyCheckpointSave node to save a SVD checkpoint. This node is unfinished, SVD checkpoints saved with this node will work with ComfyUI but not with anything else. 2024-01-18 00:37:19 +00:00			`def process_clip_vision_state_dict_for_saving(self, state_dict):`
			`replace_prefix = {}`
			`if self.clip_vision_prefix is not None:`
			`replace_prefix[""] = self.clip_vision_prefix`
			`return utils.state_dict_prefix_replace(state_dict, replace_prefix)`

Add CheckpointSave node to save checkpoints. The created checkpoints contain workflow metadata that can be loaded by dragging them on top of the UI or loading them with the "Load" button. Checkpoints will be saved in fp16 or fp32 depending on the format ComfyUI is using for inference on your hardware. To force fp32 use: --force-fp32 Anything that patches the model weights like merging or loras will be saved. The output directory is currently set to: output/checkpoints but that might change in the future. 2023-06-26 16:21:07 +00:00			`def process_unet_state_dict_for_saving(self, state_dict):`
			`replace_prefix = {"": "model.diffusion_model."}`
Move some functions to utils.py 2023-09-03 02:33:37 +00:00			`return utils.state_dict_prefix_replace(state_dict, replace_prefix)`
Add CheckpointSave node to save checkpoints. The created checkpoints contain workflow metadata that can be loaded by dragging them on top of the UI or loading them with the "Load" button. Checkpoints will be saved in fp16 or fp32 depending on the format ComfyUI is using for inference on your hardware. To force fp32 use: --force-fp32 Anything that patches the model weights like merging or loras will be saved. The output directory is currently set to: output/checkpoints but that might change in the future. 2023-06-26 16:21:07 +00:00
			`def process_vae_state_dict_for_saving(self, state_dict):`
Some cleanups to how the text encoders are loaded. 2024-02-19 15:29:18 +00:00			`replace_prefix = {"": self.vae_key_prefix[0]}`
Move some functions to utils.py 2023-09-03 02:33:37 +00:00			`return utils.state_dict_prefix_replace(state_dict, replace_prefix)`
Add CheckpointSave node to save checkpoints. The created checkpoints contain workflow metadata that can be loaded by dragging them on top of the UI or loading them with the "Load" button. Checkpoints will be saved in fp16 or fp32 depending on the format ComfyUI is using for inference on your hardware. To force fp32 use: --force-fp32 Anything that patches the model weights like merging or loras will be saved. The output directory is currently set to: output/checkpoints but that might change in the future. 2023-06-26 16:21:07 +00:00
Stable Cascade Stage C. 2024-02-16 15:55:08 +00:00			`def set_inference_dtype(self, dtype, manual_cast_dtype):`
			`self.unet_config['dtype'] = dtype`
Use faster manual cast for fp8 in unet. 2023-12-11 23:24:44 +00:00			`self.manual_cast_dtype = manual_cast_dtype`