Merge branch 'master' into m957ymj75urz-dynamic-prompting

2023-02-20 23:49:55 -05:00 · 2023-02-20 23:49:55 -05:00 · 8683ea4248
parent 482386e03d 3a83da7281
commit 8683ea4248
7 changed files with 110 additions and 15 deletions
--- a/README.md
+++ b/README.md
@ -20,6 +20,8 @@ This ui will let you design and execute advanced stable diffusion pipelines usin
 - Saving/Loading workflows as Json files.
 - Nodes interface can be used to create complex workflows like one for [Hires fix](https://comfyanonymous.github.io/ComfyUI_examples/2_pass_txt2img/) or much more advanced ones.
 - [Area Composition](https://comfyanonymous.github.io/ComfyUI_examples/area_composition/)
+- [Inpainting](https://comfyanonymous.github.io/ComfyUI_examples/inpaint/) with both regular and inpainting models.
+- [ControlNet](https://comfyanonymous.github.io/ComfyUI_examples/controlnet/)
 - Starts up very fast.
 - Works fully offline: will never download anything.

--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -3,6 +3,7 @@ CPU = 0
 NO_VRAM = 1
 LOW_VRAM = 2
 NORMAL_VRAM = 3
+HIGH_VRAM = 4

 accelerate_enabled = False
 vram_state = NORMAL_VRAM
@ -27,10 +28,11 @@ if "--lowvram" in sys.argv:
    set_vram_to = LOW_VRAM
 if "--novram" in sys.argv:
    set_vram_to = NO_VRAM
+if "--highvram" in sys.argv:
+    vram_state = HIGH_VRAM


-
-if set_vram_to != NORMAL_VRAM:
+if set_vram_to == LOW_VRAM or set_vram_to == NO_VRAM:
    try:
        import accelerate
        accelerate_enabled = True
@ -44,7 +46,7 @@ if set_vram_to != NORMAL_VRAM:
    total_vram_available_mb = int(max(256, total_vram_available_mb))


-print("Set vram state to:", ["CPU", "NO VRAM", "LOW VRAM", "NORMAL VRAM"][vram_state])
+print("Set vram state to:", ["CPU", "NO VRAM", "LOW VRAM", "NORMAL VRAM", "HIGH VRAM"][vram_state])


 current_loaded_model = None
@ -57,18 +59,24 @@ def unload_model():
    global current_loaded_model
    global model_accelerated
    global current_gpu_controlnets
+    global vram_state
+
    if current_loaded_model is not None:
        if model_accelerated:
            accelerate.hooks.remove_hook_from_submodules(current_loaded_model.model)
            model_accelerated = False

-        current_loaded_model.model.cpu()
+        #never unload models from GPU on high vram
+        if vram_state != HIGH_VRAM:
+            current_loaded_model.model.cpu()
        current_loaded_model.unpatch_model()
        current_loaded_model = None
-    if len(current_gpu_controlnets) > 0:
-        for n in current_gpu_controlnets:
-            n.cpu()
-        current_gpu_controlnets = []
+
+    if vram_state != HIGH_VRAM:
+        if len(current_gpu_controlnets) > 0:
+            for n in current_gpu_controlnets:
+                n.cpu()
+            current_gpu_controlnets = []


 def load_model_gpu(model):
@ -87,7 +95,7 @@ def load_model_gpu(model):
    current_loaded_model = model
    if vram_state == CPU:
        pass
-    elif vram_state == NORMAL_VRAM:
+    elif vram_state == NORMAL_VRAM or vram_state == HIGH_VRAM:
        model_accelerated = False
        real_model.cuda()
    else:
--- a/comfy/sd1_clip.py
+++ b/comfy/sd1_clip.py
@ -178,7 +178,6 @@ def load_embed(embedding_name, embedding_directory):
                valid_file = t
                break
        if valid_file is None:
-            print("warning, embedding {} does not exist, ignoring".format(embed_path))
            return None
        else:
            embed_path = valid_file
@ -187,7 +186,10 @@ def load_embed(embedding_name, embedding_directory):
        import safetensors.torch
        embed = safetensors.torch.load_file(embed_path, device="cpu")
    else:
-        embed = torch.load(embed_path, weights_only=True, map_location="cpu")
+        if 'weights_only' in torch.load.__code__.co_varnames:
+            embed = torch.load(embed_path, weights_only=True, map_location="cpu")
+        else:
+            embed = torch.load(embed_path, map_location="cpu")
    if 'string_to_param' in embed:
        values = embed['string_to_param'].values()
    else:
@ -218,18 +220,28 @@ class SD1Tokenizer:
        tokens = []
        for t in parsed_weights:
            to_tokenize = unescape_important(t[0]).replace("\n", " ").split(' ')
-            for word in to_tokenize:
+            while len(to_tokenize) > 0:
+                word = to_tokenize.pop(0)
                temp_tokens = []
                embedding_identifier = "embedding:"
                if word.startswith(embedding_identifier) and self.embedding_directory is not None:
                    embedding_name = word[len(embedding_identifier):].strip('\n')
                    embed = load_embed(embedding_name, self.embedding_directory)
+                    if embed is None:
+                        stripped = embedding_name.strip(',')
+                        if len(stripped) < len(embedding_name):
+                            embed = load_embed(stripped, self.embedding_directory)
+                            if embed is not None:
+                                to_tokenize.insert(0, embedding_name[len(stripped):])
+
                    if embed is not None:
                        if len(embed.shape) == 1:
                            temp_tokens += [(embed, t[1])]
                        else:
                            for x in range(embed.shape[0]):
                                temp_tokens += [(embed[x], t[1])]
+                    else:
+                        print("warning, embedding:{} does not exist, ignoring".format(embedding_name))
                elif len(word) > 0:
                    tt = self.tokenizer(word)["input_ids"][1:-1]
                    for x in tt:
--- a/main.py
+++ b/main.py
@ -29,6 +29,7 @@ if __name__ == "__main__":
        print("\t--dont-upcast-attention\t\tDisable upcasting of attention \n\t\t\t\t\tcan boost speed but increase the chances of black images.\n")
        print("\t--use-split-cross-attention\tUse the split cross attention optimization instead of the sub-quadratic one.\n\t\t\t\t\tIgnored when xformers is used.")
        print()
+        print("\t--highvram\t\t\tBy default models will be unloaded to CPU memory after being used.\n\t\t\t\t\tThis option keeps them in GPU memory.\n")
        print("\t--normalvram\t\t\tUsed to force normal vram use if lowvram gets automatically enabled.")
        print("\t--lowvram\t\t\tSplit the unet in parts to use less vram.")
        print("\t--novram\t\t\tWhen lowvram isn't enough.")
@ -208,6 +209,7 @@ class PromptExecutor:
                executed = set(executed)
                for x in executed:
                    self.old_prompt[x] = copy.deepcopy(prompt[x])
+        torch.cuda.empty_cache()

 def validate_inputs(prompt, item):
    unique_id = item
--- a/models/configs/v1-inpainting-inference.yaml
+++ b/models/configs/v1-inpainting-inference.yaml
@ -0,0 +1,71 @@
+model:
+  base_learning_rate: 7.5e-05
+  target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
+  params:
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false   # Note: different from the one we trained before
+    conditioning_key: hybrid   # important
+    monitor: val/loss_simple_ema
+    scale_factor: 0.18215
+    finetune_keys: null
+
+    scheduler_config: # 10000 warmup steps
+      target: ldm.lr_scheduler.LambdaLinearScheduler
+      params:
+        warm_up_steps: [ 2500 ] # NOTE for resuming. use 10000 if starting from scratch
+        cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
+        f_start: [ 1.e-6 ]
+        f_max: [ 1. ]
+        f_min: [ 1. ]
+
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        image_size: 32 # unused
+        in_channels: 9  # 4 data + 4 downscaled image + 1 mask
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_heads: 8
+        use_spatial_transformer: True
+        transformer_depth: 1
+        context_dim: 768
+        use_checkpoint: True
+        legacy: False
+
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
+
--- a/nodes.py
+++ b/nodes.py
@ -759,7 +759,7 @@ def load_custom_nodes():
        module_path = os.path.join(CUSTOM_NODE_PATH, possible_module)
        if os.path.isfile(module_path) and os.path.splitext(module_path)[1] != ".py": continue

-        module_name = "custom_node_module.{}".format(possible_module)
+        module_name = possible_module
        try:
            if os.path.isfile(module_path):
                module_spec = importlib.util.spec_from_file_location(module_name, module_path)
--- a/notebooks/comfyui_colab.ipynb
+++ b/notebooks/comfyui_colab.ipynb
@ -85,7 +85,7 @@
    {
      "cell_type": "markdown",
      "source": [
-        "Run ComfyUI:"
+        "Run ComfyUI (use the fp16 model configs for more speed):"
      ],
      "metadata": {
        "id": "gggggggggg"
@ -112,7 +112,7 @@
        "\n",
        "threading.Thread(target=iframe_thread, daemon=True, args=(8188,)).start()\n",
        "\n",
-        "!python main.py"
+        "!python main.py --highvram"
      ],
      "metadata": {
        "id": "hhhhhhhhhh"