Auto disable cuda malloc on some GPUs on windows.

2023-07-19 14:43:55 -04:00 · 2023-07-19 14:43:55 -04:00 · 799c08a4ce
parent 0b284f650b
commit 799c08a4ce
2 changed files with 78 additions and 24 deletions
--- a/cuda_malloc.py
+++ b/cuda_malloc.py
@ -0,0 +1,77 @@
 import os
 import importlib.util
 from comfy.cli_args import args
 #Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import.
 def get_gpu_names():
    if os.name == 'nt':
        import ctypes
        # Define necessary C structures and types
        class DISPLAY_DEVICEA(ctypes.Structure):
            _fields_ = [
                ('cb', ctypes.c_ulong),
                ('DeviceName', ctypes.c_char * 32),
                ('DeviceString', ctypes.c_char * 128),
                ('StateFlags', ctypes.c_ulong),
                ('DeviceID', ctypes.c_char * 128),
                ('DeviceKey', ctypes.c_char * 128)
            ]
        # Load user32.dll
        user32 = ctypes.windll.user32
        # Call EnumDisplayDevicesA
        def enum_display_devices():
            device_info = DISPLAY_DEVICEA()
            device_info.cb = ctypes.sizeof(device_info)
            device_index = 0
            gpu_names = set()
            while user32.EnumDisplayDevicesA(None, device_index, ctypes.byref(device_info), 0):
                device_index += 1
                gpu_names.add(device_info.DeviceString.decode('utf-8'))
            return gpu_names
        return enum_display_devices()
    else:
        return set()
 def cuda_malloc_supported():
    blacklist = {"GeForce GTX 960M", "GeForce GTX 950M", "GeForce 945M", "GeForce 940M", "GeForce 930M", "GeForce 920M", "GeForce 910M"}
    try:
        names = get_gpu_names()
    except:
        names = set()
    for x in names:
        if "NVIDIA" in x:
            for b in blacklist:
                if b in x:
                    return False
    return True
 if not args.cuda_malloc:
    try:
        version = ""
        torch_spec = importlib.util.find_spec("torch")
        for folder in torch_spec.submodule_search_locations:
            ver_file = os.path.join(folder, "version.py")
            if os.path.isfile(ver_file):
                spec = importlib.util.spec_from_file_location("torch_version_import", ver_file)
                module = importlib.util.module_from_spec(spec)
                spec.loader.exec_module(module)
                version = module.__version__
        if int(version[0]) >= 2: #enable by default for torch version 2.0 and up
            args.cuda_malloc = cuda_malloc_supported()
    except:
        pass
 if args.cuda_malloc and not args.disable_cuda_malloc:
    env_var = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', None)
    if env_var is None:
        env_var = "backend:cudaMallocAsync"
    else:
        env_var += ",backend:cudaMallocAsync"
    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = env_var
--- a/main.py
+++ b/main.py
@ -61,30 +61,7 @@ if __name__ == "__main__":
        os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda_device)
        print("Set cuda device to:", args.cuda_device)
-    if not args.cuda_malloc:
+    import cuda_malloc
        try: #if there's a better way to check the torch version without importing it let me know
            version = ""
            torch_spec = importlib.util.find_spec("torch")
            for folder in torch_spec.submodule_search_locations:
                ver_file = os.path.join(folder, "version.py")
                if os.path.isfile(ver_file):
                    spec = importlib.util.spec_from_file_location("torch_version_import", ver_file)
                    module = importlib.util.module_from_spec(spec)
                    spec.loader.exec_module(module)
                    version = module.__version__
            if int(version[0]) >= 2: #enable by default for torch version 2.0 and up
                args.cuda_malloc = True
        except:
            pass
    if args.cuda_malloc and not args.disable_cuda_malloc:
        env_var = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', None)
        if env_var is None:
            env_var = "backend:cudaMallocAsync"
        else:
            env_var += ",backend:cudaMallocAsync"
        os.environ['PYTORCH_CUDA_ALLOC_CONF'] = env_var
 import comfy.utils
 import yaml