diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index 38718b66..260a51bb 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -41,7 +41,11 @@ parser.add_argument("--output-directory", type=str, default=None, help="Set the
 parser.add_argument("--auto-launch", action="store_true", help="Automatically launch ComfyUI in the default browser.")
 parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.")
 parser.add_argument("--dont-upcast-attention", action="store_true", help="Disable upcasting of attention. Can boost speed but increase the chances of black images.")
-parser.add_argument("--force-fp32", action="store_true", help="Force fp32 (If this makes your GPU work better please report it).")
+
+fp_group = parser.add_mutually_exclusive_group()
+fp_group.add_argument("--force-fp32", action="store_true", help="Force fp32 (If this makes your GPU work better please report it).")
+fp_group.add_argument("--force-fp16", action="store_true", help="Force fp16.")
+
 parser.add_argument("--directml", type=int, nargs="?", metavar="DIRECTML_DEVICE", const=-1, help="Use torch-directml.")
 
 class LatentPreviewMethod(enum.Enum):
diff --git a/comfy/model_management.py b/comfy/model_management.py
index e44c9e8a..5461d28e 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -171,10 +171,15 @@ elif args.highvram or args.gpu_only:
     vram_state = VRAMState.HIGH_VRAM
 
 FORCE_FP32 = False
+FORCE_FP16 = False
 if args.force_fp32:
     print("Forcing FP32, if this improves things please report it.")
     FORCE_FP32 = True
 
+if args.force_fp16:
+    print("Forcing FP16.")
+    FORCE_FP16 = True
+
 if lowvram_available:
     try:
         import accelerate
@@ -457,6 +462,9 @@ def should_use_fp16(device=None):
     global xpu_available
     global directml_enabled
 
+    if FORCE_FP16:
+        return True
+
     if device is not None: #TODO
         if is_device_cpu(device):
             return False