Add option to inference the diffusion model in fp32 and fp64.
This commit is contained in:
parent
b4526d3fc3
commit
61196d8857
|
@ -60,8 +60,10 @@ fp_group.add_argument("--force-fp32", action="store_true", help="Force fp32 (If
|
||||||
fp_group.add_argument("--force-fp16", action="store_true", help="Force fp16.")
|
fp_group.add_argument("--force-fp16", action="store_true", help="Force fp16.")
|
||||||
|
|
||||||
fpunet_group = parser.add_mutually_exclusive_group()
|
fpunet_group = parser.add_mutually_exclusive_group()
|
||||||
fpunet_group.add_argument("--bf16-unet", action="store_true", help="Run the UNET in bf16. This should only be used for testing stuff.")
|
fpunet_group.add_argument("--fp32-unet", action="store_true", help="Run the diffusion model in fp32.")
|
||||||
fpunet_group.add_argument("--fp16-unet", action="store_true", help="Store unet weights in fp16.")
|
fpunet_group.add_argument("--fp64-unet", action="store_true", help="Run the diffusion model in fp64.")
|
||||||
|
fpunet_group.add_argument("--bf16-unet", action="store_true", help="Run the diffusion model in bf16.")
|
||||||
|
fpunet_group.add_argument("--fp16-unet", action="store_true", help="Run the diffusion model in fp16")
|
||||||
fpunet_group.add_argument("--fp8_e4m3fn-unet", action="store_true", help="Store unet weights in fp8_e4m3fn.")
|
fpunet_group.add_argument("--fp8_e4m3fn-unet", action="store_true", help="Store unet weights in fp8_e4m3fn.")
|
||||||
fpunet_group.add_argument("--fp8_e5m2-unet", action="store_true", help="Store unet weights in fp8_e5m2.")
|
fpunet_group.add_argument("--fp8_e5m2-unet", action="store_true", help="Store unet weights in fp8_e5m2.")
|
||||||
|
|
||||||
|
|
|
@ -628,6 +628,10 @@ def maximum_vram_for_weights(device=None):
|
||||||
def unet_dtype(device=None, model_params=0, supported_dtypes=[torch.float16, torch.bfloat16, torch.float32]):
|
def unet_dtype(device=None, model_params=0, supported_dtypes=[torch.float16, torch.bfloat16, torch.float32]):
|
||||||
if model_params < 0:
|
if model_params < 0:
|
||||||
model_params = 1000000000000000000000
|
model_params = 1000000000000000000000
|
||||||
|
if args.fp32_unet:
|
||||||
|
return torch.float32
|
||||||
|
if args.fp64_unet:
|
||||||
|
return torch.float64
|
||||||
if args.bf16_unet:
|
if args.bf16_unet:
|
||||||
return torch.bfloat16
|
return torch.bfloat16
|
||||||
if args.fp16_unet:
|
if args.fp16_unet:
|
||||||
|
@ -674,7 +678,7 @@ def unet_dtype(device=None, model_params=0, supported_dtypes=[torch.float16, tor
|
||||||
|
|
||||||
# None means no manual cast
|
# None means no manual cast
|
||||||
def unet_manual_cast(weight_dtype, inference_device, supported_dtypes=[torch.float16, torch.bfloat16, torch.float32]):
|
def unet_manual_cast(weight_dtype, inference_device, supported_dtypes=[torch.float16, torch.bfloat16, torch.float32]):
|
||||||
if weight_dtype == torch.float32:
|
if weight_dtype == torch.float32 or weight_dtype == torch.float64:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
fp16_supported = should_use_fp16(inference_device, prioritize_performance=False)
|
fp16_supported = should_use_fp16(inference_device, prioritize_performance=False)
|
||||||
|
|
Loading…
Reference in New Issue