fp16 is actually faster than fp32 on a GTX 1080.

This commit is contained in:
comfyanonymous 2024-08-21 23:23:50 -04:00
parent a60620dcea
commit 843a7ff70c
1 changed files with 3 additions and 6 deletions

View File

@ -987,16 +987,13 @@ def should_use_fp16(device=None, model_params=0, prioritize_performance=True, ma
if props.major < 6:
return False
fp16_works = False
#FP16 is confirmed working on a 1080 (GP104) but it's a bit slower than FP32 so it should only be enabled
#when the model doesn't actually fit on the card
#TODO: actually test if GP106 and others have the same type of behavior
#FP16 is confirmed working on a 1080 (GP104) and on latest pytorch actually seems faster than fp32
nvidia_10_series = ["1080", "1070", "titan x", "p3000", "p3200", "p4000", "p4200", "p5000", "p5200", "p6000", "1060", "1050", "p40", "p100", "p6", "p4"]
for x in nvidia_10_series:
if x in props.name.lower():
fp16_works = True
return True
if fp16_works or manual_cast:
if manual_cast:
free_model_memory = maximum_vram_for_weights(device)
if (not prioritize_performance) or model_params * 4 > free_model_memory:
return True