Prioritize freeing partially offloaded models first.
This commit is contained in:
parent
22d1241a50
commit
c7427375ee
|
@ -426,7 +426,7 @@ def free_memory(memory_required, device, keep_loaded=[]):
|
|||
shift_model = current_loaded_models[i]
|
||||
if shift_model.device == device:
|
||||
if shift_model not in keep_loaded:
|
||||
can_unload.append((sys.getrefcount(shift_model.model), shift_model.model_memory(), i))
|
||||
can_unload.append((-shift_model.model_offloaded_memory(), sys.getrefcount(shift_model.model), shift_model.model_memory(), i))
|
||||
shift_model.currently_used = False
|
||||
|
||||
for x in sorted(can_unload):
|
||||
|
|
Loading…
Reference in New Issue