Merge branch 'master' into worksplit-multigpu

2025-02-17 19:35:58 -06:00
parent d2504fb701 31e54b7052
commit 048f4f0b3a
48 changed files with 10819 additions and 8408 deletions
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -99,8 +99,28 @@ def wipe_lowvram_weight(m):
    if hasattr(m, "prev_comfy_cast_weights"):
        m.comfy_cast_weights = m.prev_comfy_cast_weights
        del m.prev_comfy_cast_weights
-    m.weight_function = None
-    m.bias_function = None
+
+    if hasattr(m, "weight_function"):
+        m.weight_function = []
+
+    if hasattr(m, "bias_function"):
+        m.bias_function = []
+
+def move_weight_functions(m, device):
+    if device is None:
+        return 0
+
+    memory = 0
+    if hasattr(m, "weight_function"):
+        for f in m.weight_function:
+            if hasattr(f, "move_to"):
+                memory += f.move_to(device=device)
+
+    if hasattr(m, "bias_function"):
+        for f in m.bias_function:
+            if hasattr(f, "move_to"):
+                memory += f.move_to(device=device)
+    return memory

 class LowVramPatch:
    def __init__(self, key, patches):
@@ -195,11 +215,13 @@ class ModelPatcher:
        self.backup = {}
        self.object_patches = {}
        self.object_patches_backup = {}
+        self.weight_wrapper_patches = {}
        self.model_options = {"transformer_options":{}}
        self.model_size()
        self.load_device = load_device
        self.offload_device = offload_device
        self.weight_inplace_update = weight_inplace_update
+        self.force_cast_weights = False
        self.patches_uuid = uuid.uuid4()
        self.parent = None

@@ -256,11 +278,14 @@ class ModelPatcher:
        n.patches_uuid = self.patches_uuid

        n.object_patches = self.object_patches.copy()
+        n.weight_wrapper_patches = self.weight_wrapper_patches.copy()
        n.model_options = copy.deepcopy(self.model_options)
        n.backup = self.backup
        n.object_patches_backup = self.object_patches_backup
        n.parent = self

+        n.force_cast_weights = self.force_cast_weights
+
        # attachments
        n.attachments = {}
        for k in self.attachments:
@@ -435,6 +460,16 @@ class ModelPatcher:
    def add_object_patch(self, name, obj):
        self.object_patches[name] = obj

+    def set_model_compute_dtype(self, dtype):
+        self.add_object_patch("manual_cast_dtype", dtype)
+        if dtype is not None:
+            self.force_cast_weights = True
+        self.patches_uuid = uuid.uuid4() #TODO: optimize by preventing a full model reload for this
+
+    def add_weight_wrapper(self, name, function):
+        self.weight_wrapper_patches[name] = self.weight_wrapper_patches.get(name, []) + [function]
+        self.patches_uuid = uuid.uuid4()
+
    def get_model_object(self, name: str) -> torch.nn.Module:
        """Retrieves a nested attribute from an object using dot notation considering
        object patches.
@@ -599,6 +634,9 @@ class ModelPatcher:

                lowvram_weight = False

+                weight_key = "{}.weight".format(n)
+                bias_key = "{}.bias".format(n)
+
                if not full_load and hasattr(m, "comfy_cast_weights"):
                    if mem_counter + module_mem >= lowvram_model_memory:
                        lowvram_weight = True
@@ -606,34 +644,46 @@ class ModelPatcher:
                        if hasattr(m, "prev_comfy_cast_weights"): #Already lowvramed
                            continue

-                weight_key = "{}.weight".format(n)
-                bias_key = "{}.bias".format(n)
-
+                cast_weight = self.force_cast_weights
                if lowvram_weight:
+                    if hasattr(m, "comfy_cast_weights"):
+                        m.weight_function = []
+                        m.bias_function = []
+
                    if weight_key in self.patches:
                        if force_patch_weights:
                            self.patch_weight_to_device(weight_key)
                        else:
-                            m.weight_function = LowVramPatch(weight_key, self.patches)
+                            m.weight_function = [LowVramPatch(weight_key, self.patches)]
                            patch_counter += 1
                    if bias_key in self.patches:
                        if force_patch_weights:
                            self.patch_weight_to_device(bias_key)
                        else:
-                            m.bias_function = LowVramPatch(bias_key, self.patches)
+                            m.bias_function = [LowVramPatch(bias_key, self.patches)]
                            patch_counter += 1

-                    m.prev_comfy_cast_weights = m.comfy_cast_weights
-                    m.comfy_cast_weights = True
+                    cast_weight = True
                else:
                    if hasattr(m, "comfy_cast_weights"):
-                        if m.comfy_cast_weights:
-                            wipe_lowvram_weight(m)
+                        wipe_lowvram_weight(m)

                    if full_load or mem_counter + module_mem < lowvram_model_memory:
                        mem_counter += module_mem
                        load_completely.append((module_mem, n, m, params))

+                if cast_weight:
+                    m.prev_comfy_cast_weights = m.comfy_cast_weights
+                    m.comfy_cast_weights = True
+
+                if weight_key in self.weight_wrapper_patches:
+                    m.weight_function.extend(self.weight_wrapper_patches[weight_key])
+
+                if bias_key in self.weight_wrapper_patches:
+                    m.bias_function.extend(self.weight_wrapper_patches[bias_key])
+
+                mem_counter += move_weight_functions(m, device_to)
+
            load_completely.sort(reverse=True)
            for x in load_completely:
                n = x[1]
@@ -695,6 +745,7 @@ class ModelPatcher:
            self.unpatch_hooks()
            if self.model.model_lowvram:
                for m in self.model.modules():
+                    move_weight_functions(m, device_to)
                    wipe_lowvram_weight(m)

                self.model.model_lowvram = False
@@ -761,15 +812,19 @@ class ModelPatcher:
                    weight_key = "{}.weight".format(n)
                    bias_key = "{}.bias".format(n)
                    if move_weight:
+                        cast_weight = self.force_cast_weights
                        m.to(device_to)
+                        module_mem += move_weight_functions(m, device_to)
                        if lowvram_possible:
                            if weight_key in self.patches:
-                                m.weight_function = LowVramPatch(weight_key, self.patches)
+                                m.weight_function.append(LowVramPatch(weight_key, self.patches))
                                patch_counter += 1
                            if bias_key in self.patches:
-                                m.bias_function = LowVramPatch(bias_key, self.patches)
+                                m.bias_function.append(LowVramPatch(bias_key, self.patches))
                                patch_counter += 1
+                            cast_weight = True

+                        if cast_weight:
                            m.prev_comfy_cast_weights = m.comfy_cast_weights
                            m.comfy_cast_weights = True
                        m.comfy_patched_weights = False