Some fixes to generalize CUDA specific functionality to Intel or other GPUs.

2023-09-02 18:22:10 -07:00
parent 62efc78a4b
commit 4a0c4ce4ef
3 changed files with 38 additions and 26 deletions
--- a/comfy/ldm/modules/diffusionmodules/util.py
+++ b/comfy/ldm/modules/diffusionmodules/util.py
@@ -15,6 +15,7 @@ import torch.nn as nn
 import numpy as np
 from einops import repeat

+from comfy import model_management
 from comfy.ldm.util import instantiate_from_config
 import comfy.ops

@@ -139,13 +140,22 @@ class CheckpointFunction(torch.autograd.Function):
    @staticmethod
    def backward(ctx, *output_grads):
        ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors]
-        with torch.enable_grad(), \
-                torch.cuda.amp.autocast(**ctx.gpu_autocast_kwargs):
-            # Fixes a bug where the first op in run_function modifies the
-            # Tensor storage in place, which is not allowed for detach()'d
-            # Tensors.
-            shallow_copies = [x.view_as(x) for x in ctx.input_tensors]
-            output_tensors = ctx.run_function(*shallow_copies)
+        if model_management.is_nvidia():
+            with torch.enable_grad(), \
+                    torch.cuda.amp.autocast(**ctx.gpu_autocast_kwargs):
+                # Fixes a bug where the first op in run_function modifies the
+                # Tensor storage in place, which is not allowed for detach()'d
+                # Tensors.
+                shallow_copies = [x.view_as(x) for x in ctx.input_tensors]
+                output_tensors = ctx.run_function(*shallow_copies)
+        elif model_management.is_intel_xpu():
+            with torch.enable_grad(), \
+                    torch.xpu.amp.autocast(**ctx.gpu_autocast_kwargs):
+                # Fixes a bug where the first op in run_function modifies the
+                # Tensor storage in place, which is not allowed for detach()'d
+                # Tensors.
+                shallow_copies = [x.view_as(x) for x in ctx.input_tensors]
+                output_tensors = ctx.run_function(*shallow_copies)
        input_grads = torch.autograd.grad(
            output_tensors,
            ctx.input_tensors + ctx.input_params,