Some fixes to generalize CUDA specific functionality to Intel or other GPUs.

This commit is contained in:
Simon Lui
2023-09-02 18:22:10 -07:00
parent 62efc78a4b
commit 4a0c4ce4ef
3 changed files with 38 additions and 26 deletions

View File

@@ -15,6 +15,7 @@ import torch.nn as nn
import numpy as np
from einops import repeat
from comfy import model_management
from comfy.ldm.util import instantiate_from_config
import comfy.ops
@@ -139,13 +140,22 @@ class CheckpointFunction(torch.autograd.Function):
@staticmethod
def backward(ctx, *output_grads):
ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors]
with torch.enable_grad(), \
torch.cuda.amp.autocast(**ctx.gpu_autocast_kwargs):
# Fixes a bug where the first op in run_function modifies the
# Tensor storage in place, which is not allowed for detach()'d
# Tensors.
shallow_copies = [x.view_as(x) for x in ctx.input_tensors]
output_tensors = ctx.run_function(*shallow_copies)
if model_management.is_nvidia():
with torch.enable_grad(), \
torch.cuda.amp.autocast(**ctx.gpu_autocast_kwargs):
# Fixes a bug where the first op in run_function modifies the
# Tensor storage in place, which is not allowed for detach()'d
# Tensors.
shallow_copies = [x.view_as(x) for x in ctx.input_tensors]
output_tensors = ctx.run_function(*shallow_copies)
elif model_management.is_intel_xpu():
with torch.enable_grad(), \
torch.xpu.amp.autocast(**ctx.gpu_autocast_kwargs):
# Fixes a bug where the first op in run_function modifies the
# Tensor storage in place, which is not allowed for detach()'d
# Tensors.
shallow_copies = [x.view_as(x) for x in ctx.input_tensors]
output_tensors = ctx.run_function(*shallow_copies)
input_grads = torch.autograd.grad(
output_tensors,
ctx.input_tensors + ctx.input_params,