Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b07f116dea | ||
|
|
714f728820 | ||
|
|
92d8d15300 | ||
|
|
89253e9fe5 | ||
|
|
3ea3bc8546 | ||
|
|
8e69e2ddfd | ||
|
|
0270a0b41c |
10
README.md
10
README.md
@@ -46,7 +46,7 @@ ComfyUI lets you design and execute advanced stable diffusion pipelines using a
|
|||||||
#### [Manual Install](#manual-install-windows-linux)
|
#### [Manual Install](#manual-install-windows-linux)
|
||||||
Supports all operating systems and GPU types (NVIDIA, AMD, Intel, Apple Silicon, Ascend).
|
Supports all operating systems and GPU types (NVIDIA, AMD, Intel, Apple Silicon, Ascend).
|
||||||
|
|
||||||
## Examples
|
## [Examples](https://comfyanonymous.github.io/ComfyUI_examples/)
|
||||||
See what ComfyUI can do with the [example workflows](https://comfyanonymous.github.io/ComfyUI_examples/).
|
See what ComfyUI can do with the [example workflows](https://comfyanonymous.github.io/ComfyUI_examples/).
|
||||||
|
|
||||||
|
|
||||||
@@ -68,6 +68,7 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
|
|||||||
- [LTX-Video](https://comfyanonymous.github.io/ComfyUI_examples/ltxv/)
|
- [LTX-Video](https://comfyanonymous.github.io/ComfyUI_examples/ltxv/)
|
||||||
- [Hunyuan Video](https://comfyanonymous.github.io/ComfyUI_examples/hunyuan_video/)
|
- [Hunyuan Video](https://comfyanonymous.github.io/ComfyUI_examples/hunyuan_video/)
|
||||||
- [Nvidia Cosmos](https://comfyanonymous.github.io/ComfyUI_examples/cosmos/)
|
- [Nvidia Cosmos](https://comfyanonymous.github.io/ComfyUI_examples/cosmos/)
|
||||||
|
- [Wan 2.1](https://comfyanonymous.github.io/ComfyUI_examples/wan/)
|
||||||
- [Stable Audio](https://comfyanonymous.github.io/ComfyUI_examples/audio/)
|
- [Stable Audio](https://comfyanonymous.github.io/ComfyUI_examples/audio/)
|
||||||
- Asynchronous Queue system
|
- Asynchronous Queue system
|
||||||
- Many optimizations: Only re-executes the parts of the workflow that changes between executions.
|
- Many optimizations: Only re-executes the parts of the workflow that changes between executions.
|
||||||
@@ -260,6 +261,13 @@ For models compatible with Ascend Extension for PyTorch (torch_npu). To get star
|
|||||||
3. Next, install the necessary packages for torch-npu by adhering to the platform-specific instructions on the [Installation](https://ascend.github.io/docs/sources/pytorch/install.html#pytorch) page.
|
3. Next, install the necessary packages for torch-npu by adhering to the platform-specific instructions on the [Installation](https://ascend.github.io/docs/sources/pytorch/install.html#pytorch) page.
|
||||||
4. Finally, adhere to the [ComfyUI manual installation](#manual-install-windows-linux) guide for Linux. Once all components are installed, you can run ComfyUI as described earlier.
|
4. Finally, adhere to the [ComfyUI manual installation](#manual-install-windows-linux) guide for Linux. Once all components are installed, you can run ComfyUI as described earlier.
|
||||||
|
|
||||||
|
#### Cambricon MLUs
|
||||||
|
|
||||||
|
For models compatible with Cambricon Extension for PyTorch (torch_mlu). Here's a step-by-step guide tailored to your platform and installation method:
|
||||||
|
|
||||||
|
1. Install the Cambricon CNToolkit by adhering to the platform-specific instructions on the [Installation](https://www.cambricon.com/docs/sdk_1.15.0/cntoolkit_3.7.2/cntoolkit_install_3.7.2/index.html)
|
||||||
|
2. Next, install the PyTorch(torch_mlu) following the instructions on the [Installation](https://www.cambricon.com/docs/sdk_1.15.0/cambricon_pytorch_1.17.0/user_guide_1.9/index.html)
|
||||||
|
3. Launch ComfyUI by running `python main.py`
|
||||||
|
|
||||||
# Running
|
# Running
|
||||||
|
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ def sinusoidal_embedding_1d(dim, position):
|
|||||||
# preprocess
|
# preprocess
|
||||||
assert dim % 2 == 0
|
assert dim % 2 == 0
|
||||||
half = dim // 2
|
half = dim // 2
|
||||||
position = position.type(torch.float64)
|
position = position.type(torch.float32)
|
||||||
|
|
||||||
# calculation
|
# calculation
|
||||||
sinusoid = torch.outer(
|
sinusoid = torch.outer(
|
||||||
@@ -353,7 +353,7 @@ class WanModel(torch.nn.Module):
|
|||||||
|
|
||||||
# embeddings
|
# embeddings
|
||||||
self.patch_embedding = operations.Conv3d(
|
self.patch_embedding = operations.Conv3d(
|
||||||
in_dim, dim, kernel_size=patch_size, stride=patch_size, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
|
in_dim, dim, kernel_size=patch_size, stride=patch_size, device=operation_settings.get("device"), dtype=torch.float32)
|
||||||
self.text_embedding = nn.Sequential(
|
self.text_embedding = nn.Sequential(
|
||||||
operations.Linear(text_dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")), nn.GELU(approximate='tanh'),
|
operations.Linear(text_dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")), nn.GELU(approximate='tanh'),
|
||||||
operations.Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")))
|
operations.Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")))
|
||||||
@@ -411,7 +411,7 @@ class WanModel(torch.nn.Module):
|
|||||||
List of denoised video tensors with original input shapes [C_out, F, H / 8, W / 8]
|
List of denoised video tensors with original input shapes [C_out, F, H / 8, W / 8]
|
||||||
"""
|
"""
|
||||||
# embeddings
|
# embeddings
|
||||||
x = self.patch_embedding(x)
|
x = self.patch_embedding(x.float()).to(x.dtype)
|
||||||
grid_sizes = x.shape[2:]
|
grid_sizes = x.shape[2:]
|
||||||
x = x.flatten(2).transpose(1, 2)
|
x = x.flatten(2).transpose(1, 2)
|
||||||
|
|
||||||
@@ -421,7 +421,7 @@ class WanModel(torch.nn.Module):
|
|||||||
e0 = self.time_projection(e).unflatten(1, (6, self.dim))
|
e0 = self.time_projection(e).unflatten(1, (6, self.dim))
|
||||||
|
|
||||||
# context
|
# context
|
||||||
context = self.text_embedding(torch.cat([context, context.new_zeros(context.size(0), self.text_len - context.size(1), context.size(2))], dim=1))
|
context = self.text_embedding(context)
|
||||||
|
|
||||||
if clip_fea is not None and self.img_emb is not None:
|
if clip_fea is not None and self.img_emb is not None:
|
||||||
context_clip = self.img_emb(clip_fea) # bs x 257 x dim
|
context_clip = self.img_emb(clip_fea) # bs x 257 x dim
|
||||||
|
|||||||
@@ -95,6 +95,13 @@ try:
|
|||||||
except:
|
except:
|
||||||
npu_available = False
|
npu_available = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
import torch_mlu # noqa: F401
|
||||||
|
_ = torch.mlu.device_count()
|
||||||
|
mlu_available = torch.mlu.is_available()
|
||||||
|
except:
|
||||||
|
mlu_available = False
|
||||||
|
|
||||||
if args.cpu:
|
if args.cpu:
|
||||||
cpu_state = CPUState.CPU
|
cpu_state = CPUState.CPU
|
||||||
|
|
||||||
@@ -112,6 +119,12 @@ def is_ascend_npu():
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def is_mlu():
|
||||||
|
global mlu_available
|
||||||
|
if mlu_available:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
def get_torch_device():
|
def get_torch_device():
|
||||||
global directml_enabled
|
global directml_enabled
|
||||||
global cpu_state
|
global cpu_state
|
||||||
@@ -127,6 +140,8 @@ def get_torch_device():
|
|||||||
return torch.device("xpu", torch.xpu.current_device())
|
return torch.device("xpu", torch.xpu.current_device())
|
||||||
elif is_ascend_npu():
|
elif is_ascend_npu():
|
||||||
return torch.device("npu", torch.npu.current_device())
|
return torch.device("npu", torch.npu.current_device())
|
||||||
|
elif is_mlu():
|
||||||
|
return torch.device("mlu", torch.mlu.current_device())
|
||||||
else:
|
else:
|
||||||
return torch.device(torch.cuda.current_device())
|
return torch.device(torch.cuda.current_device())
|
||||||
|
|
||||||
@@ -153,6 +168,12 @@ def get_total_memory(dev=None, torch_total_too=False):
|
|||||||
_, mem_total_npu = torch.npu.mem_get_info(dev)
|
_, mem_total_npu = torch.npu.mem_get_info(dev)
|
||||||
mem_total_torch = mem_reserved
|
mem_total_torch = mem_reserved
|
||||||
mem_total = mem_total_npu
|
mem_total = mem_total_npu
|
||||||
|
elif is_mlu():
|
||||||
|
stats = torch.mlu.memory_stats(dev)
|
||||||
|
mem_reserved = stats['reserved_bytes.all.current']
|
||||||
|
_, mem_total_mlu = torch.mlu.mem_get_info(dev)
|
||||||
|
mem_total_torch = mem_reserved
|
||||||
|
mem_total = mem_total_mlu
|
||||||
else:
|
else:
|
||||||
stats = torch.cuda.memory_stats(dev)
|
stats = torch.cuda.memory_stats(dev)
|
||||||
mem_reserved = stats['reserved_bytes.all.current']
|
mem_reserved = stats['reserved_bytes.all.current']
|
||||||
@@ -232,7 +253,7 @@ try:
|
|||||||
if torch_version_numeric[0] >= 2:
|
if torch_version_numeric[0] >= 2:
|
||||||
if ENABLE_PYTORCH_ATTENTION == False and args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
|
if ENABLE_PYTORCH_ATTENTION == False and args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
|
||||||
ENABLE_PYTORCH_ATTENTION = True
|
ENABLE_PYTORCH_ATTENTION = True
|
||||||
if is_intel_xpu() or is_ascend_npu():
|
if is_intel_xpu() or is_ascend_npu() or is_mlu():
|
||||||
if args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
|
if args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
|
||||||
ENABLE_PYTORCH_ATTENTION = True
|
ENABLE_PYTORCH_ATTENTION = True
|
||||||
except:
|
except:
|
||||||
@@ -316,6 +337,8 @@ def get_torch_device_name(device):
|
|||||||
return "{} {}".format(device, torch.xpu.get_device_name(device))
|
return "{} {}".format(device, torch.xpu.get_device_name(device))
|
||||||
elif is_ascend_npu():
|
elif is_ascend_npu():
|
||||||
return "{} {}".format(device, torch.npu.get_device_name(device))
|
return "{} {}".format(device, torch.npu.get_device_name(device))
|
||||||
|
elif is_mlu():
|
||||||
|
return "{} {}".format(device, torch.mlu.get_device_name(device))
|
||||||
else:
|
else:
|
||||||
return "CUDA {}: {}".format(device, torch.cuda.get_device_name(device))
|
return "CUDA {}: {}".format(device, torch.cuda.get_device_name(device))
|
||||||
|
|
||||||
@@ -905,6 +928,8 @@ def xformers_enabled():
|
|||||||
return False
|
return False
|
||||||
if is_ascend_npu():
|
if is_ascend_npu():
|
||||||
return False
|
return False
|
||||||
|
if is_mlu():
|
||||||
|
return False
|
||||||
if directml_enabled:
|
if directml_enabled:
|
||||||
return False
|
return False
|
||||||
return XFORMERS_IS_AVAILABLE
|
return XFORMERS_IS_AVAILABLE
|
||||||
@@ -936,6 +961,8 @@ def pytorch_attention_flash_attention():
|
|||||||
return True
|
return True
|
||||||
if is_ascend_npu():
|
if is_ascend_npu():
|
||||||
return True
|
return True
|
||||||
|
if is_mlu():
|
||||||
|
return True
|
||||||
if is_amd():
|
if is_amd():
|
||||||
return True #if you have pytorch attention enabled on AMD it probably supports at least mem efficient attention
|
return True #if you have pytorch attention enabled on AMD it probably supports at least mem efficient attention
|
||||||
return False
|
return False
|
||||||
@@ -984,6 +1011,13 @@ def get_free_memory(dev=None, torch_free_too=False):
|
|||||||
mem_free_npu, _ = torch.npu.mem_get_info(dev)
|
mem_free_npu, _ = torch.npu.mem_get_info(dev)
|
||||||
mem_free_torch = mem_reserved - mem_active
|
mem_free_torch = mem_reserved - mem_active
|
||||||
mem_free_total = mem_free_npu + mem_free_torch
|
mem_free_total = mem_free_npu + mem_free_torch
|
||||||
|
elif is_mlu():
|
||||||
|
stats = torch.mlu.memory_stats(dev)
|
||||||
|
mem_active = stats['active_bytes.all.current']
|
||||||
|
mem_reserved = stats['reserved_bytes.all.current']
|
||||||
|
mem_free_mlu, _ = torch.mlu.mem_get_info(dev)
|
||||||
|
mem_free_torch = mem_reserved - mem_active
|
||||||
|
mem_free_total = mem_free_mlu + mem_free_torch
|
||||||
else:
|
else:
|
||||||
stats = torch.cuda.memory_stats(dev)
|
stats = torch.cuda.memory_stats(dev)
|
||||||
mem_active = stats['active_bytes.all.current']
|
mem_active = stats['active_bytes.all.current']
|
||||||
@@ -1053,6 +1087,9 @@ def should_use_fp16(device=None, model_params=0, prioritize_performance=True, ma
|
|||||||
if is_ascend_npu():
|
if is_ascend_npu():
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
if is_mlu():
|
||||||
|
return True
|
||||||
|
|
||||||
if torch.version.hip:
|
if torch.version.hip:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@@ -1121,6 +1158,11 @@ def should_use_bf16(device=None, model_params=0, prioritize_performance=True, ma
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
props = torch.cuda.get_device_properties(device)
|
props = torch.cuda.get_device_properties(device)
|
||||||
|
|
||||||
|
if is_mlu():
|
||||||
|
if props.major > 3:
|
||||||
|
return True
|
||||||
|
|
||||||
if props.major >= 8:
|
if props.major >= 8:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ class UMT5XXlModel(sd1_clip.SDClipModel):
|
|||||||
class UMT5XXlTokenizer(sd1_clip.SDTokenizer):
|
class UMT5XXlTokenizer(sd1_clip.SDTokenizer):
|
||||||
def __init__(self, embedding_directory=None, tokenizer_data={}):
|
def __init__(self, embedding_directory=None, tokenizer_data={}):
|
||||||
tokenizer = tokenizer_data.get("spiece_model", None)
|
tokenizer = tokenizer_data.get("spiece_model", None)
|
||||||
super().__init__(tokenizer, pad_with_end=False, embedding_size=4096, embedding_key='umt5xxl', tokenizer_class=SPieceTokenizer, has_start_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, pad_token=0)
|
super().__init__(tokenizer, pad_with_end=False, embedding_size=4096, embedding_key='umt5xxl', tokenizer_class=SPieceTokenizer, has_start_token=False, pad_to_max_length=False, max_length=99999999, min_length=512, pad_token=0)
|
||||||
|
|
||||||
def state_dict(self):
|
def state_dict(self):
|
||||||
return {"spiece_model": self.tokenizer.serialize_model()}
|
return {"spiece_model": self.tokenizer.serialize_model()}
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
# This file is automatically generated by the build process when version is
|
# This file is automatically generated by the build process when version is
|
||||||
# updated in pyproject.toml.
|
# updated in pyproject.toml.
|
||||||
__version__ = "0.3.16"
|
__version__ = "0.3.18"
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "ComfyUI"
|
name = "ComfyUI"
|
||||||
version = "0.3.16"
|
version = "0.3.18"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
license = { file = "LICENSE" }
|
license = { file = "LICENSE" }
|
||||||
requires-python = ">=3.9"
|
requires-python = ">=3.9"
|
||||||
|
|||||||
Reference in New Issue
Block a user