Some fixes to the lowvram system.

Remove prints.
Remove print.
2024-11-22 16:40:04 -05:00 · 2024-11-22 10:51:31 -05:00 · 2024-11-22 10:49:15 -05:00 · 2024-11-22 09:24:20 -05:00 · 2024-11-22 08:46:39 -05:00 · 2024-11-22 02:10:09 -05:00
121 changed files with 78110 additions and 63930 deletions
--- a/.github/workflows/stable-release.yml
+++ b/.github/workflows/stable-release.yml
@@ -17,12 +17,12 @@ on:
        description: 'Python minor version'
        required: true
        type: string
-        default: "11"
+        default: "12"
      python_patch:
        description: 'Python patch version'
        required: true
        type: string
-        default: "9"
+        default: "7"


 jobs:
--- a/.github/workflows/windows_release_dependencies.yml
+++ b/.github/workflows/windows_release_dependencies.yml
@@ -12,7 +12,7 @@ on:
        description: 'extra dependencies'
        required: false
        type: string
-        default: "\"numpy<2\""
+        default: ""
      cu:
        description: 'cuda version'
        required: true
@@ -23,13 +23,13 @@ on:
        description: 'python minor version'
        required: true
        type: string
-        default: "11"
+        default: "12"

      python_patch:
        description: 'python patch version'
        required: true
        type: string
-        default: "9"
+        default: "7"
 #  push:
 #    branches:
 #      - master
--- a/.github/workflows/windows_release_package.yml
+++ b/.github/workflows/windows_release_package.yml
@@ -13,13 +13,13 @@ on:
        description: 'python minor version'
        required: true
        type: string
-        default: "11"
+        default: "12"

      python_patch:
        description: 'python patch version'
        required: true
        type: string
-        default: "9"
+        default: "7"
 #  push:
 #    branches:
 #      - master
--- a/README.md
+++ b/README.md
@@ -28,7 +28,7 @@
 [github-downloads-latest-shield]: https://img.shields.io/github/downloads/comfyanonymous/ComfyUI/latest/total?style=flat&label=downloads%40latest
 [github-downloads-link]: https://github.com/comfyanonymous/ComfyUI/releases

-![ComfyUI Screenshot](comfyui_screenshot.png)
+![ComfyUI Screenshot](https://github.com/user-attachments/assets/7ccaf2c1-9b72-41ae-9a89-5688c94b7abe)
 </div>

 This ui will let you design and execute advanced stable diffusion pipelines using a graph/nodes/flowchart based interface. For some workflow examples and see what ComfyUI can do you can check out:
@@ -39,7 +39,9 @@ This ui will let you design and execute advanced stable diffusion pipelines usin
 ## Features
 - Nodes/graph/flowchart interface to experiment and create complex Stable Diffusion workflows without needing to code anything.
 - Fully supports SD1.x, SD2.x, [SDXL](https://comfyanonymous.github.io/ComfyUI_examples/sdxl/), [Stable Video Diffusion](https://comfyanonymous.github.io/ComfyUI_examples/video/), [Stable Cascade](https://comfyanonymous.github.io/ComfyUI_examples/stable_cascade/), [SD3](https://comfyanonymous.github.io/ComfyUI_examples/sd3/) and [Stable Audio](https://comfyanonymous.github.io/ComfyUI_examples/audio/)
+- [LTX-Video](https://comfyanonymous.github.io/ComfyUI_examples/ltxv/)
 - [Flux](https://comfyanonymous.github.io/ComfyUI_examples/flux/)
+- [Mochi](https://comfyanonymous.github.io/ComfyUI_examples/mochi/)
 - Asynchronous Queue system
 - Many optimizations: Only re-executes the parts of the workflow that changes between executions.
 - Smart memory management: can automatically run models on GPUs with as low as 1GB vram.
@@ -127,6 +129,8 @@ To run it on services like paperspace, kaggle or colab you can use my [Jupyter N

 ## Manual Install (Windows, Linux)

+Note that some dependencies do not yet support python 3.13 so using 3.12 is recommended.
+
 Git clone this repo.

 Put your SD checkpoints (the huge ckpt/safetensors files) in: models/checkpoints
@@ -137,7 +141,7 @@ Put your VAE in: models/vae
 ### AMD GPUs (Linux only)
 AMD users can install rocm and pytorch with pip if you don't have it already installed, this is the command to install the stable version:

-```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.1```
+```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.2```

 This is the command to install the nightly with ROCm 6.2 which might have some performance improvements:

--- a/api_server/routes/internal/internal_routes.py
+++ b/api_server/routes/internal/internal_routes.py
@@ -2,6 +2,7 @@ from aiohttp import web
 from typing import Optional
 from folder_paths import models_dir, user_directory, output_directory, folder_names_and_paths
 from api_server.services.file_service import FileService
+from api_server.services.terminal_service import TerminalService
 import app.logger

 class InternalRoutes:
@@ -11,7 +12,8 @@ class InternalRoutes:
    Check README.md for more information.
    
    '''
-    def __init__(self):
+
+    def __init__(self, prompt_server):
        self.routes: web.RouteTableDef = web.RouteTableDef()
        self._app: Optional[web.Application] = None
        self.file_service = FileService({
@@ -19,6 +21,8 @@ class InternalRoutes:
            "user": user_directory,
            "output": output_directory
        })
+        self.prompt_server = prompt_server
+        self.terminal_service = TerminalService(prompt_server)

    def setup_routes(self):
        @self.routes.get('/files')
@@ -34,7 +38,28 @@ class InternalRoutes:

        @self.routes.get('/logs')
        async def get_logs(request):
-            return web.json_response(app.logger.get_logs())
+            return web.json_response("".join([(l["t"] + " - " + l["m"]) for l in app.logger.get_logs()]))
+
+        @self.routes.get('/logs/raw')
+        async def get_logs(request):
+            self.terminal_service.update_size()
+            return web.json_response({
+                "entries": list(app.logger.get_logs()),
+                "size": {"cols": self.terminal_service.cols, "rows": self.terminal_service.rows}
+            })
+
+        @self.routes.patch('/logs/subscribe')
+        async def subscribe_logs(request):
+            json_data = await request.json()
+            client_id = json_data["clientId"]
+            enabled = json_data["enabled"]
+            if enabled:
+                self.terminal_service.subscribe(client_id)
+            else:
+                self.terminal_service.unsubscribe(client_id)
+
+            return web.Response(status=200)
+

        @self.routes.get('/folder_paths')
        async def get_folder_paths(request):
--- a/api_server/services/terminal_service.py
+++ b/api_server/services/terminal_service.py
@@ -0,0 +1,60 @@
+from app.logger import on_flush
+import os
+import shutil
+
+
+class TerminalService:
+    def __init__(self, server):
+        self.server = server
+        self.cols = None
+        self.rows = None
+        self.subscriptions = set()
+        on_flush(self.send_messages)
+
+    def get_terminal_size(self):
+        try:
+            size = os.get_terminal_size()
+            return (size.columns, size.lines)
+        except OSError:
+            try:
+                size = shutil.get_terminal_size()
+                return (size.columns, size.lines)
+            except OSError:
+                return (80, 24)  # fallback to 80x24
+
+    def update_size(self):
+        columns, lines = self.get_terminal_size()
+        changed = False
+        
+        if columns != self.cols:
+            self.cols = columns
+            changed = True 
+
+        if lines != self.rows:
+            self.rows = lines
+            changed = True
+
+        if changed:
+            return {"cols": self.cols, "rows": self.rows}
+
+        return None
+
+    def subscribe(self, client_id):
+        self.subscriptions.add(client_id)
+
+    def unsubscribe(self, client_id):
+        self.subscriptions.discard(client_id)
+
+    def send_messages(self, entries):
+        if not len(entries) or not len(self.subscriptions):
+            return
+        
+        new_size = self.update_size()
+        
+        for client_id in self.subscriptions.copy(): # prevent: Set changed size during iteration
+            if client_id not in self.server.sockets:
+                # Automatically unsub if the socket has disconnected
+                self.unsubscribe(client_id)
+                continue
+
+            self.server.send_sync("logs", {"entries": entries, "size": new_size}, client_id)
--- a/app/frontend_management.py
+++ b/app/frontend_management.py
@@ -151,6 +151,15 @@ class FrontendManager:
            return cls.DEFAULT_FRONTEND_PATH

        repo_owner, repo_name, version = cls.parse_version_string(version_string)
+
+        if version.startswith("v"):
+            expected_path = str(Path(cls.CUSTOM_FRONTENDS_ROOT) / f"{repo_owner}_{repo_name}" / version.lstrip("v"))
+            if os.path.exists(expected_path):
+                logging.info(f"Using existing copy of specific frontend version tag: {repo_owner}/{repo_name}@{version}")
+                return expected_path
+
+        logging.info(f"Initializing frontend: {repo_owner}/{repo_name}@{version}, requesting version details from GitHub...")
+
        provider = provider or FrontEndProvider(repo_owner, repo_name)
        release = provider.get_release(version)

--- a/app/logger.py
+++ b/app/logger.py
@@ -1,20 +1,69 @@
-import logging
-from logging.handlers import MemoryHandler
 from collections import deque
+from datetime import datetime
+import io
+import logging
+import sys
+import threading

 logs = None
-formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+stdout_interceptor = None
+stderr_interceptor = None
+
+
+class LogInterceptor(io.TextIOWrapper):
+    def __init__(self, stream,  *args, **kwargs):
+        buffer = stream.buffer
+        encoding = stream.encoding
+        super().__init__(buffer, *args, **kwargs, encoding=encoding, line_buffering=stream.line_buffering)
+        self._lock = threading.Lock()
+        self._flush_callbacks = []
+        self._logs_since_flush = []
+
+    def write(self, data):
+        entry = {"t": datetime.now().isoformat(), "m": data}
+        with self._lock:
+            self._logs_since_flush.append(entry)
+
+            # Simple handling for cr to overwrite the last output if it isnt a full line
+            # else logs just get full of progress messages
+            if isinstance(data, str) and data.startswith("\r") and not logs[-1]["m"].endswith("\n"):
+                logs.pop()
+            logs.append(entry)
+        super().write(data)
+
+    def flush(self):
+        super().flush()
+        for cb in self._flush_callbacks:
+            cb(self._logs_since_flush)
+            self._logs_since_flush = []
+
+    def on_flush(self, callback):
+        self._flush_callbacks.append(callback)


 def get_logs():
-    return "\n".join([formatter.format(x) for x in logs])
+    return logs


+def on_flush(callback):
+    if stdout_interceptor is not None:
+        stdout_interceptor.on_flush(callback)
+    if stderr_interceptor is not None:
+        stderr_interceptor.on_flush(callback)
+
 def setup_logger(log_level: str = 'INFO', capacity: int = 300):
    global logs
    if logs:
        return

+    # Override output streams and log to buffer
+    logs = deque(maxlen=capacity)
+
+    global stdout_interceptor
+    global stderr_interceptor
+    stdout_interceptor = sys.stdout = LogInterceptor(sys.stdout)
+    stderr_interceptor = sys.stderr = LogInterceptor(sys.stderr)
+
    # Setup default global logger
    logger = logging.getLogger()
    logger.setLevel(log_level)
@@ -22,10 +71,3 @@ def setup_logger(log_level: str = 'INFO', capacity: int = 300):
    stream_handler = logging.StreamHandler()
    stream_handler.setFormatter(logging.Formatter("%(message)s"))
    logger.addHandler(stream_handler)
-
-    # Create a memory handler with a deque as its buffer
-    logs = deque(maxlen=capacity)
-    memory_handler = MemoryHandler(capacity, flushLevel=logging.INFO)
-    memory_handler.buffer = logs
-    memory_handler.setFormatter(formatter)
-    logger.addHandler(memory_handler)
--- a/app/user_manager.py
+++ b/app/user_manager.py
@@ -1,18 +1,35 @@
+from __future__ import annotations
 import json
 import os
 import re
 import uuid
 import glob
 import shutil
+import logging
 from aiohttp import web
 from urllib import parse
 from comfy.cli_args import args
 import folder_paths
 from .app_settings import AppSettings
+from typing import TypedDict

 default_user = "default"


+class FileInfo(TypedDict):
+    path: str
+    size: int
+    modified: int
+
+
+def get_file_info(path: str, relative_to: str) -> FileInfo:
+    return {
+        "path": os.path.relpath(path, relative_to).replace(os.sep, '/'),
+        "size": os.path.getsize(path),
+        "modified": os.path.getmtime(path)
+    }
+
+
 class UserManager():
    def __init__(self):
        user_directory = folder_paths.get_user_directory()
@@ -154,6 +171,7 @@ class UserManager():

            recurse = request.rel_url.query.get('recurse', '').lower() == "true"
            full_info = request.rel_url.query.get('full_info', '').lower() == "true"
+            split_path = request.rel_url.query.get('split', '').lower() == "true"

            # Use different patterns based on whether we're recursing or not
            if recurse:
@@ -161,26 +179,21 @@ class UserManager():
            else:
                pattern = os.path.join(glob.escape(path), '*')

-            results = glob.glob(pattern, recursive=recurse)
+            def process_full_path(full_path: str) -> FileInfo | str | list[str]:
+                if full_info:
+                    return get_file_info(full_path, path)

-            if full_info:
-                results = [
-                    {
-                        'path': os.path.relpath(x, path).replace(os.sep, '/'),
-                        'size': os.path.getsize(x),
-                        'modified': os.path.getmtime(x)
-                    } for x in results if os.path.isfile(x)
-                ]
-            else:
-                results = [
-                    os.path.relpath(x, path).replace(os.sep, '/')
-                    for x in results
-                    if os.path.isfile(x)
-                ]
+                rel_path = os.path.relpath(full_path, path).replace(os.sep, '/')
+                if split_path:
+                    return [rel_path] + rel_path.split('/')

-            split_path = request.rel_url.query.get('split', '').lower() == "true"
-            if split_path and not full_info:
-                results = [[x] + x.split('/') for x in results]
+                return rel_path
+
+            results = [
+                process_full_path(full_path)
+                for full_path in glob.glob(pattern, recursive=recurse)
+                if os.path.isfile(full_path)
+            ]

            return web.json_response(results)

@@ -208,20 +221,51 @@ class UserManager():

        @routes.post("/userdata/{file}")
        async def post_userdata(request):
+            """
+            Upload or update a user data file.
+
+            This endpoint handles file uploads to a user's data directory, with options for
+            controlling overwrite behavior and response format.
+
+            Query Parameters:
+            - overwrite (optional): If "false", prevents overwriting existing files. Defaults to "true".
+            - full_info (optional): If "true", returns detailed file information (path, size, modified time).
+                                  If "false", returns only the relative file path.
+
+            Path Parameters:
+            - file: The target file path (URL encoded if necessary).
+
+            Returns:
+            - 400: If 'file' parameter is missing.
+            - 403: If the requested path is not allowed.
+            - 409: If overwrite=false and the file already exists.
+            - 200: JSON response with either:
+                  - Full file information (if full_info=true)
+                  - Relative file path (if full_info=false)
+
+            The request body should contain the raw file content to be written.
+            """
            path = get_user_data_path(request)
            if not isinstance(path, str):
                return path

-            overwrite = request.query["overwrite"] != "false"
+            overwrite = request.query.get("overwrite", 'true') != "false"
+            full_info = request.query.get('full_info', 'false').lower() == "true"
+
            if not overwrite and os.path.exists(path):
-                return web.Response(status=409)
+                return web.Response(status=409, text="File already exists")

            body = await request.read()

            with open(path, "wb") as f:
                f.write(body)

-            resp = os.path.relpath(path, self.get_request_user_filepath(request, None))
+            user_path = self.get_request_user_filepath(request, None)
+            if full_info:
+                resp = get_file_info(path, user_path)
+            else:
+                resp = os.path.relpath(path, user_path)
+
            return web.json_response(resp)

        @routes.delete("/userdata/{file}")
@@ -236,6 +280,30 @@ class UserManager():

        @routes.post("/userdata/{file}/move/{dest}")
        async def move_userdata(request):
+            """
+            Move or rename a user data file.
+
+            This endpoint handles moving or renaming files within a user's data directory, with options for
+            controlling overwrite behavior and response format.
+
+            Path Parameters:
+            - file: The source file path (URL encoded if necessary)
+            - dest: The destination file path (URL encoded if necessary)
+
+            Query Parameters:
+            - overwrite (optional): If "false", prevents overwriting existing files. Defaults to "true".
+            - full_info (optional): If "true", returns detailed file information (path, size, modified time).
+                                  If "false", returns only the relative file path.
+
+            Returns:
+            - 400: If either 'file' or 'dest' parameter is missing
+            - 403: If either requested path is not allowed
+            - 404: If the source file does not exist
+            - 409: If overwrite=false and the destination file already exists
+            - 200: JSON response with either:
+                  - Full file information (if full_info=true)
+                  - Relative file path (if full_info=false)
+            """
            source = get_user_data_path(request, check_exists=True)
            if not isinstance(source, str):
                return source
@@ -244,12 +312,19 @@ class UserManager():
            if not isinstance(source, str):
                return dest

-            overwrite = request.query["overwrite"] != "false"
-            if not overwrite and os.path.exists(dest):
-                return web.Response(status=409)
+            overwrite = request.query.get("overwrite", 'true') != "false"
+            full_info = request.query.get('full_info', 'false').lower() == "true"

-            print(f"moving '{source}' -> '{dest}'")
+            if not overwrite and os.path.exists(dest):
+                return web.Response(status=409, text="File already exists")
+
+            logging.info(f"moving '{source}' -> '{dest}'")
            shutil.move(source, dest)

-            resp = os.path.relpath(dest, self.get_request_user_filepath(request, None))
+            user_path = self.get_request_user_filepath(request, None)
+            if full_info:
+                resp = get_file_info(dest, user_path)
+            else:
+                resp = os.path.relpath(dest, user_path)
+
            return web.json_response(resp)
--- a/comfy/clip_model.py
+++ b/comfy/clip_model.py
@@ -23,6 +23,7 @@ class CLIPAttention(torch.nn.Module):

 ACTIVATIONS = {"quick_gelu": lambda a: a * torch.sigmoid(1.702 * a),
               "gelu": torch.nn.functional.gelu,
+               "gelu_pytorch_tanh": lambda a: torch.nn.functional.gelu(a, approximate="tanh"),
 }

 class CLIPMLP(torch.nn.Module):
@@ -139,27 +140,35 @@ class CLIPTextModel(torch.nn.Module):


 class CLIPVisionEmbeddings(torch.nn.Module):
-    def __init__(self, embed_dim, num_channels=3, patch_size=14, image_size=224, dtype=None, device=None, operations=None):
+    def __init__(self, embed_dim, num_channels=3, patch_size=14, image_size=224, model_type="", dtype=None, device=None, operations=None):
        super().__init__()
-        self.class_embedding = torch.nn.Parameter(torch.empty(embed_dim, dtype=dtype, device=device))
+
+        num_patches = (image_size // patch_size) ** 2
+        if model_type == "siglip_vision_model":
+            self.class_embedding = None
+            patch_bias = True
+        else:
+            num_patches = num_patches + 1
+            self.class_embedding = torch.nn.Parameter(torch.empty(embed_dim, dtype=dtype, device=device))
+            patch_bias = False

        self.patch_embedding = operations.Conv2d(
            in_channels=num_channels,
            out_channels=embed_dim,
            kernel_size=patch_size,
            stride=patch_size,
-            bias=False,
+            bias=patch_bias,
            dtype=dtype,
            device=device
        )

-        num_patches = (image_size // patch_size) ** 2
-        num_positions = num_patches + 1
-        self.position_embedding = operations.Embedding(num_positions, embed_dim, dtype=dtype, device=device)
+        self.position_embedding = operations.Embedding(num_patches, embed_dim, dtype=dtype, device=device)

    def forward(self, pixel_values):
        embeds = self.patch_embedding(pixel_values).flatten(2).transpose(1, 2)
-        return torch.cat([comfy.ops.cast_to_input(self.class_embedding, embeds).expand(pixel_values.shape[0], 1, -1), embeds], dim=1) + comfy.ops.cast_to_input(self.position_embedding.weight, embeds)
+        if self.class_embedding is not None:
+            embeds = torch.cat([comfy.ops.cast_to_input(self.class_embedding, embeds).expand(pixel_values.shape[0], 1, -1), embeds], dim=1)
+        return embeds + comfy.ops.cast_to_input(self.position_embedding.weight, embeds)


 class CLIPVision(torch.nn.Module):
@@ -170,9 +179,15 @@ class CLIPVision(torch.nn.Module):
        heads = config_dict["num_attention_heads"]
        intermediate_size = config_dict["intermediate_size"]
        intermediate_activation = config_dict["hidden_act"]
+        model_type = config_dict["model_type"]

-        self.embeddings = CLIPVisionEmbeddings(embed_dim, config_dict["num_channels"], config_dict["patch_size"], config_dict["image_size"], dtype=dtype, device=device, operations=operations)
-        self.pre_layrnorm = operations.LayerNorm(embed_dim)
+        self.embeddings = CLIPVisionEmbeddings(embed_dim, config_dict["num_channels"], config_dict["patch_size"], config_dict["image_size"], model_type=model_type, dtype=dtype, device=device, operations=operations)
+        if model_type == "siglip_vision_model":
+            self.pre_layrnorm = lambda a: a
+            self.output_layernorm = True
+        else:
+            self.pre_layrnorm = operations.LayerNorm(embed_dim)
+            self.output_layernorm = False
        self.encoder = CLIPEncoder(num_layers, embed_dim, heads, intermediate_size, intermediate_activation, dtype, device, operations)
        self.post_layernorm = operations.LayerNorm(embed_dim)

@@ -181,14 +196,21 @@ class CLIPVision(torch.nn.Module):
        x = self.pre_layrnorm(x)
        #TODO: attention_mask?
        x, i = self.encoder(x, mask=None, intermediate_output=intermediate_output)
-        pooled_output = self.post_layernorm(x[:, 0, :])
+        if self.output_layernorm:
+            x = self.post_layernorm(x)
+            pooled_output = x
+        else:
+            pooled_output = self.post_layernorm(x[:, 0, :])
        return x, i, pooled_output

 class CLIPVisionModelProjection(torch.nn.Module):
    def __init__(self, config_dict, dtype, device, operations):
        super().__init__()
        self.vision_model = CLIPVision(config_dict, dtype, device, operations)
-        self.visual_projection = operations.Linear(config_dict["hidden_size"], config_dict["projection_dim"], bias=False)
+        if "projection_dim" in config_dict:
+            self.visual_projection = operations.Linear(config_dict["hidden_size"], config_dict["projection_dim"], bias=False)
+        else:
+            self.visual_projection = lambda a: a

    def forward(self, *args, **kwargs):
        x = self.vision_model(*args, **kwargs)
--- a/comfy/clip_vision.py
+++ b/comfy/clip_vision.py
@@ -16,9 +16,9 @@ class Output:
    def __setitem__(self, key, item):
        setattr(self, key, item)

-def clip_preprocess(image, size=224):
-    mean = torch.tensor([ 0.48145466,0.4578275,0.40821073], device=image.device, dtype=image.dtype)
-    std = torch.tensor([0.26862954,0.26130258,0.27577711], device=image.device, dtype=image.dtype)
+def clip_preprocess(image, size=224, mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]):
+    mean = torch.tensor(mean, device=image.device, dtype=image.dtype)
+    std = torch.tensor(std, device=image.device, dtype=image.dtype)
    image = image.movedim(-1, 1)
    if not (image.shape[2] == size and image.shape[3] == size):
        scale = (size / min(image.shape[2], image.shape[3]))
@@ -35,6 +35,8 @@ class ClipVisionModel():
            config = json.load(f)

        self.image_size = config.get("image_size", 224)
+        self.image_mean = config.get("image_mean", [0.48145466, 0.4578275, 0.40821073])
+        self.image_std = config.get("image_std", [0.26862954, 0.26130258, 0.27577711])
        self.load_device = comfy.model_management.text_encoder_device()
        offload_device = comfy.model_management.text_encoder_offload_device()
        self.dtype = comfy.model_management.text_encoder_dtype(self.load_device)
@@ -51,7 +53,7 @@ class ClipVisionModel():

    def encode_image(self, image):
        comfy.model_management.load_model_gpu(self.patcher)
-        pixel_values = clip_preprocess(image.to(self.load_device), size=self.image_size).float()
+        pixel_values = clip_preprocess(image.to(self.load_device), size=self.image_size, mean=self.image_mean, std=self.image_std).float()
        out = self.model(pixel_values=pixel_values, intermediate_output=-2)

        outputs = Output()
@@ -94,7 +96,9 @@ def load_clipvision_from_sd(sd, prefix="", convert_keys=False):
    elif "vision_model.encoder.layers.30.layer_norm1.weight" in sd:
        json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_h.json")
    elif "vision_model.encoder.layers.22.layer_norm1.weight" in sd:
-        if sd["vision_model.embeddings.position_embedding.weight"].shape[0] == 577:
+        if sd["vision_model.encoder.layers.0.layer_norm1.weight"].shape[0] == 1152:
+            json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_siglip_384.json")
+        elif sd["vision_model.embeddings.position_embedding.weight"].shape[0] == 577:
            json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl_336.json")
        else:
            json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl.json")
--- a/comfy/clip_vision_siglip_384.json
+++ b/comfy/clip_vision_siglip_384.json
@@ -0,0 +1,13 @@
+{
+  "num_channels": 3,
+  "hidden_act": "gelu_pytorch_tanh",
+  "hidden_size": 1152,
+  "image_size": 384,
+  "intermediate_size": 4304,
+  "model_type": "siglip_vision_model",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 27,
+  "patch_size": 14,
+  "image_mean": [0.5, 0.5, 0.5],
+  "image_std": [0.5, 0.5, 0.5]
+}
--- a/comfy/controlnet.py
+++ b/comfy/controlnet.py
@@ -60,7 +60,7 @@ class StrengthType(Enum):
    LINEAR_UP = 2

 class ControlBase:
-    def __init__(self, device=None):
+    def __init__(self):
        self.cond_hint_original = None
        self.cond_hint = None
        self.strength = 1.0
@@ -72,10 +72,6 @@ class ControlBase:
        self.compression_ratio = 8
        self.upscale_algorithm = 'nearest-exact'
        self.extra_args = {}
-
-        if device is None:
-            device = comfy.model_management.get_torch_device()
-        self.device = device
        self.previous_controlnet = None
        self.extra_conds = []
        self.strength_type = StrengthType.CONSTANT
@@ -185,8 +181,8 @@ class ControlBase:


 class ControlNet(ControlBase):
-    def __init__(self, control_model=None, global_average_pooling=False, compression_ratio=8, latent_format=None, device=None, load_device=None, manual_cast_dtype=None, extra_conds=["y"], strength_type=StrengthType.CONSTANT, concat_mask=False):
-        super().__init__(device)
+    def __init__(self, control_model=None, global_average_pooling=False, compression_ratio=8, latent_format=None, load_device=None, manual_cast_dtype=None, extra_conds=["y"], strength_type=StrengthType.CONSTANT, concat_mask=False):
+        super().__init__()
        self.control_model = control_model
        self.load_device = load_device
        if control_model is not None:
@@ -237,11 +233,12 @@ class ControlNet(ControlBase):
            if len(self.extra_concat_orig) > 0:
                to_concat = []
                for c in self.extra_concat_orig:
+                    c = c.to(self.cond_hint.device)
                    c = comfy.utils.common_upscale(c, self.cond_hint.shape[3], self.cond_hint.shape[2], self.upscale_algorithm, "center")
                    to_concat.append(comfy.utils.repeat_to_batch_size(c, self.cond_hint.shape[0]))
                self.cond_hint = torch.cat([self.cond_hint] + to_concat, dim=1)

-            self.cond_hint = self.cond_hint.to(device=self.device, dtype=dtype)
+            self.cond_hint = self.cond_hint.to(device=x_noisy.device, dtype=dtype)
        if x_noisy.shape[0] != self.cond_hint.shape[0]:
            self.cond_hint = broadcast_image_to(self.cond_hint, x_noisy.shape[0], batched_number)

@@ -340,8 +337,8 @@ class ControlLoraOps:


 class ControlLora(ControlNet):
-    def __init__(self, control_weights, global_average_pooling=False, device=None, model_options={}): #TODO? model_options
-        ControlBase.__init__(self, device)
+    def __init__(self, control_weights, global_average_pooling=False, model_options={}): #TODO? model_options
+        ControlBase.__init__(self)
        self.control_weights = control_weights
        self.global_average_pooling = global_average_pooling
        self.extra_conds += ["y"]
@@ -661,12 +658,15 @@ def load_controlnet(ckpt_path, model=None, model_options={}):

 class T2IAdapter(ControlBase):
    def __init__(self, t2i_model, channels_in, compression_ratio, upscale_algorithm, device=None):
-        super().__init__(device)
+        super().__init__()
        self.t2i_model = t2i_model
        self.channels_in = channels_in
        self.control_input = None
        self.compression_ratio = compression_ratio
        self.upscale_algorithm = upscale_algorithm
+        if device is None:
+            device = comfy.model_management.get_torch_device()
+        self.device = device

    def scale_image_to(self, width, height):
        unshuffle_amount = self.t2i_model.unshuffle_amount
--- a/comfy/extra_samplers/uni_pc.py
+++ b/comfy/extra_samplers/uni_pc.py
@@ -16,7 +16,7 @@ class NoiseScheduleVP:
            continuous_beta_0=0.1,
            continuous_beta_1=20.,
        ):
-        """Create a wrapper class for the forward SDE (VP type).
+        r"""Create a wrapper class for the forward SDE (VP type).

        ***
        Update: We support discrete-time diffusion models by implementing a picewise linear interpolation for log_alpha_t.
--- a/comfy/float.py
+++ b/comfy/float.py
@@ -41,6 +41,8 @@ def manual_stochastic_round_to_float8(x, dtype, generator=None):
        (2.0 ** (-EXPONENT_BIAS + 1)) * abs_x
    )

+    inf = torch.finfo(dtype)
+    torch.clamp(sign, min=inf.min, max=inf.max, out=sign)
    return sign


--- a/comfy/k_diffusion/sampling.py
+++ b/comfy/k_diffusion/sampling.py
@@ -164,6 +164,8 @@ def sample_euler(model, x, sigmas, extra_args=None, callback=None, disable=None,

@torch.no_grad()
 def sample_euler_ancestral(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
+    if isinstance(model.inner_model.inner_model.model_sampling, comfy.model_sampling.CONST):
+        return sample_euler_ancestral_RF(model, x, sigmas, extra_args, callback, disable, eta, s_noise, noise_sampler)
    """Ancestral sampling with Euler method steps."""
    extra_args = {} if extra_args is None else extra_args
    noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler
@@ -181,6 +183,29 @@ def sample_euler_ancestral(model, x, sigmas, extra_args=None, callback=None, dis
            x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up
    return x

+@torch.no_grad()
+def sample_euler_ancestral_RF(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1.0, s_noise=1., noise_sampler=None):
+    """Ancestral sampling with Euler method steps."""
+    extra_args = {} if extra_args is None else extra_args
+    noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler
+    s_in = x.new_ones([x.shape[0]])
+    for i in trange(len(sigmas) - 1, disable=disable):
+        denoised = model(x, sigmas[i] * s_in, **extra_args)
+        # sigma_down, sigma_up = get_ancestral_step(sigmas[i], sigmas[i + 1], eta=eta)
+        downstep_ratio = 1 + (sigmas[i+1]/sigmas[i] - 1) * eta
+        sigma_down = sigmas[i+1] * downstep_ratio
+        alpha_ip1 = 1 - sigmas[i+1]
+        alpha_down = 1 - sigma_down
+        renoise_coeff = (sigmas[i+1]**2 - sigma_down**2*alpha_ip1**2/alpha_down**2)**0.5
+        if callback is not None:
+            callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
+
+        # Euler method
+        sigma_down_i_ratio = sigma_down / sigmas[i]
+        x = sigma_down_i_ratio * x + (1 - sigma_down_i_ratio) * denoised
+        if sigmas[i + 1] > 0 and eta > 0:
+            x = (alpha_ip1/alpha_down) * x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * renoise_coeff
+    return x

@torch.no_grad()
 def sample_heun(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0., s_tmax=float('inf'), s_noise=1.):
@@ -1080,7 +1105,6 @@ def sample_euler_cfg_pp(model, x, sigmas, extra_args=None, callback=None, disabl
        d = to_d(x, sigma_hat, temp[0])
        if callback is not None:
            callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigma_hat, 'denoised': denoised})
-        dt = sigmas[i + 1] - sigma_hat
        # Euler method
        x = denoised + d * sigmas[i + 1]
    return x
@@ -1107,7 +1131,6 @@ def sample_euler_ancestral_cfg_pp(model, x, sigmas, extra_args=None, callback=No
            callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
        d = to_d(x, sigmas[i], temp[0])
        # Euler method
-        dt = sigma_down - sigmas[i]
        x = denoised + d * sigma_down
        if sigmas[i + 1] > 0:
            x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up
@@ -1138,7 +1161,6 @@ def sample_dpmpp_2s_ancestral_cfg_pp(model, x, sigmas, extra_args=None, callback
        if sigma_down == 0:
            # Euler method
            d = to_d(x, sigmas[i], temp[0])
-            dt = sigma_down - sigmas[i]
            x = denoised + d * sigma_down
        else:
            # DPM-Solver++(2S)
@@ -1186,4 +1208,4 @@ def sample_dpmpp_2m_cfg_pp(model, x, sigmas, extra_args=None, callback=None, dis
            denoised_mix = -torch.exp(-h) * uncond_denoised - torch.expm1(-h) * (1 / (2 * r)) * (denoised - old_uncond_denoised)
        x = denoised + denoised_mix + torch.exp(-h) * x
        old_uncond_denoised = uncond_denoised
-    return x
+    return x
--- a/comfy/latent_formats.py
+++ b/comfy/latent_formats.py
@@ -175,3 +175,48 @@ class Flux(SD3):

    def process_out(self, latent):
        return (latent / self.scale_factor) + self.shift_factor
+
+class Mochi(LatentFormat):
+    latent_channels = 12
+
+    def __init__(self):
+        self.scale_factor = 1.0
+        self.latents_mean = torch.tensor([-0.06730895953510081, -0.038011381506090416, -0.07477820912866141,
+                                          -0.05565264470995561, 0.012767231469026969, -0.04703542746246419,
+                                          0.043896967884726704, -0.09346305707025976, -0.09918314763016893,
+                                          -0.008729793427399178, -0.011931556316503654, -0.0321993391887285]).view(1, self.latent_channels, 1, 1, 1)
+        self.latents_std = torch.tensor([0.9263795028493863, 0.9248894543193766, 0.9393059390890617,
+                                         0.959253732819592, 0.8244560132752793, 0.917259975397747,
+                                         0.9294154431013696, 1.3720942357788521, 0.881393668867029,
+                                         0.9168315692124348, 0.9185249279345552, 0.9274757570805041]).view(1, self.latent_channels, 1, 1, 1)
+
+        self.latent_rgb_factors =[
+            [-0.0069, -0.0045,  0.0018],
+            [ 0.0154, -0.0692, -0.0274],
+            [ 0.0333,  0.0019,  0.0206],
+            [-0.1390,  0.0628,  0.1678],
+            [-0.0725,  0.0134, -0.1898],
+            [ 0.0074, -0.0270, -0.0209],
+            [-0.0176, -0.0277, -0.0221],
+            [ 0.5294,  0.5204,  0.3852],
+            [-0.0326, -0.0446, -0.0143],
+            [-0.0659,  0.0153, -0.0153],
+            [ 0.0185, -0.0217,  0.0014],
+            [-0.0396, -0.0495, -0.0281]
+        ]
+        self.latent_rgb_factors_bias = [-0.0940, -0.1418, -0.1453]
+        self.taesd_decoder_name = None #TODO
+
+    def process_in(self, latent):
+        latents_mean = self.latents_mean.to(latent.device, latent.dtype)
+        latents_std = self.latents_std.to(latent.device, latent.dtype)
+        return (latent - latents_mean) * self.scale_factor / latents_std
+
+    def process_out(self, latent):
+        latents_mean = self.latents_mean.to(latent.device, latent.dtype)
+        latents_std = self.latents_std.to(latent.device, latent.dtype)
+        return latent * latents_std / self.scale_factor + latents_mean
+
+class LTXV(LatentFormat):
+    latent_channels = 128
+
--- a/comfy/ldm/audio/dit.py
+++ b/comfy/ldm/audio/dit.py
@@ -612,7 +612,9 @@ class ContinuousTransformer(nn.Module):
        return_info = False,
        **kwargs
    ):
+        patches_replace = kwargs.get("transformer_options", {}).get("patches_replace", {})
        batch, seq, device = *x.shape[:2], x.device
+        context = kwargs["context"]

        info = {
            "hidden_states": [],
@@ -643,9 +645,19 @@ class ContinuousTransformer(nn.Module):
        if self.use_sinusoidal_emb or self.use_abs_pos_emb:
            x = x + self.pos_emb(x)

+        blocks_replace = patches_replace.get("dit", {})
        # Iterate over the transformer layers
-        for layer in self.layers:
-            x = layer(x, rotary_pos_emb = rotary_pos_emb, global_cond=global_cond, **kwargs)
+        for i, layer in enumerate(self.layers):
+            if ("double_block", i) in blocks_replace:
+                def block_wrap(args):
+                    out = {}
+                    out["img"] = layer(args["img"], rotary_pos_emb=args["pe"], global_cond=args["vec"], context=args["txt"])
+                    return out
+
+                out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "vec": global_cond, "pe": rotary_pos_emb}, {"original_block": block_wrap})
+                x = out["img"]
+            else:
+                x = layer(x, rotary_pos_emb = rotary_pos_emb, global_cond=global_cond, context=context)
            # x = checkpoint(layer, x, rotary_pos_emb = rotary_pos_emb, global_cond=global_cond, **kwargs)

            if return_info:
@@ -874,7 +886,6 @@ class AudioDiffusionTransformer(nn.Module):
        mask=None,
        return_info=False,
        control=None,
-        transformer_options={},
        **kwargs):
            return self._forward(
                x,
--- a/comfy/ldm/aura/mmdit.py
+++ b/comfy/ldm/aura/mmdit.py
@@ -437,7 +437,8 @@ class MMDiT(nn.Module):
        pos_encoding = pos_encoding[:,from_h:from_h+h,from_w:from_w+w]
        return x + pos_encoding.reshape(1, -1, self.positional_encoding.shape[-1])

-    def forward(self, x, timestep, context, **kwargs):
+    def forward(self, x, timestep, context, transformer_options={}, **kwargs):
+        patches_replace = transformer_options.get("patches_replace", {})
        # patchify x, add PE
        b, c, h, w = x.shape

@@ -458,15 +459,36 @@ class MMDiT(nn.Module):

        global_cond = self.t_embedder(t, x.dtype)  # B, D

+        blocks_replace = patches_replace.get("dit", {})
        if len(self.double_layers) > 0:
-            for layer in self.double_layers:
-                c, x = layer(c, x, global_cond, **kwargs)
+            for i, layer in enumerate(self.double_layers):
+                if ("double_block", i) in blocks_replace:
+                    def block_wrap(args):
+                        out = {}
+                        out["txt"], out["img"] = layer(args["txt"],
+                                                       args["img"],
+                                                       args["vec"])
+                        return out
+                    out = blocks_replace[("double_block", i)]({"img": x, "txt": c, "vec": global_cond}, {"original_block": block_wrap})
+                    c = out["txt"]
+                    x = out["img"]
+                else:
+                    c, x = layer(c, x, global_cond, **kwargs)

        if len(self.single_layers) > 0:
            c_len = c.size(1)
            cx = torch.cat([c, x], dim=1)
-            for layer in self.single_layers:
-                cx = layer(cx, global_cond, **kwargs)
+            for i, layer in enumerate(self.single_layers):
+                if ("single_block", i) in blocks_replace:
+                    def block_wrap(args):
+                        out = {}
+                        out["img"] = layer(args["img"], args["vec"])
+                        return out
+
+                    out = blocks_replace[("single_block", i)]({"img": cx, "vec": global_cond}, {"original_block": block_wrap})
+                    cx = out["img"]
+                else:
+                    cx = layer(cx, global_cond, **kwargs)

            x = cx[:, c_len:]

--- a/comfy/ldm/common_dit.py
+++ b/comfy/ldm/common_dit.py
@@ -13,9 +13,15 @@ try:
 except:
    rms_norm_torch = None

-def rms_norm(x, weight, eps=1e-6):
+def rms_norm(x, weight=None, eps=1e-6):
    if rms_norm_torch is not None and not (torch.jit.is_tracing() or torch.jit.is_scripting()):
-        return rms_norm_torch(x, weight.shape, weight=comfy.ops.cast_to(weight, dtype=x.dtype, device=x.device), eps=eps)
+        if weight is None:
+            return rms_norm_torch(x, (x.shape[-1],), eps=eps)
+        else:
+            return rms_norm_torch(x, weight.shape, weight=comfy.ops.cast_to(weight, dtype=x.dtype, device=x.device), eps=eps)
    else:
-        rrms = torch.rsqrt(torch.mean(x**2, dim=-1, keepdim=True) + eps)
-        return (x * rrms) * comfy.ops.cast_to(weight, dtype=x.dtype, device=x.device)
+        r = x * torch.rsqrt(torch.mean(x**2, dim=-1, keepdim=True) + eps)
+        if weight is None:
+            return r
+        else:
+            return r * comfy.ops.cast_to(weight, dtype=x.dtype, device=x.device)
--- a/comfy/ldm/flux/model.py
+++ b/comfy/ldm/flux/model.py
@@ -20,6 +20,7 @@ import comfy.ldm.common_dit
@dataclass
 class FluxParams:
    in_channels: int
+    out_channels: int
    vec_in_dim: int
    context_in_dim: int
    hidden_size: int
@@ -29,6 +30,7 @@ class FluxParams:
    depth_single_blocks: int
    axes_dim: list
    theta: int
+    patch_size: int
    qkv_bias: bool
    guidance_embed: bool

@@ -43,8 +45,9 @@ class Flux(nn.Module):
        self.dtype = dtype
        params = FluxParams(**kwargs)
        self.params = params
-        self.in_channels = params.in_channels * 2 * 2
-        self.out_channels = self.in_channels
+        self.patch_size = params.patch_size
+        self.in_channels = params.in_channels * params.patch_size * params.patch_size
+        self.out_channels = params.out_channels * params.patch_size * params.patch_size
        if params.hidden_size % params.num_heads != 0:
            raise ValueError(
                f"Hidden size {params.hidden_size} must be divisible by num_heads {params.num_heads}"
@@ -96,7 +99,9 @@ class Flux(nn.Module):
        y: Tensor,
        guidance: Tensor = None,
        control=None,
+        transformer_options={},
    ) -> Tensor:
+        patches_replace = transformer_options.get("patches_replace", {})
        if img.ndim != 3 or txt.ndim != 3:
            raise ValueError("Input img and txt tensors must have 3 dimensions.")

@@ -114,8 +119,19 @@ class Flux(nn.Module):
        ids = torch.cat((txt_ids, img_ids), dim=1)
        pe = self.pe_embedder(ids)

+        blocks_replace = patches_replace.get("dit", {})
        for i, block in enumerate(self.double_blocks):
-            img, txt = block(img=img, txt=txt, vec=vec, pe=pe)
+            if ("double_block", i) in blocks_replace:
+                def block_wrap(args):
+                    out = {}
+                    out["img"], out["txt"] = block(img=args["img"], txt=args["txt"], vec=args["vec"], pe=args["pe"])
+                    return out
+
+                out = blocks_replace[("double_block", i)]({"img": img, "txt": txt, "vec": vec, "pe": pe}, {"original_block": block_wrap})
+                txt = out["txt"]
+                img = out["img"]
+            else:
+                img, txt = block(img=img, txt=txt, vec=vec, pe=pe)

            if control is not None: # Controlnet
                control_i = control.get("input")
@@ -127,7 +143,16 @@ class Flux(nn.Module):
        img = torch.cat((txt, img), 1)

        for i, block in enumerate(self.single_blocks):
-            img = block(img, vec=vec, pe=pe)
+            if ("single_block", i) in blocks_replace:
+                def block_wrap(args):
+                    out = {}
+                    out["img"] = block(args["img"], vec=args["vec"], pe=args["pe"])
+                    return out
+
+                out = blocks_replace[("single_block", i)]({"img": img, "vec": vec, "pe": pe}, {"original_block": block_wrap})
+                img = out["img"]
+            else:
+                img = block(img, vec=vec, pe=pe)

            if control is not None: # Controlnet
                control_o = control.get("output")
@@ -141,9 +166,9 @@ class Flux(nn.Module):
        img = self.final_layer(img, vec)  # (N, T, patch_size ** 2 * out_channels)
        return img

-    def forward(self, x, timestep, context, y, guidance, control=None, **kwargs):
+    def forward(self, x, timestep, context, y, guidance, control=None, transformer_options={}, **kwargs):
        bs, c, h, w = x.shape
-        patch_size = 2
+        patch_size = self.patch_size
        x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size))

        img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size)
@@ -151,10 +176,10 @@ class Flux(nn.Module):
        h_len = ((h + (patch_size // 2)) // patch_size)
        w_len = ((w + (patch_size // 2)) // patch_size)
        img_ids = torch.zeros((h_len, w_len, 3), device=x.device, dtype=x.dtype)
-        img_ids[:, :, 1] = torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).unsqueeze(1)
-        img_ids[:, :, 2] = torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0)
+        img_ids[:, :, 1] = img_ids[:, :, 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).unsqueeze(1)
+        img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0)
        img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs)

        txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype)
-        out = self.forward_orig(img, img_ids, context, txt_ids, timestep, y, guidance, control)
+        out = self.forward_orig(img, img_ids, context, txt_ids, timestep, y, guidance, control, transformer_options)
        return rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h,:w]
--- a/comfy/ldm/flux/redux.py
+++ b/comfy/ldm/flux/redux.py
@@ -0,0 +1,25 @@
+import torch
+import comfy.ops
+
+ops = comfy.ops.manual_cast
+
+class ReduxImageEncoder(torch.nn.Module):
+    def __init__(
+        self,
+        redux_dim: int = 1152,
+        txt_in_features: int = 4096,
+        device=None,
+        dtype=None,
+    ) -> None:
+        super().__init__()
+
+        self.redux_dim = redux_dim
+        self.device = device
+        self.dtype = dtype
+
+        self.redux_up = ops.Linear(redux_dim, txt_in_features * 3, dtype=dtype)
+        self.redux_down = ops.Linear(txt_in_features * 3, txt_in_features, dtype=dtype)
+
+    def forward(self, sigclip_embeds) -> torch.Tensor:
+        projected_x = self.redux_down(torch.nn.functional.silu(self.redux_up(sigclip_embeds)))
+        return projected_x
--- a/comfy/ldm/genmo/joint_model/asymm_models_joint.py
+++ b/comfy/ldm/genmo/joint_model/asymm_models_joint.py
@@ -0,0 +1,559 @@
+#original code from https://github.com/genmoai/models under apache 2.0 license
+#adapted to ComfyUI
+
+from typing import Dict, List, Optional, Tuple
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange
+# from flash_attn import flash_attn_varlen_qkvpacked_func
+from comfy.ldm.modules.attention import optimized_attention
+
+from .layers import (
+    FeedForward,
+    PatchEmbed,
+    RMSNorm,
+    TimestepEmbedder,
+)
+
+from .rope_mixed import (
+    compute_mixed_rotation,
+    create_position_matrix,
+)
+from .temporal_rope import apply_rotary_emb_qk_real
+from .utils import (
+    AttentionPool,
+    modulate,
+)
+
+import comfy.ldm.common_dit
+import comfy.ops
+
+
+def modulated_rmsnorm(x, scale, eps=1e-6):
+    # Normalize and modulate
+    x_normed = comfy.ldm.common_dit.rms_norm(x, eps=eps)
+    x_modulated = x_normed * (1 + scale.unsqueeze(1))
+
+    return x_modulated
+
+
+def residual_tanh_gated_rmsnorm(x, x_res, gate, eps=1e-6):
+    # Apply tanh to gate
+    tanh_gate = torch.tanh(gate).unsqueeze(1)
+
+    # Normalize and apply gated scaling
+    x_normed = comfy.ldm.common_dit.rms_norm(x_res, eps=eps) * tanh_gate
+
+    # Apply residual connection
+    output = x + x_normed
+
+    return output
+
+class AsymmetricAttention(nn.Module):
+    def __init__(
+        self,
+        dim_x: int,
+        dim_y: int,
+        num_heads: int = 8,
+        qkv_bias: bool = True,
+        qk_norm: bool = False,
+        attn_drop: float = 0.0,
+        update_y: bool = True,
+        out_bias: bool = True,
+        attend_to_padding: bool = False,
+        softmax_scale: Optional[float] = None,
+        device: Optional[torch.device] = None,
+        dtype=None,
+        operations=None,
+    ):
+        super().__init__()
+        self.dim_x = dim_x
+        self.dim_y = dim_y
+        self.num_heads = num_heads
+        self.head_dim = dim_x // num_heads
+        self.attn_drop = attn_drop
+        self.update_y = update_y
+        self.attend_to_padding = attend_to_padding
+        self.softmax_scale = softmax_scale
+        if dim_x % num_heads != 0:
+            raise ValueError(
+                f"dim_x={dim_x} should be divisible by num_heads={num_heads}"
+            )
+
+        # Input layers.
+        self.qkv_bias = qkv_bias
+        self.qkv_x = operations.Linear(dim_x, 3 * dim_x, bias=qkv_bias, device=device, dtype=dtype)
+        # Project text features to match visual features (dim_y -> dim_x)
+        self.qkv_y = operations.Linear(dim_y, 3 * dim_x, bias=qkv_bias, device=device, dtype=dtype)
+
+        # Query and key normalization for stability.
+        assert qk_norm
+        self.q_norm_x = RMSNorm(self.head_dim, device=device, dtype=dtype)
+        self.k_norm_x = RMSNorm(self.head_dim, device=device, dtype=dtype)
+        self.q_norm_y = RMSNorm(self.head_dim, device=device, dtype=dtype)
+        self.k_norm_y = RMSNorm(self.head_dim, device=device, dtype=dtype)
+
+        # Output layers. y features go back down from dim_x -> dim_y.
+        self.proj_x = operations.Linear(dim_x, dim_x, bias=out_bias, device=device, dtype=dtype)
+        self.proj_y = (
+            operations.Linear(dim_x, dim_y, bias=out_bias, device=device, dtype=dtype)
+            if update_y
+            else nn.Identity()
+        )
+
+    def forward(
+        self,
+        x: torch.Tensor,  # (B, N, dim_x)
+        y: torch.Tensor,  # (B, L, dim_y)
+        scale_x: torch.Tensor,  # (B, dim_x), modulation for pre-RMSNorm.
+        scale_y: torch.Tensor,  # (B, dim_y), modulation for pre-RMSNorm.
+        crop_y,
+        **rope_rotation,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        rope_cos = rope_rotation.get("rope_cos")
+        rope_sin = rope_rotation.get("rope_sin")
+        # Pre-norm for visual features
+        x = modulated_rmsnorm(x, scale_x)  # (B, M, dim_x) where M = N / cp_group_size
+
+        # Process visual features
+        # qkv_x = self.qkv_x(x)  # (B, M, 3 * dim_x)
+        # assert qkv_x.dtype == torch.bfloat16
+        # qkv_x = all_to_all_collect_tokens(
+        #     qkv_x, self.num_heads
+        # )  # (3, B, N, local_h, head_dim)
+
+        # Process text features
+        y = modulated_rmsnorm(y, scale_y)  # (B, L, dim_y)
+        q_y, k_y, v_y = self.qkv_y(y).view(y.shape[0], y.shape[1], 3, self.num_heads, -1).unbind(2)  # (B, N, local_h, head_dim)
+
+        q_y = self.q_norm_y(q_y)
+        k_y = self.k_norm_y(k_y)
+
+        # Split qkv_x into q, k, v
+        q_x, k_x, v_x = self.qkv_x(x).view(x.shape[0], x.shape[1], 3, self.num_heads, -1).unbind(2)  # (B, N, local_h, head_dim)
+        q_x = self.q_norm_x(q_x)
+        q_x = apply_rotary_emb_qk_real(q_x, rope_cos, rope_sin)
+        k_x = self.k_norm_x(k_x)
+        k_x = apply_rotary_emb_qk_real(k_x, rope_cos, rope_sin)
+
+        q = torch.cat([q_x, q_y[:, :crop_y]], dim=1).transpose(1, 2)
+        k = torch.cat([k_x, k_y[:, :crop_y]], dim=1).transpose(1, 2)
+        v = torch.cat([v_x, v_y[:, :crop_y]], dim=1).transpose(1, 2)
+
+        xy = optimized_attention(q,
+                                 k,
+                                 v, self.num_heads, skip_reshape=True)
+
+        x, y = torch.tensor_split(xy, (q_x.shape[1],), dim=1)
+        x = self.proj_x(x)
+        o = torch.zeros(y.shape[0], q_y.shape[1], y.shape[-1], device=y.device, dtype=y.dtype)
+        o[:, :y.shape[1]] = y
+
+        y = self.proj_y(o)
+        # print("ox", x)
+        # print("oy", y)
+        return x, y
+
+
+class AsymmetricJointBlock(nn.Module):
+    def __init__(
+        self,
+        hidden_size_x: int,
+        hidden_size_y: int,
+        num_heads: int,
+        *,
+        mlp_ratio_x: float = 8.0,  # Ratio of hidden size to d_model for MLP for visual tokens.
+        mlp_ratio_y: float = 4.0,  # Ratio of hidden size to d_model for MLP for text tokens.
+        update_y: bool = True,  # Whether to update text tokens in this block.
+        device: Optional[torch.device] = None,
+        dtype=None,
+        operations=None,
+        **block_kwargs,
+    ):
+        super().__init__()
+        self.update_y = update_y
+        self.hidden_size_x = hidden_size_x
+        self.hidden_size_y = hidden_size_y
+        self.mod_x = operations.Linear(hidden_size_x, 4 * hidden_size_x, device=device, dtype=dtype)
+        if self.update_y:
+            self.mod_y = operations.Linear(hidden_size_x, 4 * hidden_size_y, device=device, dtype=dtype)
+        else:
+            self.mod_y = operations.Linear(hidden_size_x, hidden_size_y, device=device, dtype=dtype)
+
+        # Self-attention:
+        self.attn = AsymmetricAttention(
+            hidden_size_x,
+            hidden_size_y,
+            num_heads=num_heads,
+            update_y=update_y,
+            device=device,
+            dtype=dtype,
+            operations=operations,
+            **block_kwargs,
+        )
+
+        # MLP.
+        mlp_hidden_dim_x = int(hidden_size_x * mlp_ratio_x)
+        assert mlp_hidden_dim_x == int(1536 * 8)
+        self.mlp_x = FeedForward(
+            in_features=hidden_size_x,
+            hidden_size=mlp_hidden_dim_x,
+            multiple_of=256,
+            ffn_dim_multiplier=None,
+            device=device,
+            dtype=dtype,
+            operations=operations,
+        )
+
+        # MLP for text not needed in last block.
+        if self.update_y:
+            mlp_hidden_dim_y = int(hidden_size_y * mlp_ratio_y)
+            self.mlp_y = FeedForward(
+                in_features=hidden_size_y,
+                hidden_size=mlp_hidden_dim_y,
+                multiple_of=256,
+                ffn_dim_multiplier=None,
+                device=device,
+                dtype=dtype,
+                operations=operations,
+            )
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        c: torch.Tensor,
+        y: torch.Tensor,
+        **attn_kwargs,
+    ):
+        """Forward pass of a block.
+
+        Args:
+            x: (B, N, dim) tensor of visual tokens
+            c: (B, dim) tensor of conditioned features
+            y: (B, L, dim) tensor of text tokens
+            num_frames: Number of frames in the video. N = num_frames * num_spatial_tokens
+
+        Returns:
+            x: (B, N, dim) tensor of visual tokens after block
+            y: (B, L, dim) tensor of text tokens after block
+        """
+        N = x.size(1)
+
+        c = F.silu(c)
+        mod_x = self.mod_x(c)
+        scale_msa_x, gate_msa_x, scale_mlp_x, gate_mlp_x = mod_x.chunk(4, dim=1)
+
+        mod_y = self.mod_y(c)
+        if self.update_y:
+            scale_msa_y, gate_msa_y, scale_mlp_y, gate_mlp_y = mod_y.chunk(4, dim=1)
+        else:
+            scale_msa_y = mod_y
+
+        # Self-attention block.
+        x_attn, y_attn = self.attn(
+            x,
+            y,
+            scale_x=scale_msa_x,
+            scale_y=scale_msa_y,
+            **attn_kwargs,
+        )
+
+        assert x_attn.size(1) == N
+        x = residual_tanh_gated_rmsnorm(x, x_attn, gate_msa_x)
+        if self.update_y:
+            y = residual_tanh_gated_rmsnorm(y, y_attn, gate_msa_y)
+
+        # MLP block.
+        x = self.ff_block_x(x, scale_mlp_x, gate_mlp_x)
+        if self.update_y:
+            y = self.ff_block_y(y, scale_mlp_y, gate_mlp_y)
+
+        return x, y
+
+    def ff_block_x(self, x, scale_x, gate_x):
+        x_mod = modulated_rmsnorm(x, scale_x)
+        x_res = self.mlp_x(x_mod)
+        x = residual_tanh_gated_rmsnorm(x, x_res, gate_x)  # Sandwich norm
+        return x
+
+    def ff_block_y(self, y, scale_y, gate_y):
+        y_mod = modulated_rmsnorm(y, scale_y)
+        y_res = self.mlp_y(y_mod)
+        y = residual_tanh_gated_rmsnorm(y, y_res, gate_y)  # Sandwich norm
+        return y
+
+
+class FinalLayer(nn.Module):
+    """
+    The final layer of DiT.
+    """
+
+    def __init__(
+        self,
+        hidden_size,
+        patch_size,
+        out_channels,
+        device: Optional[torch.device] = None,
+        dtype=None,
+        operations=None,
+    ):
+        super().__init__()
+        self.norm_final = operations.LayerNorm(
+            hidden_size, elementwise_affine=False, eps=1e-6, device=device, dtype=dtype
+        )
+        self.mod = operations.Linear(hidden_size, 2 * hidden_size, device=device, dtype=dtype)
+        self.linear = operations.Linear(
+            hidden_size, patch_size * patch_size * out_channels, device=device, dtype=dtype
+        )
+
+    def forward(self, x, c):
+        c = F.silu(c)
+        shift, scale = self.mod(c).chunk(2, dim=1)
+        x = modulate(self.norm_final(x), shift, scale)
+        x = self.linear(x)
+        return x
+
+
+class AsymmDiTJoint(nn.Module):
+    """
+    Diffusion model with a Transformer backbone.
+
+    Ingests text embeddings instead of a label.
+    """
+
+    def __init__(
+        self,
+        *,
+        patch_size=2,
+        in_channels=4,
+        hidden_size_x=1152,
+        hidden_size_y=1152,
+        depth=48,
+        num_heads=16,
+        mlp_ratio_x=8.0,
+        mlp_ratio_y=4.0,
+        use_t5: bool = False,
+        t5_feat_dim: int = 4096,
+        t5_token_length: int = 256,
+        learn_sigma=True,
+        patch_embed_bias: bool = True,
+        timestep_mlp_bias: bool = True,
+        attend_to_padding: bool = False,
+        timestep_scale: Optional[float] = None,
+        use_extended_posenc: bool = False,
+        posenc_preserve_area: bool = False,
+        rope_theta: float = 10000.0,
+        image_model=None,
+        device: Optional[torch.device] = None,
+        dtype=None,
+        operations=None,
+        **block_kwargs,
+    ):
+        super().__init__()
+
+        self.dtype = dtype
+        self.learn_sigma = learn_sigma
+        self.in_channels = in_channels
+        self.out_channels = in_channels * 2 if learn_sigma else in_channels
+        self.patch_size = patch_size
+        self.num_heads = num_heads
+        self.hidden_size_x = hidden_size_x
+        self.hidden_size_y = hidden_size_y
+        self.head_dim = (
+            hidden_size_x // num_heads
+        )  # Head dimension and count is determined by visual.
+        self.attend_to_padding = attend_to_padding
+        self.use_extended_posenc = use_extended_posenc
+        self.posenc_preserve_area = posenc_preserve_area
+        self.use_t5 = use_t5
+        self.t5_token_length = t5_token_length
+        self.t5_feat_dim = t5_feat_dim
+        self.rope_theta = (
+            rope_theta  # Scaling factor for frequency computation for temporal RoPE.
+        )
+
+        self.x_embedder = PatchEmbed(
+            patch_size=patch_size,
+            in_chans=in_channels,
+            embed_dim=hidden_size_x,
+            bias=patch_embed_bias,
+            dtype=dtype,
+            device=device,
+            operations=operations
+        )
+        # Conditionings
+        # Timestep
+        self.t_embedder = TimestepEmbedder(
+            hidden_size_x, bias=timestep_mlp_bias, timestep_scale=timestep_scale, dtype=dtype, device=device, operations=operations
+        )
+
+        if self.use_t5:
+            # Caption Pooling (T5)
+            self.t5_y_embedder = AttentionPool(
+                t5_feat_dim, num_heads=8, output_dim=hidden_size_x, dtype=dtype, device=device, operations=operations
+            )
+
+            # Dense Embedding Projection (T5)
+            self.t5_yproj = operations.Linear(
+                t5_feat_dim, hidden_size_y, bias=True, dtype=dtype, device=device
+            )
+
+        # Initialize pos_frequencies as an empty parameter.
+        self.pos_frequencies = nn.Parameter(
+            torch.empty(3, self.num_heads, self.head_dim // 2, dtype=dtype, device=device)
+        )
+
+        assert not self.attend_to_padding
+
+        # for depth 48:
+        #  b =  0: AsymmetricJointBlock, update_y=True
+        #  b =  1: AsymmetricJointBlock, update_y=True
+        #  ...
+        #  b = 46: AsymmetricJointBlock, update_y=True
+        #  b = 47: AsymmetricJointBlock, update_y=False. No need to update text features.
+        blocks = []
+        for b in range(depth):
+            # Joint multi-modal block
+            update_y = b < depth - 1
+            block = AsymmetricJointBlock(
+                hidden_size_x,
+                hidden_size_y,
+                num_heads,
+                mlp_ratio_x=mlp_ratio_x,
+                mlp_ratio_y=mlp_ratio_y,
+                update_y=update_y,
+                attend_to_padding=attend_to_padding,
+                device=device,
+                dtype=dtype,
+                operations=operations,
+                **block_kwargs,
+            )
+
+            blocks.append(block)
+        self.blocks = nn.ModuleList(blocks)
+
+        self.final_layer = FinalLayer(
+            hidden_size_x, patch_size, self.out_channels, dtype=dtype, device=device, operations=operations
+        )
+
+    def embed_x(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            x: (B, C=12, T, H, W) tensor of visual tokens
+
+        Returns:
+            x: (B, C=3072, N) tensor of visual tokens with positional embedding.
+        """
+        return self.x_embedder(x)  # Convert BcTHW to BCN
+
+    def prepare(
+        self,
+        x: torch.Tensor,
+        sigma: torch.Tensor,
+        t5_feat: torch.Tensor,
+        t5_mask: torch.Tensor,
+    ):
+        """Prepare input and conditioning embeddings."""
+        # Visual patch embeddings with positional encoding.
+        T, H, W = x.shape[-3:]
+        pH, pW = H // self.patch_size, W // self.patch_size
+        x = self.embed_x(x)  # (B, N, D), where N = T * H * W / patch_size ** 2
+        assert x.ndim == 3
+        B = x.size(0)
+
+
+        pH, pW = H // self.patch_size, W // self.patch_size
+        N = T * pH * pW
+        assert x.size(1) == N
+        pos = create_position_matrix(
+            T, pH=pH, pW=pW, device=x.device, dtype=torch.float32
+        )  # (N, 3)
+        rope_cos, rope_sin = compute_mixed_rotation(
+            freqs=comfy.ops.cast_to(self.pos_frequencies, dtype=x.dtype, device=x.device), pos=pos
+        )  # Each are (N, num_heads, dim // 2)
+
+        c_t = self.t_embedder(1 - sigma, out_dtype=x.dtype)  # (B, D)
+
+        t5_y_pool = self.t5_y_embedder(t5_feat, t5_mask)  # (B, D)
+
+        c = c_t + t5_y_pool
+
+        y_feat = self.t5_yproj(t5_feat)  # (B, L, t5_feat_dim) --> (B, L, D)
+
+        return x, c, y_feat, rope_cos, rope_sin
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        timestep: torch.Tensor,
+        context: List[torch.Tensor],
+        attention_mask: List[torch.Tensor],
+        num_tokens=256,
+        packed_indices: Dict[str, torch.Tensor] = None,
+        rope_cos: torch.Tensor = None,
+        rope_sin: torch.Tensor = None,
+        control=None, transformer_options={}, **kwargs
+    ):
+        patches_replace = transformer_options.get("patches_replace", {})
+        y_feat = context
+        y_mask = attention_mask
+        sigma = timestep
+        """Forward pass of DiT.
+
+        Args:
+            x: (B, C, T, H, W) tensor of spatial inputs (images or latent representations of images)
+            sigma: (B,) tensor of noise standard deviations
+            y_feat: List((B, L, y_feat_dim) tensor of caption token features. For SDXL text encoders: L=77, y_feat_dim=2048)
+            y_mask: List((B, L) boolean tensor indicating which tokens are not padding)
+            packed_indices: Dict with keys for Flash Attention. Result of compute_packed_indices.
+        """
+        B, _, T, H, W = x.shape
+
+        x, c, y_feat, rope_cos, rope_sin = self.prepare(
+            x, sigma, y_feat, y_mask
+        )
+        del y_mask
+
+        blocks_replace = patches_replace.get("dit", {})
+        for i, block in enumerate(self.blocks):
+            if ("double_block", i) in blocks_replace:
+                def block_wrap(args):
+                    out = {}
+                    out["img"], out["txt"] = block(
+                                                    args["img"],
+                                                    args["vec"],
+                                                    args["txt"],
+                                                    rope_cos=args["rope_cos"],
+                                                    rope_sin=args["rope_sin"],
+                                                    crop_y=args["num_tokens"]
+                                                    )
+                    return out
+                out = blocks_replace[("double_block", i)]({"img": x, "txt": y_feat, "vec": c, "rope_cos": rope_cos, "rope_sin": rope_sin, "num_tokens": num_tokens}, {"original_block": block_wrap})
+                y_feat = out["txt"]
+                x = out["img"]
+            else:
+                x, y_feat = block(
+                    x,
+                    c,
+                    y_feat,
+                    rope_cos=rope_cos,
+                    rope_sin=rope_sin,
+                    crop_y=num_tokens,
+                )  # (B, M, D), (B, L, D)
+        del y_feat  # Final layers don't use dense text features.
+
+        x = self.final_layer(x, c)  # (B, M, patch_size ** 2 * out_channels)
+        x = rearrange(
+            x,
+            "B (T hp wp) (p1 p2 c) -> B c T (hp p1) (wp p2)",
+            T=T,
+            hp=H // self.patch_size,
+            wp=W // self.patch_size,
+            p1=self.patch_size,
+            p2=self.patch_size,
+            c=self.out_channels,
+        )
+
+        return -x
--- a/comfy/ldm/genmo/joint_model/layers.py
+++ b/comfy/ldm/genmo/joint_model/layers.py
@@ -0,0 +1,164 @@
+#original code from https://github.com/genmoai/models under apache 2.0 license
+#adapted to ComfyUI
+
+import collections.abc
+import math
+from itertools import repeat
+from typing import Callable, Optional
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange
+import comfy.ldm.common_dit
+
+
+# From PyTorch internals
+def _ntuple(n):
+    def parse(x):
+        if isinstance(x, collections.abc.Iterable) and not isinstance(x, str):
+            return tuple(x)
+        return tuple(repeat(x, n))
+
+    return parse
+
+
+to_2tuple = _ntuple(2)
+
+
+class TimestepEmbedder(nn.Module):
+    def __init__(
+        self,
+        hidden_size: int,
+        frequency_embedding_size: int = 256,
+        *,
+        bias: bool = True,
+        timestep_scale: Optional[float] = None,
+        dtype=None,
+        device=None,
+        operations=None,
+    ):
+        super().__init__()
+        self.mlp = nn.Sequential(
+            operations.Linear(frequency_embedding_size, hidden_size, bias=bias, dtype=dtype, device=device),
+            nn.SiLU(),
+            operations.Linear(hidden_size, hidden_size, bias=bias, dtype=dtype, device=device),
+        )
+        self.frequency_embedding_size = frequency_embedding_size
+        self.timestep_scale = timestep_scale
+
+    @staticmethod
+    def timestep_embedding(t, dim, max_period=10000):
+        half = dim // 2
+        freqs = torch.arange(start=0, end=half, dtype=torch.float32, device=t.device)
+        freqs.mul_(-math.log(max_period) / half).exp_()
+        args = t[:, None].float() * freqs[None]
+        embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
+        if dim % 2:
+            embedding = torch.cat(
+                [embedding, torch.zeros_like(embedding[:, :1])], dim=-1
+            )
+        return embedding
+
+    def forward(self, t, out_dtype):
+        if self.timestep_scale is not None:
+            t = t * self.timestep_scale
+        t_freq = self.timestep_embedding(t, self.frequency_embedding_size).to(dtype=out_dtype)
+        t_emb = self.mlp(t_freq)
+        return t_emb
+
+
+class FeedForward(nn.Module):
+    def __init__(
+        self,
+        in_features: int,
+        hidden_size: int,
+        multiple_of: int,
+        ffn_dim_multiplier: Optional[float],
+        device: Optional[torch.device] = None,
+        dtype=None,
+        operations=None,
+    ):
+        super().__init__()
+        # keep parameter count and computation constant compared to standard FFN
+        hidden_size = int(2 * hidden_size / 3)
+        # custom dim factor multiplier
+        if ffn_dim_multiplier is not None:
+            hidden_size = int(ffn_dim_multiplier * hidden_size)
+        hidden_size = multiple_of * ((hidden_size + multiple_of - 1) // multiple_of)
+
+        self.hidden_dim = hidden_size
+        self.w1 = operations.Linear(in_features, 2 * hidden_size, bias=False, device=device, dtype=dtype)
+        self.w2 = operations.Linear(hidden_size, in_features, bias=False, device=device, dtype=dtype)
+
+    def forward(self, x):
+        x, gate = self.w1(x).chunk(2, dim=-1)
+        x = self.w2(F.silu(x) * gate)
+        return x
+
+
+class PatchEmbed(nn.Module):
+    def __init__(
+        self,
+        patch_size: int = 16,
+        in_chans: int = 3,
+        embed_dim: int = 768,
+        norm_layer: Optional[Callable] = None,
+        flatten: bool = True,
+        bias: bool = True,
+        dynamic_img_pad: bool = False,
+        dtype=None,
+        device=None,
+        operations=None,
+    ):
+        super().__init__()
+        self.patch_size = to_2tuple(patch_size)
+        self.flatten = flatten
+        self.dynamic_img_pad = dynamic_img_pad
+
+        self.proj = operations.Conv2d(
+            in_chans,
+            embed_dim,
+            kernel_size=patch_size,
+            stride=patch_size,
+            bias=bias,
+            device=device,
+            dtype=dtype,
+        )
+        assert norm_layer is None
+        self.norm = (
+            norm_layer(embed_dim, device=device) if norm_layer else nn.Identity()
+        )
+
+    def forward(self, x):
+        B, _C, T, H, W = x.shape
+        if not self.dynamic_img_pad:
+            assert H % self.patch_size[0] == 0, f"Input height ({H}) should be divisible by patch size ({self.patch_size[0]})."
+            assert W % self.patch_size[1] == 0, f"Input width ({W}) should be divisible by patch size ({self.patch_size[1]})."
+        else:
+            pad_h = (self.patch_size[0] - H % self.patch_size[0]) % self.patch_size[0]
+            pad_w = (self.patch_size[1] - W % self.patch_size[1]) % self.patch_size[1]
+            x = F.pad(x, (0, pad_w, 0, pad_h))
+
+        x = rearrange(x, "B C T H W -> (B T) C H W", B=B, T=T)
+        x = comfy.ldm.common_dit.pad_to_patch_size(x, self.patch_size, padding_mode='circular')
+        x = self.proj(x)
+
+        # Flatten temporal and spatial dimensions.
+        if not self.flatten:
+            raise NotImplementedError("Must flatten output.")
+        x = rearrange(x, "(B T) C H W -> B (T H W) C", B=B, T=T)
+
+        x = self.norm(x)
+        return x
+
+
+class RMSNorm(torch.nn.Module):
+    def __init__(self, hidden_size, eps=1e-5, device=None, dtype=None):
+        super().__init__()
+        self.eps = eps
+        self.weight = torch.nn.Parameter(torch.empty(hidden_size, device=device, dtype=dtype))
+        self.register_parameter("bias", None)
+
+    def forward(self, x):
+        return comfy.ldm.common_dit.rms_norm(x, self.weight, self.eps)
--- a/comfy/ldm/genmo/joint_model/rope_mixed.py
+++ b/comfy/ldm/genmo/joint_model/rope_mixed.py
@@ -0,0 +1,88 @@
+#original code from https://github.com/genmoai/models under apache 2.0 license
+
+# import functools
+import math
+
+import torch
+
+
+def centers(start: float, stop, num, dtype=None, device=None):
+    """linspace through bin centers.
+
+    Args:
+        start (float): Start of the range.
+        stop (float): End of the range.
+        num (int): Number of points.
+        dtype (torch.dtype): Data type of the points.
+        device (torch.device): Device of the points.
+
+    Returns:
+        centers (Tensor): Centers of the bins. Shape: (num,).
+    """
+    edges = torch.linspace(start, stop, num + 1, dtype=dtype, device=device)
+    return (edges[:-1] + edges[1:]) / 2
+
+
+# @functools.lru_cache(maxsize=1)
+def create_position_matrix(
+    T: int,
+    pH: int,
+    pW: int,
+    device: torch.device,
+    dtype: torch.dtype,
+    *,
+    target_area: float = 36864,
+):
+    """
+    Args:
+        T: int - Temporal dimension
+        pH: int - Height dimension after patchify
+        pW: int - Width dimension after patchify
+
+    Returns:
+        pos: [T * pH * pW, 3] - position matrix
+    """
+    # Create 1D tensors for each dimension
+    t = torch.arange(T, dtype=dtype)
+
+    # Positionally interpolate to area 36864.
+    # (3072x3072 frame with 16x16 patches = 192x192 latents).
+    # This automatically scales rope positions when the resolution changes.
+    # We use a large target area so the model is more sensitive
+    # to changes in the learned pos_frequencies matrix.
+    scale = math.sqrt(target_area / (pW * pH))
+    w = centers(-pW * scale / 2, pW * scale / 2, pW)
+    h = centers(-pH * scale / 2, pH * scale / 2, pH)
+
+    # Use meshgrid to create 3D grids
+    grid_t, grid_h, grid_w = torch.meshgrid(t, h, w, indexing="ij")
+
+    # Stack and reshape the grids.
+    pos = torch.stack([grid_t, grid_h, grid_w], dim=-1)  # [T, pH, pW, 3]
+    pos = pos.view(-1, 3)  # [T * pH * pW, 3]
+    pos = pos.to(dtype=dtype, device=device)
+
+    return pos
+
+
+def compute_mixed_rotation(
+    freqs: torch.Tensor,
+    pos: torch.Tensor,
+):
+    """
+    Project each 3-dim position into per-head, per-head-dim 1D frequencies.
+
+    Args:
+        freqs: [3, num_heads, num_freqs] - learned rotation frequency (for t, row, col) for each head position
+        pos: [N, 3] - position of each token
+        num_heads: int
+
+    Returns:
+        freqs_cos: [N, num_heads, num_freqs] - cosine components
+        freqs_sin: [N, num_heads, num_freqs] - sine components
+    """
+    assert freqs.ndim == 3
+    freqs_sum = torch.einsum("Nd,dhf->Nhf", pos.to(freqs), freqs)
+    freqs_cos = torch.cos(freqs_sum)
+    freqs_sin = torch.sin(freqs_sum)
+    return freqs_cos, freqs_sin
--- a/comfy/ldm/genmo/joint_model/temporal_rope.py
+++ b/comfy/ldm/genmo/joint_model/temporal_rope.py
@@ -0,0 +1,34 @@
+#original code from https://github.com/genmoai/models under apache 2.0 license
+
+# Based on Llama3 Implementation.
+import torch
+
+
+def apply_rotary_emb_qk_real(
+    xqk: torch.Tensor,
+    freqs_cos: torch.Tensor,
+    freqs_sin: torch.Tensor,
+) -> torch.Tensor:
+    """
+    Apply rotary embeddings to input tensors using the given frequency tensor without complex numbers.
+
+    Args:
+        xqk (torch.Tensor): Query and/or Key tensors to apply rotary embeddings. Shape: (B, S, *, num_heads, D)
+                            Can be either just query or just key, or both stacked along some batch or * dim.
+        freqs_cos (torch.Tensor): Precomputed cosine frequency tensor.
+        freqs_sin (torch.Tensor): Precomputed sine frequency tensor.
+
+    Returns:
+        torch.Tensor: The input tensor with rotary embeddings applied.
+    """
+    # Split the last dimension into even and odd parts
+    xqk_even = xqk[..., 0::2]
+    xqk_odd = xqk[..., 1::2]
+
+    # Apply rotation
+    cos_part = (xqk_even * freqs_cos - xqk_odd * freqs_sin).type_as(xqk)
+    sin_part = (xqk_even * freqs_sin + xqk_odd * freqs_cos).type_as(xqk)
+
+    # Interleave the results back into the original shape
+    out = torch.stack([cos_part, sin_part], dim=-1).flatten(-2)
+    return out
--- a/comfy/ldm/genmo/joint_model/utils.py
+++ b/comfy/ldm/genmo/joint_model/utils.py
@@ -0,0 +1,102 @@
+#original code from https://github.com/genmoai/models under apache 2.0 license
+#adapted to ComfyUI
+
+from typing import Optional, Tuple
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def modulate(x, shift, scale):
+    return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1)
+
+
+def pool_tokens(x: torch.Tensor, mask: torch.Tensor, *, keepdim=False) -> torch.Tensor:
+    """
+    Pool tokens in x using mask.
+
+    NOTE: We assume x does not require gradients.
+
+    Args:
+        x: (B, L, D) tensor of tokens.
+        mask: (B, L) boolean tensor indicating which tokens are not padding.
+
+    Returns:
+        pooled: (B, D) tensor of pooled tokens.
+    """
+    assert x.size(1) == mask.size(1)  # Expected mask to have same length as tokens.
+    assert x.size(0) == mask.size(0)  # Expected mask to have same batch size as tokens.
+    mask = mask[:, :, None].to(dtype=x.dtype)
+    mask = mask / mask.sum(dim=1, keepdim=True).clamp(min=1)
+    pooled = (x * mask).sum(dim=1, keepdim=keepdim)
+    return pooled
+
+
+class AttentionPool(nn.Module):
+    def __init__(
+        self,
+        embed_dim: int,
+        num_heads: int,
+        output_dim: int = None,
+        device: Optional[torch.device] = None,
+        dtype=None,
+        operations=None,
+    ):
+        """
+        Args:
+            spatial_dim (int): Number of tokens in sequence length.
+            embed_dim (int): Dimensionality of input tokens.
+            num_heads (int): Number of attention heads.
+            output_dim (int): Dimensionality of output tokens. Defaults to embed_dim.
+        """
+        super().__init__()
+        self.num_heads = num_heads
+        self.to_kv = operations.Linear(embed_dim, 2 * embed_dim, device=device, dtype=dtype)
+        self.to_q = operations.Linear(embed_dim, embed_dim, device=device, dtype=dtype)
+        self.to_out = operations.Linear(embed_dim, output_dim or embed_dim, device=device, dtype=dtype)
+
+    def forward(self, x, mask):
+        """
+        Args:
+            x (torch.Tensor): (B, L, D) tensor of input tokens.
+            mask (torch.Tensor): (B, L) boolean tensor indicating which tokens are not padding.
+
+        NOTE: We assume x does not require gradients.
+
+        Returns:
+            x (torch.Tensor): (B, D) tensor of pooled tokens.
+        """
+        D = x.size(2)
+
+        # Construct attention mask, shape: (B, 1, num_queries=1, num_keys=1+L).
+        attn_mask = mask[:, None, None, :].bool()  # (B, 1, 1, L).
+        attn_mask = F.pad(attn_mask, (1, 0), value=True)  # (B, 1, 1, 1+L).
+
+        # Average non-padding token features. These will be used as the query.
+        x_pool = pool_tokens(x, mask, keepdim=True)  # (B, 1, D)
+
+        # Concat pooled features to input sequence.
+        x = torch.cat([x_pool, x], dim=1)  # (B, L+1, D)
+
+        # Compute queries, keys, values. Only the mean token is used to create a query.
+        kv = self.to_kv(x)  # (B, L+1, 2 * D)
+        q = self.to_q(x[:, 0])  # (B, D)
+
+        # Extract heads.
+        head_dim = D // self.num_heads
+        kv = kv.unflatten(2, (2, self.num_heads, head_dim))  # (B, 1+L, 2, H, head_dim)
+        kv = kv.transpose(1, 3)  # (B, H, 2, 1+L, head_dim)
+        k, v = kv.unbind(2)  # (B, H, 1+L, head_dim)
+        q = q.unflatten(1, (self.num_heads, head_dim))  # (B, H, head_dim)
+        q = q.unsqueeze(2)  # (B, H, 1, head_dim)
+
+        # Compute attention.
+        x = F.scaled_dot_product_attention(
+            q, k, v, attn_mask=attn_mask, dropout_p=0.0
+        )  # (B, H, 1, head_dim)
+
+        # Concatenate heads and run output.
+        x = x.squeeze(2).flatten(1, 2)  # (B, D = H * head_dim)
+        x = self.to_out(x)
+        return x
--- a/comfy/ldm/genmo/vae/model.py
+++ b/comfy/ldm/genmo/vae/model.py
@@ -0,0 +1,711 @@
+#original code from https://github.com/genmoai/models under apache 2.0 license
+#adapted to ComfyUI
+
+from typing import Callable, List, Optional, Tuple, Union
+from functools import partial
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange
+
+from comfy.ldm.modules.attention import optimized_attention
+
+import comfy.ops
+ops = comfy.ops.disable_weight_init
+
+# import mochi_preview.dit.joint_model.context_parallel as cp
+# from mochi_preview.vae.cp_conv import cp_pass_frames, gather_all_frames
+
+
+def cast_tuple(t, length=1):
+    return t if isinstance(t, tuple) else ((t,) * length)
+
+
+class GroupNormSpatial(ops.GroupNorm):
+    """
+    GroupNorm applied per-frame.
+    """
+
+    def forward(self, x: torch.Tensor, *, chunk_size: int = 8):
+        B, C, T, H, W = x.shape
+        x = rearrange(x, "B C T H W -> (B T) C H W")
+        # Run group norm in chunks.
+        output = torch.empty_like(x)
+        for b in range(0, B * T, chunk_size):
+            output[b : b + chunk_size] = super().forward(x[b : b + chunk_size])
+        return rearrange(output, "(B T) C H W -> B C T H W", B=B, T=T)
+
+class PConv3d(ops.Conv3d):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size: Union[int, Tuple[int, int, int]],
+        stride: Union[int, Tuple[int, int, int]],
+        causal: bool = True,
+        context_parallel: bool = True,
+        **kwargs,
+    ):
+        self.causal = causal
+        self.context_parallel = context_parallel
+        kernel_size = cast_tuple(kernel_size, 3)
+        stride = cast_tuple(stride, 3)
+        height_pad = (kernel_size[1] - 1) // 2
+        width_pad = (kernel_size[2] - 1) // 2
+
+        super().__init__(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            dilation=(1, 1, 1),
+            padding=(0, height_pad, width_pad),
+            **kwargs,
+        )
+
+    def forward(self, x: torch.Tensor):
+        # Compute padding amounts.
+        context_size = self.kernel_size[0] - 1
+        if self.causal:
+            pad_front = context_size
+            pad_back = 0
+        else:
+            pad_front = context_size // 2
+            pad_back = context_size - pad_front
+
+        # Apply padding.
+        assert self.padding_mode == "replicate"  # DEBUG
+        mode = "constant" if self.padding_mode == "zeros" else self.padding_mode
+        x = F.pad(x, (0, 0, 0, 0, pad_front, pad_back), mode=mode)
+        return super().forward(x)
+
+
+class Conv1x1(ops.Linear):
+    """*1x1 Conv implemented with a linear layer."""
+
+    def __init__(self, in_features: int, out_features: int, *args, **kwargs):
+        super().__init__(in_features, out_features, *args, **kwargs)
+
+    def forward(self, x: torch.Tensor):
+        """Forward pass.
+
+        Args:
+            x: Input tensor. Shape: [B, C, *] or [B, *, C].
+
+        Returns:
+            x: Output tensor. Shape: [B, C', *] or [B, *, C'].
+        """
+        x = x.movedim(1, -1)
+        x = super().forward(x)
+        x = x.movedim(-1, 1)
+        return x
+
+
+class DepthToSpaceTime(nn.Module):
+    def __init__(
+        self,
+        temporal_expansion: int,
+        spatial_expansion: int,
+    ):
+        super().__init__()
+        self.temporal_expansion = temporal_expansion
+        self.spatial_expansion = spatial_expansion
+
+    # When printed, this module should show the temporal and spatial expansion factors.
+    def extra_repr(self):
+        return f"texp={self.temporal_expansion}, sexp={self.spatial_expansion}"
+
+    def forward(self, x: torch.Tensor):
+        """Forward pass.
+
+        Args:
+            x: Input tensor. Shape: [B, C, T, H, W].
+
+        Returns:
+            x: Rearranged tensor. Shape: [B, C/(st*s*s), T*st, H*s, W*s].
+        """
+        x = rearrange(
+            x,
+            "B (C st sh sw) T H W -> B C (T st) (H sh) (W sw)",
+            st=self.temporal_expansion,
+            sh=self.spatial_expansion,
+            sw=self.spatial_expansion,
+        )
+
+        # cp_rank, _ = cp.get_cp_rank_size()
+        if self.temporal_expansion > 1: # and cp_rank == 0:
+            # Drop the first self.temporal_expansion - 1 frames.
+            # This is because we always want the 3x3x3 conv filter to only apply
+            # to the first frame, and the first frame doesn't need to be repeated.
+            assert all(x.shape)
+            x = x[:, :, self.temporal_expansion - 1 :]
+            assert all(x.shape)
+
+        return x
+
+
+def norm_fn(
+    in_channels: int,
+    affine: bool = True,
+):
+    return GroupNormSpatial(affine=affine, num_groups=32, num_channels=in_channels)
+
+
+class ResBlock(nn.Module):
+    """Residual block that preserves the spatial dimensions."""
+
+    def __init__(
+        self,
+        channels: int,
+        *,
+        affine: bool = True,
+        attn_block: Optional[nn.Module] = None,
+        causal: bool = True,
+        prune_bottleneck: bool = False,
+        padding_mode: str,
+        bias: bool = True,
+    ):
+        super().__init__()
+        self.channels = channels
+
+        assert causal
+        self.stack = nn.Sequential(
+            norm_fn(channels, affine=affine),
+            nn.SiLU(inplace=True),
+            PConv3d(
+                in_channels=channels,
+                out_channels=channels // 2 if prune_bottleneck else channels,
+                kernel_size=(3, 3, 3),
+                stride=(1, 1, 1),
+                padding_mode=padding_mode,
+                bias=bias,
+                causal=causal,
+            ),
+            norm_fn(channels, affine=affine),
+            nn.SiLU(inplace=True),
+            PConv3d(
+                in_channels=channels // 2 if prune_bottleneck else channels,
+                out_channels=channels,
+                kernel_size=(3, 3, 3),
+                stride=(1, 1, 1),
+                padding_mode=padding_mode,
+                bias=bias,
+                causal=causal,
+            ),
+        )
+
+        self.attn_block = attn_block if attn_block else nn.Identity()
+
+    def forward(self, x: torch.Tensor):
+        """Forward pass.
+
+        Args:
+            x: Input tensor. Shape: [B, C, T, H, W].
+        """
+        residual = x
+        x = self.stack(x)
+        x = x + residual
+        del residual
+
+        return self.attn_block(x)
+
+
+class Attention(nn.Module):
+    def __init__(
+        self,
+        dim: int,
+        head_dim: int = 32,
+        qkv_bias: bool = False,
+        out_bias: bool = True,
+        qk_norm: bool = True,
+    ) -> None:
+        super().__init__()
+        self.head_dim = head_dim
+        self.num_heads = dim // head_dim
+        self.qk_norm = qk_norm
+
+        self.qkv = nn.Linear(dim, 3 * dim, bias=qkv_bias)
+        self.out = nn.Linear(dim, dim, bias=out_bias)
+
+    def forward(
+        self,
+        x: torch.Tensor,
+    ) -> torch.Tensor:
+        """Compute temporal self-attention.
+
+        Args:
+            x: Input tensor. Shape: [B, C, T, H, W].
+            chunk_size: Chunk size for large tensors.
+
+        Returns:
+            x: Output tensor. Shape: [B, C, T, H, W].
+        """
+        B, _, T, H, W = x.shape
+
+        if T == 1:
+            # No attention for single frame.
+            x = x.movedim(1, -1)  # [B, C, T, H, W] -> [B, T, H, W, C]
+            qkv = self.qkv(x)
+            _, _, x = qkv.chunk(3, dim=-1)  # Throw away queries and keys.
+            x = self.out(x)
+            return x.movedim(-1, 1)  # [B, T, H, W, C] -> [B, C, T, H, W]
+
+        # 1D temporal attention.
+        x = rearrange(x, "B C t h w -> (B h w) t C")
+        qkv = self.qkv(x)
+
+        # Input: qkv with shape [B, t, 3 * num_heads * head_dim]
+        # Output: x with shape [B, num_heads, t, head_dim]
+        q, k, v = qkv.view(qkv.shape[0], qkv.shape[1], 3, self.num_heads, self.head_dim).transpose(1, 3).unbind(2)
+
+        if self.qk_norm:
+            q = F.normalize(q, p=2, dim=-1)
+            k = F.normalize(k, p=2, dim=-1)
+
+        x = optimized_attention(q, k, v, self.num_heads, skip_reshape=True)
+
+        assert x.size(0) == q.size(0)
+
+        x = self.out(x)
+        x = rearrange(x, "(B h w) t C -> B C t h w", B=B, h=H, w=W)
+        return x
+
+
+class AttentionBlock(nn.Module):
+    def __init__(
+        self,
+        dim: int,
+        **attn_kwargs,
+    ) -> None:
+        super().__init__()
+        self.norm = norm_fn(dim)
+        self.attn = Attention(dim, **attn_kwargs)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return x + self.attn(self.norm(x))
+
+
+class CausalUpsampleBlock(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        num_res_blocks: int,
+        *,
+        temporal_expansion: int = 2,
+        spatial_expansion: int = 2,
+        **block_kwargs,
+    ):
+        super().__init__()
+
+        blocks = []
+        for _ in range(num_res_blocks):
+            blocks.append(block_fn(in_channels, **block_kwargs))
+        self.blocks = nn.Sequential(*blocks)
+
+        self.temporal_expansion = temporal_expansion
+        self.spatial_expansion = spatial_expansion
+
+        # Change channels in the final convolution layer.
+        self.proj = Conv1x1(
+            in_channels,
+            out_channels * temporal_expansion * (spatial_expansion**2),
+        )
+
+        self.d2st = DepthToSpaceTime(
+            temporal_expansion=temporal_expansion, spatial_expansion=spatial_expansion
+        )
+
+    def forward(self, x):
+        x = self.blocks(x)
+        x = self.proj(x)
+        x = self.d2st(x)
+        return x
+
+
+def block_fn(channels, *, affine: bool = True, has_attention: bool = False, **block_kwargs):
+    attn_block = AttentionBlock(channels) if has_attention else None
+    return ResBlock(channels, affine=affine, attn_block=attn_block, **block_kwargs)
+
+
+class DownsampleBlock(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        num_res_blocks,
+        *,
+        temporal_reduction=2,
+        spatial_reduction=2,
+        **block_kwargs,
+    ):
+        """
+        Downsample block for the VAE encoder.
+
+        Args:
+            in_channels: Number of input channels.
+            out_channels: Number of output channels.
+            num_res_blocks: Number of residual blocks.
+            temporal_reduction: Temporal reduction factor.
+            spatial_reduction: Spatial reduction factor.
+        """
+        super().__init__()
+        layers = []
+
+        # Change the channel count in the strided convolution.
+        # This lets the ResBlock have uniform channel count,
+        # as in ConvNeXt.
+        assert in_channels != out_channels
+        layers.append(
+            PConv3d(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=(temporal_reduction, spatial_reduction, spatial_reduction),
+                stride=(temporal_reduction, spatial_reduction, spatial_reduction),
+                # First layer in each block always uses replicate padding
+                padding_mode="replicate",
+                bias=block_kwargs["bias"],
+            )
+        )
+
+        for _ in range(num_res_blocks):
+            layers.append(block_fn(out_channels, **block_kwargs))
+
+        self.layers = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.layers(x)
+
+
+def add_fourier_features(inputs: torch.Tensor, start=6, stop=8, step=1):
+    num_freqs = (stop - start) // step
+    assert inputs.ndim == 5
+    C = inputs.size(1)
+
+    # Create Base 2 Fourier features.
+    freqs = torch.arange(start, stop, step, dtype=inputs.dtype, device=inputs.device)
+    assert num_freqs == len(freqs)
+    w = torch.pow(2.0, freqs) * (2 * torch.pi)  # [num_freqs]
+    C = inputs.shape[1]
+    w = w.repeat(C)[None, :, None, None, None]  # [1, C * num_freqs, 1, 1, 1]
+
+    # Interleaved repeat of input channels to match w.
+    h = inputs.repeat_interleave(num_freqs, dim=1)  # [B, C * num_freqs, T, H, W]
+    # Scale channels by frequency.
+    h = w * h
+
+    return torch.cat(
+        [
+            inputs,
+            torch.sin(h),
+            torch.cos(h),
+        ],
+        dim=1,
+    )
+
+
+class FourierFeatures(nn.Module):
+    def __init__(self, start: int = 6, stop: int = 8, step: int = 1):
+        super().__init__()
+        self.start = start
+        self.stop = stop
+        self.step = step
+
+    def forward(self, inputs):
+        """Add Fourier features to inputs.
+
+        Args:
+            inputs: Input tensor. Shape: [B, C, T, H, W]
+
+        Returns:
+            h: Output tensor. Shape: [B, (1 + 2 * num_freqs) * C, T, H, W]
+        """
+        return add_fourier_features(inputs, self.start, self.stop, self.step)
+
+
+class Decoder(nn.Module):
+    def __init__(
+        self,
+        *,
+        out_channels: int = 3,
+        latent_dim: int,
+        base_channels: int,
+        channel_multipliers: List[int],
+        num_res_blocks: List[int],
+        temporal_expansions: Optional[List[int]] = None,
+        spatial_expansions: Optional[List[int]] = None,
+        has_attention: List[bool],
+        output_norm: bool = True,
+        nonlinearity: str = "silu",
+        output_nonlinearity: str = "silu",
+        causal: bool = True,
+        **block_kwargs,
+    ):
+        super().__init__()
+        self.input_channels = latent_dim
+        self.base_channels = base_channels
+        self.channel_multipliers = channel_multipliers
+        self.num_res_blocks = num_res_blocks
+        self.output_nonlinearity = output_nonlinearity
+        assert nonlinearity == "silu"
+        assert causal
+
+        ch = [mult * base_channels for mult in channel_multipliers]
+        self.num_up_blocks = len(ch) - 1
+        assert len(num_res_blocks) == self.num_up_blocks + 2
+
+        blocks = []
+
+        first_block = [
+            ops.Conv3d(latent_dim, ch[-1], kernel_size=(1, 1, 1))
+        ]  # Input layer.
+        # First set of blocks preserve channel count.
+        for _ in range(num_res_blocks[-1]):
+            first_block.append(
+                block_fn(
+                    ch[-1],
+                    has_attention=has_attention[-1],
+                    causal=causal,
+                    **block_kwargs,
+                )
+            )
+        blocks.append(nn.Sequential(*first_block))
+
+        assert len(temporal_expansions) == len(spatial_expansions) == self.num_up_blocks
+        assert len(num_res_blocks) == len(has_attention) == self.num_up_blocks + 2
+
+        upsample_block_fn = CausalUpsampleBlock
+
+        for i in range(self.num_up_blocks):
+            block = upsample_block_fn(
+                ch[-i - 1],
+                ch[-i - 2],
+                num_res_blocks=num_res_blocks[-i - 2],
+                has_attention=has_attention[-i - 2],
+                temporal_expansion=temporal_expansions[-i - 1],
+                spatial_expansion=spatial_expansions[-i - 1],
+                causal=causal,
+                **block_kwargs,
+            )
+            blocks.append(block)
+
+        assert not output_norm
+
+        # Last block. Preserve channel count.
+        last_block = []
+        for _ in range(num_res_blocks[0]):
+            last_block.append(
+                block_fn(
+                    ch[0], has_attention=has_attention[0], causal=causal, **block_kwargs
+                )
+            )
+        blocks.append(nn.Sequential(*last_block))
+
+        self.blocks = nn.ModuleList(blocks)
+        self.output_proj = Conv1x1(ch[0], out_channels)
+
+    def forward(self, x):
+        """Forward pass.
+
+        Args:
+            x: Latent tensor. Shape: [B, input_channels, t, h, w]. Scaled [-1, 1].
+
+        Returns:
+            x: Reconstructed video tensor. Shape: [B, C, T, H, W]. Scaled to [-1, 1].
+               T + 1 = (t - 1) * 4.
+               H = h * 16, W = w * 16.
+        """
+        for block in self.blocks:
+            x = block(x)
+
+        if self.output_nonlinearity == "silu":
+            x = F.silu(x, inplace=not self.training)
+        else:
+            assert (
+                not self.output_nonlinearity
+            )  # StyleGAN3 omits the to-RGB nonlinearity.
+
+        return self.output_proj(x).contiguous()
+
+class LatentDistribution:
+    def __init__(self, mean: torch.Tensor, logvar: torch.Tensor):
+        """Initialize latent distribution.
+
+        Args:
+            mean: Mean of the distribution. Shape: [B, C, T, H, W].
+            logvar: Logarithm of variance of the distribution. Shape: [B, C, T, H, W].
+        """
+        assert mean.shape == logvar.shape
+        self.mean = mean
+        self.logvar = logvar
+
+    def sample(self, temperature=1.0, generator: torch.Generator = None, noise=None):
+        if temperature == 0.0:
+            return self.mean
+
+        if noise is None:
+            noise = torch.randn(self.mean.shape, device=self.mean.device, dtype=self.mean.dtype, generator=generator)
+        else:
+            assert noise.device == self.mean.device
+            noise = noise.to(self.mean.dtype)
+
+        if temperature != 1.0:
+            raise NotImplementedError(f"Temperature {temperature} is not supported.")
+
+        # Just Gaussian sample with no scaling of variance.
+        return noise * torch.exp(self.logvar * 0.5) + self.mean
+
+    def mode(self):
+        return self.mean
+
+class Encoder(nn.Module):
+    def __init__(
+        self,
+        *,
+        in_channels: int,
+        base_channels: int,
+        channel_multipliers: List[int],
+        num_res_blocks: List[int],
+        latent_dim: int,
+        temporal_reductions: List[int],
+        spatial_reductions: List[int],
+        prune_bottlenecks: List[bool],
+        has_attentions: List[bool],
+        affine: bool = True,
+        bias: bool = True,
+        input_is_conv_1x1: bool = False,
+        padding_mode: str,
+    ):
+        super().__init__()
+        self.temporal_reductions = temporal_reductions
+        self.spatial_reductions = spatial_reductions
+        self.base_channels = base_channels
+        self.channel_multipliers = channel_multipliers
+        self.num_res_blocks = num_res_blocks
+        self.latent_dim = latent_dim
+
+        self.fourier_features = FourierFeatures()
+        ch = [mult * base_channels for mult in channel_multipliers]
+        num_down_blocks = len(ch) - 1
+        assert len(num_res_blocks) == num_down_blocks + 2
+
+        layers = (
+            [ops.Conv3d(in_channels, ch[0], kernel_size=(1, 1, 1), bias=True)]
+            if not input_is_conv_1x1
+            else [Conv1x1(in_channels, ch[0])]
+        )
+
+        assert len(prune_bottlenecks) == num_down_blocks + 2
+        assert len(has_attentions) == num_down_blocks + 2
+        block = partial(block_fn, padding_mode=padding_mode, affine=affine, bias=bias)
+
+        for _ in range(num_res_blocks[0]):
+            layers.append(block(ch[0], has_attention=has_attentions[0], prune_bottleneck=prune_bottlenecks[0]))
+        prune_bottlenecks = prune_bottlenecks[1:]
+        has_attentions = has_attentions[1:]
+
+        assert len(temporal_reductions) == len(spatial_reductions) == len(ch) - 1
+        for i in range(num_down_blocks):
+            layer = DownsampleBlock(
+                ch[i],
+                ch[i + 1],
+                num_res_blocks=num_res_blocks[i + 1],
+                temporal_reduction=temporal_reductions[i],
+                spatial_reduction=spatial_reductions[i],
+                prune_bottleneck=prune_bottlenecks[i],
+                has_attention=has_attentions[i],
+                affine=affine,
+                bias=bias,
+                padding_mode=padding_mode,
+            )
+
+            layers.append(layer)
+
+        # Additional blocks.
+        for _ in range(num_res_blocks[-1]):
+            layers.append(block(ch[-1], has_attention=has_attentions[-1], prune_bottleneck=prune_bottlenecks[-1]))
+
+        self.layers = nn.Sequential(*layers)
+
+        # Output layers.
+        self.output_norm = norm_fn(ch[-1])
+        self.output_proj = Conv1x1(ch[-1], 2 * latent_dim, bias=False)
+
+    @property
+    def temporal_downsample(self):
+        return math.prod(self.temporal_reductions)
+
+    @property
+    def spatial_downsample(self):
+        return math.prod(self.spatial_reductions)
+
+    def forward(self, x) -> LatentDistribution:
+        """Forward pass.
+
+        Args:
+            x: Input video tensor. Shape: [B, C, T, H, W]. Scaled to [-1, 1]
+
+        Returns:
+            means: Latent tensor. Shape: [B, latent_dim, t, h, w]. Scaled [-1, 1].
+                   h = H // 8, w = W // 8, t - 1 = (T - 1) // 6
+            logvar: Shape: [B, latent_dim, t, h, w].
+        """
+        assert x.ndim == 5, f"Expected 5D input, got {x.shape}"
+        x = self.fourier_features(x)
+
+        x = self.layers(x)
+
+        x = self.output_norm(x)
+        x = F.silu(x, inplace=True)
+        x = self.output_proj(x)
+
+        means, logvar = torch.chunk(x, 2, dim=1)
+
+        assert means.ndim == 5
+        assert logvar.shape == means.shape
+        assert means.size(1) == self.latent_dim
+
+        return LatentDistribution(means, logvar)
+
+
+class VideoVAE(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.encoder = Encoder(
+            in_channels=15,
+            base_channels=64,
+            channel_multipliers=[1, 2, 4, 6],
+            num_res_blocks=[3, 3, 4, 6, 3],
+            latent_dim=12,
+            temporal_reductions=[1, 2, 3],
+            spatial_reductions=[2, 2, 2],
+            prune_bottlenecks=[False, False, False, False, False],
+            has_attentions=[False, True, True, True, True],
+            affine=True,
+            bias=True,
+            input_is_conv_1x1=True,
+            padding_mode="replicate"
+        )
+        self.decoder = Decoder(
+            out_channels=3,
+            base_channels=128,
+            channel_multipliers=[1, 2, 4, 6],
+            temporal_expansions=[1, 2, 3],
+            spatial_expansions=[2, 2, 2],
+            num_res_blocks=[3, 3, 4, 6, 3],
+            latent_dim=12,
+            has_attention=[False, False, False, False, False],
+            padding_mode="replicate",
+            output_norm=False,
+            nonlinearity="silu",
+            output_nonlinearity="silu",
+            causal=True,
+        )
+
+    def encode(self, x):
+        return self.encoder(x).mode()
+
+    def decode(self, x):
+        return self.decoder(x)
--- a/comfy/ldm/lightricks/model.py
+++ b/comfy/ldm/lightricks/model.py
@@ -0,0 +1,502 @@
+import torch
+from torch import nn
+import comfy.ldm.modules.attention
+from comfy.ldm.genmo.joint_model.layers import RMSNorm
+import comfy.ldm.common_dit
+from einops import rearrange
+import math
+from typing import Dict, Optional, Tuple
+
+from .symmetric_patchifier import SymmetricPatchifier
+
+
+def get_timestep_embedding(
+    timesteps: torch.Tensor,
+    embedding_dim: int,
+    flip_sin_to_cos: bool = False,
+    downscale_freq_shift: float = 1,
+    scale: float = 1,
+    max_period: int = 10000,
+):
+    """
+    This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings.
+
+    Args
+        timesteps (torch.Tensor):
+            a 1-D Tensor of N indices, one per batch element. These may be fractional.
+        embedding_dim (int):
+            the dimension of the output.
+        flip_sin_to_cos (bool):
+            Whether the embedding order should be `cos, sin` (if True) or `sin, cos` (if False)
+        downscale_freq_shift (float):
+            Controls the delta between frequencies between dimensions
+        scale (float):
+            Scaling factor applied to the embeddings.
+        max_period (int):
+            Controls the maximum frequency of the embeddings
+    Returns
+        torch.Tensor: an [N x dim] Tensor of positional embeddings.
+    """
+    assert len(timesteps.shape) == 1, "Timesteps should be a 1d-array"
+
+    half_dim = embedding_dim // 2
+    exponent = -math.log(max_period) * torch.arange(
+        start=0, end=half_dim, dtype=torch.float32, device=timesteps.device
+    )
+    exponent = exponent / (half_dim - downscale_freq_shift)
+
+    emb = torch.exp(exponent)
+    emb = timesteps[:, None].float() * emb[None, :]
+
+    # scale embeddings
+    emb = scale * emb
+
+    # concat sine and cosine embeddings
+    emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=-1)
+
+    # flip sine and cosine embeddings
+    if flip_sin_to_cos:
+        emb = torch.cat([emb[:, half_dim:], emb[:, :half_dim]], dim=-1)
+
+    # zero pad
+    if embedding_dim % 2 == 1:
+        emb = torch.nn.functional.pad(emb, (0, 1, 0, 0))
+    return emb
+
+
+class TimestepEmbedding(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        time_embed_dim: int,
+        act_fn: str = "silu",
+        out_dim: int = None,
+        post_act_fn: Optional[str] = None,
+        cond_proj_dim=None,
+        sample_proj_bias=True,
+        dtype=None, device=None, operations=None,
+    ):
+        super().__init__()
+
+        self.linear_1 = operations.Linear(in_channels, time_embed_dim, sample_proj_bias, dtype=dtype, device=device)
+
+        if cond_proj_dim is not None:
+            self.cond_proj = operations.Linear(cond_proj_dim, in_channels, bias=False, dtype=dtype, device=device)
+        else:
+            self.cond_proj = None
+
+        self.act = nn.SiLU()
+
+        if out_dim is not None:
+            time_embed_dim_out = out_dim
+        else:
+            time_embed_dim_out = time_embed_dim
+        self.linear_2 = operations.Linear(time_embed_dim, time_embed_dim_out, sample_proj_bias, dtype=dtype, device=device)
+
+        if post_act_fn is None:
+            self.post_act = None
+        # else:
+        #     self.post_act = get_activation(post_act_fn)
+
+    def forward(self, sample, condition=None):
+        if condition is not None:
+            sample = sample + self.cond_proj(condition)
+        sample = self.linear_1(sample)
+
+        if self.act is not None:
+            sample = self.act(sample)
+
+        sample = self.linear_2(sample)
+
+        if self.post_act is not None:
+            sample = self.post_act(sample)
+        return sample
+
+
+class Timesteps(nn.Module):
+    def __init__(self, num_channels: int, flip_sin_to_cos: bool, downscale_freq_shift: float, scale: int = 1):
+        super().__init__()
+        self.num_channels = num_channels
+        self.flip_sin_to_cos = flip_sin_to_cos
+        self.downscale_freq_shift = downscale_freq_shift
+        self.scale = scale
+
+    def forward(self, timesteps):
+        t_emb = get_timestep_embedding(
+            timesteps,
+            self.num_channels,
+            flip_sin_to_cos=self.flip_sin_to_cos,
+            downscale_freq_shift=self.downscale_freq_shift,
+            scale=self.scale,
+        )
+        return t_emb
+
+
+class PixArtAlphaCombinedTimestepSizeEmbeddings(nn.Module):
+    """
+    For PixArt-Alpha.
+
+    Reference:
+    https://github.com/PixArt-alpha/PixArt-alpha/blob/0f55e922376d8b797edd44d25d0e7464b260dcab/diffusion/model/nets/PixArtMS.py#L164C9-L168C29
+    """
+
+    def __init__(self, embedding_dim, size_emb_dim, use_additional_conditions: bool = False, dtype=None, device=None, operations=None):
+        super().__init__()
+
+        self.outdim = size_emb_dim
+        self.time_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0)
+        self.timestep_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim, dtype=dtype, device=device, operations=operations)
+
+    def forward(self, timestep, resolution, aspect_ratio, batch_size, hidden_dtype):
+        timesteps_proj = self.time_proj(timestep)
+        timesteps_emb = self.timestep_embedder(timesteps_proj.to(dtype=hidden_dtype))  # (N, D)
+        return timesteps_emb
+
+
+class AdaLayerNormSingle(nn.Module):
+    r"""
+    Norm layer adaptive layer norm single (adaLN-single).
+
+    As proposed in PixArt-Alpha (see: https://arxiv.org/abs/2310.00426; Section 2.3).
+
+    Parameters:
+        embedding_dim (`int`): The size of each embedding vector.
+        use_additional_conditions (`bool`): To use additional conditions for normalization or not.
+    """
+
+    def __init__(self, embedding_dim: int, use_additional_conditions: bool = False, dtype=None, device=None, operations=None):
+        super().__init__()
+
+        self.emb = PixArtAlphaCombinedTimestepSizeEmbeddings(
+            embedding_dim, size_emb_dim=embedding_dim // 3, use_additional_conditions=use_additional_conditions, dtype=dtype, device=device, operations=operations
+        )
+
+        self.silu = nn.SiLU()
+        self.linear = operations.Linear(embedding_dim, 6 * embedding_dim, bias=True, dtype=dtype, device=device)
+
+    def forward(
+        self,
+        timestep: torch.Tensor,
+        added_cond_kwargs: Optional[Dict[str, torch.Tensor]] = None,
+        batch_size: Optional[int] = None,
+        hidden_dtype: Optional[torch.dtype] = None,
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        # No modulation happening here.
+        added_cond_kwargs = added_cond_kwargs or {"resolution": None, "aspect_ratio": None}
+        embedded_timestep = self.emb(timestep, **added_cond_kwargs, batch_size=batch_size, hidden_dtype=hidden_dtype)
+        return self.linear(self.silu(embedded_timestep)), embedded_timestep
+
+class PixArtAlphaTextProjection(nn.Module):
+    """
+    Projects caption embeddings. Also handles dropout for classifier-free guidance.
+
+    Adapted from https://github.com/PixArt-alpha/PixArt-alpha/blob/master/diffusion/model/nets/PixArt_blocks.py
+    """
+
+    def __init__(self, in_features, hidden_size, out_features=None, act_fn="gelu_tanh", dtype=None, device=None, operations=None):
+        super().__init__()
+        if out_features is None:
+            out_features = hidden_size
+        self.linear_1 = operations.Linear(in_features=in_features, out_features=hidden_size, bias=True, dtype=dtype, device=device)
+        if act_fn == "gelu_tanh":
+            self.act_1 = nn.GELU(approximate="tanh")
+        elif act_fn == "silu":
+            self.act_1 = nn.SiLU()
+        else:
+            raise ValueError(f"Unknown activation function: {act_fn}")
+        self.linear_2 = operations.Linear(in_features=hidden_size, out_features=out_features, bias=True, dtype=dtype, device=device)
+
+    def forward(self, caption):
+        hidden_states = self.linear_1(caption)
+        hidden_states = self.act_1(hidden_states)
+        hidden_states = self.linear_2(hidden_states)
+        return hidden_states
+
+
+class GELU_approx(nn.Module):
+    def __init__(self, dim_in, dim_out, dtype=None, device=None, operations=None):
+        super().__init__()
+        self.proj = operations.Linear(dim_in, dim_out, dtype=dtype, device=device)
+
+    def forward(self, x):
+        return torch.nn.functional.gelu(self.proj(x), approximate="tanh")
+
+
+class FeedForward(nn.Module):
+    def __init__(self, dim, dim_out, mult=4, glu=False, dropout=0., dtype=None, device=None, operations=None):
+        super().__init__()
+        inner_dim = int(dim * mult)
+        project_in = GELU_approx(dim, inner_dim, dtype=dtype, device=device, operations=operations)
+
+        self.net = nn.Sequential(
+            project_in,
+            nn.Dropout(dropout),
+            operations.Linear(inner_dim, dim_out, dtype=dtype, device=device)
+        )
+
+    def forward(self, x):
+        return self.net(x)
+
+
+def apply_rotary_emb(input_tensor, freqs_cis): #TODO: remove duplicate funcs and pick the best/fastest one
+    cos_freqs = freqs_cis[0]
+    sin_freqs = freqs_cis[1]
+
+    t_dup = rearrange(input_tensor, "... (d r) -> ... d r", r=2)
+    t1, t2 = t_dup.unbind(dim=-1)
+    t_dup = torch.stack((-t2, t1), dim=-1)
+    input_tensor_rot = rearrange(t_dup, "... d r -> ... (d r)")
+
+    out = input_tensor * cos_freqs + input_tensor_rot * sin_freqs
+
+    return out
+
+
+class CrossAttention(nn.Module):
+    def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0., attn_precision=None, dtype=None, device=None, operations=None):
+        super().__init__()
+        inner_dim = dim_head * heads
+        context_dim = query_dim if context_dim is None else context_dim
+        self.attn_precision = attn_precision
+
+        self.heads = heads
+        self.dim_head = dim_head
+
+        self.q_norm = RMSNorm(inner_dim, dtype=dtype, device=device)
+        self.k_norm = RMSNorm(inner_dim, dtype=dtype, device=device)
+
+        self.to_q = operations.Linear(query_dim, inner_dim, bias=True, dtype=dtype, device=device)
+        self.to_k = operations.Linear(context_dim, inner_dim, bias=True, dtype=dtype, device=device)
+        self.to_v = operations.Linear(context_dim, inner_dim, bias=True, dtype=dtype, device=device)
+
+        self.to_out = nn.Sequential(operations.Linear(inner_dim, query_dim, dtype=dtype, device=device), nn.Dropout(dropout))
+
+    def forward(self, x, context=None, mask=None, pe=None):
+        q = self.to_q(x)
+        context = x if context is None else context
+        k = self.to_k(context)
+        v = self.to_v(context)
+
+        q = self.q_norm(q)
+        k = self.k_norm(k)
+
+        if pe is not None:
+            q = apply_rotary_emb(q, pe)
+            k = apply_rotary_emb(k, pe)
+
+        if mask is None:
+            out = comfy.ldm.modules.attention.optimized_attention(q, k, v, self.heads, attn_precision=self.attn_precision)
+        else:
+            out = comfy.ldm.modules.attention.optimized_attention_masked(q, k, v, self.heads, mask, attn_precision=self.attn_precision)
+        return self.to_out(out)
+
+
+class BasicTransformerBlock(nn.Module):
+    def __init__(self, dim, n_heads, d_head, context_dim=None, attn_precision=None, dtype=None, device=None, operations=None):
+        super().__init__()
+
+        self.attn_precision = attn_precision
+        self.attn1 = CrossAttention(query_dim=dim, heads=n_heads, dim_head=d_head, context_dim=None, attn_precision=self.attn_precision, dtype=dtype, device=device, operations=operations)
+        self.ff = FeedForward(dim, dim_out=dim, glu=True, dtype=dtype, device=device, operations=operations)
+
+        self.attn2 = CrossAttention(query_dim=dim, context_dim=context_dim, heads=n_heads, dim_head=d_head, attn_precision=self.attn_precision, dtype=dtype, device=device, operations=operations)
+
+        self.scale_shift_table = nn.Parameter(torch.empty(6, dim, device=device, dtype=dtype))
+
+    def forward(self, x, context=None, attention_mask=None, timestep=None, pe=None):
+        shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = (self.scale_shift_table[None, None] + timestep.reshape(x.shape[0], timestep.shape[1], self.scale_shift_table.shape[0], -1)).unbind(dim=2)
+
+        x += self.attn1(comfy.ldm.common_dit.rms_norm(x) * (1 + scale_msa) + shift_msa, pe=pe) * gate_msa
+
+        x += self.attn2(x, context=context, mask=attention_mask)
+
+        y = comfy.ldm.common_dit.rms_norm(x) * (1 + scale_mlp) + shift_mlp
+        x += self.ff(y) * gate_mlp
+
+        return x
+
+def get_fractional_positions(indices_grid, max_pos):
+    fractional_positions = torch.stack(
+        [
+            indices_grid[:, i] / max_pos[i]
+            for i in range(3)
+        ],
+        dim=-1,
+    )
+    return fractional_positions
+
+
+def precompute_freqs_cis(indices_grid, dim, out_dtype, theta=10000.0, max_pos=[20, 2048, 2048]):
+    dtype = torch.float32 #self.dtype
+
+    fractional_positions = get_fractional_positions(indices_grid, max_pos)
+
+    start = 1
+    end = theta
+    device = fractional_positions.device
+
+    indices = theta ** (
+        torch.linspace(
+            math.log(start, theta),
+            math.log(end, theta),
+            dim // 6,
+            device=device,
+            dtype=dtype,
+        )
+    )
+    indices = indices.to(dtype=dtype)
+
+    indices = indices * math.pi / 2
+
+    freqs = (
+        (indices * (fractional_positions.unsqueeze(-1) * 2 - 1))
+        .transpose(-1, -2)
+        .flatten(2)
+    )
+
+    cos_freq = freqs.cos().repeat_interleave(2, dim=-1)
+    sin_freq = freqs.sin().repeat_interleave(2, dim=-1)
+    if dim % 6 != 0:
+        cos_padding = torch.ones_like(cos_freq[:, :, : dim % 6])
+        sin_padding = torch.zeros_like(cos_freq[:, :, : dim % 6])
+        cos_freq = torch.cat([cos_padding, cos_freq], dim=-1)
+        sin_freq = torch.cat([sin_padding, sin_freq], dim=-1)
+    return cos_freq.to(out_dtype), sin_freq.to(out_dtype)
+
+
+class LTXVModel(torch.nn.Module):
+    def __init__(self,
+                 in_channels=128,
+                 cross_attention_dim=2048,
+                 attention_head_dim=64,
+                 num_attention_heads=32,
+
+                 caption_channels=4096,
+                 num_layers=28,
+
+
+                 positional_embedding_theta=10000.0,
+                 positional_embedding_max_pos=[20, 2048, 2048],
+                 dtype=None, device=None, operations=None, **kwargs):
+        super().__init__()
+        self.dtype = dtype
+        self.out_channels = in_channels
+        self.inner_dim = num_attention_heads * attention_head_dim
+
+        self.patchify_proj = operations.Linear(in_channels, self.inner_dim, bias=True, dtype=dtype, device=device)
+
+        self.adaln_single = AdaLayerNormSingle(
+            self.inner_dim, use_additional_conditions=False, dtype=dtype, device=device, operations=operations
+        )
+
+        # self.adaln_single.linear = operations.Linear(self.inner_dim, 4 * self.inner_dim, bias=True, dtype=dtype, device=device)
+
+        self.caption_projection = PixArtAlphaTextProjection(
+            in_features=caption_channels, hidden_size=self.inner_dim, dtype=dtype, device=device, operations=operations
+        )
+
+        self.transformer_blocks = nn.ModuleList(
+            [
+                BasicTransformerBlock(
+                    self.inner_dim,
+                    num_attention_heads,
+                    attention_head_dim,
+                    context_dim=cross_attention_dim,
+                    # attn_precision=attn_precision,
+                    dtype=dtype, device=device, operations=operations
+                )
+                for d in range(num_layers)
+            ]
+        )
+
+        self.scale_shift_table = nn.Parameter(torch.empty(2, self.inner_dim, dtype=dtype, device=device))
+        self.norm_out = operations.LayerNorm(self.inner_dim, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
+        self.proj_out = operations.Linear(self.inner_dim, self.out_channels, dtype=dtype, device=device)
+
+        self.patchifier = SymmetricPatchifier(1)
+
+    def forward(self, x, timestep, context, attention_mask, frame_rate=25, guiding_latent=None, **kwargs):
+        indices_grid = self.patchifier.get_grid(
+            orig_num_frames=x.shape[2],
+            orig_height=x.shape[3],
+            orig_width=x.shape[4],
+            batch_size=x.shape[0],
+            scale_grid=((1 / frame_rate) * 8, 32, 32), #TODO: controlable frame rate
+            device=x.device,
+        )
+
+        if guiding_latent is not None:
+            ts = torch.ones([x.shape[0], 1, x.shape[2], x.shape[3], x.shape[4]], device=x.device, dtype=x.dtype)
+            input_ts = timestep.view([timestep.shape[0]] + [1] * (x.ndim - 1))
+            ts *= input_ts
+            ts[:, :, 0] = 0.0
+            timestep = self.patchifier.patchify(ts)
+            input_x = x.clone()
+            x[:, :, 0] = guiding_latent[:, :, 0]
+
+        orig_shape = list(x.shape)
+
+        x = self.patchifier.patchify(x)
+
+        x = self.patchify_proj(x)
+        timestep = timestep * 1000.0
+
+        attention_mask = 1.0 - attention_mask.to(x.dtype).reshape((attention_mask.shape[0], 1, -1, attention_mask.shape[-1]))
+        attention_mask = attention_mask.masked_fill(attention_mask.to(torch.bool), float("-inf"))  # not sure about this
+        # attention_mask = (context != 0).any(dim=2).to(dtype=x.dtype)
+
+        pe = precompute_freqs_cis(indices_grid, dim=self.inner_dim, out_dtype=x.dtype)
+
+        batch_size = x.shape[0]
+        timestep, embedded_timestep = self.adaln_single(
+            timestep.flatten(),
+            {"resolution": None, "aspect_ratio": None},
+            batch_size=batch_size,
+            hidden_dtype=x.dtype,
+        )
+        # Second dimension is 1 or number of tokens (if timestep_per_token)
+        timestep = timestep.view(batch_size, -1, timestep.shape[-1])
+        embedded_timestep = embedded_timestep.view(
+            batch_size, -1, embedded_timestep.shape[-1]
+        )
+
+        # 2. Blocks
+        if self.caption_projection is not None:
+            batch_size = x.shape[0]
+            context = self.caption_projection(context)
+            context = context.view(
+                batch_size, -1, x.shape[-1]
+            )
+
+        for block in self.transformer_blocks:
+            x = block(
+                x,
+                context=context,
+                attention_mask=attention_mask,
+                timestep=timestep,
+                pe=pe
+            )
+
+        # 3. Output
+        scale_shift_values = (
+            self.scale_shift_table[None, None] + embedded_timestep[:, :, None]
+        )
+        shift, scale = scale_shift_values[:, :, 0], scale_shift_values[:, :, 1]
+        x = self.norm_out(x)
+        # Modulation
+        x = x * (1 + scale) + shift
+        x = self.proj_out(x)
+
+        x = self.patchifier.unpatchify(
+            latents=x,
+            output_height=orig_shape[3],
+            output_width=orig_shape[4],
+            output_num_frames=orig_shape[2],
+            out_channels=orig_shape[1] // math.prod(self.patchifier.patch_size),
+        )
+
+        if guiding_latent is not None:
+            x[:, :, 0] = (input_x[:, :, 0] - guiding_latent[:, :, 0]) / input_ts[:, :, 0]
+
+        # print("res", x)
+        return x
--- a/comfy/ldm/lightricks/symmetric_patchifier.py
+++ b/comfy/ldm/lightricks/symmetric_patchifier.py
@@ -0,0 +1,105 @@
+from abc import ABC, abstractmethod
+from typing import Tuple
+
+import torch
+from einops import rearrange
+from torch import Tensor
+
+
+def append_dims(x: torch.Tensor, target_dims: int) -> torch.Tensor:
+    """Appends dimensions to the end of a tensor until it has target_dims dimensions."""
+    dims_to_append = target_dims - x.ndim
+    if dims_to_append < 0:
+        raise ValueError(
+            f"input has {x.ndim} dims but target_dims is {target_dims}, which is less"
+        )
+    elif dims_to_append == 0:
+        return x
+    return x[(...,) + (None,) * dims_to_append]
+
+
+class Patchifier(ABC):
+    def __init__(self, patch_size: int):
+        super().__init__()
+        self._patch_size = (1, patch_size, patch_size)
+
+    @abstractmethod
+    def patchify(
+        self, latents: Tensor, frame_rates: Tensor, scale_grid: bool
+    ) -> Tuple[Tensor, Tensor]:
+        pass
+
+    @abstractmethod
+    def unpatchify(
+        self,
+        latents: Tensor,
+        output_height: int,
+        output_width: int,
+        output_num_frames: int,
+        out_channels: int,
+    ) -> Tuple[Tensor, Tensor]:
+        pass
+
+    @property
+    def patch_size(self):
+        return self._patch_size
+
+    def get_grid(
+        self, orig_num_frames, orig_height, orig_width, batch_size, scale_grid, device
+    ):
+        f = orig_num_frames // self._patch_size[0]
+        h = orig_height // self._patch_size[1]
+        w = orig_width // self._patch_size[2]
+        grid_h = torch.arange(h, dtype=torch.float32, device=device)
+        grid_w = torch.arange(w, dtype=torch.float32, device=device)
+        grid_f = torch.arange(f, dtype=torch.float32, device=device)
+        grid = torch.meshgrid(grid_f, grid_h, grid_w)
+        grid = torch.stack(grid, dim=0)
+        grid = grid.unsqueeze(0).repeat(batch_size, 1, 1, 1, 1)
+
+        if scale_grid is not None:
+            for i in range(3):
+                if isinstance(scale_grid[i], Tensor):
+                    scale = append_dims(scale_grid[i], grid.ndim - 1)
+                else:
+                    scale = scale_grid[i]
+                grid[:, i, ...] = grid[:, i, ...] * scale * self._patch_size[i]
+
+        grid = rearrange(grid, "b c f h w -> b c (f h w)", b=batch_size)
+        return grid
+
+
+class SymmetricPatchifier(Patchifier):
+    def patchify(
+        self,
+        latents: Tensor,
+    ) -> Tuple[Tensor, Tensor]:
+        latents = rearrange(
+            latents,
+            "b c (f p1) (h p2) (w p3) -> b (f h w) (c p1 p2 p3)",
+            p1=self._patch_size[0],
+            p2=self._patch_size[1],
+            p3=self._patch_size[2],
+        )
+        return latents
+
+    def unpatchify(
+        self,
+        latents: Tensor,
+        output_height: int,
+        output_width: int,
+        output_num_frames: int,
+        out_channels: int,
+    ) -> Tuple[Tensor, Tensor]:
+        output_height = output_height // self._patch_size[1]
+        output_width = output_width // self._patch_size[2]
+        latents = rearrange(
+            latents,
+            "b (f h w) (c p q) -> b c f (h p) (w q) ",
+            f=output_num_frames,
+            h=output_height,
+            w=output_width,
+            p=self._patch_size[1],
+            q=self._patch_size[2],
+        )
+        return latents
--- a/comfy/ldm/lightricks/vae/causal_conv3d.py
+++ b/comfy/ldm/lightricks/vae/causal_conv3d.py
@@ -0,0 +1,62 @@
+from typing import Tuple, Union
+
+import torch
+import torch.nn as nn
+
+
+class CausalConv3d(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size: int = 3,
+        stride: Union[int, Tuple[int]] = 1,
+        dilation: int = 1,
+        groups: int = 1,
+        **kwargs,
+    ):
+        super().__init__()
+
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+
+        kernel_size = (kernel_size, kernel_size, kernel_size)
+        self.time_kernel_size = kernel_size[0]
+
+        dilation = (dilation, 1, 1)
+
+        height_pad = kernel_size[1] // 2
+        width_pad = kernel_size[2] // 2
+        padding = (0, height_pad, width_pad)
+
+        self.conv = nn.Conv3d(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            dilation=dilation,
+            padding=padding,
+            padding_mode="zeros",
+            groups=groups,
+        )
+
+    def forward(self, x, causal: bool = True):
+        if causal:
+            first_frame_pad = x[:, :, :1, :, :].repeat(
+                (1, 1, self.time_kernel_size - 1, 1, 1)
+            )
+            x = torch.concatenate((first_frame_pad, x), dim=2)
+        else:
+            first_frame_pad = x[:, :, :1, :, :].repeat(
+                (1, 1, (self.time_kernel_size - 1) // 2, 1, 1)
+            )
+            last_frame_pad = x[:, :, -1:, :, :].repeat(
+                (1, 1, (self.time_kernel_size - 1) // 2, 1, 1)
+            )
+            x = torch.concatenate((first_frame_pad, x, last_frame_pad), dim=2)
+        x = self.conv(x)
+        return x
+
+    @property
+    def weight(self):
+        return self.conv.weight
--- a/comfy/ldm/lightricks/vae/causal_video_autoencoder.py
+++ b/comfy/ldm/lightricks/vae/causal_video_autoencoder.py
@@ -0,0 +1,698 @@
+import torch
+from torch import nn
+from functools import partial
+import math
+from einops import rearrange
+from typing import Any, Mapping, Optional, Tuple, Union, List
+from .conv_nd_factory import make_conv_nd, make_linear_nd
+from .pixel_norm import PixelNorm
+
+
+class Encoder(nn.Module):
+    r"""
+    The `Encoder` layer of a variational autoencoder that encodes its input into a latent representation.
+
+    Args:
+        dims (`int` or `Tuple[int, int]`, *optional*, defaults to 3):
+            The number of dimensions to use in convolutions.
+        in_channels (`int`, *optional*, defaults to 3):
+            The number of input channels.
+        out_channels (`int`, *optional*, defaults to 3):
+            The number of output channels.
+        blocks (`List[Tuple[str, int]]`, *optional*, defaults to `[("res_x", 1)]`):
+            The blocks to use. Each block is a tuple of the block name and the number of layers.
+        base_channels (`int`, *optional*, defaults to 128):
+            The number of output channels for the first convolutional layer.
+        norm_num_groups (`int`, *optional*, defaults to 32):
+            The number of groups for normalization.
+        patch_size (`int`, *optional*, defaults to 1):
+            The patch size to use. Should be a power of 2.
+        norm_layer (`str`, *optional*, defaults to `group_norm`):
+            The normalization layer to use. Can be either `group_norm` or `pixel_norm`.
+        latent_log_var (`str`, *optional*, defaults to `per_channel`):
+            The number of channels for the log variance. Can be either `per_channel`, `uniform`, or `none`.
+    """
+
+    def __init__(
+        self,
+        dims: Union[int, Tuple[int, int]] = 3,
+        in_channels: int = 3,
+        out_channels: int = 3,
+        blocks=[("res_x", 1)],
+        base_channels: int = 128,
+        norm_num_groups: int = 32,
+        patch_size: Union[int, Tuple[int]] = 1,
+        norm_layer: str = "group_norm",  # group_norm, pixel_norm
+        latent_log_var: str = "per_channel",
+    ):
+        super().__init__()
+        self.patch_size = patch_size
+        self.norm_layer = norm_layer
+        self.latent_channels = out_channels
+        self.latent_log_var = latent_log_var
+        self.blocks_desc = blocks
+
+        in_channels = in_channels * patch_size**2
+        output_channel = base_channels
+
+        self.conv_in = make_conv_nd(
+            dims=dims,
+            in_channels=in_channels,
+            out_channels=output_channel,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            causal=True,
+        )
+
+        self.down_blocks = nn.ModuleList([])
+
+        for block_name, block_params in blocks:
+            input_channel = output_channel
+            if isinstance(block_params, int):
+                block_params = {"num_layers": block_params}
+
+            if block_name == "res_x":
+                block = UNetMidBlock3D(
+                    dims=dims,
+                    in_channels=input_channel,
+                    num_layers=block_params["num_layers"],
+                    resnet_eps=1e-6,
+                    resnet_groups=norm_num_groups,
+                    norm_layer=norm_layer,
+                )
+            elif block_name == "res_x_y":
+                output_channel = block_params.get("multiplier", 2) * output_channel
+                block = ResnetBlock3D(
+                    dims=dims,
+                    in_channels=input_channel,
+                    out_channels=output_channel,
+                    eps=1e-6,
+                    groups=norm_num_groups,
+                    norm_layer=norm_layer,
+                )
+            elif block_name == "compress_time":
+                block = make_conv_nd(
+                    dims=dims,
+                    in_channels=input_channel,
+                    out_channels=output_channel,
+                    kernel_size=3,
+                    stride=(2, 1, 1),
+                    causal=True,
+                )
+            elif block_name == "compress_space":
+                block = make_conv_nd(
+                    dims=dims,
+                    in_channels=input_channel,
+                    out_channels=output_channel,
+                    kernel_size=3,
+                    stride=(1, 2, 2),
+                    causal=True,
+                )
+            elif block_name == "compress_all":
+                block = make_conv_nd(
+                    dims=dims,
+                    in_channels=input_channel,
+                    out_channels=output_channel,
+                    kernel_size=3,
+                    stride=(2, 2, 2),
+                    causal=True,
+                )
+            elif block_name == "compress_all_x_y":
+                output_channel = block_params.get("multiplier", 2) * output_channel
+                block = make_conv_nd(
+                    dims=dims,
+                    in_channels=input_channel,
+                    out_channels=output_channel,
+                    kernel_size=3,
+                    stride=(2, 2, 2),
+                    causal=True,
+                )
+            else:
+                raise ValueError(f"unknown block: {block_name}")
+
+            self.down_blocks.append(block)
+
+        # out
+        if norm_layer == "group_norm":
+            self.conv_norm_out = nn.GroupNorm(
+                num_channels=output_channel, num_groups=norm_num_groups, eps=1e-6
+            )
+        elif norm_layer == "pixel_norm":
+            self.conv_norm_out = PixelNorm()
+        elif norm_layer == "layer_norm":
+            self.conv_norm_out = LayerNorm(output_channel, eps=1e-6)
+
+        self.conv_act = nn.SiLU()
+
+        conv_out_channels = out_channels
+        if latent_log_var == "per_channel":
+            conv_out_channels *= 2
+        elif latent_log_var == "uniform":
+            conv_out_channels += 1
+        elif latent_log_var != "none":
+            raise ValueError(f"Invalid latent_log_var: {latent_log_var}")
+        self.conv_out = make_conv_nd(
+            dims, output_channel, conv_out_channels, 3, padding=1, causal=True
+        )
+
+        self.gradient_checkpointing = False
+
+    def forward(self, sample: torch.FloatTensor) -> torch.FloatTensor:
+        r"""The forward method of the `Encoder` class."""
+
+        sample = patchify(sample, patch_size_hw=self.patch_size, patch_size_t=1)
+        sample = self.conv_in(sample)
+
+        checkpoint_fn = (
+            partial(torch.utils.checkpoint.checkpoint, use_reentrant=False)
+            if self.gradient_checkpointing and self.training
+            else lambda x: x
+        )
+
+        for down_block in self.down_blocks:
+            sample = checkpoint_fn(down_block)(sample)
+
+        sample = self.conv_norm_out(sample)
+        sample = self.conv_act(sample)
+        sample = self.conv_out(sample)
+
+        if self.latent_log_var == "uniform":
+            last_channel = sample[:, -1:, ...]
+            num_dims = sample.dim()
+
+            if num_dims == 4:
+                # For shape (B, C, H, W)
+                repeated_last_channel = last_channel.repeat(
+                    1, sample.shape[1] - 2, 1, 1
+                )
+                sample = torch.cat([sample, repeated_last_channel], dim=1)
+            elif num_dims == 5:
+                # For shape (B, C, F, H, W)
+                repeated_last_channel = last_channel.repeat(
+                    1, sample.shape[1] - 2, 1, 1, 1
+                )
+                sample = torch.cat([sample, repeated_last_channel], dim=1)
+            else:
+                raise ValueError(f"Invalid input shape: {sample.shape}")
+
+        return sample
+
+
+class Decoder(nn.Module):
+    r"""
+    The `Decoder` layer of a variational autoencoder that decodes its latent representation into an output sample.
+
+    Args:
+        dims (`int` or `Tuple[int, int]`, *optional*, defaults to 3):
+            The number of dimensions to use in convolutions.
+        in_channels (`int`, *optional*, defaults to 3):
+            The number of input channels.
+        out_channels (`int`, *optional*, defaults to 3):
+            The number of output channels.
+        blocks (`List[Tuple[str, int]]`, *optional*, defaults to `[("res_x", 1)]`):
+            The blocks to use. Each block is a tuple of the block name and the number of layers.
+        base_channels (`int`, *optional*, defaults to 128):
+            The number of output channels for the first convolutional layer.
+        norm_num_groups (`int`, *optional*, defaults to 32):
+            The number of groups for normalization.
+        patch_size (`int`, *optional*, defaults to 1):
+            The patch size to use. Should be a power of 2.
+        norm_layer (`str`, *optional*, defaults to `group_norm`):
+            The normalization layer to use. Can be either `group_norm` or `pixel_norm`.
+        causal (`bool`, *optional*, defaults to `True`):
+            Whether to use causal convolutions or not.
+    """
+
+    def __init__(
+        self,
+        dims,
+        in_channels: int = 3,
+        out_channels: int = 3,
+        blocks=[("res_x", 1)],
+        base_channels: int = 128,
+        layers_per_block: int = 2,
+        norm_num_groups: int = 32,
+        patch_size: int = 1,
+        norm_layer: str = "group_norm",
+        causal: bool = True,
+    ):
+        super().__init__()
+        self.patch_size = patch_size
+        self.layers_per_block = layers_per_block
+        out_channels = out_channels * patch_size**2
+        self.causal = causal
+        self.blocks_desc = blocks
+
+        # Compute output channel to be product of all channel-multiplier blocks
+        output_channel = base_channels
+        for block_name, block_params in list(reversed(blocks)):
+            block_params = block_params if isinstance(block_params, dict) else {}
+            if block_name == "res_x_y":
+                output_channel = output_channel * block_params.get("multiplier", 2)
+
+        self.conv_in = make_conv_nd(
+            dims,
+            in_channels,
+            output_channel,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            causal=True,
+        )
+
+        self.up_blocks = nn.ModuleList([])
+
+        for block_name, block_params in list(reversed(blocks)):
+            input_channel = output_channel
+            if isinstance(block_params, int):
+                block_params = {"num_layers": block_params}
+
+            if block_name == "res_x":
+                block = UNetMidBlock3D(
+                    dims=dims,
+                    in_channels=input_channel,
+                    num_layers=block_params["num_layers"],
+                    resnet_eps=1e-6,
+                    resnet_groups=norm_num_groups,
+                    norm_layer=norm_layer,
+                )
+            elif block_name == "res_x_y":
+                output_channel = output_channel // block_params.get("multiplier", 2)
+                block = ResnetBlock3D(
+                    dims=dims,
+                    in_channels=input_channel,
+                    out_channels=output_channel,
+                    eps=1e-6,
+                    groups=norm_num_groups,
+                    norm_layer=norm_layer,
+                )
+            elif block_name == "compress_time":
+                block = DepthToSpaceUpsample(
+                    dims=dims, in_channels=input_channel, stride=(2, 1, 1)
+                )
+            elif block_name == "compress_space":
+                block = DepthToSpaceUpsample(
+                    dims=dims, in_channels=input_channel, stride=(1, 2, 2)
+                )
+            elif block_name == "compress_all":
+                block = DepthToSpaceUpsample(
+                    dims=dims,
+                    in_channels=input_channel,
+                    stride=(2, 2, 2),
+                    residual=block_params.get("residual", False),
+                )
+            else:
+                raise ValueError(f"unknown layer: {block_name}")
+
+            self.up_blocks.append(block)
+
+        if norm_layer == "group_norm":
+            self.conv_norm_out = nn.GroupNorm(
+                num_channels=output_channel, num_groups=norm_num_groups, eps=1e-6
+            )
+        elif norm_layer == "pixel_norm":
+            self.conv_norm_out = PixelNorm()
+        elif norm_layer == "layer_norm":
+            self.conv_norm_out = LayerNorm(output_channel, eps=1e-6)
+
+        self.conv_act = nn.SiLU()
+        self.conv_out = make_conv_nd(
+            dims, output_channel, out_channels, 3, padding=1, causal=True
+        )
+
+        self.gradient_checkpointing = False
+
+    # def forward(self, sample: torch.FloatTensor, target_shape) -> torch.FloatTensor:
+    def forward(self, sample: torch.FloatTensor) -> torch.FloatTensor:
+        r"""The forward method of the `Decoder` class."""
+        # assert target_shape is not None, "target_shape must be provided"
+
+        sample = self.conv_in(sample, causal=self.causal)
+
+        upscale_dtype = next(iter(self.up_blocks.parameters())).dtype
+
+        checkpoint_fn = (
+            partial(torch.utils.checkpoint.checkpoint, use_reentrant=False)
+            if self.gradient_checkpointing and self.training
+            else lambda x: x
+        )
+
+        sample = sample.to(upscale_dtype)
+
+        for up_block in self.up_blocks:
+            sample = checkpoint_fn(up_block)(sample, causal=self.causal)
+
+        sample = self.conv_norm_out(sample)
+        sample = self.conv_act(sample)
+        sample = self.conv_out(sample, causal=self.causal)
+
+        sample = unpatchify(sample, patch_size_hw=self.patch_size, patch_size_t=1)
+
+        return sample
+
+
+class UNetMidBlock3D(nn.Module):
+    """
+    A 3D UNet mid-block [`UNetMidBlock3D`] with multiple residual blocks.
+
+    Args:
+        in_channels (`int`): The number of input channels.
+        dropout (`float`, *optional*, defaults to 0.0): The dropout rate.
+        num_layers (`int`, *optional*, defaults to 1): The number of residual blocks.
+        resnet_eps (`float`, *optional*, 1e-6 ): The epsilon value for the resnet blocks.
+        resnet_groups (`int`, *optional*, defaults to 32):
+            The number of groups to use in the group normalization layers of the resnet blocks.
+
+    Returns:
+        `torch.FloatTensor`: The output of the last residual block, which is a tensor of shape `(batch_size,
+        in_channels, height, width)`.
+
+    """
+
+    def __init__(
+        self,
+        dims: Union[int, Tuple[int, int]],
+        in_channels: int,
+        dropout: float = 0.0,
+        num_layers: int = 1,
+        resnet_eps: float = 1e-6,
+        resnet_groups: int = 32,
+        norm_layer: str = "group_norm",
+    ):
+        super().__init__()
+        resnet_groups = (
+            resnet_groups if resnet_groups is not None else min(in_channels // 4, 32)
+        )
+
+        self.res_blocks = nn.ModuleList(
+            [
+                ResnetBlock3D(
+                    dims=dims,
+                    in_channels=in_channels,
+                    out_channels=in_channels,
+                    eps=resnet_eps,
+                    groups=resnet_groups,
+                    dropout=dropout,
+                    norm_layer=norm_layer,
+                )
+                for _ in range(num_layers)
+            ]
+        )
+
+    def forward(
+        self, hidden_states: torch.FloatTensor, causal: bool = True
+    ) -> torch.FloatTensor:
+        for resnet in self.res_blocks:
+            hidden_states = resnet(hidden_states, causal=causal)
+
+        return hidden_states
+
+
+class DepthToSpaceUpsample(nn.Module):
+    def __init__(self, dims, in_channels, stride, residual=False):
+        super().__init__()
+        self.stride = stride
+        self.out_channels = math.prod(stride) * in_channels
+        self.conv = make_conv_nd(
+            dims=dims,
+            in_channels=in_channels,
+            out_channels=self.out_channels,
+            kernel_size=3,
+            stride=1,
+            causal=True,
+        )
+        self.residual = residual
+
+    def forward(self, x, causal: bool = True):
+        if self.residual:
+            # Reshape and duplicate the input to match the output shape
+            x_in = rearrange(
+                x,
+                "b (c p1 p2 p3) d h w -> b c (d p1) (h p2) (w p3)",
+                p1=self.stride[0],
+                p2=self.stride[1],
+                p3=self.stride[2],
+            )
+            x_in = x_in.repeat(1, math.prod(self.stride), 1, 1, 1)
+            if self.stride[0] == 2:
+                x_in = x_in[:, :, 1:, :, :]
+        x = self.conv(x, causal=causal)
+        x = rearrange(
+            x,
+            "b (c p1 p2 p3) d h w -> b c (d p1) (h p2) (w p3)",
+            p1=self.stride[0],
+            p2=self.stride[1],
+            p3=self.stride[2],
+        )
+        if self.stride[0] == 2:
+            x = x[:, :, 1:, :, :]
+        if self.residual:
+            x = x + x_in
+        return x
+
+
+class LayerNorm(nn.Module):
+    def __init__(self, dim, eps, elementwise_affine=True) -> None:
+        super().__init__()
+        self.norm = nn.LayerNorm(dim, eps=eps, elementwise_affine=elementwise_affine)
+
+    def forward(self, x):
+        x = rearrange(x, "b c d h w -> b d h w c")
+        x = self.norm(x)
+        x = rearrange(x, "b d h w c -> b c d h w")
+        return x
+
+
+class ResnetBlock3D(nn.Module):
+    r"""
+    A Resnet block.
+
+    Parameters:
+        in_channels (`int`): The number of channels in the input.
+        out_channels (`int`, *optional*, default to be `None`):
+            The number of output channels for the first conv layer. If None, same as `in_channels`.
+        dropout (`float`, *optional*, defaults to `0.0`): The dropout probability to use.
+        groups (`int`, *optional*, default to `32`): The number of groups to use for the first normalization layer.
+        eps (`float`, *optional*, defaults to `1e-6`): The epsilon to use for the normalization.
+    """
+
+    def __init__(
+        self,
+        dims: Union[int, Tuple[int, int]],
+        in_channels: int,
+        out_channels: Optional[int] = None,
+        dropout: float = 0.0,
+        groups: int = 32,
+        eps: float = 1e-6,
+        norm_layer: str = "group_norm",
+    ):
+        super().__init__()
+        self.in_channels = in_channels
+        out_channels = in_channels if out_channels is None else out_channels
+        self.out_channels = out_channels
+
+        if norm_layer == "group_norm":
+            self.norm1 = nn.GroupNorm(
+                num_groups=groups, num_channels=in_channels, eps=eps, affine=True
+            )
+        elif norm_layer == "pixel_norm":
+            self.norm1 = PixelNorm()
+        elif norm_layer == "layer_norm":
+            self.norm1 = LayerNorm(in_channels, eps=eps, elementwise_affine=True)
+
+        self.non_linearity = nn.SiLU()
+
+        self.conv1 = make_conv_nd(
+            dims,
+            in_channels,
+            out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            causal=True,
+        )
+
+        if norm_layer == "group_norm":
+            self.norm2 = nn.GroupNorm(
+                num_groups=groups, num_channels=out_channels, eps=eps, affine=True
+            )
+        elif norm_layer == "pixel_norm":
+            self.norm2 = PixelNorm()
+        elif norm_layer == "layer_norm":
+            self.norm2 = LayerNorm(out_channels, eps=eps, elementwise_affine=True)
+
+        self.dropout = torch.nn.Dropout(dropout)
+
+        self.conv2 = make_conv_nd(
+            dims,
+            out_channels,
+            out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            causal=True,
+        )
+
+        self.conv_shortcut = (
+            make_linear_nd(
+                dims=dims, in_channels=in_channels, out_channels=out_channels
+            )
+            if in_channels != out_channels
+            else nn.Identity()
+        )
+
+        self.norm3 = (
+            LayerNorm(in_channels, eps=eps, elementwise_affine=True)
+            if in_channels != out_channels
+            else nn.Identity()
+        )
+
+    def forward(
+        self,
+        input_tensor: torch.FloatTensor,
+        causal: bool = True,
+    ) -> torch.FloatTensor:
+        hidden_states = input_tensor
+
+        hidden_states = self.norm1(hidden_states)
+
+        hidden_states = self.non_linearity(hidden_states)
+
+        hidden_states = self.conv1(hidden_states, causal=causal)
+
+        hidden_states = self.norm2(hidden_states)
+
+        hidden_states = self.non_linearity(hidden_states)
+
+        hidden_states = self.dropout(hidden_states)
+
+        hidden_states = self.conv2(hidden_states, causal=causal)
+
+        input_tensor = self.norm3(input_tensor)
+
+        input_tensor = self.conv_shortcut(input_tensor)
+
+        output_tensor = input_tensor + hidden_states
+
+        return output_tensor
+
+
+def patchify(x, patch_size_hw, patch_size_t=1):
+    if patch_size_hw == 1 and patch_size_t == 1:
+        return x
+    if x.dim() == 4:
+        x = rearrange(
+            x, "b c (h q) (w r) -> b (c r q) h w", q=patch_size_hw, r=patch_size_hw
+        )
+    elif x.dim() == 5:
+        x = rearrange(
+            x,
+            "b c (f p) (h q) (w r) -> b (c p r q) f h w",
+            p=patch_size_t,
+            q=patch_size_hw,
+            r=patch_size_hw,
+        )
+    else:
+        raise ValueError(f"Invalid input shape: {x.shape}")
+
+    return x
+
+
+def unpatchify(x, patch_size_hw, patch_size_t=1):
+    if patch_size_hw == 1 and patch_size_t == 1:
+        return x
+
+    if x.dim() == 4:
+        x = rearrange(
+            x, "b (c r q) h w -> b c (h q) (w r)", q=patch_size_hw, r=patch_size_hw
+        )
+    elif x.dim() == 5:
+        x = rearrange(
+            x,
+            "b (c p r q) f h w -> b c (f p) (h q) (w r)",
+            p=patch_size_t,
+            q=patch_size_hw,
+            r=patch_size_hw,
+        )
+
+    return x
+
+class processor(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.register_buffer("std-of-means", torch.empty(128))
+        self.register_buffer("mean-of-means", torch.empty(128))
+        self.register_buffer("mean-of-stds", torch.empty(128))
+        self.register_buffer("mean-of-stds_over_std-of-means", torch.empty(128))
+        self.register_buffer("channel", torch.empty(128))
+
+    def un_normalize(self, x):
+        return (x * self.get_buffer("std-of-means").view(1, -1, 1, 1, 1)) + self.get_buffer("mean-of-means").view(1, -1, 1, 1, 1)
+
+    def normalize(self, x):
+        return (x - self.get_buffer("mean-of-means").view(1, -1, 1, 1, 1)) / self.get_buffer("std-of-means").view(1, -1, 1, 1, 1)
+
+class VideoVAE(nn.Module):
+    def __init__(self):
+        super().__init__()
+        config = {
+            "_class_name": "CausalVideoAutoencoder",
+            "dims": 3,
+            "in_channels": 3,
+            "out_channels": 3,
+            "latent_channels": 128,
+            "blocks": [
+                ["res_x", 4],
+                ["compress_all", 1],
+                ["res_x_y", 1],
+                ["res_x", 3],
+                ["compress_all", 1],
+                ["res_x_y", 1],
+                ["res_x", 3],
+                ["compress_all", 1],
+                ["res_x", 3],
+                ["res_x", 4],
+            ],
+            "scaling_factor": 1.0,
+            "norm_layer": "pixel_norm",
+            "patch_size": 4,
+            "latent_log_var": "uniform",
+            "use_quant_conv": False,
+            "causal_decoder": False,
+        }
+
+        double_z = config.get("double_z", True)
+        latent_log_var = config.get(
+            "latent_log_var", "per_channel" if double_z else "none"
+        )
+
+        self.encoder = Encoder(
+            dims=config["dims"],
+            in_channels=config.get("in_channels", 3),
+            out_channels=config["latent_channels"],
+            blocks=config.get("encoder_blocks", config.get("blocks")),
+            patch_size=config.get("patch_size", 1),
+            latent_log_var=latent_log_var,
+            norm_layer=config.get("norm_layer", "group_norm"),
+        )
+
+        self.decoder = Decoder(
+            dims=config["dims"],
+            in_channels=config["latent_channels"],
+            out_channels=config.get("out_channels", 3),
+            blocks=config.get("decoder_blocks", config.get("blocks")),
+            patch_size=config.get("patch_size", 1),
+            norm_layer=config.get("norm_layer", "group_norm"),
+            causal=config.get("causal_decoder", False),
+        )
+
+        self.per_channel_statistics = processor()
+
+    def encode(self, x):
+        means, logvar = torch.chunk(self.encoder(x), 2, dim=1)
+        return self.per_channel_statistics.normalize(means)
+
+    def decode(self, x):
+        return self.decoder(self.per_channel_statistics.un_normalize(x))
+
--- a/comfy/ldm/lightricks/vae/conv_nd_factory.py
+++ b/comfy/ldm/lightricks/vae/conv_nd_factory.py
@@ -0,0 +1,82 @@
+from typing import Tuple, Union
+
+import torch
+
+from .dual_conv3d import DualConv3d
+from .causal_conv3d import CausalConv3d
+
+
+def make_conv_nd(
+    dims: Union[int, Tuple[int, int]],
+    in_channels: int,
+    out_channels: int,
+    kernel_size: int,
+    stride=1,
+    padding=0,
+    dilation=1,
+    groups=1,
+    bias=True,
+    causal=False,
+):
+    if dims == 2:
+        return torch.nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+            bias=bias,
+        )
+    elif dims == 3:
+        if causal:
+            return CausalConv3d(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=kernel_size,
+                stride=stride,
+                padding=padding,
+                dilation=dilation,
+                groups=groups,
+                bias=bias,
+            )
+        return torch.nn.Conv3d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+            bias=bias,
+        )
+    elif dims == (2, 1):
+        return DualConv3d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            bias=bias,
+        )
+    else:
+        raise ValueError(f"unsupported dimensions: {dims}")
+
+
+def make_linear_nd(
+    dims: int,
+    in_channels: int,
+    out_channels: int,
+    bias=True,
+):
+    if dims == 2:
+        return torch.nn.Conv2d(
+            in_channels=in_channels, out_channels=out_channels, kernel_size=1, bias=bias
+        )
+    elif dims == 3 or dims == (2, 1):
+        return torch.nn.Conv3d(
+            in_channels=in_channels, out_channels=out_channels, kernel_size=1, bias=bias
+        )
+    else:
+        raise ValueError(f"unsupported dimensions: {dims}")
--- a/comfy/ldm/lightricks/vae/dual_conv3d.py
+++ b/comfy/ldm/lightricks/vae/dual_conv3d.py
@@ -0,0 +1,195 @@
+import math
+from typing import Tuple, Union
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange
+
+
+class DualConv3d(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride: Union[int, Tuple[int, int, int]] = 1,
+        padding: Union[int, Tuple[int, int, int]] = 0,
+        dilation: Union[int, Tuple[int, int, int]] = 1,
+        groups=1,
+        bias=True,
+    ):
+        super(DualConv3d, self).__init__()
+
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        # Ensure kernel_size, stride, padding, and dilation are tuples of length 3
+        if isinstance(kernel_size, int):
+            kernel_size = (kernel_size, kernel_size, kernel_size)
+        if kernel_size == (1, 1, 1):
+            raise ValueError(
+                "kernel_size must be greater than 1. Use make_linear_nd instead."
+            )
+        if isinstance(stride, int):
+            stride = (stride, stride, stride)
+        if isinstance(padding, int):
+            padding = (padding, padding, padding)
+        if isinstance(dilation, int):
+            dilation = (dilation, dilation, dilation)
+
+        # Set parameters for convolutions
+        self.groups = groups
+        self.bias = bias
+
+        # Define the size of the channels after the first convolution
+        intermediate_channels = (
+            out_channels if in_channels < out_channels else in_channels
+        )
+
+        # Define parameters for the first convolution
+        self.weight1 = nn.Parameter(
+            torch.Tensor(
+                intermediate_channels,
+                in_channels // groups,
+                1,
+                kernel_size[1],
+                kernel_size[2],
+            )
+        )
+        self.stride1 = (1, stride[1], stride[2])
+        self.padding1 = (0, padding[1], padding[2])
+        self.dilation1 = (1, dilation[1], dilation[2])
+        if bias:
+            self.bias1 = nn.Parameter(torch.Tensor(intermediate_channels))
+        else:
+            self.register_parameter("bias1", None)
+
+        # Define parameters for the second convolution
+        self.weight2 = nn.Parameter(
+            torch.Tensor(
+                out_channels, intermediate_channels // groups, kernel_size[0], 1, 1
+            )
+        )
+        self.stride2 = (stride[0], 1, 1)
+        self.padding2 = (padding[0], 0, 0)
+        self.dilation2 = (dilation[0], 1, 1)
+        if bias:
+            self.bias2 = nn.Parameter(torch.Tensor(out_channels))
+        else:
+            self.register_parameter("bias2", None)
+
+        # Initialize weights and biases
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        nn.init.kaiming_uniform_(self.weight1, a=math.sqrt(5))
+        nn.init.kaiming_uniform_(self.weight2, a=math.sqrt(5))
+        if self.bias:
+            fan_in1, _ = nn.init._calculate_fan_in_and_fan_out(self.weight1)
+            bound1 = 1 / math.sqrt(fan_in1)
+            nn.init.uniform_(self.bias1, -bound1, bound1)
+            fan_in2, _ = nn.init._calculate_fan_in_and_fan_out(self.weight2)
+            bound2 = 1 / math.sqrt(fan_in2)
+            nn.init.uniform_(self.bias2, -bound2, bound2)
+
+    def forward(self, x, use_conv3d=False, skip_time_conv=False):
+        if use_conv3d:
+            return self.forward_with_3d(x=x, skip_time_conv=skip_time_conv)
+        else:
+            return self.forward_with_2d(x=x, skip_time_conv=skip_time_conv)
+
+    def forward_with_3d(self, x, skip_time_conv):
+        # First convolution
+        x = F.conv3d(
+            x,
+            self.weight1,
+            self.bias1,
+            self.stride1,
+            self.padding1,
+            self.dilation1,
+            self.groups,
+        )
+
+        if skip_time_conv:
+            return x
+
+        # Second convolution
+        x = F.conv3d(
+            x,
+            self.weight2,
+            self.bias2,
+            self.stride2,
+            self.padding2,
+            self.dilation2,
+            self.groups,
+        )
+
+        return x
+
+    def forward_with_2d(self, x, skip_time_conv):
+        b, c, d, h, w = x.shape
+
+        # First 2D convolution
+        x = rearrange(x, "b c d h w -> (b d) c h w")
+        # Squeeze the depth dimension out of weight1 since it's 1
+        weight1 = self.weight1.squeeze(2)
+        # Select stride, padding, and dilation for the 2D convolution
+        stride1 = (self.stride1[1], self.stride1[2])
+        padding1 = (self.padding1[1], self.padding1[2])
+        dilation1 = (self.dilation1[1], self.dilation1[2])
+        x = F.conv2d(x, weight1, self.bias1, stride1, padding1, dilation1, self.groups)
+
+        _, _, h, w = x.shape
+
+        if skip_time_conv:
+            x = rearrange(x, "(b d) c h w -> b c d h w", b=b)
+            return x
+
+        # Second convolution which is essentially treated as a 1D convolution across the 'd' dimension
+        x = rearrange(x, "(b d) c h w -> (b h w) c d", b=b)
+
+        # Reshape weight2 to match the expected dimensions for conv1d
+        weight2 = self.weight2.squeeze(-1).squeeze(-1)
+        # Use only the relevant dimension for stride, padding, and dilation for the 1D convolution
+        stride2 = self.stride2[0]
+        padding2 = self.padding2[0]
+        dilation2 = self.dilation2[0]
+        x = F.conv1d(x, weight2, self.bias2, stride2, padding2, dilation2, self.groups)
+        x = rearrange(x, "(b h w) c d -> b c d h w", b=b, h=h, w=w)
+
+        return x
+
+    @property
+    def weight(self):
+        return self.weight2
+
+
+def test_dual_conv3d_consistency():
+    # Initialize parameters
+    in_channels = 3
+    out_channels = 5
+    kernel_size = (3, 3, 3)
+    stride = (2, 2, 2)
+    padding = (1, 1, 1)
+
+    # Create an instance of the DualConv3d class
+    dual_conv3d = DualConv3d(
+        in_channels=in_channels,
+        out_channels=out_channels,
+        kernel_size=kernel_size,
+        stride=stride,
+        padding=padding,
+        bias=True,
+    )
+
+    # Example input tensor
+    test_input = torch.randn(1, 3, 10, 10, 10)
+
+    # Perform forward passes with both 3D and 2D settings
+    output_conv3d = dual_conv3d(test_input, use_conv3d=True)
+    output_2d = dual_conv3d(test_input, use_conv3d=False)
+
+    # Assert that the outputs from both methods are sufficiently close
+    assert torch.allclose(
+        output_conv3d, output_2d, atol=1e-6
+    ), "Outputs are not consistent between 3D and 2D convolutions."
--- a/comfy/ldm/lightricks/vae/pixel_norm.py
+++ b/comfy/ldm/lightricks/vae/pixel_norm.py
@@ -0,0 +1,12 @@
+import torch
+from torch import nn
+
+
+class PixelNorm(nn.Module):
+    def __init__(self, dim=1, eps=1e-8):
+        super(PixelNorm, self).__init__()
+        self.dim = dim
+        self.eps = eps
+
+    def forward(self, x):
+        return x / torch.sqrt(torch.mean(x**2, dim=self.dim, keepdim=True) + self.eps)
--- a/comfy/ldm/modules/attention.py
+++ b/comfy/ldm/modules/attention.py
@@ -299,7 +299,10 @@ def attention_split(q, k, v, heads, mask=None, attn_precision=None, skip_reshape
                    if len(mask.shape) == 2:
                        s1 += mask[i:end]
                    else:
-                        s1 += mask[:, i:end]
+                        if mask.shape[1] == 1:
+                            s1 += mask
+                        else:
+                            s1 += mask[:, i:end]

                s2 = s1.softmax(dim=-1).to(v.dtype)
                del s1
@@ -372,10 +375,10 @@ def attention_xformers(q, k, v, heads, mask=None, attn_precision=None, skip_resh
        )

    if mask is not None:
-        pad = 8 - q.shape[1] % 8
-        mask_out = torch.empty([q.shape[0], q.shape[1], q.shape[1] + pad], dtype=q.dtype, device=q.device)
-        mask_out[:, :, :mask.shape[-1]] = mask
-        mask = mask_out[:, :, :mask.shape[-1]]
+        pad = 8 - mask.shape[-1] % 8
+        mask_out = torch.empty([q.shape[0], q.shape[2], q.shape[1], mask.shape[-1] + pad], dtype=q.dtype, device=q.device)
+        mask_out[..., :mask.shape[-1]] = mask
+        mask = mask_out[..., :mask.shape[-1]]

    out = xformers.ops.memory_efficient_attention(q, k, v, attn_bias=mask)

@@ -393,6 +396,13 @@ def attention_xformers(q, k, v, heads, mask=None, attn_precision=None, skip_resh

    return out

+if model_management.is_nvidia(): #pytorch 2.3 and up seem to have this issue.
+    SDP_BATCH_LIMIT = 2**15
+else:
+    #TODO: other GPUs ?
+    SDP_BATCH_LIMIT = 2**31
+
+
 def attention_pytorch(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False):
    if skip_reshape:
        b, _, _, dim_head = q.shape
@@ -404,10 +414,15 @@ def attention_pytorch(q, k, v, heads, mask=None, attn_precision=None, skip_resha
            (q, k, v),
        )

-    out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0, is_causal=False)
-    out = (
-        out.transpose(1, 2).reshape(b, -1, heads * dim_head)
-    )
+    if SDP_BATCH_LIMIT >= q.shape[0]:
+        out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0, is_causal=False)
+        out = (
+            out.transpose(1, 2).reshape(b, -1, heads * dim_head)
+        )
+    else:
+        out = torch.empty((q.shape[0], q.shape[2], heads * dim_head), dtype=q.dtype, layout=q.layout, device=q.device)
+        for i in range(0, q.shape[0], SDP_BATCH_LIMIT):
+            out[i : i + SDP_BATCH_LIMIT] = torch.nn.functional.scaled_dot_product_attention(q[i : i + SDP_BATCH_LIMIT], k[i : i + SDP_BATCH_LIMIT], v[i : i + SDP_BATCH_LIMIT], attn_mask=mask, dropout_p=0.0, is_causal=False).transpose(1, 2).reshape(-1, q.shape[2], heads * dim_head)
    return out


--- a/comfy/ldm/modules/diffusionmodules/mmdit.py
+++ b/comfy/ldm/modules/diffusionmodules/mmdit.py
@@ -1,11 +1,11 @@
 import logging
 import math
-from typing import Dict, Optional
+from typing import Dict, Optional, List

 import numpy as np
 import torch
 import torch.nn as nn
-from .. import attention
+from ..attention import optimized_attention
 from einops import rearrange, repeat
 from .util import timestep_embedding
 import comfy.ops
@@ -97,7 +97,7 @@ class PatchEmbed(nn.Module):
        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()

    def forward(self, x):
-        B, C, H, W = x.shape
+        # B, C, H, W = x.shape
        # if self.img_size is not None:
        #     if self.strict_img_size:
        #         _assert(H == self.img_size[0], f"Input height ({H}) doesn't match model ({self.img_size[0]}).")
@@ -266,8 +266,6 @@ def split_qkv(qkv, head_dim):
    qkv = qkv.reshape(qkv.shape[0], qkv.shape[1], 3, -1, head_dim).movedim(2, 0)
    return qkv[0], qkv[1], qkv[2]

-def optimized_attention(qkv, num_heads):
-    return attention.optimized_attention(qkv[0], qkv[1], qkv[2], num_heads)

 class SelfAttention(nn.Module):
    ATTENTION_MODES = ("xformers", "torch", "torch-hb", "math", "debug")
@@ -326,9 +324,9 @@ class SelfAttention(nn.Module):
        return x

    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        qkv = self.pre_attention(x)
+        q, k, v = self.pre_attention(x)
        x = optimized_attention(
-            qkv, num_heads=self.num_heads
+            q, k, v, heads=self.num_heads
        )
        x = self.post_attention(x)
        return x
@@ -417,6 +415,7 @@ class DismantledBlock(nn.Module):
        scale_mod_only: bool = False,
        swiglu: bool = False,
        qk_norm: Optional[str] = None,
+        x_block_self_attn: bool = False,
        dtype=None,
        device=None,
        operations=None,
@@ -440,6 +439,24 @@ class DismantledBlock(nn.Module):
            device=device,
            operations=operations
        )
+        if x_block_self_attn:
+            assert not pre_only
+            assert not scale_mod_only
+            self.x_block_self_attn = True
+            self.attn2 = SelfAttention(
+                dim=hidden_size,
+                num_heads=num_heads,
+                qkv_bias=qkv_bias,
+                attn_mode=attn_mode,
+                pre_only=False,
+                qk_norm=qk_norm,
+                rmsnorm=rmsnorm,
+                dtype=dtype,
+                device=device,
+                operations=operations
+            )
+        else:
+            self.x_block_self_attn = False
        if not pre_only:
            if not rmsnorm:
                self.norm2 = operations.LayerNorm(
@@ -466,7 +483,11 @@ class DismantledBlock(nn.Module):
                    multiple_of=256,
                )
        self.scale_mod_only = scale_mod_only
-        if not scale_mod_only:
+        if x_block_self_attn:
+            assert not pre_only
+            assert not scale_mod_only
+            n_mods = 9
+        elif not scale_mod_only:
            n_mods = 6 if not pre_only else 2
        else:
            n_mods = 4 if not pre_only else 1
@@ -527,14 +548,64 @@ class DismantledBlock(nn.Module):
        )
        return x

+    def pre_attention_x(self, x: torch.Tensor, c: torch.Tensor) -> torch.Tensor:
+        assert self.x_block_self_attn
+        (
+            shift_msa,
+            scale_msa,
+            gate_msa,
+            shift_mlp,
+            scale_mlp,
+            gate_mlp,
+            shift_msa2,
+            scale_msa2,
+            gate_msa2,
+        ) = self.adaLN_modulation(c).chunk(9, dim=1)
+        x_norm = self.norm1(x)
+        qkv = self.attn.pre_attention(modulate(x_norm, shift_msa, scale_msa))
+        qkv2 = self.attn2.pre_attention(modulate(x_norm, shift_msa2, scale_msa2))
+        return qkv, qkv2, (
+            x,
+            gate_msa,
+            shift_mlp,
+            scale_mlp,
+            gate_mlp,
+            gate_msa2,
+        )
+
+    def post_attention_x(self, attn, attn2, x, gate_msa, shift_mlp, scale_mlp, gate_mlp, gate_msa2):
+        assert not self.pre_only
+        attn1 = self.attn.post_attention(attn)
+        attn2 = self.attn2.post_attention(attn2)
+        out1 = gate_msa.unsqueeze(1) * attn1
+        out2 = gate_msa2.unsqueeze(1) * attn2
+        x = x + out1
+        x = x + out2
+        x = x + gate_mlp.unsqueeze(1) * self.mlp(
+            modulate(self.norm2(x), shift_mlp, scale_mlp)
+        )
+        return x
+
    def forward(self, x: torch.Tensor, c: torch.Tensor) -> torch.Tensor:
        assert not self.pre_only
-        qkv, intermediates = self.pre_attention(x, c)
-        attn = optimized_attention(
-            qkv,
-            num_heads=self.attn.num_heads,
-        )
-        return self.post_attention(attn, *intermediates)
+        if self.x_block_self_attn:
+            qkv, qkv2, intermediates = self.pre_attention_x(x, c)
+            attn, _ = optimized_attention(
+                qkv[0], qkv[1], qkv[2],
+                num_heads=self.attn.num_heads,
+            )
+            attn2, _ = optimized_attention(
+                qkv2[0], qkv2[1], qkv2[2],
+                num_heads=self.attn2.num_heads,
+            )
+            return self.post_attention_x(attn, attn2, *intermediates)
+        else:
+            qkv, intermediates = self.pre_attention(x, c)
+            attn = optimized_attention(
+                qkv[0], qkv[1], qkv[2],
+                heads=self.attn.num_heads,
+            )
+            return self.post_attention(attn, *intermediates)


 def block_mixing(*args, use_checkpoint=True, **kwargs):
@@ -549,7 +620,10 @@ def block_mixing(*args, use_checkpoint=True, **kwargs):
 def _block_mixing(context, x, context_block, x_block, c):
    context_qkv, context_intermediates = context_block.pre_attention(context, c)

-    x_qkv, x_intermediates = x_block.pre_attention(x, c)
+    if x_block.x_block_self_attn:
+        x_qkv, x_qkv2, x_intermediates = x_block.pre_attention_x(x, c)
+    else:
+        x_qkv, x_intermediates = x_block.pre_attention(x, c)

    o = []
    for t in range(3):
@@ -557,8 +631,8 @@ def _block_mixing(context, x, context_block, x_block, c):
    qkv = tuple(o)

    attn = optimized_attention(
-        qkv,
-        num_heads=x_block.attn.num_heads,
+        qkv[0], qkv[1], qkv[2],
+        heads=x_block.attn.num_heads,
    )
    context_attn, x_attn = (
        attn[:, : context_qkv[0].shape[1]],
@@ -570,7 +644,14 @@ def _block_mixing(context, x, context_block, x_block, c):

    else:
        context = None
-    x = x_block.post_attention(x_attn, *x_intermediates)
+    if x_block.x_block_self_attn:
+        attn2 = optimized_attention(
+                x_qkv2[0], x_qkv2[1], x_qkv2[2],
+                heads=x_block.attn2.num_heads,
+            )
+        x = x_block.post_attention_x(x_attn, attn2, *x_intermediates)
+    else:
+        x = x_block.post_attention(x_attn, *x_intermediates)
    return context, x


@@ -585,8 +666,13 @@ class JointBlock(nn.Module):
        super().__init__()
        pre_only = kwargs.pop("pre_only")
        qk_norm = kwargs.pop("qk_norm", None)
+        x_block_self_attn = kwargs.pop("x_block_self_attn", False)
        self.context_block = DismantledBlock(*args, pre_only=pre_only, qk_norm=qk_norm, **kwargs)
-        self.x_block = DismantledBlock(*args, pre_only=False, qk_norm=qk_norm, **kwargs)
+        self.x_block = DismantledBlock(*args,
+                                       pre_only=False,
+                                       qk_norm=qk_norm,
+                                       x_block_self_attn=x_block_self_attn,
+                                       **kwargs)

    def forward(self, *args, **kwargs):
        return block_mixing(
@@ -642,7 +728,7 @@ class SelfAttentionContext(nn.Module):
    def forward(self, x):
        qkv = self.qkv(x)
        q, k, v = split_qkv(qkv, self.dim_head)
-        x = optimized_attention((q.reshape(q.shape[0], q.shape[1], -1), k, v), self.heads)
+        x = optimized_attention(q.reshape(q.shape[0], q.shape[1], -1), k, v, heads=self.heads)
        return self.proj(x)

 class ContextProcessorBlock(nn.Module):
@@ -701,9 +787,12 @@ class MMDiT(nn.Module):
        qk_norm: Optional[str] = None,
        qkv_bias: bool = True,
        context_processor_layers = None,
+        x_block_self_attn: bool = False,
+        x_block_self_attn_layers: Optional[List[int]] = [],
        context_size = 4096,
        num_blocks = None,
        final_layer = True,
+        skip_blocks = False,
        dtype = None, #TODO
        device = None,
        operations = None,
@@ -718,6 +807,7 @@ class MMDiT(nn.Module):
        self.pos_embed_scaling_factor = pos_embed_scaling_factor
        self.pos_embed_offset = pos_embed_offset
        self.pos_embed_max_size = pos_embed_max_size
+        self.x_block_self_attn_layers = x_block_self_attn_layers

        # hidden_size = default(hidden_size, 64 * depth)
        # num_heads = default(num_heads, hidden_size // 64)
@@ -775,26 +865,28 @@ class MMDiT(nn.Module):
            self.pos_embed = None

        self.use_checkpoint = use_checkpoint
-        self.joint_blocks = nn.ModuleList(
-            [
-                JointBlock(
-                    self.hidden_size,
-                    num_heads,
-                    mlp_ratio=mlp_ratio,
-                    qkv_bias=qkv_bias,
-                    attn_mode=attn_mode,
-                    pre_only=(i == num_blocks - 1) and final_layer,
-                    rmsnorm=rmsnorm,
-                    scale_mod_only=scale_mod_only,
-                    swiglu=swiglu,
-                    qk_norm=qk_norm,
-                    dtype=dtype,
-                    device=device,
-                    operations=operations
-                )
-                for i in range(num_blocks)
-            ]
-        )
+        if not skip_blocks:
+            self.joint_blocks = nn.ModuleList(
+                [
+                    JointBlock(
+                        self.hidden_size,
+                        num_heads,
+                        mlp_ratio=mlp_ratio,
+                        qkv_bias=qkv_bias,
+                        attn_mode=attn_mode,
+                        pre_only=(i == num_blocks - 1) and final_layer,
+                        rmsnorm=rmsnorm,
+                        scale_mod_only=scale_mod_only,
+                        swiglu=swiglu,
+                        qk_norm=qk_norm,
+                        x_block_self_attn=(i in self.x_block_self_attn_layers) or x_block_self_attn,
+                        dtype=dtype,
+                        device=device,
+                        operations=operations,
+                    )
+                    for i in range(num_blocks)
+                ]
+            )

        if final_layer:
            self.final_layer = FinalLayer(self.hidden_size, patch_size, self.out_channels, dtype=dtype, device=device, operations=operations)
@@ -857,7 +949,9 @@ class MMDiT(nn.Module):
        c_mod: torch.Tensor,
        context: Optional[torch.Tensor] = None,
        control = None,
+        transformer_options = {},
    ) -> torch.Tensor:
+        patches_replace = transformer_options.get("patches_replace", {})
        if self.register_length > 0:
            context = torch.cat(
                (
@@ -869,14 +963,25 @@ class MMDiT(nn.Module):

        # context is B, L', D
        # x is B, L, D
+        blocks_replace = patches_replace.get("dit", {})
        blocks = len(self.joint_blocks)
        for i in range(blocks):
-            context, x = self.joint_blocks[i](
-                context,
-                x,
-                c=c_mod,
-                use_checkpoint=self.use_checkpoint,
-            )
+            if ("double_block", i) in blocks_replace:
+                def block_wrap(args):
+                    out = {}
+                    out["txt"], out["img"] = self.joint_blocks[i](args["txt"], args["img"], c=args["vec"])
+                    return out
+
+                out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "vec": c_mod}, {"original_block": block_wrap})
+                context = out["txt"]
+                x = out["img"]
+            else:
+                context, x = self.joint_blocks[i](
+                    context,
+                    x,
+                    c=c_mod,
+                    use_checkpoint=self.use_checkpoint,
+                )
            if control is not None:
                control_o = control.get("output")
                if i < len(control_o):
@@ -894,6 +999,7 @@ class MMDiT(nn.Module):
        y: Optional[torch.Tensor] = None,
        context: Optional[torch.Tensor] = None,
        control = None,
+        transformer_options = {},
    ) -> torch.Tensor:
        """
        Forward pass of DiT.
@@ -915,7 +1021,7 @@ class MMDiT(nn.Module):
        if context is not None:
            context = self.context_embedder(context)

-        x = self.forward_core_with_concat(x, c, context, control)
+        x = self.forward_core_with_concat(x, c, context, control, transformer_options)

        x = self.unpatchify(x, hw=hw)  # (N, out_channels, H, W)
        return x[:,:,:hw[-2],:hw[-1]]
@@ -929,7 +1035,8 @@ class OpenAISignatureMMDITWrapper(MMDiT):
        context: Optional[torch.Tensor] = None,
        y: Optional[torch.Tensor] = None,
        control = None,
+        transformer_options = {},
        **kwargs,
    ) -> torch.Tensor:
-        return super().forward(x, timesteps, context=context, y=y, control=control)
+        return super().forward(x, timesteps, context=context, y=y, control=control, transformer_options=transformer_options)

--- a/comfy/ldm/modules/sub_quadratic_attention.py
+++ b/comfy/ldm/modules/sub_quadratic_attention.py
@@ -234,6 +234,8 @@ def efficient_dot_product_attention(
    def get_mask_chunk(chunk_idx: int) -> Tensor:
        if mask is None:
            return None
+        if mask.shape[1] == 1:
+            return mask
        chunk = min(query_chunk_size, q_tokens)
        return mask[:,chunk_idx:chunk_idx + chunk]

--- a/comfy/lora.py
+++ b/comfy/lora.py
@@ -49,6 +49,15 @@ def load_lora(lora, to_load):
            dora_scale = lora[dora_scale_name]
            loaded_keys.add(dora_scale_name)

+        reshape_name = "{}.reshape_weight".format(x)
+        reshape = None
+        if reshape_name in lora.keys():
+            try:
+                reshape = lora[reshape_name].tolist()
+                loaded_keys.add(reshape_name)
+            except:
+                pass
+
        regular_lora = "{}.lora_up.weight".format(x)
        diffusers_lora = "{}_lora.up.weight".format(x)
        diffusers2_lora = "{}.lora_B.weight".format(x)
@@ -82,7 +91,7 @@ def load_lora(lora, to_load):
            if mid_name is not None and mid_name in lora.keys():
                mid = lora[mid_name]
                loaded_keys.add(mid_name)
-            patch_dict[to_load[x]] = ("lora", (lora[A_name], lora[B_name], alpha, mid, dora_scale))
+            patch_dict[to_load[x]] = ("lora", (lora[A_name], lora[B_name], alpha, mid, dora_scale, reshape))
            loaded_keys.add(A_name)
            loaded_keys.add(B_name)

@@ -193,6 +202,12 @@ def load_lora(lora, to_load):
            patch_dict["{}.bias".format(to_load[x][:-len(".weight")])] = ("diff", (diff_bias,))
            loaded_keys.add(diff_bias_name)

+        set_weight_name = "{}.set_weight".format(x)
+        set_weight = lora.get(set_weight_name, None)
+        if set_weight is not None:
+            patch_dict[to_load[x]] = ("set", (set_weight,))
+            loaded_keys.add(set_weight_name)
+
    for x in lora.keys():
        if x not in loaded_keys:
            logging.warning("lora key not loaded: {}".format(x))
@@ -282,11 +297,14 @@ def model_lora_keys_unet(model, key_map={}):
    sdk = sd.keys()

    for k in sdk:
-        if k.startswith("diffusion_model.") and k.endswith(".weight"):
-            key_lora = k[len("diffusion_model."):-len(".weight")].replace(".", "_")
-            key_map["lora_unet_{}".format(key_lora)] = k
-            key_map["lora_prior_unet_{}".format(key_lora)] = k #cascade lora: TODO put lora key prefix in the model config
-            key_map["{}".format(k[:-len(".weight")])] = k #generic lora format without any weird key names
+        if k.startswith("diffusion_model."):
+            if k.endswith(".weight"):
+                key_lora = k[len("diffusion_model."):-len(".weight")].replace(".", "_")
+                key_map["lora_unet_{}".format(key_lora)] = k
+                key_map["lora_prior_unet_{}".format(key_lora)] = k #cascade lora: TODO put lora key prefix in the model config
+                key_map["{}".format(k[:-len(".weight")])] = k #generic lora format without any weird key names
+            else:
+                key_map["{}".format(k)] = k #generic lora format for not .weight without any weird key names

    diffusers_keys = comfy.utils.unet_to_diffusers(model.model_config.unet_config)
    for k in diffusers_keys:
@@ -317,6 +335,10 @@ def model_lora_keys_unet(model, key_map={}):
                key_lora = "lora_transformer_{}".format(k[:-len(".weight")].replace(".", "_")) #OneTrainer lora
                key_map[key_lora] = to

+                key_lora = "lycoris_{}".format(k[:-len(".weight")].replace(".", "_")) #simpletuner lycoris format
+                key_map[key_lora] = to
+
+
    if isinstance(model, comfy.model_base.AuraFlow): #Diffusers lora AuraFlow
        diffusers_keys = comfy.utils.auraflow_to_diffusers(model.model_config.unet_config, output_prefix="diffusion_model.")
        for k in diffusers_keys:
@@ -415,7 +437,7 @@ def calculate_weight(patches, weight, key, intermediate_dtype=torch.float32):
            weight *= strength_model

        if isinstance(v, list):
-            v = (calculate_weight(v[1:], comfy.model_management.cast_to_device(v[0], weight.device, intermediate_dtype, copy=True), key, intermediate_dtype=intermediate_dtype), )
+            v = (calculate_weight(v[1:], v[0][1](comfy.model_management.cast_to_device(v[0][0], weight.device, intermediate_dtype, copy=True), inplace=True), key, intermediate_dtype=intermediate_dtype), )

        if len(v) == 1:
            patch_type = "diff"
@@ -436,10 +458,17 @@ def calculate_weight(patches, weight, key, intermediate_dtype=torch.float32):
                    logging.warning("WARNING SHAPE MISMATCH {} WEIGHT NOT MERGED {} != {}".format(key, diff.shape, weight.shape))
                else:
                    weight += function(strength * comfy.model_management.cast_to_device(diff, weight.device, weight.dtype))
+        elif patch_type == "set":
+            weight.copy_(v[0])
        elif patch_type == "lora": #lora/locon
            mat1 = comfy.model_management.cast_to_device(v[0], weight.device, intermediate_dtype)
            mat2 = comfy.model_management.cast_to_device(v[1], weight.device, intermediate_dtype)
            dora_scale = v[4]
+            reshape = v[5]
+
+            if reshape is not None:
+                weight = pad_tensor_to_shape(weight, reshape)
+
            if v[2] is not None:
                alpha = v[2] / mat2.shape[0]
            else:
--- a/comfy/lora_convert.py
+++ b/comfy/lora_convert.py
@@ -0,0 +1,17 @@
+import torch
+
+
+def convert_lora_bfl_control(sd): #BFL loras for Flux
+    sd_out = {}
+    for k in sd:
+        k_to = "diffusion_model.{}".format(k.replace(".lora_B.bias", ".diff_b").replace("_norm.scale", "_norm.scale.set_weight"))
+        sd_out[k_to] = sd[k]
+
+    sd_out["diffusion_model.img_in.reshape_weight"] = torch.tensor([sd["img_in.lora_B.weight"].shape[0], sd["img_in.lora_A.weight"].shape[1]])
+    return sd_out
+
+
+def convert_lora(sd):
+    if "img_in.lora_A.weight" in sd and "single_blocks.0.norm.key_norm.scale" in sd:
+        return convert_lora_bfl_control(sd)
+    return sd
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -24,11 +24,13 @@ from comfy.ldm.cascade.stage_b import StageB
 from comfy.ldm.modules.encoders.noise_aug_modules import CLIPEmbeddingNoiseAugmentation
 from comfy.ldm.modules.diffusionmodules.upscaling import ImageConcatWithNoiseAugmentation
 from comfy.ldm.modules.diffusionmodules.mmdit import OpenAISignatureMMDITWrapper
+import comfy.ldm.genmo.joint_model.asymm_models_joint
 import comfy.ldm.aura.mmdit
 import comfy.ldm.hydit.models
 import comfy.ldm.audio.dit
 import comfy.ldm.audio.embedders
 import comfy.ldm.flux.model
+import comfy.ldm.lightricks.model

 import comfy.model_management
 import comfy.conds
@@ -96,7 +98,8 @@ class BaseModel(torch.nn.Module):

        if not unet_config.get("disable_unet_model_creation", False):
            if model_config.custom_operations is None:
-                operations = comfy.ops.pick_operations(unet_config.get("dtype", None), self.manual_cast_dtype, fp8_optimizations=model_config.optimizations.get("fp8", False))
+                fp8 = model_config.optimizations.get("fp8", model_config.scaled_fp8 is not None)
+                operations = comfy.ops.pick_operations(unet_config.get("dtype", None), self.manual_cast_dtype, fp8_optimizations=fp8, scaled_fp8=model_config.scaled_fp8)
            else:
                operations = model_config.custom_operations
            self.diffusion_model = unet_model(**unet_config, device=device, operations=operations)
@@ -151,8 +154,7 @@ class BaseModel(torch.nn.Module):
    def encode_adm(self, **kwargs):
        return None

-    def extra_conds(self, **kwargs):
-        out = {}
+    def concat_cond(self, **kwargs):
        if len(self.concat_keys) > 0:
            cond_concat = []
            denoise_mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None))
@@ -191,7 +193,14 @@ class BaseModel(torch.nn.Module):
                    elif ck == "masked_image":
                        cond_concat.append(self.blank_inpaint_image_like(noise))
            data = torch.cat(cond_concat, dim=1)
-            out['c_concat'] = comfy.conds.CONDNoiseShape(data)
+            return data
+        return None
+
+    def extra_conds(self, **kwargs):
+        out = {}
+        concat_cond = self.concat_cond(**kwargs)
+        if concat_cond is not None:
+            out['c_concat'] = comfy.conds.CONDNoiseShape(concat_cond)

        adm = self.encode_adm(**kwargs)
        if adm is not None:
@@ -244,6 +253,10 @@ class BaseModel(torch.nn.Module):
            extra_sds.append(self.model_config.process_clip_vision_state_dict_for_saving(clip_vision_state_dict))

        unet_state_dict = self.diffusion_model.state_dict()
+
+        if self.model_config.scaled_fp8 is not None:
+            unet_state_dict["scaled_fp8"] = torch.tensor([], dtype=self.model_config.scaled_fp8)
+
        unet_state_dict = self.model_config.process_unet_state_dict_for_saving(unet_state_dict)

        if self.model_type == ModelType.V_PREDICTION:
@@ -517,9 +530,7 @@ class SD_X4Upscaler(BaseModel):
        return out

 class IP2P:
-    def extra_conds(self, **kwargs):
-        out = {}
-
+    def concat_cond(self, **kwargs):
        image = kwargs.get("concat_latent_image", None)
        noise = kwargs.get("noise", None)
        device = kwargs["device"]
@@ -531,18 +542,15 @@ class IP2P:
            image = utils.common_upscale(image.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center")

        image = utils.resize_to_batch_size(image, noise.shape[0])
+        return self.process_ip2p_image_in(image)

-        out['c_concat'] = comfy.conds.CONDNoiseShape(self.process_ip2p_image_in(image))
-        adm = self.encode_adm(**kwargs)
-        if adm is not None:
-            out['y'] = comfy.conds.CONDRegular(adm)
-        return out

 class SD15_instructpix2pix(IP2P, BaseModel):
    def __init__(self, model_config, model_type=ModelType.EPS, device=None):
        super().__init__(model_config, model_type, device=device)
        self.process_ip2p_image_in = lambda image: image

+
 class SDXL_instructpix2pix(IP2P, SDXL):
    def __init__(self, model_config, model_type=ModelType.EPS, device=None):
        super().__init__(model_config, model_type, device=device)
@@ -703,6 +711,38 @@ class Flux(BaseModel):
    def __init__(self, model_config, model_type=ModelType.FLUX, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.flux.model.Flux)

+    def concat_cond(self, **kwargs):
+        num_channels = self.diffusion_model.img_in.weight.shape[1] // (self.diffusion_model.patch_size * self.diffusion_model.patch_size)
+        out_channels = self.model_config.unet_config["out_channels"]
+
+        if num_channels <= out_channels:
+            return None
+
+        image = kwargs.get("concat_latent_image", None)
+        noise = kwargs.get("noise", None)
+        device = kwargs["device"]
+
+        if image is None:
+            image = torch.zeros_like(noise)
+
+        image = utils.common_upscale(image.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center")
+        image = utils.resize_to_batch_size(image, noise.shape[0])
+        image = self.process_latent_in(image)
+        if num_channels <= out_channels * 2:
+            return image
+
+        #inpaint model
+        mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None))
+        if mask is None:
+            mask = torch.ones_like(noise)[:, :1]
+
+        mask = torch.mean(mask, dim=1, keepdim=True)
+        print(mask.shape)
+        mask = utils.common_upscale(mask.to(device), noise.shape[-1] * 8, noise.shape[-2] * 8, "bilinear", "center")
+        mask = mask.view(mask.shape[0], mask.shape[2] // 8, 8, mask.shape[3] // 8, 8).permute(0, 2, 4, 1, 3).reshape(mask.shape[0], -1, mask.shape[2] // 8, mask.shape[3] // 8)
+        mask = utils.resize_to_batch_size(mask, noise.shape[0])
+        return torch.cat((image, mask), dim=1)
+
    def encode_adm(self, **kwargs):
        return kwargs["pooled_output"]

@@ -713,3 +753,38 @@ class Flux(BaseModel):
            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
        out['guidance'] = comfy.conds.CONDRegular(torch.FloatTensor([kwargs.get("guidance", 3.5)]))
        return out
+
+class GenmoMochi(BaseModel):
+    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
+        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.genmo.joint_model.asymm_models_joint.AsymmDiTJoint)
+
+    def extra_conds(self, **kwargs):
+        out = super().extra_conds(**kwargs)
+        attention_mask = kwargs.get("attention_mask", None)
+        if attention_mask is not None:
+            out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
+            out['num_tokens'] = comfy.conds.CONDConstant(max(1, torch.sum(attention_mask).item()))
+        cross_attn = kwargs.get("cross_attn", None)
+        if cross_attn is not None:
+            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+        return out
+
+class LTXV(BaseModel):
+    def __init__(self, model_config, model_type=ModelType.FLUX, device=None):
+        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.lightricks.model.LTXVModel) #TODO
+
+    def extra_conds(self, **kwargs):
+        out = super().extra_conds(**kwargs)
+        attention_mask = kwargs.get("attention_mask", None)
+        if attention_mask is not None:
+            out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
+        cross_attn = kwargs.get("cross_attn", None)
+        if cross_attn is not None:
+            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+
+        guiding_latent = kwargs.get("guiding_latent", None)
+        if guiding_latent is not None:
+            out['guiding_latent'] = comfy.conds.CONDRegular(guiding_latent)
+
+        out['frame_rate'] = comfy.conds.CONDConstant(kwargs.get("frame_rate", 25))
+        return out
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -70,6 +70,11 @@ def detect_unet_config(state_dict, key_prefix):
        context_processor = '{}context_processor.layers.0.attn.qkv.weight'.format(key_prefix)
        if context_processor in state_dict_keys:
            unet_config["context_processor_layers"] = count_blocks(state_dict_keys, '{}context_processor.layers.'.format(key_prefix) + '{}.')
+        unet_config["x_block_self_attn_layers"] = []
+        for key in state_dict_keys:
+            if key.startswith('{}joint_blocks.'.format(key_prefix)) and key.endswith('.x_block.attn2.qkv.weight'):
+                layer = key[len('{}joint_blocks.'.format(key_prefix)):-len('.x_block.attn2.qkv.weight')]
+                unet_config["x_block_self_attn_layers"].append(int(layer))
        return unet_config

    if '{}clf.1.weight'.format(key_prefix) in state_dict_keys: #stable cascade
@@ -132,6 +137,12 @@ def detect_unet_config(state_dict, key_prefix):
        dit_config = {}
        dit_config["image_model"] = "flux"
        dit_config["in_channels"] = 16
+        patch_size = 2
+        dit_config["patch_size"] = patch_size
+        in_key = "{}img_in.weight".format(key_prefix)
+        if in_key in state_dict_keys:
+            dit_config["in_channels"] = state_dict[in_key].shape[1] // (patch_size * patch_size)
+        dit_config["out_channels"] = 16
        dit_config["vec_in_dim"] = 768
        dit_config["context_in_dim"] = 4096
        dit_config["hidden_size"] = 3072
@@ -145,6 +156,38 @@ def detect_unet_config(state_dict, key_prefix):
        dit_config["guidance_embed"] = "{}guidance_in.in_layer.weight".format(key_prefix) in state_dict_keys
        return dit_config

+    if '{}t5_yproj.weight'.format(key_prefix) in state_dict_keys: #Genmo mochi preview
+        dit_config = {}
+        dit_config["image_model"] = "mochi_preview"
+        dit_config["depth"] = 48
+        dit_config["patch_size"] = 2
+        dit_config["num_heads"] = 24
+        dit_config["hidden_size_x"] = 3072
+        dit_config["hidden_size_y"] = 1536
+        dit_config["mlp_ratio_x"] = 4.0
+        dit_config["mlp_ratio_y"] = 4.0
+        dit_config["learn_sigma"] = False
+        dit_config["in_channels"] = 12
+        dit_config["qk_norm"] = True
+        dit_config["qkv_bias"] = False
+        dit_config["out_bias"] = True
+        dit_config["attn_drop"] = 0.0
+        dit_config["patch_embed_bias"] = True
+        dit_config["posenc_preserve_area"] = True
+        dit_config["timestep_mlp_bias"] = True
+        dit_config["attend_to_padding"] = False
+        dit_config["timestep_scale"] = 1000.0
+        dit_config["use_t5"] = True
+        dit_config["t5_feat_dim"] = 4096
+        dit_config["t5_token_length"] = 256
+        dit_config["rope_theta"] = 10000.0
+        return dit_config
+
+    if '{}adaln_single.emb.timestep_embedder.linear_1.bias'.format(key_prefix) in state_dict_keys: #Lightricks ltxv
+        dit_config = {}
+        dit_config["image_model"] = "ltxv"
+        return dit_config
+
    if '{}input_blocks.0.0.weight'.format(key_prefix) not in state_dict_keys:
        return None

@@ -286,9 +329,16 @@ def model_config_from_unet(state_dict, unet_key_prefix, use_base_if_no_match=Fal
        return None
    model_config = model_config_from_unet_config(unet_config, state_dict)
    if model_config is None and use_base_if_no_match:
-        return comfy.supported_models_base.BASE(unet_config)
-    else:
-        return model_config
+        model_config = comfy.supported_models_base.BASE(unet_config)
+
+    scaled_fp8_key = "{}scaled_fp8".format(unet_key_prefix)
+    if scaled_fp8_key in state_dict:
+        scaled_fp8_weight = state_dict.pop(scaled_fp8_key)
+        model_config.scaled_fp8 = scaled_fp8_weight.dtype
+        if model_config.scaled_fp8 == torch.float32:
+            model_config.scaled_fp8 = torch.float8_e4m3fn
+
+    return model_config

 def unet_prefix_from_state_dict(state_dict):
    candidates = ["model.diffusion_model.", #ldm/sgm models
@@ -501,7 +551,11 @@ def model_config_from_diffusers_unet(state_dict):
 def convert_diffusers_mmdit(state_dict, output_prefix=""):
    out_sd = {}

-    if 'transformer_blocks.0.attn.norm_added_k.weight' in state_dict: #Flux
+    if 'joint_transformer_blocks.0.attn.add_k_proj.weight' in state_dict: #AuraFlow
+        num_joint = count_blocks(state_dict, 'joint_transformer_blocks.{}.')
+        num_single = count_blocks(state_dict, 'single_transformer_blocks.{}.')
+        sd_map = comfy.utils.auraflow_to_diffusers({"n_double_layers": num_joint, "n_layers": num_joint + num_single}, output_prefix=output_prefix)
+    elif 'x_embedder.weight' in state_dict: #Flux
        depth = count_blocks(state_dict, 'transformer_blocks.{}.')
        depth_single_blocks = count_blocks(state_dict, 'single_transformer_blocks.{}.')
        hidden_size = state_dict["x_embedder.bias"].shape[0]
@@ -510,10 +564,6 @@ def convert_diffusers_mmdit(state_dict, output_prefix=""):
        num_blocks = count_blocks(state_dict, 'transformer_blocks.{}.')
        depth = state_dict["pos_embed.proj.weight"].shape[0] // 64
        sd_map = comfy.utils.mmdit_to_diffusers({"depth": depth, "num_blocks": num_blocks}, output_prefix=output_prefix)
-    elif 'joint_transformer_blocks.0.attn.add_k_proj.weight' in state_dict: #AuraFlow
-        num_joint = count_blocks(state_dict, 'joint_transformer_blocks.{}.')
-        num_single = count_blocks(state_dict, 'single_transformer_blocks.{}.')
-        sd_map = comfy.utils.auraflow_to_diffusers({"n_double_layers": num_joint, "n_layers": num_joint + num_single}, output_prefix=output_prefix)
    else:
        return None

--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -647,6 +647,9 @@ def unet_dtype(device=None, model_params=0, supported_dtypes=[torch.float16, tor
        pass

    if fp8_dtype is not None:
+        if supports_fp8_compute(device): #if fp8 compute is supported the casting is most likely not expensive
+            return fp8_dtype
+
        free_model_memory = maximum_vram_for_weights(device)
        if model_params * 2 > free_model_memory:
            return fp8_dtype
@@ -840,27 +843,21 @@ def force_channels_last():
    #TODO
    return False

+def cast_to(weight, dtype=None, device=None, non_blocking=False, copy=False):
+    if device is None or weight.device == device:
+        if not copy:
+            if dtype is None or weight.dtype == dtype:
+                return weight
+        return weight.to(dtype=dtype, copy=copy)
+
+    r = torch.empty_like(weight, dtype=dtype, device=device)
+    r.copy_(weight, non_blocking=non_blocking)
+    return r
+
 def cast_to_device(tensor, device, dtype, copy=False):
-    device_supports_cast = False
-    if tensor.dtype == torch.float32 or tensor.dtype == torch.float16:
-        device_supports_cast = True
-    elif tensor.dtype == torch.bfloat16:
-        if hasattr(device, 'type') and device.type.startswith("cuda"):
-            device_supports_cast = True
-        elif is_intel_xpu():
-            device_supports_cast = True
+    non_blocking = device_supports_non_blocking(device)
+    return cast_to(tensor, dtype=dtype, device=device, non_blocking=non_blocking, copy=copy)

-    non_blocking = device_should_use_non_blocking(device)
-
-    if device_supports_cast:
-        if copy:
-            if tensor.device == device:
-                return tensor.to(dtype, copy=copy, non_blocking=non_blocking)
-            return tensor.to(device, copy=copy, non_blocking=non_blocking).to(dtype, non_blocking=non_blocking)
-        else:
-            return tensor.to(device, non_blocking=non_blocking).to(dtype, non_blocking=non_blocking)
-    else:
-        return tensor.to(device, dtype, copy=copy, non_blocking=non_blocking)

 def xformers_enabled():
    global directml_enabled
@@ -899,7 +896,7 @@ def force_upcast_attention_dtype():
    upcast = args.force_upcast_attention
    try:
        macos_version = tuple(int(n) for n in platform.mac_ver()[0].split("."))
-        if (14, 5) <= macos_version <= (15, 0, 1):  # black image bug on recent versions of macOS
+        if (14, 5) <= macos_version <= (15, 2):  # black image bug on recent versions of macOS
            upcast = True
    except:
        pass
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -94,6 +94,31 @@ class LowVramPatch:
            return comfy.float.stochastic_rounding(comfy.lora.calculate_weight(self.patches[self.key], weight.to(intermediate_dtype), self.key, intermediate_dtype=intermediate_dtype), weight.dtype, seed=string_to_seed(self.key))

        return comfy.lora.calculate_weight(self.patches[self.key], weight, self.key, intermediate_dtype=intermediate_dtype)
+
+def get_key_weight(model, key):
+    set_func = None
+    convert_func = None
+    op_keys = key.rsplit('.', 1)
+    if len(op_keys) < 2:
+        weight = comfy.utils.get_attr(model, key)
+    else:
+        op = comfy.utils.get_attr(model, op_keys[0])
+        try:
+            set_func = getattr(op, "set_{}".format(op_keys[1]))
+        except AttributeError:
+            pass
+
+        try:
+            convert_func = getattr(op, "convert_{}".format(op_keys[1]))
+        except AttributeError:
+            pass
+
+        weight = getattr(op, op_keys[1])
+        if convert_func is not None:
+            weight = comfy.utils.get_attr(model, key)
+
+    return weight, set_func, convert_func
+
 class ModelPatcher:
    def __init__(self, model, load_device, offload_device, size=0, weight_inplace_update=False):
        self.size = size
@@ -294,14 +319,16 @@ class ModelPatcher:
                if not k.startswith(filter_prefix):
                    continue
            bk = self.backup.get(k, None)
+            weight, set_func, convert_func = get_key_weight(self.model, k)
            if bk is not None:
                weight = bk.weight
-            else:
-                weight = model_sd[k]
+            if convert_func is None:
+                convert_func = lambda a, **kwargs: a
+
            if k in self.patches:
-                p[k] = [weight] + self.patches[k]
+                p[k] = [(weight, convert_func)] + self.patches[k]
            else:
-                p[k] = (weight,)
+                p[k] = [(weight, convert_func)]
        return p

    def model_state_dict(self, filter_prefix=None):
@@ -317,8 +344,7 @@ class ModelPatcher:
        if key not in self.patches:
            return

-        weight = comfy.utils.get_attr(self.model, key)
-
+        weight, set_func, convert_func = get_key_weight(self.model, key)
        inplace_update = self.weight_inplace_update or inplace_update

        if key not in self.backup:
@@ -328,12 +354,18 @@ class ModelPatcher:
            temp_weight = comfy.model_management.cast_to_device(weight, device_to, torch.float32, copy=True)
        else:
            temp_weight = weight.to(torch.float32, copy=True)
+        if convert_func is not None:
+            temp_weight = convert_func(temp_weight, inplace=True)
+
        out_weight = comfy.lora.calculate_weight(self.patches[key], temp_weight, key)
-        out_weight = comfy.float.stochastic_rounding(out_weight, weight.dtype, seed=string_to_seed(key))
-        if inplace_update:
-            comfy.utils.copy_to_param(self.model, key, out_weight)
+        if set_func is None:
+            out_weight = comfy.float.stochastic_rounding(out_weight, weight.dtype, seed=string_to_seed(key))
+            if inplace_update:
+                comfy.utils.copy_to_param(self.model, key, out_weight)
+            else:
+                comfy.utils.set_attr_param(self.model, key, out_weight)
        else:
-            comfy.utils.set_attr_param(self.model, key, out_weight)
+            set_func(out_weight, inplace_update=inplace_update, seed=string_to_seed(key))

    def load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False, full_load=False):
        mem_counter = 0
@@ -341,14 +373,23 @@ class ModelPatcher:
        lowvram_counter = 0
        loading = []
        for n, m in self.model.named_modules():
-            if hasattr(m, "comfy_cast_weights") or hasattr(m, "weight"):
-                loading.append((comfy.model_management.module_size(m), n, m))
+            params = []
+            skip = False
+            for name, param in m.named_parameters(recurse=False):
+                params.append(name)
+            for name, param in m.named_parameters(recurse=True):
+                if name not in params:
+                    skip = True # skip random weights in non leaf modules
+                    break
+            if not skip and (hasattr(m, "comfy_cast_weights") or len(params) > 0):
+                loading.append((comfy.model_management.module_size(m), n, m, params))

        load_completely = []
        loading.sort(reverse=True)
        for x in loading:
            n = x[1]
            m = x[2]
+            params = x[3]
            module_mem = x[0]

            lowvram_weight = False
@@ -384,22 +425,22 @@ class ModelPatcher:
                    if m.comfy_cast_weights:
                        wipe_lowvram_weight(m)

-                if hasattr(m, "weight"):
+                if full_load or mem_counter + module_mem < lowvram_model_memory:
                    mem_counter += module_mem
-                    load_completely.append((module_mem, n, m))
+                    load_completely.append((module_mem, n, m, params))

        load_completely.sort(reverse=True)
        for x in load_completely:
            n = x[1]
            m = x[2]
-            weight_key = "{}.weight".format(n)
-            bias_key = "{}.bias".format(n)
+            params = x[3]
            if hasattr(m, "comfy_patched_weights"):
                if m.comfy_patched_weights == True:
                    continue

-            self.patch_weight_to_device(weight_key, device_to=device_to)
-            self.patch_weight_to_device(bias_key, device_to=device_to)
+            for param in params:
+                self.patch_weight_to_device("{}.{}".format(n, param), device_to=device_to)
+
            logging.debug("lowvram: loaded module regularly {} {}".format(n, m))
            m.comfy_patched_weights = True

--- a/comfy/model_sampling.py
+++ b/comfy/model_sampling.py
@@ -2,6 +2,25 @@ import torch
 from comfy.ldm.modules.diffusionmodules.util import make_beta_schedule
 import math

+def rescale_zero_terminal_snr_sigmas(sigmas):
+    alphas_cumprod = 1 / ((sigmas * sigmas) + 1)
+    alphas_bar_sqrt = alphas_cumprod.sqrt()
+
+    # Store old values.
+    alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
+    alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
+
+    # Shift so the last timestep is zero.
+    alphas_bar_sqrt -= (alphas_bar_sqrt_T)
+
+    # Scale so the first timestep is back to the old value.
+    alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
+
+    # Convert alphas_bar_sqrt to betas
+    alphas_bar = alphas_bar_sqrt**2  # Revert sqrt
+    alphas_bar[-1] = 4.8973451890853435e-08
+    return ((1 - alphas_bar) / alphas_bar) ** 0.5
+
 class EPS:
    def calculate_input(self, sigma, noise):
        sigma = sigma.view(sigma.shape[:1] + (1,) * (noise.ndim - 1))
@@ -48,7 +67,7 @@ class CONST:
        return latent / (1.0 - sigma)

 class ModelSamplingDiscrete(torch.nn.Module):
-    def __init__(self, model_config=None):
+    def __init__(self, model_config=None, zsnr=None):
        super().__init__()

        if model_config is not None:
@@ -61,11 +80,14 @@ class ModelSamplingDiscrete(torch.nn.Module):
        linear_end = sampling_settings.get("linear_end", 0.012)
        timesteps = sampling_settings.get("timesteps", 1000)

-        self._register_schedule(given_betas=None, beta_schedule=beta_schedule, timesteps=timesteps, linear_start=linear_start, linear_end=linear_end, cosine_s=8e-3)
+        if zsnr is None:
+            zsnr = sampling_settings.get("zsnr", False)
+
+        self._register_schedule(given_betas=None, beta_schedule=beta_schedule, timesteps=timesteps, linear_start=linear_start, linear_end=linear_end, cosine_s=8e-3, zsnr=zsnr)
        self.sigma_data = 1.0

    def _register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000,
-                          linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
+                          linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3, zsnr=False):
        if given_betas is not None:
            betas = given_betas
        else:
@@ -83,6 +105,9 @@ class ModelSamplingDiscrete(torch.nn.Module):
        # self.register_buffer('alphas_cumprod_prev', torch.tensor(alphas_cumprod_prev, dtype=torch.float32))

        sigmas = ((1 - alphas_cumprod) / alphas_cumprod) ** 0.5
+        if zsnr:
+            sigmas = rescale_zero_terminal_snr_sigmas(sigmas)
+
        self.set_sigmas(sigmas)

    def set_sigmas(self, sigmas):
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -19,20 +19,12 @@
 import torch
 import comfy.model_management
 from comfy.cli_args import args
+import comfy.float

-def cast_to(weight, dtype=None, device=None, non_blocking=False, copy=False):
-    if device is None or weight.device == device:
-        if not copy:
-            if dtype is None or weight.dtype == dtype:
-                return weight
-        return weight.to(dtype=dtype, copy=copy)
-
-    r = torch.empty_like(weight, dtype=dtype, device=device)
-    r.copy_(weight, non_blocking=non_blocking)
-    return r
+cast_to = comfy.model_management.cast_to #TODO: remove once no more references

 def cast_to_input(weight, input, non_blocking=False, copy=True):
-    return cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy)
+    return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy)

 def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None):
    if input is not None:
@@ -47,12 +39,12 @@ def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None):
    non_blocking = comfy.model_management.device_supports_non_blocking(device)
    if s.bias is not None:
        has_function = s.bias_function is not None
-        bias = cast_to(s.bias, bias_dtype, device, non_blocking=non_blocking, copy=has_function)
+        bias = comfy.model_management.cast_to(s.bias, bias_dtype, device, non_blocking=non_blocking, copy=has_function)
        if has_function:
            bias = s.bias_function(bias)

    has_function = s.weight_function is not None
-    weight = cast_to(s.weight, dtype, device, non_blocking=non_blocking, copy=has_function)
+    weight = comfy.model_management.cast_to(s.weight, dtype, device, non_blocking=non_blocking, copy=has_function)
    if has_function:
        weight = s.weight_function(weight)
    return weight, bias
@@ -258,19 +250,29 @@ def fp8_linear(self, input):
    if dtype not in [torch.float8_e4m3fn]:
        return None

+    tensor_2d = False
+    if len(input.shape) == 2:
+        tensor_2d = True
+        input = input.unsqueeze(1)
+
+
    if len(input.shape) == 3:
-        inn = input.reshape(-1, input.shape[2]).to(dtype)
        w, bias = cast_bias_weight(self, input, dtype=dtype, bias_dtype=input.dtype)
        w = w.t()

        scale_weight = self.scale_weight
        scale_input = self.scale_input
        if scale_weight is None:
-            scale_weight = torch.ones((1), device=input.device, dtype=torch.float32)
-            if scale_input is None:
-                scale_input = scale_weight
+            scale_weight = torch.ones((), device=input.device, dtype=torch.float32)
+        else:
+            scale_weight = scale_weight.to(input.device)
+
        if scale_input is None:
-            scale_input = torch.ones((1), device=input.device, dtype=torch.float32)
+            scale_input = torch.ones((), device=input.device, dtype=torch.float32)
+            inn = input.reshape(-1, input.shape[2]).to(dtype)
+        else:
+            scale_input = scale_input.to(input.device)
+            inn = (input * (1.0 / scale_input).to(input.dtype)).reshape(-1, input.shape[2]).to(dtype)

        if bias is not None:
            o = torch._scaled_mm(inn, w, out_dtype=input.dtype, bias=bias, scale_a=scale_input, scale_b=scale_weight)
@@ -280,7 +282,11 @@ def fp8_linear(self, input):
        if isinstance(o, tuple):
            o = o[0]

+        if tensor_2d:
+            return o.reshape(input.shape[0], -1)
+
        return o.reshape((-1, input.shape[1], self.weight.shape[0]))
+
    return None

 class fp8_ops(manual_cast):
@@ -298,15 +304,63 @@ class fp8_ops(manual_cast):
            weight, bias = cast_bias_weight(self, input)
            return torch.nn.functional.linear(input, weight, bias)

+def scaled_fp8_ops(fp8_matrix_mult=False, scale_input=False, override_dtype=None):
+    class scaled_fp8_op(manual_cast):
+        class Linear(manual_cast.Linear):
+            def __init__(self, *args, **kwargs):
+                if override_dtype is not None:
+                    kwargs['dtype'] = override_dtype
+                super().__init__(*args, **kwargs)

-def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False):
-    if comfy.model_management.supports_fp8_compute(load_device):
-        if (fp8_optimizations or args.fast) and not disable_fast_fp8:
-            return fp8_ops
+            def reset_parameters(self):
+                if not hasattr(self, 'scale_weight'):
+                    self.scale_weight = torch.nn.parameter.Parameter(data=torch.ones((), device=self.weight.device, dtype=torch.float32), requires_grad=False)
+
+                if not scale_input:
+                    self.scale_input = None
+
+                if not hasattr(self, 'scale_input'):
+                    self.scale_input = torch.nn.parameter.Parameter(data=torch.ones((), device=self.weight.device, dtype=torch.float32), requires_grad=False)
+                return None
+
+            def forward_comfy_cast_weights(self, input):
+                if fp8_matrix_mult:
+                    out = fp8_linear(self, input)
+                    if out is not None:
+                        return out
+
+                weight, bias = cast_bias_weight(self, input)
+
+                if weight.numel() < input.numel(): #TODO: optimize
+                    return torch.nn.functional.linear(input, weight * self.scale_weight.to(device=weight.device, dtype=weight.dtype), bias)
+                else:
+                    return torch.nn.functional.linear(input * self.scale_weight.to(device=weight.device, dtype=weight.dtype), weight, bias)
+
+            def convert_weight(self, weight, inplace=False, **kwargs):
+                if inplace:
+                    weight *= self.scale_weight.to(device=weight.device, dtype=weight.dtype)
+                    return weight
+                else:
+                    return weight * self.scale_weight.to(device=weight.device, dtype=weight.dtype)
+
+            def set_weight(self, weight, inplace_update=False, seed=None, **kwargs):
+                weight = comfy.float.stochastic_rounding(weight / self.scale_weight.to(device=weight.device, dtype=weight.dtype), self.weight.dtype, seed=seed)
+                if inplace_update:
+                    self.weight.data.copy_(weight)
+                else:
+                    self.weight = torch.nn.Parameter(weight, requires_grad=False)
+
+    return scaled_fp8_op
+
+def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, scaled_fp8=None):
+    fp8_compute = comfy.model_management.supports_fp8_compute(load_device)
+    if scaled_fp8 is not None:
+        return scaled_fp8_ops(fp8_matrix_mult=fp8_compute, scale_input=True, override_dtype=scaled_fp8)
+
+    if fp8_compute and (fp8_optimizations or args.fast) and not disable_fast_fp8:
+        return fp8_ops

    if compute_dtype is None or weight_dtype == compute_dtype:
        return disable_weight_init
-    if args.fast and not disable_fast_fp8:
-        if comfy.model_management.supports_fp8_compute(load_device):
-            return fp8_ops
+
    return manual_cast
--- a/comfy/sampler_helpers.py
+++ b/comfy/sampler_helpers.py
@@ -1,14 +1,10 @@
 import torch
 import comfy.model_management
 import comfy.conds
+import comfy.utils

 def prepare_mask(noise_mask, shape, device):
-    """ensures noise mask is of proper dimensions"""
-    noise_mask = torch.nn.functional.interpolate(noise_mask.reshape((-1, 1, noise_mask.shape[-2], noise_mask.shape[-1])), size=(shape[2], shape[3]), mode="bilinear")
-    noise_mask = torch.cat([noise_mask] * shape[1], dim=1)
-    noise_mask = comfy.utils.repeat_to_batch_size(noise_mask, shape[0])
-    noise_mask = noise_mask.to(device)
-    return noise_mask
+    return comfy.utils.reshape_mask(noise_mask, shape).to(device)

 def get_models_from_cond(cond, model_type):
    models = []
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@@ -358,11 +358,35 @@ def beta_scheduler(model_sampling, steps, alpha=0.6, beta=0.6):
    ts = numpy.rint(scipy.stats.beta.ppf(ts, alpha, beta) * total_timesteps)

    sigs = []
+    last_t = -1
    for t in ts:
-        sigs += [float(model_sampling.sigmas[int(t)])]
+        if t != last_t:
+            sigs += [float(model_sampling.sigmas[int(t)])]
+        last_t = t
    sigs += [0.0]
    return torch.FloatTensor(sigs)

+# from: https://github.com/genmoai/models/blob/main/src/mochi_preview/infer.py#L41
+def linear_quadratic_schedule(model_sampling, steps, threshold_noise=0.025, linear_steps=None):
+    if steps == 1:
+        sigma_schedule = [1.0, 0.0]
+    else:
+        if linear_steps is None:
+            linear_steps = steps // 2
+        linear_sigma_schedule = [i * threshold_noise / linear_steps for i in range(linear_steps)]
+        threshold_noise_step_diff = linear_steps - threshold_noise * steps
+        quadratic_steps = steps - linear_steps
+        quadratic_coef = threshold_noise_step_diff / (linear_steps * quadratic_steps ** 2)
+        linear_coef = threshold_noise / linear_steps - 2 * threshold_noise_step_diff / (quadratic_steps ** 2)
+        const = quadratic_coef * (linear_steps ** 2)
+        quadratic_sigma_schedule = [
+            quadratic_coef * (i ** 2) + linear_coef * i + const
+            for i in range(linear_steps, steps)
+        ]
+        sigma_schedule = linear_sigma_schedule + quadratic_sigma_schedule + [1.0]
+        sigma_schedule = [1.0 - x for x in sigma_schedule]
+    return torch.FloatTensor(sigma_schedule) * model_sampling.sigma_max.cpu()
+
 def get_mask_aabb(masks):
    if masks.numel() == 0:
        return torch.zeros((0, 4), device=masks.device, dtype=torch.int)
@@ -729,7 +753,7 @@ def sample(model, noise, positive, negative, cfg, device, sampler, sigmas, model
    return cfg_guider.sample(noise, latent_image, sampler, sigmas, denoise_mask, callback, disable_pbar, seed)


-SCHEDULER_NAMES = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform", "beta"]
+SCHEDULER_NAMES = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform", "beta", "linear_quadratic"]
 SAMPLER_NAMES = KSAMPLER_NAMES + ["ddim", "uni_pc", "uni_pc_bh2"]

 def calculate_sigmas(model_sampling, scheduler_name, steps):
@@ -747,6 +771,8 @@ def calculate_sigmas(model_sampling, scheduler_name, steps):
        sigmas = normal_scheduler(model_sampling, steps, sgm=True)
    elif scheduler_name == "beta":
        sigmas = beta_scheduler(model_sampling, steps)
+    elif scheduler_name == "linear_quadratic":
+        sigmas = linear_quadratic_schedule(model_sampling, steps)
    else:
        logging.error("error invalid scheduler {}".format(scheduler_name))
    return sigmas
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -7,6 +7,8 @@ from .ldm.models.autoencoder import AutoencoderKL, AutoencodingEngine
 from .ldm.cascade.stage_a import StageA
 from .ldm.cascade.stage_c_coder import StageC_coder
 from .ldm.audio.autoencoder import AudioOobleckVAE
+import comfy.ldm.genmo.vae.model
+import comfy.ldm.lightricks.vae.causal_video_autoencoder
 import yaml

 import comfy.utils
@@ -25,12 +27,17 @@ import comfy.text_encoders.aura_t5
 import comfy.text_encoders.hydit
 import comfy.text_encoders.flux
 import comfy.text_encoders.long_clipl
+import comfy.text_encoders.genmo
+import comfy.text_encoders.lt

 import comfy.model_patcher
 import comfy.lora
+import comfy.lora_convert
 import comfy.t2i_adapter.adapter
 import comfy.taesd.taesd

+import comfy.ldm.flux.redux
+
 def load_lora_for_models(model, clip, lora, strength_model, strength_clip):
    key_map = {}
    if model is not None:
@@ -38,6 +45,7 @@ def load_lora_for_models(model, clip, lora, strength_model, strength_clip):
    if clip is not None:
        key_map = comfy.lora.model_lora_keys_clip(clip.cond_stage_model, key_map)

+    lora = comfy.lora_convert.convert_lora(lora)
    loaded = comfy.lora.load_lora(lora, key_map)
    if model is not None:
        new_modelpatcher = model.clone()
@@ -169,6 +177,7 @@ class VAE:
        self.downscale_ratio = 8
        self.upscale_ratio = 8
        self.latent_channels = 4
+        self.latent_dim = 2
        self.output_channels = 3
        self.process_input = lambda image: image * 2.0 - 1.0
        self.process_output = lambda image: torch.clamp((image + 1.0) / 2.0, min=0.0, max=1.0)
@@ -238,9 +247,30 @@ class VAE:
                self.output_channels = 2
                self.upscale_ratio = 2048
                self.downscale_ratio =  2048
+                self.latent_dim = 1
                self.process_output = lambda audio: audio
                self.process_input = lambda audio: audio
                self.working_dtypes = [torch.float16, torch.bfloat16, torch.float32]
+            elif "blocks.2.blocks.3.stack.5.weight" in sd or "decoder.blocks.2.blocks.3.stack.5.weight" in sd or "layers.4.layers.1.attn_block.attn.qkv.weight" in sd or "encoder.layers.4.layers.1.attn_block.attn.qkv.weight" in sd: #genmo mochi vae
+                if "blocks.2.blocks.3.stack.5.weight" in sd:
+                    sd = comfy.utils.state_dict_prefix_replace(sd, {"": "decoder."})
+                if "layers.4.layers.1.attn_block.attn.qkv.weight" in sd:
+                    sd = comfy.utils.state_dict_prefix_replace(sd, {"": "encoder."})
+                self.first_stage_model = comfy.ldm.genmo.vae.model.VideoVAE()
+                self.latent_channels = 12
+                self.latent_dim = 3
+                self.memory_used_decode = lambda shape, dtype: (1000 * shape[2] * shape[3] * shape[4] * (6 * 8 * 8)) * model_management.dtype_size(dtype)
+                self.memory_used_encode = lambda shape, dtype: (1.5 * max(shape[2], 7) * shape[3] * shape[4] * (6 * 8 * 8)) * model_management.dtype_size(dtype)
+                self.upscale_ratio = (lambda a: max(0, a * 6 - 5), 8, 8)
+                self.working_dtypes = [torch.float16, torch.float32]
+            elif "decoder.up_blocks.0.res_blocks.0.conv1.conv.weight" in sd: #lightricks ltxv
+                self.first_stage_model = comfy.ldm.lightricks.vae.causal_video_autoencoder.VideoVAE()
+                self.latent_channels = 128
+                self.latent_dim = 3
+                self.memory_used_decode = lambda shape, dtype: (900 * shape[2] * shape[3] * shape[4] * (8 * 8 * 8)) * model_management.dtype_size(dtype)
+                self.memory_used_encode = lambda shape, dtype: (70 * max(shape[2], 7) * shape[3] * shape[4]) * model_management.dtype_size(dtype)
+                self.upscale_ratio = 8
+                self.working_dtypes = [torch.bfloat16, torch.float32]
            else:
                logging.warning("WARNING: No VAE weights detected, VAE not initalized.")
                self.first_stage_model = None
@@ -296,6 +326,10 @@ class VAE:
        decode_fn = lambda a: self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)).float()
        return comfy.utils.tiled_scale_multidim(samples, decode_fn, tile=(tile_x,), overlap=overlap, upscale_amount=self.upscale_ratio, out_channels=self.output_channels, output_device=self.output_device)

+    def decode_tiled_3d(self, samples, tile_t=999, tile_x=32, tile_y=32, overlap=(1, 8, 8)):
+        decode_fn = lambda a: self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)).float()
+        return self.process_output(comfy.utils.tiled_scale_multidim(samples, decode_fn, tile=(tile_t, tile_x, tile_y), overlap=overlap, upscale_amount=self.upscale_ratio, out_channels=self.output_channels, output_device=self.output_device))
+
    def encode_tiled_(self, pixel_samples, tile_x=512, tile_y=512, overlap = 64):
        steps = pixel_samples.shape[0] * comfy.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x, tile_y, overlap)
        steps += pixel_samples.shape[0] * comfy.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x // 2, tile_y * 2, overlap)
@@ -314,6 +348,7 @@ class VAE:
        return comfy.utils.tiled_scale_multidim(samples, encode_fn, tile=(tile_x,), overlap=overlap, upscale_amount=(1/self.downscale_ratio), out_channels=self.latent_channels, output_device=self.output_device)

    def decode(self, samples_in):
+        pixel_samples = None
        try:
            memory_used = self.memory_used_decode(samples_in.shape, self.vae_dtype)
            model_management.load_models_gpu([self.patcher], memory_required=memory_used)
@@ -321,38 +356,64 @@ class VAE:
            batch_number = int(free_memory / memory_used)
            batch_number = max(1, batch_number)

-            pixel_samples = torch.empty((samples_in.shape[0], self.output_channels) + tuple(map(lambda a: a * self.upscale_ratio, samples_in.shape[2:])), device=self.output_device)
            for x in range(0, samples_in.shape[0], batch_number):
                samples = samples_in[x:x+batch_number].to(self.vae_dtype).to(self.device)
-                pixel_samples[x:x+batch_number] = self.process_output(self.first_stage_model.decode(samples).to(self.output_device).float())
+                out = self.process_output(self.first_stage_model.decode(samples).to(self.output_device).float())
+                if pixel_samples is None:
+                    pixel_samples = torch.empty((samples_in.shape[0],) + tuple(out.shape[1:]), device=self.output_device)
+                pixel_samples[x:x+batch_number] = out
        except model_management.OOM_EXCEPTION as e:
            logging.warning("Warning: Ran out of memory when regular VAE decoding, retrying with tiled VAE decoding.")
-            if len(samples_in.shape) == 3:
+            dims = samples_in.ndim - 2
+            if dims == 1:
                pixel_samples = self.decode_tiled_1d(samples_in)
-            else:
+            elif dims == 2:
                pixel_samples = self.decode_tiled_(samples_in)
+            elif dims == 3:
+                pixel_samples = self.decode_tiled_3d(samples_in)

        pixel_samples = pixel_samples.to(self.output_device).movedim(1,-1)
        return pixel_samples

-    def decode_tiled(self, samples, tile_x=64, tile_y=64, overlap = 16):
-        model_management.load_model_gpu(self.patcher)
-        output = self.decode_tiled_(samples, tile_x, tile_y, overlap)
-        return output.movedim(1,-1)
+    def decode_tiled(self, samples, tile_x=None, tile_y=None, overlap=None):
+        memory_used = self.memory_used_decode(samples.shape, self.vae_dtype) #TODO: calculate mem required for tile
+        model_management.load_models_gpu([self.patcher], memory_required=memory_used)
+        dims = samples.ndim - 2
+        args = {}
+        if tile_x is not None:
+            args["tile_x"] = tile_x
+        if tile_y is not None:
+            args["tile_y"] = tile_y
+        if overlap is not None:
+            args["overlap"] = overlap
+
+        if dims == 1:
+            args.pop("tile_y")
+            output = self.decode_tiled_1d(samples, **args)
+        elif dims == 2:
+            output = self.decode_tiled_(samples, **args)
+        elif dims == 3:
+            output = self.decode_tiled_3d(samples, **args)
+        return output.movedim(1, -1)

    def encode(self, pixel_samples):
        pixel_samples = self.vae_encode_crop_pixels(pixel_samples)
-        pixel_samples = pixel_samples.movedim(-1,1)
+        pixel_samples = pixel_samples.movedim(-1, 1)
+        if self.latent_dim == 3:
+            pixel_samples = pixel_samples.movedim(1, 0).unsqueeze(0)
        try:
            memory_used = self.memory_used_encode(pixel_samples.shape, self.vae_dtype)
            model_management.load_models_gpu([self.patcher], memory_required=memory_used)
            free_memory = model_management.get_free_memory(self.device)
            batch_number = int(free_memory / max(1, memory_used))
            batch_number = max(1, batch_number)
-            samples = torch.empty((pixel_samples.shape[0], self.latent_channels) + tuple(map(lambda a: a // self.downscale_ratio, pixel_samples.shape[2:])), device=self.output_device)
+            samples = None
            for x in range(0, pixel_samples.shape[0], batch_number):
-                pixels_in = self.process_input(pixel_samples[x:x+batch_number]).to(self.vae_dtype).to(self.device)
-                samples[x:x+batch_number] = self.first_stage_model.encode(pixels_in).to(self.output_device).float()
+                pixels_in = self.process_input(pixel_samples[x:x + batch_number]).to(self.vae_dtype).to(self.device)
+                out = self.first_stage_model.encode(pixels_in).to(self.output_device).float()
+                if samples is None:
+                    samples = torch.empty((pixel_samples.shape[0],) + tuple(out.shape[1:]), device=self.output_device)
+                samples[x:x + batch_number] = out

        except model_management.OOM_EXCEPTION as e:
            logging.warning("Warning: Ran out of memory when regular VAE encoding, retrying with tiled VAE encoding.")
@@ -386,6 +447,8 @@ def load_style_model(ckpt_path):
    keys = model_data.keys()
    if "style_embedding" in keys:
        model = comfy.t2i_adapter.adapter.StyleAdapter(width=1024, context_dim=768, num_head=8, n_layes=3, num_token=8)
+    elif "redux_down.weight" in keys:
+        model = comfy.ldm.flux.redux.ReduxImageEncoder()
    else:
        raise Exception("invalid style model {}".format(ckpt_path))
    model.load_state_dict(model_data)
@@ -398,6 +461,8 @@ class CLIPType(Enum):
    STABLE_AUDIO = 4
    HUNYUAN_DIT = 5
    FLUX = 6
+    MOCHI = 7
+    LTXV = 8

 def load_clip(ckpt_paths, embedding_directory=None, clip_type=CLIPType.STABLE_DIFFUSION, model_options={}):
    clip_data = []
@@ -431,6 +496,17 @@ def detect_te_model(sd):
        return TEModel.T5_BASE
    return None

+
+def t5xxl_detect(clip_data):
+    weight_name = "encoder.block.23.layer.1.DenseReluDense.wi_1.weight"
+
+    for sd in clip_data:
+        if weight_name in sd:
+            return comfy.text_encoders.sd3_clip.t5_xxl_detect(sd)
+
+    return {}
+
+
 def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip_type=CLIPType.STABLE_DIFFUSION, model_options={}):
    clip_data = state_dicts

@@ -462,10 +538,15 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
            clip_target.clip = comfy.text_encoders.sd2_clip.SD2ClipModel
            clip_target.tokenizer = comfy.text_encoders.sd2_clip.SD2Tokenizer
        elif te_model == TEModel.T5_XXL:
-            weight = clip_data[0]["encoder.block.23.layer.1.DenseReluDense.wi_1.weight"]
-            dtype_t5 = weight.dtype
-            clip_target.clip = comfy.text_encoders.sd3_clip.sd3_clip(clip_l=False, clip_g=False, t5=True, dtype_t5=dtype_t5)
-            clip_target.tokenizer = comfy.text_encoders.sd3_clip.SD3Tokenizer
+            if clip_type == CLIPType.SD3:
+                clip_target.clip = comfy.text_encoders.sd3_clip.sd3_clip(clip_l=False, clip_g=False, t5=True, **t5xxl_detect(clip_data))
+                clip_target.tokenizer = comfy.text_encoders.sd3_clip.SD3Tokenizer
+            elif clip_type == CLIPType.LTXV:
+                clip_target.clip = comfy.text_encoders.lt.ltxv_te(**t5xxl_detect(clip_data))
+                clip_target.tokenizer = comfy.text_encoders.lt.LTXVT5Tokenizer
+            else: #CLIPType.MOCHI
+                clip_target.clip = comfy.text_encoders.genmo.mochi_te(**t5xxl_detect(clip_data))
+                clip_target.tokenizer = comfy.text_encoders.genmo.MochiT5Tokenizer
        elif te_model == TEModel.T5_XL:
            clip_target.clip = comfy.text_encoders.aura_t5.AuraT5Model
            clip_target.tokenizer = comfy.text_encoders.aura_t5.AuraT5Tokenizer
@@ -482,25 +563,19 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
    elif len(clip_data) == 2:
        if clip_type == CLIPType.SD3:
            te_models = [detect_te_model(clip_data[0]), detect_te_model(clip_data[1])]
-            clip_target.clip = comfy.text_encoders.sd3_clip.sd3_clip(clip_l=TEModel.CLIP_L in te_models, clip_g=TEModel.CLIP_G in te_models, t5=TEModel.T5_XXL in te_models)
+            clip_target.clip = comfy.text_encoders.sd3_clip.sd3_clip(clip_l=TEModel.CLIP_L in te_models, clip_g=TEModel.CLIP_G in te_models, t5=TEModel.T5_XXL in te_models, **t5xxl_detect(clip_data))
            clip_target.tokenizer = comfy.text_encoders.sd3_clip.SD3Tokenizer
        elif clip_type == CLIPType.HUNYUAN_DIT:
            clip_target.clip = comfy.text_encoders.hydit.HyditModel
            clip_target.tokenizer = comfy.text_encoders.hydit.HyditTokenizer
        elif clip_type == CLIPType.FLUX:
-            weight_name = "encoder.block.23.layer.1.DenseReluDense.wi_1.weight"
-            weight = clip_data[0].get(weight_name, clip_data[1].get(weight_name, None))
-            dtype_t5 = None
-            if weight is not None:
-                dtype_t5 = weight.dtype
-
-            clip_target.clip = comfy.text_encoders.flux.flux_clip(dtype_t5=dtype_t5)
+            clip_target.clip = comfy.text_encoders.flux.flux_clip(**t5xxl_detect(clip_data))
            clip_target.tokenizer = comfy.text_encoders.flux.FluxTokenizer
        else:
            clip_target.clip = sdxl_clip.SDXLClipModel
            clip_target.tokenizer = sdxl_clip.SDXLTokenizer
    elif len(clip_data) == 3:
-        clip_target.clip = comfy.text_encoders.sd3_clip.SD3ClipModel
+        clip_target.clip = comfy.text_encoders.sd3_clip.sd3_clip(**t5xxl_detect(clip_data))
        clip_target.tokenizer = comfy.text_encoders.sd3_clip.SD3Tokenizer

    parameters = 0
@@ -575,11 +650,11 @@ def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_c
        return None

    unet_weight_dtype = list(model_config.supported_inference_dtypes)
-    if weight_dtype is not None:
+    if weight_dtype is not None and model_config.scaled_fp8 is None:
        unet_weight_dtype.append(weight_dtype)

    model_config.custom_operations = model_options.get("custom_operations", None)
-    unet_dtype = model_options.get("weight_dtype", None)
+    unet_dtype = model_options.get("dtype", model_options.get("weight_dtype", None))

    if unet_dtype is None:
        unet_dtype = model_management.unet_dtype(model_params=parameters, supported_dtypes=unet_weight_dtype)
@@ -644,6 +719,8 @@ def load_diffusion_model_state_dict(sd, model_options={}): #load unet in diffuse
        sd = temp_sd

    parameters = comfy.utils.calculate_parameters(sd)
+    weight_dtype = comfy.utils.weight_dtype(sd)
+
    load_device = model_management.get_torch_device()
    model_config = model_detection.model_config_from_unet(sd, "")

@@ -670,8 +747,12 @@ def load_diffusion_model_state_dict(sd, model_options={}): #load unet in diffuse
                    logging.warning("{} {}".format(diffusers_keys[k], k))

    offload_device = model_management.unet_offload_device()
+    unet_weight_dtype = list(model_config.supported_inference_dtypes)
+    if weight_dtype is not None and model_config.scaled_fp8 is None:
+        unet_weight_dtype.append(weight_dtype)
+
    if dtype is None:
-        unet_dtype = model_management.unet_dtype(model_params=parameters, supported_dtypes=model_config.supported_inference_dtypes)
+        unet_dtype = model_management.unet_dtype(model_params=parameters, supported_dtypes=unet_weight_dtype)
    else:
        unet_dtype = dtype

--- a/comfy/sd1_clip.py
+++ b/comfy/sd1_clip.py
@@ -80,7 +80,7 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
        "pooled",
        "hidden"
    ]
-    def __init__(self, version="openai/clip-vit-large-patch14", device="cpu", max_length=77,
+    def __init__(self, device="cpu", max_length=77,
                 freeze=True, layer="last", layer_idx=None, textmodel_json_config=None, dtype=None, model_class=comfy.clip_model.CLIPTextModel,
                 special_tokens={"start": 49406, "end": 49407, "pad": 49407}, layer_norm_hidden_state=True, enable_attention_masks=False, zero_out_masked=False,
                 return_projected_pooled=True, return_attention_masks=False, model_options={}):  # clip-vit-base-patch32
@@ -94,11 +94,20 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
            config = json.load(f)

        operations = model_options.get("custom_operations", None)
+        scaled_fp8 = None
+
        if operations is None:
-            operations = comfy.ops.manual_cast
+            scaled_fp8 = model_options.get("scaled_fp8", None)
+            if scaled_fp8 is not None:
+                operations = comfy.ops.scaled_fp8_ops(fp8_matrix_mult=False, override_dtype=scaled_fp8)
+            else:
+                operations = comfy.ops.manual_cast

        self.operations = operations
        self.transformer = model_class(config, dtype, device, self.operations)
+        if scaled_fp8 is not None:
+            self.transformer.scaled_fp8 = torch.nn.Parameter(torch.tensor([], dtype=scaled_fp8))
+
        self.num_layers = self.transformer.num_layers

        self.max_length = max_length
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@@ -10,6 +10,8 @@ import comfy.text_encoders.sa_t5
 import comfy.text_encoders.aura_t5
 import comfy.text_encoders.hydit
 import comfy.text_encoders.flux
+import comfy.text_encoders.genmo
+import comfy.text_encoders.lt

 from . import supported_models_base
 from . import latent_formats
@@ -196,6 +198,8 @@ class SDXL(supported_models_base.BASE):
                self.sampling_settings["sigma_min"] = float(state_dict["edm_vpred.sigma_min"].item())
            return model_base.ModelType.V_PREDICTION_EDM
        elif "v_pred" in state_dict:
+            if "ztsnr" in state_dict: #Some zsnr anime checkpoints
+                self.sampling_settings["zsnr"] = True
            return model_base.ModelType.V_PREDICTION
        else:
            return model_base.ModelType.EPS
@@ -529,12 +533,11 @@ class SD3(supported_models_base.BASE):
            clip_l = True
        if "{}clip_g.transformer.text_model.final_layer_norm.weight".format(pref) in state_dict:
            clip_g = True
-        t5_key = "{}t5xxl.transformer.encoder.final_layer_norm.weight".format(pref)
-        if t5_key in state_dict:
+        t5_detect = comfy.text_encoders.sd3_clip.t5_xxl_detect(state_dict, "{}t5xxl.transformer.".format(pref))
+        if "dtype_t5" in t5_detect:
            t5 = True
-            dtype_t5 = state_dict[t5_key].dtype

-        return supported_models_base.ClipTarget(comfy.text_encoders.sd3_clip.SD3Tokenizer, comfy.text_encoders.sd3_clip.sd3_clip(clip_l=clip_l, clip_g=clip_g, t5=t5, dtype_t5=dtype_t5))
+        return supported_models_base.ClipTarget(comfy.text_encoders.sd3_clip.SD3Tokenizer, comfy.text_encoders.sd3_clip.sd3_clip(clip_l=clip_l, clip_g=clip_g, t5=t5, **t5_detect))

 class StableAudio(supported_models_base.BASE):
    unet_config = {
@@ -653,11 +656,8 @@ class Flux(supported_models_base.BASE):

    def clip_target(self, state_dict={}):
        pref = self.text_encoder_key_prefix[0]
-        t5_key = "{}t5xxl.transformer.encoder.final_layer_norm.weight".format(pref)
-        dtype_t5 = None
-        if t5_key in state_dict:
-            dtype_t5 = state_dict[t5_key].dtype
-        return supported_models_base.ClipTarget(comfy.text_encoders.flux.FluxTokenizer, comfy.text_encoders.flux.flux_clip(dtype_t5=dtype_t5))
+        t5_detect = comfy.text_encoders.sd3_clip.t5_xxl_detect(state_dict, "{}t5xxl.transformer.".format(pref))
+        return supported_models_base.ClipTarget(comfy.text_encoders.flux.FluxTokenizer, comfy.text_encoders.flux.flux_clip(**t5_detect))

 class FluxSchnell(Flux):
    unet_config = {
@@ -674,7 +674,63 @@ class FluxSchnell(Flux):
        out = model_base.Flux(self, model_type=model_base.ModelType.FLOW, device=device)
        return out

+class GenmoMochi(supported_models_base.BASE):
+    unet_config = {
+        "image_model": "mochi_preview",
+    }

-models = [Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, HunyuanDiT, HunyuanDiT1, Flux, FluxSchnell]
+    sampling_settings = {
+        "multiplier": 1.0,
+        "shift": 6.0,
+    }
+
+    unet_extra_config = {}
+    latent_format = latent_formats.Mochi
+
+    memory_usage_factor = 2.0 #TODO
+
+    supported_inference_dtypes = [torch.bfloat16, torch.float32]
+
+    vae_key_prefix = ["vae."]
+    text_encoder_key_prefix = ["text_encoders."]
+
+    def get_model(self, state_dict, prefix="", device=None):
+        out = model_base.GenmoMochi(self, device=device)
+        return out
+
+    def clip_target(self, state_dict={}):
+        pref = self.text_encoder_key_prefix[0]
+        t5_detect = comfy.text_encoders.sd3_clip.t5_xxl_detect(state_dict, "{}t5xxl.transformer.".format(pref))
+        return supported_models_base.ClipTarget(comfy.text_encoders.genmo.MochiT5Tokenizer, comfy.text_encoders.genmo.mochi_te(**t5_detect))
+
+class LTXV(supported_models_base.BASE):
+    unet_config = {
+        "image_model": "ltxv",
+    }
+
+    sampling_settings = {
+        "shift": 2.37,
+    }
+
+    unet_extra_config = {}
+    latent_format = latent_formats.LTXV
+
+    memory_usage_factor = 2.7
+
+    supported_inference_dtypes = [torch.bfloat16, torch.float32]
+
+    vae_key_prefix = ["vae."]
+    text_encoder_key_prefix = ["text_encoders."]
+
+    def get_model(self, state_dict, prefix="", device=None):
+        out = model_base.LTXV(self, device=device)
+        return out
+
+    def clip_target(self, state_dict={}):
+        pref = self.text_encoder_key_prefix[0]
+        t5_detect = comfy.text_encoders.sd3_clip.t5_xxl_detect(state_dict, "{}t5xxl.transformer.".format(pref))
+        return supported_models_base.ClipTarget(comfy.text_encoders.lt.LTXVT5Tokenizer, comfy.text_encoders.lt.ltxv_te(**t5_detect))
+
+models = [Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, HunyuanDiT, HunyuanDiT1, Flux, FluxSchnell, GenmoMochi, LTXV]

 models += [SVD_img2vid]
--- a/comfy/supported_models_base.py
+++ b/comfy/supported_models_base.py
@@ -49,6 +49,7 @@ class BASE:

    manual_cast_dtype = None
    custom_operations = None
+    scaled_fp8 = None
    optimizations = {"fp8": False}

    @classmethod
@@ -72,6 +73,7 @@ class BASE:
        self.unet_config = unet_config.copy()
        self.sampling_settings = self.sampling_settings.copy()
        self.latent_format = self.latent_format()
+        self.optimizations = self.optimizations.copy()
        for x in self.unet_extra_config:
            self.unet_config[x] = self.unet_extra_config[x]

--- a/comfy/text_encoders/flux.py
+++ b/comfy/text_encoders/flux.py
@@ -1,15 +1,11 @@
 from comfy import sd1_clip
 import comfy.text_encoders.t5
+import comfy.text_encoders.sd3_clip
 import comfy.model_management
 from transformers import T5TokenizerFast
 import torch
 import os

-class T5XXLModel(sd1_clip.SDClipModel):
-    def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, model_options={}):
-        textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "t5_config_xxl.json")
-        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"end": 1, "pad": 0}, model_class=comfy.text_encoders.t5.T5, model_options=model_options)
-
 class T5XXLTokenizer(sd1_clip.SDTokenizer):
    def __init__(self, embedding_directory=None, tokenizer_data={}):
        tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "t5_tokenizer")
@@ -41,7 +37,7 @@ class FluxClipModel(torch.nn.Module):
        dtype_t5 = comfy.model_management.pick_weight_dtype(dtype_t5, dtype, device)
        clip_l_class = model_options.get("clip_l_class", sd1_clip.SDClipModel)
        self.clip_l = clip_l_class(device=device, dtype=dtype, return_projected_pooled=False, model_options=model_options)
-        self.t5xxl = T5XXLModel(device=device, dtype=dtype_t5, model_options=model_options)
+        self.t5xxl = comfy.text_encoders.sd3_clip.T5XXLModel(device=device, dtype=dtype_t5, model_options=model_options)
        self.dtypes = set([dtype, dtype_t5])

    def set_clip_options(self, options):
@@ -66,8 +62,11 @@ class FluxClipModel(torch.nn.Module):
        else:
            return self.t5xxl.load_sd(sd)

-def flux_clip(dtype_t5=None):
+def flux_clip(dtype_t5=None, t5xxl_scaled_fp8=None):
    class FluxClipModel_(FluxClipModel):
        def __init__(self, device="cpu", dtype=None, model_options={}):
+            if t5xxl_scaled_fp8 is not None and "t5xxl_scaled_fp8" not in model_options:
+                model_options = model_options.copy()
+                model_options["t5xxl_scaled_fp8"] = t5xxl_scaled_fp8
            super().__init__(dtype_t5=dtype_t5, device=device, dtype=dtype, model_options=model_options)
    return FluxClipModel_
--- a/comfy/text_encoders/genmo.py
+++ b/comfy/text_encoders/genmo.py
@@ -0,0 +1,38 @@
+from comfy import sd1_clip
+import comfy.text_encoders.sd3_clip
+import os
+from transformers import T5TokenizerFast
+
+
+class T5XXLModel(comfy.text_encoders.sd3_clip.T5XXLModel):
+    def __init__(self, **kwargs):
+        kwargs["attention_mask"] = True
+        super().__init__(**kwargs)
+
+
+class MochiT5XXL(sd1_clip.SD1ClipModel):
+    def __init__(self, device="cpu", dtype=None, model_options={}):
+        super().__init__(device=device, dtype=dtype, name="t5xxl", clip_model=T5XXLModel, model_options=model_options)
+
+
+class T5XXLTokenizer(sd1_clip.SDTokenizer):
+    def __init__(self, embedding_directory=None, tokenizer_data={}):
+        tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "t5_tokenizer")
+        super().__init__(tokenizer_path, embedding_directory=embedding_directory, pad_with_end=False, embedding_size=4096, embedding_key='t5xxl', tokenizer_class=T5TokenizerFast, has_start_token=False, pad_to_max_length=False, max_length=99999999, min_length=256)
+
+
+class MochiT5Tokenizer(sd1_clip.SD1Tokenizer):
+    def __init__(self, embedding_directory=None, tokenizer_data={}):
+        super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, clip_name="t5xxl", tokenizer=T5XXLTokenizer)
+
+
+def mochi_te(dtype_t5=None, t5xxl_scaled_fp8=None):
+    class MochiTEModel_(MochiT5XXL):
+        def __init__(self, device="cpu", dtype=None, model_options={}):
+            if t5xxl_scaled_fp8 is not None and "t5xxl_scaled_fp8" not in model_options:
+                model_options = model_options.copy()
+                model_options["t5xxl_scaled_fp8"] = t5xxl_scaled_fp8
+            if dtype is None:
+                dtype = dtype_t5
+            super().__init__(device=device, dtype=dtype, model_options=model_options)
+    return MochiTEModel_
--- a/comfy/text_encoders/lt.py
+++ b/comfy/text_encoders/lt.py
@@ -0,0 +1,18 @@
+from comfy import sd1_clip
+import os
+from transformers import T5TokenizerFast
+import comfy.text_encoders.genmo
+
+class T5XXLTokenizer(sd1_clip.SDTokenizer):
+    def __init__(self, embedding_directory=None, tokenizer_data={}):
+        tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "t5_tokenizer")
+        super().__init__(tokenizer_path, embedding_directory=embedding_directory, pad_with_end=False, embedding_size=4096, embedding_key='t5xxl', tokenizer_class=T5TokenizerFast, has_start_token=False, pad_to_max_length=False, max_length=99999999, min_length=128) #pad to 128?
+
+
+class LTXVT5Tokenizer(sd1_clip.SD1Tokenizer):
+    def __init__(self, embedding_directory=None, tokenizer_data={}):
+        super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, clip_name="t5xxl", tokenizer=T5XXLTokenizer)
+
+
+def ltxv_te(*args, **kwargs):
+    return comfy.text_encoders.genmo.mochi_te(*args, **kwargs)
--- a/comfy/text_encoders/sd3_clip.py
+++ b/comfy/text_encoders/sd3_clip.py
@@ -8,9 +8,27 @@ import comfy.model_management
 import logging

 class T5XXLModel(sd1_clip.SDClipModel):
-    def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, model_options={}):
+    def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, attention_mask=False, model_options={}):
        textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "t5_config_xxl.json")
-        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"end": 1, "pad": 0}, model_class=comfy.text_encoders.t5.T5, model_options=model_options)
+        t5xxl_scaled_fp8 = model_options.get("t5xxl_scaled_fp8", None)
+        if t5xxl_scaled_fp8 is not None:
+            model_options = model_options.copy()
+            model_options["scaled_fp8"] = t5xxl_scaled_fp8
+
+        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"end": 1, "pad": 0}, model_class=comfy.text_encoders.t5.T5, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)
+
+
+def t5_xxl_detect(state_dict, prefix=""):
+    out = {}
+    t5_key = "{}encoder.final_layer_norm.weight".format(prefix)
+    if t5_key in state_dict:
+        out["dtype_t5"] = state_dict[t5_key].dtype
+
+    scaled_fp8_key = "{}scaled_fp8".format(prefix)
+    if scaled_fp8_key in state_dict:
+        out["t5xxl_scaled_fp8"] = state_dict[scaled_fp8_key].dtype
+
+    return out

 class T5XXLTokenizer(sd1_clip.SDTokenizer):
    def __init__(self, embedding_directory=None, tokenizer_data={}):
@@ -39,7 +57,7 @@ class SD3Tokenizer:
        return {}

 class SD3ClipModel(torch.nn.Module):
-    def __init__(self, clip_l=True, clip_g=True, t5=True, dtype_t5=None, device="cpu", dtype=None, model_options={}):
+    def __init__(self, clip_l=True, clip_g=True, t5=True, dtype_t5=None, t5_attention_mask=False, device="cpu", dtype=None, model_options={}):
        super().__init__()
        self.dtypes = set()
        if clip_l:
@@ -57,7 +75,8 @@ class SD3ClipModel(torch.nn.Module):

        if t5:
            dtype_t5 = comfy.model_management.pick_weight_dtype(dtype_t5, dtype, device)
-            self.t5xxl = T5XXLModel(device=device, dtype=dtype_t5, model_options=model_options)
+            self.t5_attention_mask = t5_attention_mask
+            self.t5xxl = T5XXLModel(device=device, dtype=dtype_t5, model_options=model_options, attention_mask=self.t5_attention_mask)
            self.dtypes.add(dtype_t5)
        else:
            self.t5xxl = None
@@ -87,6 +106,7 @@ class SD3ClipModel(torch.nn.Module):
        lg_out = None
        pooled = None
        out = None
+        extra = {}

        if len(token_weight_pairs_g) > 0 or len(token_weight_pairs_l) > 0:
            if self.clip_l is not None:
@@ -111,7 +131,11 @@ class SD3ClipModel(torch.nn.Module):
            pooled = torch.cat((l_pooled, g_pooled), dim=-1)

        if self.t5xxl is not None:
-            t5_out, t5_pooled = self.t5xxl.encode_token_weights(token_weight_pairs_t5)
+            t5_output = self.t5xxl.encode_token_weights(token_weight_pairs_t5)
+            t5_out, t5_pooled = t5_output[:2]
+            if self.t5_attention_mask:
+                extra["attention_mask"] = t5_output[2]["attention_mask"]
+
            if lg_out is not None:
                out = torch.cat([lg_out, t5_out], dim=-2)
            else:
@@ -123,7 +147,7 @@ class SD3ClipModel(torch.nn.Module):
        if pooled is None:
            pooled = torch.zeros((1, 768 + 1280), device=comfy.model_management.intermediate_device())

-        return out, pooled
+        return out, pooled, extra

    def load_sd(self, sd):
        if "text_model.encoder.layers.30.mlp.fc1.weight" in sd:
@@ -133,8 +157,11 @@ class SD3ClipModel(torch.nn.Module):
        else:
            return self.t5xxl.load_sd(sd)

-def sd3_clip(clip_l=True, clip_g=True, t5=True, dtype_t5=None):
+def sd3_clip(clip_l=True, clip_g=True, t5=True, dtype_t5=None, t5xxl_scaled_fp8=None, t5_attention_mask=False):
    class SD3ClipModel_(SD3ClipModel):
        def __init__(self, device="cpu", dtype=None, model_options={}):
-            super().__init__(clip_l=clip_l, clip_g=clip_g, t5=t5, dtype_t5=dtype_t5, device=device, dtype=dtype, model_options=model_options)
+            if t5xxl_scaled_fp8 is not None and "t5xxl_scaled_fp8" not in model_options:
+                model_options = model_options.copy()
+                model_options["t5xxl_scaled_fp8"] = t5xxl_scaled_fp8
+            super().__init__(clip_l=clip_l, clip_g=clip_g, t5=t5, dtype_t5=dtype_t5, t5_attention_mask=t5_attention_mask, device=device, dtype=dtype, model_options=model_options)
    return SD3ClipModel_
--- a/comfy/utils.py
+++ b/comfy/utils.py
@@ -68,7 +68,7 @@ def weight_dtype(sd, prefix=""):
    for k in sd.keys():
        if k.startswith(prefix):
            w = sd[k]
-            dtypes[w.dtype] = dtypes.get(w.dtype, 0) + 1
+            dtypes[w.dtype] = dtypes.get(w.dtype, 0) + w.numel()

    if len(dtypes) == 0:
        return None
@@ -316,10 +316,18 @@ MMDIT_MAP_BLOCK = {
    ("context_block.mlp.fc1.weight", "ff_context.net.0.proj.weight"),
    ("context_block.mlp.fc2.bias", "ff_context.net.2.bias"),
    ("context_block.mlp.fc2.weight", "ff_context.net.2.weight"),
+    ("context_block.attn.ln_q.weight", "attn.norm_added_q.weight"),
+    ("context_block.attn.ln_k.weight", "attn.norm_added_k.weight"),
    ("x_block.adaLN_modulation.1.bias", "norm1.linear.bias"),
    ("x_block.adaLN_modulation.1.weight", "norm1.linear.weight"),
    ("x_block.attn.proj.bias", "attn.to_out.0.bias"),
    ("x_block.attn.proj.weight", "attn.to_out.0.weight"),
+    ("x_block.attn.ln_q.weight", "attn.norm_q.weight"),
+    ("x_block.attn.ln_k.weight", "attn.norm_k.weight"),
+    ("x_block.attn2.proj.bias", "attn2.to_out.0.bias"),
+    ("x_block.attn2.proj.weight", "attn2.to_out.0.weight"),
+    ("x_block.attn2.ln_q.weight", "attn2.norm_q.weight"),
+    ("x_block.attn2.ln_k.weight", "attn2.norm_k.weight"),
    ("x_block.mlp.fc1.bias", "ff.net.0.proj.bias"),
    ("x_block.mlp.fc1.weight", "ff.net.0.proj.weight"),
    ("x_block.mlp.fc2.bias", "ff.net.2.bias"),
@@ -349,6 +357,12 @@ def mmdit_to_diffusers(mmdit_config, output_prefix=""):
            key_map["{}add_k_proj.{}".format(k, end)] = (qkv, (0, offset, offset))
            key_map["{}add_v_proj.{}".format(k, end)] = (qkv, (0, offset * 2, offset))

+            k = "{}.attn2.".format(block_from)
+            qkv = "{}.x_block.attn2.qkv.{}".format(block_to, end)
+            key_map["{}to_q.{}".format(k, end)] = (qkv, (0, 0, offset))
+            key_map["{}to_k.{}".format(k, end)] = (qkv, (0, offset, offset))
+            key_map["{}to_v.{}".format(k, end)] = (qkv, (0, offset * 2, offset))
+
        for k in MMDIT_MAP_BLOCK:
            key_map["{}.{}".format(block_from, k[1])] = "{}.{}".format(block_to, k[0])

@@ -690,9 +704,14 @@ def lanczos(samples, width, height):
    return result.to(samples.device, samples.dtype)

 def common_upscale(samples, width, height, upscale_method, crop):
+        orig_shape = tuple(samples.shape)
+        if len(orig_shape) > 4:
+            samples = samples.reshape(samples.shape[0], samples.shape[1], -1, samples.shape[-2], samples.shape[-1])
+            samples = samples.movedim(2, 1)
+            samples = samples.reshape(-1, orig_shape[1], orig_shape[-2], orig_shape[-1])
        if crop == "center":
-            old_width = samples.shape[3]
-            old_height = samples.shape[2]
+            old_width = samples.shape[-1]
+            old_height = samples.shape[-2]
            old_aspect = old_width / old_height
            new_aspect = width / height
            x = 0
@@ -701,16 +720,22 @@ def common_upscale(samples, width, height, upscale_method, crop):
                x = round((old_width - old_width * (new_aspect / old_aspect)) / 2)
            elif old_aspect < new_aspect:
                y = round((old_height - old_height * (old_aspect / new_aspect)) / 2)
-            s = samples[:,:,y:old_height-y,x:old_width-x]
+            s = samples.narrow(-2, y, old_height - y * 2).narrow(-1, x, old_width - x * 2)
        else:
            s = samples

        if upscale_method == "bislerp":
-            return bislerp(s, width, height)
+            out = bislerp(s, width, height)
        elif upscale_method == "lanczos":
-            return lanczos(s, width, height)
+            out = lanczos(s, width, height)
        else:
-            return torch.nn.functional.interpolate(s, size=(height, width), mode=upscale_method)
+            out = torch.nn.functional.interpolate(s, size=(height, width), mode=upscale_method)
+
+        if len(orig_shape) == 4:
+            return out
+
+        out = out.reshape((orig_shape[0], -1, orig_shape[1]) + (height, width))
+        return out.movedim(2, 1).reshape(orig_shape[:-2] + (height, width))

 def get_tiled_scale_steps(width, height, tile_x, tile_y, overlap):
    rows = 1 if height <= tile_y else math.ceil((height - overlap) / (tile_y - overlap))
@@ -720,7 +745,27 @@ def get_tiled_scale_steps(width, height, tile_x, tile_y, overlap):
@torch.inference_mode()
 def tiled_scale_multidim(samples, function, tile=(64, 64), overlap = 8, upscale_amount = 4, out_channels = 3, output_device="cpu", pbar = None):
    dims = len(tile)
-    output = torch.empty([samples.shape[0], out_channels] + list(map(lambda a: round(a * upscale_amount), samples.shape[2:])), device=output_device)
+
+    if not (isinstance(upscale_amount, (tuple, list))):
+        upscale_amount = [upscale_amount] * dims
+
+    if not (isinstance(overlap, (tuple, list))):
+        overlap = [overlap] * dims
+
+    def get_upscale(dim, val):
+        up = upscale_amount[dim]
+        if callable(up):
+            return up(val)
+        else:
+            return up * val
+
+    def mult_list_upscale(a):
+        out = []
+        for i in range(len(a)):
+            out.append(round(get_upscale(i, a[i])))
+        return out
+
+    output = torch.empty([samples.shape[0], out_channels] + mult_list_upscale(samples.shape[2:]), device=output_device)

    for b in range(samples.shape[0]):
        s = samples[b:b+1]
@@ -732,27 +777,27 @@ def tiled_scale_multidim(samples, function, tile=(64, 64), overlap = 8, upscale_
                pbar.update(1)
            continue

-        out = torch.zeros([s.shape[0], out_channels] + list(map(lambda a: round(a * upscale_amount), s.shape[2:])), device=output_device)
-        out_div = torch.zeros([s.shape[0], out_channels] + list(map(lambda a: round(a * upscale_amount), s.shape[2:])), device=output_device)
+        out = torch.zeros([s.shape[0], out_channels] + mult_list_upscale(s.shape[2:]), device=output_device)
+        out_div = torch.zeros([s.shape[0], out_channels] + mult_list_upscale(s.shape[2:]), device=output_device)

-        positions = [range(0, s.shape[d+2], tile[d] - overlap) if s.shape[d+2] > tile[d] else [0] for d in range(dims)]
+        positions = [range(0, s.shape[d+2], tile[d] - overlap[d]) if s.shape[d+2] > tile[d] else [0] for d in range(dims)]

        for it in itertools.product(*positions):
            s_in = s
            upscaled = []

            for d in range(dims):
-                pos = max(0, min(s.shape[d + 2] - overlap, it[d]))
+                pos = max(0, min(s.shape[d + 2] - (overlap[d] + 1), it[d]))
                l = min(tile[d], s.shape[d + 2] - pos)
                s_in = s_in.narrow(d + 2, pos, l)
-                upscaled.append(round(pos * upscale_amount))
+                upscaled.append(round(get_upscale(d, pos)))

            ps = function(s_in).to(output_device)
            mask = torch.ones_like(ps)
-            feather = round(overlap * upscale_amount)

-            for t in range(feather):
-                for d in range(2, dims + 2):
+            for d in range(2, dims + 2):
+                feather = round(get_upscale(d - 2, overlap[d - 2]))
+                for t in range(feather):
                    a = (t + 1) / feather
                    mask.narrow(d, t, 1).mul_(a)
                    mask.narrow(d, mask.shape[d] - 1 - t, 1).mul_(a)
@@ -803,3 +848,24 @@ class ProgressBar:

    def update(self, value):
        self.update_absolute(self.current + value)
+
+def reshape_mask(input_mask, output_shape):
+    dims = len(output_shape) - 2
+
+    if dims == 1:
+        scale_mode = "linear"
+
+    if dims == 2:
+        input_mask = input_mask.reshape((-1, 1, input_mask.shape[-2], input_mask.shape[-1]))
+        scale_mode = "bilinear"
+
+    if dims == 3:
+        if len(input_mask.shape) < 5:
+            input_mask = input_mask.reshape((1, 1, -1, input_mask.shape[-2], input_mask.shape[-1]))
+        scale_mode = "trilinear"
+
+    mask = torch.nn.functional.interpolate(input_mask, size=output_shape[2:], mode=scale_mode)
+    if mask.shape[1] < output_shape[1]:
+        mask = mask.repeat((1, output_shape[1]) + (1,) * dims)[:,:output_shape[1]]
+    mask = comfy.utils.repeat_to_batch_size(mask, output_shape[0])
+    return mask
--- a/comfy_extras/nodes_latent.py
+++ b/comfy_extras/nodes_latent.py
@@ -1,4 +1,5 @@
 import comfy.utils
+import comfy_extras.nodes_post_processing
 import torch

 def reshape_latent_to(target_shape, latent):
@@ -145,6 +146,131 @@ class LatentBatchSeedBehavior:

        return (samples_out,)

+class LatentApplyOperation:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "samples": ("LATENT",),
+                             "operation": ("LATENT_OPERATION",),
+                             }}
+
+    RETURN_TYPES = ("LATENT",)
+    FUNCTION = "op"
+
+    CATEGORY = "latent/advanced/operations"
+    EXPERIMENTAL = True
+
+    def op(self, samples, operation):
+        samples_out = samples.copy()
+
+        s1 = samples["samples"]
+        samples_out["samples"] = operation(latent=s1)
+        return (samples_out,)
+
+class LatentApplyOperationCFG:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "model": ("MODEL",),
+                             "operation": ("LATENT_OPERATION",),
+                              }}
+    RETURN_TYPES = ("MODEL",)
+    FUNCTION = "patch"
+
+    CATEGORY = "latent/advanced/operations"
+    EXPERIMENTAL = True
+
+    def patch(self, model, operation):
+        m = model.clone()
+
+        def pre_cfg_function(args):
+            conds_out = args["conds_out"]
+            if len(conds_out) == 2:
+                conds_out[0] = operation(latent=(conds_out[0] - conds_out[1])) + conds_out[1]
+            else:
+                conds_out[0] = operation(latent=conds_out[0])
+            return conds_out
+
+        m.set_model_sampler_pre_cfg_function(pre_cfg_function)
+        return (m, )
+
+class LatentOperationTonemapReinhard:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "multiplier": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01}),
+                              }}
+
+    RETURN_TYPES = ("LATENT_OPERATION",)
+    FUNCTION = "op"
+
+    CATEGORY = "latent/advanced/operations"
+    EXPERIMENTAL = True
+
+    def op(self, multiplier):
+        def tonemap_reinhard(latent, **kwargs):
+            latent_vector_magnitude = (torch.linalg.vector_norm(latent, dim=(1)) + 0.0000000001)[:,None]
+            normalized_latent = latent / latent_vector_magnitude
+
+            mean = torch.mean(latent_vector_magnitude, dim=(1,2,3), keepdim=True)
+            std = torch.std(latent_vector_magnitude, dim=(1,2,3), keepdim=True)
+
+            top = (std * 5 + mean) * multiplier
+
+            #reinhard
+            latent_vector_magnitude *= (1.0 / top)
+            new_magnitude = latent_vector_magnitude / (latent_vector_magnitude + 1.0)
+            new_magnitude *= top
+
+            return normalized_latent * new_magnitude
+        return (tonemap_reinhard,)
+
+class LatentOperationSharpen:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {
+                "sharpen_radius": ("INT", {
+                    "default": 9,
+                    "min": 1,
+                    "max": 31,
+                    "step": 1
+                }),
+                "sigma": ("FLOAT", {
+                    "default": 1.0,
+                    "min": 0.1,
+                    "max": 10.0,
+                    "step": 0.1
+                }),
+                "alpha": ("FLOAT", {
+                    "default": 0.1,
+                    "min": 0.0,
+                    "max": 5.0,
+                    "step": 0.01
+                }),
+                              }}
+
+    RETURN_TYPES = ("LATENT_OPERATION",)
+    FUNCTION = "op"
+
+    CATEGORY = "latent/advanced/operations"
+    EXPERIMENTAL = True
+
+    def op(self, sharpen_radius, sigma, alpha):
+        def sharpen(latent, **kwargs):
+            luminance = (torch.linalg.vector_norm(latent, dim=(1)) + 1e-6)[:,None]
+            normalized_latent = latent / luminance
+            channels = latent.shape[1]
+
+            kernel_size = sharpen_radius * 2 + 1
+            kernel = comfy_extras.nodes_post_processing.gaussian_kernel(kernel_size, sigma, device=luminance.device)
+            center = kernel_size // 2
+
+            kernel *= alpha * -10
+            kernel[center, center] = kernel[center, center] - kernel.sum() + 1.0
+
+            padded_image = torch.nn.functional.pad(normalized_latent, (sharpen_radius,sharpen_radius,sharpen_radius,sharpen_radius), 'reflect')
+            sharpened = torch.nn.functional.conv2d(padded_image, kernel.repeat(channels, 1, 1).unsqueeze(1), padding=kernel_size // 2, groups=channels)[:,:,sharpen_radius:-sharpen_radius, sharpen_radius:-sharpen_radius]
+
+            return luminance * sharpened
+        return (sharpen,)
+
 NODE_CLASS_MAPPINGS = {
    "LatentAdd": LatentAdd,
    "LatentSubtract": LatentSubtract,
@@ -152,4 +278,8 @@ NODE_CLASS_MAPPINGS = {
    "LatentInterpolate": LatentInterpolate,
    "LatentBatch": LatentBatch,
    "LatentBatchSeedBehavior": LatentBatchSeedBehavior,
+    "LatentApplyOperation": LatentApplyOperation,
+    "LatentApplyOperationCFG": LatentApplyOperationCFG,
+    "LatentOperationTonemapReinhard": LatentOperationTonemapReinhard,
+    "LatentOperationSharpen": LatentOperationSharpen,
 }
--- a/comfy_extras/nodes_lora_extract.py
+++ b/comfy_extras/nodes_lora_extract.py
@@ -82,8 +82,8 @@ class LoraSave:
                              "lora_type": (tuple(LORA_TYPES.keys()),),
                              "bias_diff": ("BOOLEAN", {"default": True}),
                            },
-                "optional": {"model_diff": ("MODEL",),
-                             "text_encoder_diff": ("CLIP",)},
+                "optional": {"model_diff": ("MODEL", {"tooltip": "The ModelSubtract output to be converted to a lora."}),
+                             "text_encoder_diff": ("CLIP", {"tooltip": "The CLIPSubtract output to be converted to a lora."})},
    }
    RETURN_TYPES = ()
    FUNCTION = "save"
@@ -113,3 +113,7 @@ class LoraSave:
 NODE_CLASS_MAPPINGS = {
    "LoraSave": LoraSave
 }
+
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "LoraSave": "Extract and Save Lora"
+}
--- a/comfy_extras/nodes_lt.py
+++ b/comfy_extras/nodes_lt.py
@@ -0,0 +1,181 @@
+import nodes
+import node_helpers
+import torch
+import comfy.model_management
+import comfy.model_sampling
+import math
+
+class EmptyLTXVLatentVideo:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "width": ("INT", {"default": 768, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
+                              "height": ("INT", {"default": 512, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
+                              "length": ("INT", {"default": 97, "min": 9, "max": nodes.MAX_RESOLUTION, "step": 8}),
+                              "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}}
+    RETURN_TYPES = ("LATENT",)
+    FUNCTION = "generate"
+
+    CATEGORY = "latent/video/ltxv"
+
+    def generate(self, width, height, length, batch_size=1):
+        latent = torch.zeros([batch_size, 128, ((length - 1) // 8) + 1, height // 32, width // 32], device=comfy.model_management.intermediate_device())
+        return ({"samples": latent}, )
+
+
+class LTXVImgToVideo:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {"positive": ("CONDITIONING", ),
+                             "negative": ("CONDITIONING", ),
+                             "vae": ("VAE",),
+                             "image": ("IMAGE",),
+                             "width": ("INT", {"default": 768, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
+                             "height": ("INT", {"default": 512, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
+                             "length": ("INT", {"default": 97, "min": 9, "max": nodes.MAX_RESOLUTION, "step": 8}),
+                             "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}}
+
+    RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
+    RETURN_NAMES = ("positive", "negative", "latent")
+
+    CATEGORY = "conditioning/video_models"
+    FUNCTION = "generate"
+
+    def generate(self, positive, negative, image, vae, width, height, length, batch_size):
+        pixels = comfy.utils.common_upscale(image.movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
+        encode_pixels = pixels[:, :, :, :3]
+        t = vae.encode(encode_pixels)
+        positive = node_helpers.conditioning_set_values(positive, {"guiding_latent": t})
+        negative = node_helpers.conditioning_set_values(negative, {"guiding_latent": t})
+
+        latent = torch.zeros([batch_size, 128, ((length - 1) // 8) + 1, height // 32, width // 32], device=comfy.model_management.intermediate_device())
+        latent[:, :, :t.shape[2]] = t
+        return (positive, negative, {"samples": latent}, )
+
+
+class LTXVConditioning:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {"positive": ("CONDITIONING", ),
+                             "negative": ("CONDITIONING", ),
+                             "frame_rate": ("FLOAT", {"default": 25.0, "min": 0.0, "max": 1000.0, "step": 0.01}),
+                             }}
+    RETURN_TYPES = ("CONDITIONING", "CONDITIONING")
+    RETURN_NAMES = ("positive", "negative")
+    FUNCTION = "append"
+
+    CATEGORY = "conditioning/video_models"
+
+    def append(self, positive, negative, frame_rate):
+        positive = node_helpers.conditioning_set_values(positive, {"frame_rate": frame_rate})
+        negative = node_helpers.conditioning_set_values(negative, {"frame_rate": frame_rate})
+        return (positive, negative)
+
+
+class ModelSamplingLTXV:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "model": ("MODEL",),
+                              "max_shift": ("FLOAT", {"default": 2.05, "min": 0.0, "max": 100.0, "step":0.01}),
+                              "base_shift": ("FLOAT", {"default": 0.95, "min": 0.0, "max": 100.0, "step":0.01}),
+                              },
+                "optional": {"latent": ("LATENT",), }
+                }
+
+    RETURN_TYPES = ("MODEL",)
+    FUNCTION = "patch"
+
+    CATEGORY = "advanced/model"
+
+    def patch(self, model, max_shift, base_shift, latent=None):
+        m = model.clone()
+
+        if latent is None:
+            tokens = 4096
+        else:
+            tokens = math.prod(latent["samples"].shape[2:])
+
+        x1 = 1024
+        x2 = 4096
+        mm = (max_shift - base_shift) / (x2 - x1)
+        b = base_shift - mm * x1
+        shift = (tokens) * mm + b
+
+        sampling_base = comfy.model_sampling.ModelSamplingFlux
+        sampling_type = comfy.model_sampling.CONST
+
+        class ModelSamplingAdvanced(sampling_base, sampling_type):
+            pass
+
+        model_sampling = ModelSamplingAdvanced(model.model.model_config)
+        model_sampling.set_parameters(shift=shift)
+        m.add_object_patch("model_sampling", model_sampling)
+        return (m, )
+
+
+class LTXVScheduler:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required":
+                    {"steps": ("INT", {"default": 20, "min": 1, "max": 10000}),
+                     "max_shift": ("FLOAT", {"default": 2.05, "min": 0.0, "max": 100.0, "step":0.01}),
+                     "base_shift": ("FLOAT", {"default": 0.95, "min": 0.0, "max": 100.0, "step":0.01}),
+                     "stretch": ("BOOLEAN", {
+                        "default": True,
+                        "tooltip": "Stretch the sigmas to be in the range [terminal, 1]."
+                    }),
+                     "terminal": (
+                        "FLOAT",
+                        {
+                            "default": 0.1, "min": 0.0, "max": 0.99, "step": 0.01,
+                            "tooltip": "The terminal value of the sigmas after stretching."
+                        },
+                    ),
+                    },
+                "optional": {"latent": ("LATENT",), }
+               }
+
+    RETURN_TYPES = ("SIGMAS",)
+    CATEGORY = "sampling/custom_sampling/schedulers"
+
+    FUNCTION = "get_sigmas"
+
+    def get_sigmas(self, steps, max_shift, base_shift, stretch, terminal, latent=None):
+        if latent is None:
+            tokens = 4096
+        else:
+            tokens = math.prod(latent["samples"].shape[2:])
+
+        sigmas = torch.linspace(1.0, 0.0, steps + 1)
+
+        x1 = 1024
+        x2 = 4096
+        mm = (max_shift - base_shift) / (x2 - x1)
+        b = base_shift - mm * x1
+        sigma_shift = (tokens) * mm + b
+
+        power = 1
+        sigmas = torch.where(
+            sigmas != 0,
+            math.exp(sigma_shift) / (math.exp(sigma_shift) + (1 / sigmas - 1) ** power),
+            0,
+        )
+
+        # Stretch sigmas so that its final value matches the given terminal value.
+        if stretch:
+            non_zero_mask = sigmas != 0
+            non_zero_sigmas = sigmas[non_zero_mask]
+            one_minus_z = 1.0 - non_zero_sigmas
+            scale_factor = one_minus_z[-1] / (1.0 - terminal)
+            stretched = 1.0 - (one_minus_z / scale_factor)
+            sigmas[non_zero_mask] = stretched
+
+        return (sigmas,)
+
+
+NODE_CLASS_MAPPINGS = {
+    "EmptyLTXVLatentVideo": EmptyLTXVLatentVideo,
+    "LTXVImgToVideo": LTXVImgToVideo,
+    "ModelSamplingLTXV": ModelSamplingLTXV,
+    "LTXVConditioning": LTXVConditioning,
+    "LTXVScheduler": LTXVScheduler,
+}
--- a/comfy_extras/nodes_mochi.py
+++ b/comfy_extras/nodes_mochi.py
@@ -0,0 +1,23 @@
+import nodes
+import torch
+import comfy.model_management
+
+class EmptyMochiLatentVideo:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "width": ("INT", {"default": 848, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
+                              "height": ("INT", {"default": 480, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
+                              "length": ("INT", {"default": 25, "min": 7, "max": nodes.MAX_RESOLUTION, "step": 6}),
+                              "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}}
+    RETURN_TYPES = ("LATENT",)
+    FUNCTION = "generate"
+
+    CATEGORY = "latent/video"
+
+    def generate(self, width, height, length, batch_size=1):
+        latent = torch.zeros([batch_size, 12, ((length - 1) // 6) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device())
+        return ({"samples":latent}, )
+
+NODE_CLASS_MAPPINGS = {
+    "EmptyMochiLatentVideo": EmptyMochiLatentVideo,
+}
--- a/comfy_extras/nodes_model_advanced.py
+++ b/comfy_extras/nodes_model_advanced.py
@@ -26,8 +26,8 @@ class X0(comfy.model_sampling.EPS):
 class ModelSamplingDiscreteDistilled(comfy.model_sampling.ModelSamplingDiscrete):
    original_timesteps = 50

-    def __init__(self, model_config=None):
-        super().__init__(model_config)
+    def __init__(self, model_config=None, zsnr=None):
+        super().__init__(model_config, zsnr=zsnr)

        self.skip_steps = self.num_timesteps // self.original_timesteps

@@ -51,25 +51,6 @@ class ModelSamplingDiscreteDistilled(comfy.model_sampling.ModelSamplingDiscrete)
        return log_sigma.exp().to(timestep.device)


-def rescale_zero_terminal_snr_sigmas(sigmas):
-    alphas_cumprod = 1 / ((sigmas * sigmas) + 1)
-    alphas_bar_sqrt = alphas_cumprod.sqrt()
-
-    # Store old values.
-    alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
-    alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
-
-    # Shift so the last timestep is zero.
-    alphas_bar_sqrt -= (alphas_bar_sqrt_T)
-
-    # Scale so the first timestep is back to the old value.
-    alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
-
-    # Convert alphas_bar_sqrt to betas
-    alphas_bar = alphas_bar_sqrt**2  # Revert sqrt
-    alphas_bar[-1] = 4.8973451890853435e-08
-    return ((1 - alphas_bar) / alphas_bar) ** 0.5
-
 class ModelSamplingDiscrete:
    @classmethod
    def INPUT_TYPES(s):
@@ -100,9 +81,7 @@ class ModelSamplingDiscrete:
        class ModelSamplingAdvanced(sampling_base, sampling_type):
            pass

-        model_sampling = ModelSamplingAdvanced(model.model.model_config)
-        if zsnr:
-            model_sampling.set_sigmas(rescale_zero_terminal_snr_sigmas(model_sampling.sigmas))
+        model_sampling = ModelSamplingAdvanced(model.model.model_config, zsnr=zsnr)

        m.add_object_patch("model_sampling", model_sampling)
        return (m, )
--- a/comfy_extras/nodes_model_merging_model_specific.py
+++ b/comfy_extras/nodes_model_merging_model_specific.py
@@ -75,6 +75,34 @@ class ModelMergeSD3_2B(comfy_extras.nodes_model_merging.ModelMergeBlocks):

        return {"required": arg_dict}

+
+class ModelMergeAuraflow(comfy_extras.nodes_model_merging.ModelMergeBlocks):
+    CATEGORY = "advanced/model_merging/model_specific"
+
+    @classmethod
+    def INPUT_TYPES(s):
+        arg_dict = { "model1": ("MODEL",),
+                              "model2": ("MODEL",)}
+
+        argument = ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01})
+
+        arg_dict["init_x_linear."] = argument
+        arg_dict["positional_encoding"] = argument
+        arg_dict["cond_seq_linear."] = argument
+        arg_dict["register_tokens"] = argument
+        arg_dict["t_embedder."] = argument
+
+        for i in range(4):
+            arg_dict["double_layers.{}.".format(i)] = argument
+
+        for i in range(32):
+            arg_dict["single_layers.{}.".format(i)] = argument
+
+        arg_dict["modF."] = argument
+        arg_dict["final_linear."] = argument
+
+        return {"required": arg_dict}
+
 class ModelMergeFlux1(comfy_extras.nodes_model_merging.ModelMergeBlocks):
    CATEGORY = "advanced/model_merging/model_specific"

@@ -101,10 +129,58 @@ class ModelMergeFlux1(comfy_extras.nodes_model_merging.ModelMergeBlocks):

        return {"required": arg_dict}

+class ModelMergeSD35_Large(comfy_extras.nodes_model_merging.ModelMergeBlocks):
+    CATEGORY = "advanced/model_merging/model_specific"
+
+    @classmethod
+    def INPUT_TYPES(s):
+        arg_dict = { "model1": ("MODEL",),
+                              "model2": ("MODEL",)}
+
+        argument = ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01})
+
+        arg_dict["pos_embed."] = argument
+        arg_dict["x_embedder."] = argument
+        arg_dict["context_embedder."] = argument
+        arg_dict["y_embedder."] = argument
+        arg_dict["t_embedder."] = argument
+
+        for i in range(38):
+            arg_dict["joint_blocks.{}.".format(i)] = argument
+
+        arg_dict["final_layer."] = argument
+
+        return {"required": arg_dict}
+
+class ModelMergeMochiPreview(comfy_extras.nodes_model_merging.ModelMergeBlocks):
+    CATEGORY = "advanced/model_merging/model_specific"
+
+    @classmethod
+    def INPUT_TYPES(s):
+        arg_dict = { "model1": ("MODEL",),
+                              "model2": ("MODEL",)}
+
+        argument = ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01})
+
+        arg_dict["pos_frequencies."] = argument
+        arg_dict["t_embedder."] = argument
+        arg_dict["t5_y_embedder."] = argument
+        arg_dict["t5_yproj."] = argument
+
+        for i in range(48):
+            arg_dict["blocks.{}.".format(i)] = argument
+
+        arg_dict["final_layer."] = argument
+
+        return {"required": arg_dict}
+
 NODE_CLASS_MAPPINGS = {
    "ModelMergeSD1": ModelMergeSD1,
    "ModelMergeSD2": ModelMergeSD1, #SD1 and SD2 have the same blocks
    "ModelMergeSDXL": ModelMergeSDXL,
    "ModelMergeSD3_2B": ModelMergeSD3_2B,
+    "ModelMergeAuraflow": ModelMergeAuraflow,
    "ModelMergeFlux1": ModelMergeFlux1,
+    "ModelMergeSD35_Large": ModelMergeSD35_Large,
+    "ModelMergeMochiPreview": ModelMergeMochiPreview,
 }
--- a/comfy_extras/nodes_sag.py
+++ b/comfy_extras/nodes_sag.py
@@ -57,12 +57,24 @@ def create_blur_map(x0, attn, sigma=3.0, threshold=1.0):
    attn = attn.reshape(b, -1, hw1, hw2)
    # Global Average Pool
    mask = attn.mean(1, keepdim=False).sum(1, keepdim=False) > threshold
-    ratio = 2**(math.ceil(math.sqrt(lh * lw / hw1)) - 1).bit_length()
-    mid_shape = [math.ceil(lh / ratio), math.ceil(lw / ratio)]
+
+    total = mask.shape[-1]
+    x = round(math.sqrt((lh / lw) * total))
+    xx = None
+    for i in range(0, math.floor(math.sqrt(total) / 2)):
+        for j in [(x + i), max(1, x - i)]:
+            if total % j == 0:
+                xx = j
+                break
+        if xx is not None:
+            break
+
+    x = xx
+    y = total // x

    # Reshape
    mask = (
-        mask.reshape(b, *mid_shape)
+        mask.reshape(b, x, y)
        .unsqueeze(1)
        .type(attn.dtype)
    )
--- a/comfy_extras/nodes_sd3.py
+++ b/comfy_extras/nodes_sd3.py
@@ -3,24 +3,29 @@ import comfy.sd
 import comfy.model_management
 import nodes
 import torch
+import comfy_extras.nodes_slg
+

 class TripleCLIPLoader:
    @classmethod
    def INPUT_TYPES(s):
-        return {"required": { "clip_name1": (folder_paths.get_filename_list("clip"), ), "clip_name2": (folder_paths.get_filename_list("clip"), ), "clip_name3": (folder_paths.get_filename_list("clip"), )
+        return {"required": { "clip_name1": (folder_paths.get_filename_list("text_encoders"), ), "clip_name2": (folder_paths.get_filename_list("text_encoders"), ), "clip_name3": (folder_paths.get_filename_list("text_encoders"), )
                             }}
    RETURN_TYPES = ("CLIP",)
    FUNCTION = "load_clip"

    CATEGORY = "advanced/loaders"

+    DESCRIPTION = "[Recipes]\n\nsd3: clip-l, clip-g, t5"
+
    def load_clip(self, clip_name1, clip_name2, clip_name3):
-        clip_path1 = folder_paths.get_full_path_or_raise("clip", clip_name1)
-        clip_path2 = folder_paths.get_full_path_or_raise("clip", clip_name2)
-        clip_path3 = folder_paths.get_full_path_or_raise("clip", clip_name3)
+        clip_path1 = folder_paths.get_full_path_or_raise("text_encoders", clip_name1)
+        clip_path2 = folder_paths.get_full_path_or_raise("text_encoders", clip_name2)
+        clip_path3 = folder_paths.get_full_path_or_raise("text_encoders", clip_name3)
        clip = comfy.sd.load_clip(ckpt_paths=[clip_path1, clip_path2, clip_path3], embedding_directory=folder_paths.get_folder_paths("embeddings"))
        return (clip,)

+
 class EmptySD3LatentImage:
    def __init__(self):
        self.device = comfy.model_management.intermediate_device()
@@ -39,6 +44,7 @@ class EmptySD3LatentImage:
        latent = torch.zeros([batch_size, 16, height // 8, width // 8], device=self.device)
        return ({"samples":latent}, )

+
 class CLIPTextEncodeSD3:
    @classmethod
    def INPUT_TYPES(s):
@@ -95,11 +101,36 @@ class ControlNetApplySD3(nodes.ControlNetApplyAdvanced):
    CATEGORY = "conditioning/controlnet"
    DEPRECATED = True

+
+class SkipLayerGuidanceSD3(comfy_extras.nodes_slg.SkipLayerGuidanceDiT):
+    '''
+    Enhance guidance towards detailed dtructure by having another set of CFG negative with skipped layers.
+    Inspired by Perturbed Attention Guidance (https://arxiv.org/abs/2403.17377)
+    Experimental implementation by Dango233@StabilityAI.
+    '''
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {"model": ("MODEL", ),
+                             "layers": ("STRING", {"default": "7, 8, 9", "multiline": False}),
+                             "scale": ("FLOAT", {"default": 3.0, "min": 0.0, "max": 10.0, "step": 0.1}),
+                             "start_percent": ("FLOAT", {"default": 0.01, "min": 0.0, "max": 1.0, "step": 0.001}),
+                             "end_percent": ("FLOAT", {"default": 0.15, "min": 0.0, "max": 1.0, "step": 0.001})
+                                }}
+    RETURN_TYPES = ("MODEL",)
+    FUNCTION = "skip_guidance_sd3"
+
+    CATEGORY = "advanced/guidance"
+
+    def skip_guidance_sd3(self, model, layers, scale, start_percent, end_percent):
+        return self.skip_guidance(model=model, scale=scale, start_percent=start_percent, end_percent=end_percent, double_layers=layers)
+
+
 NODE_CLASS_MAPPINGS = {
    "TripleCLIPLoader": TripleCLIPLoader,
    "EmptySD3LatentImage": EmptySD3LatentImage,
    "CLIPTextEncodeSD3": CLIPTextEncodeSD3,
    "ControlNetApplySD3": ControlNetApplySD3,
+    "SkipLayerGuidanceSD3": SkipLayerGuidanceSD3,
 }

 NODE_DISPLAY_NAME_MAPPINGS = {
--- a/comfy_extras/nodes_slg.py
+++ b/comfy_extras/nodes_slg.py
@@ -0,0 +1,78 @@
+import comfy.model_patcher
+import comfy.samplers
+import re
+
+
+class SkipLayerGuidanceDiT:
+    '''
+    Enhance guidance towards detailed dtructure by having another set of CFG negative with skipped layers.
+    Inspired by Perturbed Attention Guidance (https://arxiv.org/abs/2403.17377)
+    Original experimental implementation for SD3 by Dango233@StabilityAI.
+    '''
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {"model": ("MODEL", ),
+                             "double_layers": ("STRING", {"default": "7, 8, 9", "multiline": False}),
+                             "single_layers": ("STRING", {"default": "7, 8, 9", "multiline": False}),
+                             "scale": ("FLOAT", {"default": 3.0, "min": 0.0, "max": 10.0, "step": 0.1}),
+                             "start_percent": ("FLOAT", {"default": 0.01, "min": 0.0, "max": 1.0, "step": 0.001}),
+                             "end_percent": ("FLOAT", {"default": 0.15, "min": 0.0, "max": 1.0, "step": 0.001})
+                                }}
+    RETURN_TYPES = ("MODEL",)
+    FUNCTION = "skip_guidance"
+    EXPERIMENTAL = True
+
+    DESCRIPTION = "Generic version of SkipLayerGuidance node that can be used on every DiT model."
+
+    CATEGORY = "advanced/guidance"
+
+    def skip_guidance(self, model, scale, start_percent, end_percent, double_layers="", single_layers=""):
+        # check if layer is comma separated integers
+        def skip(args, extra_args):
+            return args
+
+        model_sampling = model.get_model_object("model_sampling")
+        sigma_start = model_sampling.percent_to_sigma(start_percent)
+        sigma_end = model_sampling.percent_to_sigma(end_percent)
+
+        double_layers = re.findall(r'\d+', double_layers)
+        double_layers = [int(i) for i in double_layers]
+
+        single_layers = re.findall(r'\d+', single_layers)
+        single_layers = [int(i) for i in single_layers]
+
+        if len(double_layers) == 0 and len(single_layers) == 0:
+            return (model, )
+
+        def post_cfg_function(args):
+            model = args["model"]
+            cond_pred = args["cond_denoised"]
+            cond = args["cond"]
+            cfg_result = args["denoised"]
+            sigma = args["sigma"]
+            x = args["input"]
+            model_options = args["model_options"].copy()
+
+            for layer in double_layers:
+                model_options = comfy.model_patcher.set_model_options_patch_replace(model_options, skip, "dit", "double_block", layer)
+
+            for layer in single_layers:
+                model_options = comfy.model_patcher.set_model_options_patch_replace(model_options, skip, "dit", "single_block", layer)
+
+            model_sampling.percent_to_sigma(start_percent)
+
+            sigma_ = sigma[0].item()
+            if scale > 0 and sigma_ >= sigma_end and sigma_ <= sigma_start:
+                (slg,) = comfy.samplers.calc_cond_batch(model, [cond], x, sigma, model_options)
+                cfg_result = cfg_result + (cond_pred - slg) * scale
+            return cfg_result
+
+        m = model.clone()
+        m.set_model_sampler_post_cfg_function(post_cfg_function)
+
+        return (m, )
+
+
+NODE_CLASS_MAPPINGS = {
+    "SkipLayerGuidanceDiT": SkipLayerGuidanceDiT,
+}
--- a/comfyui_screenshot.png
+++ b/comfyui_screenshot.png
--- a/folder_paths.py
+++ b/folder_paths.py
@@ -18,7 +18,7 @@ folder_names_and_paths["configs"] = ([os.path.join(models_dir, "configs")], [".y

 folder_names_and_paths["loras"] = ([os.path.join(models_dir, "loras")], supported_pt_extensions)
 folder_names_and_paths["vae"] = ([os.path.join(models_dir, "vae")], supported_pt_extensions)
-folder_names_and_paths["clip"] = ([os.path.join(models_dir, "clip")], supported_pt_extensions)
+folder_names_and_paths["text_encoders"] = ([os.path.join(models_dir, "text_encoders"), os.path.join(models_dir, "clip")], supported_pt_extensions)
 folder_names_and_paths["diffusion_models"] = ([os.path.join(models_dir, "unet"), os.path.join(models_dir, "diffusion_models")], supported_pt_extensions)
 folder_names_and_paths["clip_vision"] = ([os.path.join(models_dir, "clip_vision")], supported_pt_extensions)
 folder_names_and_paths["style_models"] = ([os.path.join(models_dir, "style_models")], supported_pt_extensions)
@@ -81,7 +81,8 @@ extension_mimetypes_cache = {
 }

 def map_legacy(folder_name: str) -> str:
-    legacy = {"unet": "diffusion_models"}
+    legacy = {"unet": "diffusion_models",
+              "clip": "text_encoders"}
    return legacy.get(folder_name, folder_name)

 if not os.path.exists(input_directory):
--- a/latent_preview.py
+++ b/latent_preview.py
@@ -47,7 +47,12 @@ class Latent2RGBPreviewer(LatentPreviewer):
        if self.latent_rgb_factors_bias is not None:
            self.latent_rgb_factors_bias = self.latent_rgb_factors_bias.to(dtype=x0.dtype, device=x0.device)

-        latent_image = torch.nn.functional.linear(x0[0].permute(1, 2, 0), self.latent_rgb_factors, bias=self.latent_rgb_factors_bias)
+        if x0.ndim == 5:
+            x0 = x0[0, :, 0]
+        else:
+            x0 = x0[0]
+
+        latent_image = torch.nn.functional.linear(x0.movedim(0, -1), self.latent_rgb_factors, bias=self.latent_rgb_factors_bias)
        # latent_image = x0[0].permute(1, 2, 0) @ self.latent_rgb_factors

        return preview_to_image(latent_image)
--- a/main.py
+++ b/main.py
@@ -71,6 +71,7 @@ if os.name == "nt":
 if __name__ == "__main__":
    if args.cuda_device is not None:
        os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda_device)
+        os.environ['HIP_VISIBLE_DEVICES'] = str(args.cuda_device)
        logging.info("Set cuda device to: {}".format(args.cuda_device))

    if args.deterministic:
--- a/models/text_encoders/put_text_encoder_files_here
+++ b/models/text_encoders/put_text_encoder_files_here
--- a/nodes.py
+++ b/nodes.py
@@ -281,21 +281,30 @@ class VAEDecode:
    DESCRIPTION = "Decodes latent images back into pixel space images."

    def decode(self, vae, samples):
-        return (vae.decode(samples["samples"]), )
+        images = vae.decode(samples["samples"])
+        if len(images.shape) == 5: #Combine batches
+            images = images.reshape(-1, images.shape[-3], images.shape[-2], images.shape[-1])
+        return (images, )

 class VAEDecodeTiled:
    @classmethod
    def INPUT_TYPES(s):
        return {"required": {"samples": ("LATENT", ), "vae": ("VAE", ),
-                             "tile_size": ("INT", {"default": 512, "min": 320, "max": 4096, "step": 64})
+                             "tile_size": ("INT", {"default": 512, "min": 128, "max": 4096, "step": 32}),
+                             "overlap": ("INT", {"default": 64, "min": 0, "max": 4096, "step": 32}),
                            }}
    RETURN_TYPES = ("IMAGE",)
    FUNCTION = "decode"

    CATEGORY = "_for_testing"

-    def decode(self, vae, samples, tile_size):
-        return (vae.decode_tiled(samples["samples"], tile_x=tile_size // 8, tile_y=tile_size // 8, ), )
+    def decode(self, vae, samples, tile_size, overlap=64):
+        if tile_size < overlap * 4:
+            overlap = tile_size // 4
+        images = vae.decode_tiled(samples["samples"], tile_x=tile_size // 8, tile_y=tile_size // 8, overlap=overlap // 8)
+        if len(images.shape) == 5: #Combine batches
+            images = images.reshape(-1, images.shape[-3], images.shape[-2], images.shape[-1])
+        return (images, )

 class VAEEncode:
    @classmethod
@@ -373,6 +382,7 @@ class InpaintModelConditioning:
                             "vae": ("VAE", ),
                             "pixels": ("IMAGE", ),
                             "mask": ("MASK", ),
+                             "noise_mask": ("BOOLEAN", {"default": True, "tooltip": "Add a noise mask to the latent so sampling will only happen within the mask. Might improve results or completely break things depending on the model."}),
                             }}

    RETURN_TYPES = ("CONDITIONING","CONDITIONING","LATENT")
@@ -381,7 +391,7 @@ class InpaintModelConditioning:

    CATEGORY = "conditioning/inpaint"

-    def encode(self, positive, negative, pixels, vae, mask):
+    def encode(self, positive, negative, pixels, vae, mask, noise_mask):
        x = (pixels.shape[1] // 8) * 8
        y = (pixels.shape[2] // 8) * 8
        mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(pixels.shape[1], pixels.shape[2]), mode="bilinear")
@@ -405,7 +415,8 @@ class InpaintModelConditioning:
        out_latent = {}

        out_latent["samples"] = orig_latent
-        out_latent["noise_mask"] = mask
+        if noise_mask:
+            out_latent["noise_mask"] = mask

        out = []
        for conditioning in [positive, negative]:
@@ -885,14 +896,16 @@ class UNETLoader:
 class CLIPLoader:
    @classmethod
    def INPUT_TYPES(s):
-        return {"required": { "clip_name": (folder_paths.get_filename_list("clip"), ),
-                              "type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio"], ),
+        return {"required": { "clip_name": (folder_paths.get_filename_list("text_encoders"), ),
+                              "type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv"], ),
                             }}
    RETURN_TYPES = ("CLIP",)
    FUNCTION = "load_clip"

    CATEGORY = "advanced/loaders"

+    DESCRIPTION = "[Recipes]\n\nstable_diffusion: clip-l\nstable_cascade: clip-g\nsd3: t5 / clip-g / clip-l\nstable_audio: t5\nmochi: t5"
+
    def load_clip(self, clip_name, type="stable_diffusion"):
        if type == "stable_cascade":
            clip_type = comfy.sd.CLIPType.STABLE_CASCADE
@@ -900,18 +913,22 @@ class CLIPLoader:
            clip_type = comfy.sd.CLIPType.SD3
        elif type == "stable_audio":
            clip_type = comfy.sd.CLIPType.STABLE_AUDIO
+        elif type == "mochi":
+            clip_type = comfy.sd.CLIPType.MOCHI
+        elif type == "ltxv":
+            clip_type = comfy.sd.CLIPType.LTXV
        else:
            clip_type = comfy.sd.CLIPType.STABLE_DIFFUSION

-        clip_path = folder_paths.get_full_path_or_raise("clip", clip_name)
+        clip_path = folder_paths.get_full_path_or_raise("text_encoders", clip_name)
        clip = comfy.sd.load_clip(ckpt_paths=[clip_path], embedding_directory=folder_paths.get_folder_paths("embeddings"), clip_type=clip_type)
        return (clip,)

 class DualCLIPLoader:
    @classmethod
    def INPUT_TYPES(s):
-        return {"required": { "clip_name1": (folder_paths.get_filename_list("clip"), ),
-                              "clip_name2": (folder_paths.get_filename_list("clip"), ),
+        return {"required": { "clip_name1": (folder_paths.get_filename_list("text_encoders"), ),
+                              "clip_name2": (folder_paths.get_filename_list("text_encoders"), ),
                              "type": (["sdxl", "sd3", "flux"], ),
                             }}
    RETURN_TYPES = ("CLIP",)
@@ -919,9 +936,11 @@ class DualCLIPLoader:

    CATEGORY = "advanced/loaders"

+    DESCRIPTION = "[Recipes]\n\nsdxl: clip-l, clip-g\nsd3: clip-l, clip-g / clip-l, t5 / clip-g, t5\nflux: clip-l, t5"
+
    def load_clip(self, clip_name1, clip_name2, type):
-        clip_path1 = folder_paths.get_full_path_or_raise("clip", clip_name1)
-        clip_path2 = folder_paths.get_full_path_or_raise("clip", clip_name2)
+        clip_path1 = folder_paths.get_full_path_or_raise("text_encoders", clip_name1)
+        clip_path2 = folder_paths.get_full_path_or_raise("text_encoders", clip_name2)
        if type == "sdxl":
            clip_type = comfy.sd.CLIPType.STABLE_DIFFUSION
        elif type == "sd3":
@@ -1179,10 +1198,10 @@ class LatentUpscale:

            if width == 0:
                height = max(64, height)
-                width = max(64, round(samples["samples"].shape[3] * height / samples["samples"].shape[2]))
+                width = max(64, round(samples["samples"].shape[-1] * height / samples["samples"].shape[-2]))
            elif height == 0:
                width = max(64, width)
-                height = max(64, round(samples["samples"].shape[2] * width / samples["samples"].shape[3]))
+                height = max(64, round(samples["samples"].shape[-2] * width / samples["samples"].shape[-1]))
            else:
                width = max(64, width)
                height = max(64, height)
@@ -1204,8 +1223,8 @@ class LatentUpscaleBy:

    def upscale(self, samples, upscale_method, scale_by):
        s = samples.copy()
-        width = round(samples["samples"].shape[3] * scale_by)
-        height = round(samples["samples"].shape[2] * scale_by)
+        width = round(samples["samples"].shape[-1] * scale_by)
+        height = round(samples["samples"].shape[-2] * scale_by)
        s["samples"] = comfy.utils.common_upscale(samples["samples"], width, height, upscale_method, "disabled")
        return (s,)

@@ -1952,6 +1971,12 @@ NODE_DISPLAY_NAME_MAPPINGS = {
    "ImageInvert": "Invert Image",
    "ImagePadForOutpaint": "Pad Image for Outpainting",
    "ImageBatch": "Batch Images",
+    "ImageCrop": "Image Crop",
+    "ImageBlend": "Image Blend",
+    "ImageBlur": "Image Blur",
+    "ImageQuantize": "Image Quantize",
+    "ImageSharpen": "Image Sharpen",
+    "ImageScaleToTotalPixels": "Scale Image to Total Pixels",
    # _for_testing
    "VAEDecodeTiled": "VAE Decode (Tiled)",
    "VAEEncodeTiled": "VAE Encode (Tiled)",
@@ -2111,6 +2136,9 @@ def init_builtin_extra_nodes():
        "nodes_flux.py",
        "nodes_lora_extract.py",
        "nodes_torch_compile.py",
+        "nodes_mochi.py",
+        "nodes_slg.py",
+        "nodes_lt.py",
    ]

    import_failed = []
--- a/server.py
+++ b/server.py
@@ -40,7 +40,7 @@ class BinaryEventTypes:
 async def send_socket_catch_exception(function, message):
    try:
        await function(message)
-    except (aiohttp.ClientError, aiohttp.ClientPayloadError, ConnectionResetError) as err:
+    except (aiohttp.ClientError, aiohttp.ClientPayloadError, ConnectionResetError, BrokenPipeError, ConnectionError) as err:
        logging.warning("send error: {}".format(err))

 def get_comfyui_version():
@@ -152,7 +152,7 @@ class PromptServer():
        mimetypes.types_map['.js'] = 'application/javascript; charset=utf-8'

        self.user_manager = UserManager()
-        self.internal_routes = InternalRoutes()
+        self.internal_routes = InternalRoutes(self)
        self.supports = ["custom_nodes_from_web"]
        self.prompt_queue = None
        self.loop = loop
--- a/tests-unit/prompt_server_test/user_manager_test.py
+++ b/tests-unit/prompt_server_test/user_manager_test.py
@@ -14,7 +14,7 @@ def user_manager(tmp_path):
    um = UserManager()
    um.get_request_user_filepath = lambda req, file, **kwargs: os.path.join(
        tmp_path, file
-    )
+    ) if file else tmp_path
    return um


@@ -80,9 +80,7 @@ async def test_listuserdata_split_path(aiohttp_client, app, tmp_path):
    client = await aiohttp_client(app)
    resp = await client.get("/userdata?dir=test_dir&recurse=true&split=true")
    assert resp.status == 200
-    assert await resp.json() == [
-        ["subdir/file1.txt", "subdir", "file1.txt"]
-    ]
+    assert await resp.json() == [["subdir/file1.txt", "subdir", "file1.txt"]]


 async def test_listuserdata_invalid_directory(aiohttp_client, app):
@@ -118,3 +116,116 @@ async def test_listuserdata_normalized_separator(aiohttp_client, app, tmp_path):
            assert "/" in result[0]["path"]  # Ensure forward slash is used
            assert "\\" not in result[0]["path"]  # Ensure backslash is not present
            assert result[0]["path"] == "subdir/file1.txt"
+
+
+async def test_post_userdata_new_file(aiohttp_client, app, tmp_path):
+    client = await aiohttp_client(app)
+    content = b"test content"
+    resp = await client.post("/userdata/test.txt", data=content)
+
+    assert resp.status == 200
+    assert await resp.text() == '"test.txt"'
+
+    # Verify file was created with correct content
+    with open(tmp_path / "test.txt", "rb") as f:
+        assert f.read() == content
+
+
+async def test_post_userdata_overwrite_existing(aiohttp_client, app, tmp_path):
+    # Create initial file
+    with open(tmp_path / "test.txt", "w") as f:
+        f.write("initial content")
+
+    client = await aiohttp_client(app)
+    new_content = b"updated content"
+    resp = await client.post("/userdata/test.txt", data=new_content)
+
+    assert resp.status == 200
+    assert await resp.text() == '"test.txt"'
+
+    # Verify file was overwritten
+    with open(tmp_path / "test.txt", "rb") as f:
+        assert f.read() == new_content
+
+
+async def test_post_userdata_no_overwrite(aiohttp_client, app, tmp_path):
+    # Create initial file
+    with open(tmp_path / "test.txt", "w") as f:
+        f.write("initial content")
+
+    client = await aiohttp_client(app)
+    resp = await client.post("/userdata/test.txt?overwrite=false", data=b"new content")
+
+    assert resp.status == 409
+
+    # Verify original content unchanged
+    with open(tmp_path / "test.txt", "r") as f:
+        assert f.read() == "initial content"
+
+
+async def test_post_userdata_full_info(aiohttp_client, app, tmp_path):
+    client = await aiohttp_client(app)
+    content = b"test content"
+    resp = await client.post("/userdata/test.txt?full_info=true", data=content)
+
+    assert resp.status == 200
+    result = await resp.json()
+    assert result["path"] == "test.txt"
+    assert result["size"] == len(content)
+    assert "modified" in result
+
+
+async def test_move_userdata(aiohttp_client, app, tmp_path):
+    # Create initial file
+    with open(tmp_path / "source.txt", "w") as f:
+        f.write("test content")
+
+    client = await aiohttp_client(app)
+    resp = await client.post("/userdata/source.txt/move/dest.txt")
+
+    assert resp.status == 200
+    assert await resp.text() == '"dest.txt"'
+
+    # Verify file was moved
+    assert not os.path.exists(tmp_path / "source.txt")
+    with open(tmp_path / "dest.txt", "r") as f:
+        assert f.read() == "test content"
+
+
+async def test_move_userdata_no_overwrite(aiohttp_client, app, tmp_path):
+    # Create source and destination files
+    with open(tmp_path / "source.txt", "w") as f:
+        f.write("source content")
+    with open(tmp_path / "dest.txt", "w") as f:
+        f.write("destination content")
+
+    client = await aiohttp_client(app)
+    resp = await client.post("/userdata/source.txt/move/dest.txt?overwrite=false")
+
+    assert resp.status == 409
+
+    # Verify files remain unchanged
+    with open(tmp_path / "source.txt", "r") as f:
+        assert f.read() == "source content"
+    with open(tmp_path / "dest.txt", "r") as f:
+        assert f.read() == "destination content"
+
+
+async def test_move_userdata_full_info(aiohttp_client, app, tmp_path):
+    # Create initial file
+    with open(tmp_path / "source.txt", "w") as f:
+        f.write("test content")
+
+    client = await aiohttp_client(app)
+    resp = await client.post("/userdata/source.txt/move/dest.txt?full_info=true")
+
+    assert resp.status == 200
+    result = await resp.json()
+    assert result["path"] == "dest.txt"
+    assert result["size"] == len("test content")
+    assert "modified" in result
+
+    # Verify file was moved
+    assert not os.path.exists(tmp_path / "source.txt")
+    with open(tmp_path / "dest.txt", "r") as f:
+        assert f.read() == "test content"
--- a/tests-unit/server/routes/internal_routes_test.py
+++ b/tests-unit/server/routes/internal_routes_test.py
@@ -8,7 +8,7 @@ from folder_paths import models_dir, user_directory, output_directory

@pytest.fixture
 def internal_routes():
-    return InternalRoutes()
+    return InternalRoutes(None)

@pytest.fixture
 def aiohttp_client_factory(aiohttp_client, internal_routes):
@@ -102,7 +102,7 @@ async def test_file_service_initialization():
        # Create a mock instance
        mock_file_service_instance = MagicMock(spec=FileService)
        MockFileService.return_value = mock_file_service_instance
-        internal_routes = InternalRoutes()
+        internal_routes = InternalRoutes(None)

        # Check if FileService was initialized with the correct parameters
        MockFileService.assert_called_once_with({
@@ -112,4 +112,4 @@ async def test_file_service_initialization():
        })

        # Verify that the file_service attribute of InternalRoutes is set
-        assert internal_routes.file_service == mock_file_service_instance
+        assert internal_routes.file_service == mock_file_service_instance
--- a/web/assets/ExtensionPanel-CfMfcLgI.js
+++ b/web/assets/ExtensionPanel-CfMfcLgI.js
@@ -0,0 +1,103 @@
+var __defProp = Object.defineProperty;
+var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
+import { d as defineComponent, c6 as useExtensionStore, u as useSettingStore, r as ref, o as onMounted, q as computed, g as openBlock, h as createElementBlock, i as createVNode, y as withCtx, z as unref, bT as script$1, A as createBaseVNode, x as createBlock, N as Fragment, O as renderList, a6 as toDisplayString, aw as createTextVNode, bR as script$3, j as createCommentVNode, D as script$4 } from "./index-B6dYHNhg.js";
+import { s as script, a as script$2 } from "./index-CjwCGacA.js";
+import "./index-MX9DEi8Q.js";
+const _hoisted_1 = { class: "extension-panel" };
+const _hoisted_2 = { class: "mt-4" };
+const _sfc_main = /* @__PURE__ */ defineComponent({
+  __name: "ExtensionPanel",
+  setup(__props) {
+    const extensionStore = useExtensionStore();
+    const settingStore = useSettingStore();
+    const editingEnabledExtensions = ref({});
+    onMounted(() => {
+      extensionStore.extensions.forEach((ext) => {
+        editingEnabledExtensions.value[ext.name] = extensionStore.isExtensionEnabled(ext.name);
+      });
+    });
+    const changedExtensions = computed(() => {
+      return extensionStore.extensions.filter(
+        (ext) => editingEnabledExtensions.value[ext.name] !== extensionStore.isExtensionEnabled(ext.name)
+      );
+    });
+    const hasChanges = computed(() => {
+      return changedExtensions.value.length > 0;
+    });
+    const updateExtensionStatus = /* @__PURE__ */ __name(() => {
+      const editingDisabledExtensionNames = Object.entries(
+        editingEnabledExtensions.value
+      ).filter(([_, enabled]) => !enabled).map(([name]) => name);
+      settingStore.set("Comfy.Extension.Disabled", [
+        ...extensionStore.inactiveDisabledExtensionNames,
+        ...editingDisabledExtensionNames
+      ]);
+    }, "updateExtensionStatus");
+    const applyChanges = /* @__PURE__ */ __name(() => {
+      window.location.reload();
+    }, "applyChanges");
+    return (_ctx, _cache) => {
+      return openBlock(), createElementBlock("div", _hoisted_1, [
+        createVNode(unref(script$2), {
+          value: unref(extensionStore).extensions,
+          stripedRows: "",
+          size: "small"
+        }, {
+          default: withCtx(() => [
+            createVNode(unref(script), {
+              field: "name",
+              header: _ctx.$t("extensionName"),
+              sortable: ""
+            }, null, 8, ["header"]),
+            createVNode(unref(script), { pt: {
+              bodyCell: "flex items-center justify-end"
+            } }, {
+              body: withCtx((slotProps) => [
+                createVNode(unref(script$1), {
+                  modelValue: editingEnabledExtensions.value[slotProps.data.name],
+                  "onUpdate:modelValue": /* @__PURE__ */ __name(($event) => editingEnabledExtensions.value[slotProps.data.name] = $event, "onUpdate:modelValue"),
+                  onChange: updateExtensionStatus
+                }, null, 8, ["modelValue", "onUpdate:modelValue"])
+              ]),
+              _: 1
+            })
+          ]),
+          _: 1
+        }, 8, ["value"]),
+        createBaseVNode("div", _hoisted_2, [
+          hasChanges.value ? (openBlock(), createBlock(unref(script$3), {
+            key: 0,
+            severity: "info"
+          }, {
+            default: withCtx(() => [
+              createBaseVNode("ul", null, [
+                (openBlock(true), createElementBlock(Fragment, null, renderList(changedExtensions.value, (ext) => {
+                  return openBlock(), createElementBlock("li", {
+                    key: ext.name
+                  }, [
+                    createBaseVNode("span", null, toDisplayString(unref(extensionStore).isExtensionEnabled(ext.name) ? "[-]" : "[+]"), 1),
+                    createTextVNode(" " + toDisplayString(ext.name), 1)
+                  ]);
+                }), 128))
+              ])
+            ]),
+            _: 1
+          })) : createCommentVNode("", true),
+          createVNode(unref(script$4), {
+            label: _ctx.$t("reloadToApplyChanges"),
+            icon: "pi pi-refresh",
+            onClick: applyChanges,
+            disabled: !hasChanges.value,
+            text: "",
+            fluid: "",
+            severity: "danger"
+          }, null, 8, ["label", "disabled"])
+        ])
+      ]);
+    };
+  }
+});
+export {
+  _sfc_main as default
+};
+//# sourceMappingURL=ExtensionPanel-CfMfcLgI.js.map
--- a/web/assets/ExtensionPanel-CfMfcLgI.js.map
+++ b/web/assets/ExtensionPanel-CfMfcLgI.js.map
@@ -0,0 +1 @@
+{"version":3,"file":"ExtensionPanel-CfMfcLgI.js","sources":["../../src/components/dialog/content/setting/ExtensionPanel.vue"],"sourcesContent":["<template>\n  <div class=\"extension-panel\">\n    <DataTable :value=\"extensionStore.extensions\" stripedRows size=\"small\">\n      <Column field=\"name\" :header=\"$t('extensionName')\" sortable></Column>\n      <Column\n        :pt=\"{\n          bodyCell: 'flex items-center justify-end'\n        }\"\n      >\n        <template #body=\"slotProps\">\n          <ToggleSwitch\n            v-model=\"editingEnabledExtensions[slotProps.data.name]\"\n            @change=\"updateExtensionStatus\"\n          />\n        </template>\n      </Column>\n    </DataTable>\n    <div class=\"mt-4\">\n      <Message v-if=\"hasChanges\" severity=\"info\">\n        <ul>\n          <li v-for=\"ext in changedExtensions\" :key=\"ext.name\">\n            <span>\n              {{ extensionStore.isExtensionEnabled(ext.name) ? '[-]' : '[+]' }}\n            </span>\n            {{ ext.name }}\n          </li>\n        </ul>\n      </Message>\n      <Button\n        :label=\"$t('reloadToApplyChanges')\"\n        icon=\"pi pi-refresh\"\n        @click=\"applyChanges\"\n        :disabled=\"!hasChanges\"\n        text\n        fluid\n        severity=\"danger\"\n      />\n    </div>\n  </div>\n</template>\n\n<script setup lang=\"ts\">\nimport { ref, computed, onMounted } from 'vue'\nimport { useExtensionStore } from '@/stores/extensionStore'\nimport { useSettingStore } from '@/stores/settingStore'\nimport DataTable from 'primevue/datatable'\nimport Column from 'primevue/column'\nimport ToggleSwitch from 'primevue/toggleswitch'\nimport Button from 'primevue/button'\nimport Message from 'primevue/message'\n\nconst extensionStore = useExtensionStore()\nconst settingStore = useSettingStore()\n\nconst editingEnabledExtensions = ref<Record<string, boolean>>({})\n\nonMounted(() => {\n  extensionStore.extensions.forEach((ext) => {\n    editingEnabledExtensions.value[ext.name] =\n      extensionStore.isExtensionEnabled(ext.name)\n  })\n})\n\nconst changedExtensions = computed(() => {\n  return extensionStore.extensions.filter(\n    (ext) =>\n      editingEnabledExtensions.value[ext.name] !==\n      extensionStore.isExtensionEnabled(ext.name)\n  )\n})\n\nconst hasChanges = computed(() => {\n  return changedExtensions.value.length > 0\n})\n\nconst updateExtensionStatus = () => {\n  const editingDisabledExtensionNames = Object.entries(\n    editingEnabledExtensions.value\n  )\n    .filter(([_, enabled]) => !enabled)\n    .map(([name]) => name)\n\n  settingStore.set('Comfy.Extension.Disabled', [\n    ...extensionStore.inactiveDisabledExtensionNames,\n    ...editingDisabledExtensionNames\n  ])\n}\n\nconst applyChanges = () => {\n  // Refresh the page to apply changes\n  window.location.reload()\n}\n</script>\n"],"names":[],"mappings":";;;;;;;;;;AAmDA,UAAM,iBAAiB;AACvB,UAAM,eAAe;AAEf,UAAA,2BAA2B,IAA6B,CAAA,CAAE;AAEhE,cAAU,MAAM;AACC,qBAAA,WAAW,QAAQ,CAAC,QAAQ;AACzC,iCAAyB,MAAM,IAAI,IAAI,IACrC,eAAe,mBAAmB,IAAI,IAAI;AAAA,MAAA,CAC7C;AAAA,IAAA,CACF;AAEK,UAAA,oBAAoB,SAAS,MAAM;AACvC,aAAO,eAAe,WAAW;AAAA,QAC/B,CAAC,QACC,yBAAyB,MAAM,IAAI,IAAI,MACvC,eAAe,mBAAmB,IAAI,IAAI;AAAA,MAAA;AAAA,IAC9C,CACD;AAEK,UAAA,aAAa,SAAS,MAAM;AACzB,aAAA,kBAAkB,MAAM,SAAS;AAAA,IAAA,CACzC;AAED,UAAM,wBAAwB,6BAAM;AAClC,YAAM,gCAAgC,OAAO;AAAA,QAC3C,yBAAyB;AAAA,MAExB,EAAA,OAAO,CAAC,CAAC,GAAG,OAAO,MAAM,CAAC,OAAO,EACjC,IAAI,CAAC,CAAC,IAAI,MAAM,IAAI;AAEvB,mBAAa,IAAI,4BAA4B;AAAA,QAC3C,GAAG,eAAe;AAAA,QAClB,GAAG;AAAA,MAAA,CACJ;AAAA,IAAA,GAV2B;AAa9B,UAAM,eAAe,6BAAM;AAEzB,aAAO,SAAS;IAAO,GAFJ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;"}
--- a/web/assets/GraphView-BCOd0Zle.js
+++ b/web/assets/GraphView-BCOd0Zle.js
--- a/web/assets/GraphView-BCOd0Zle.js.map
+++ b/web/assets/GraphView-BCOd0Zle.js.map
--- a/web/assets/GraphView-BGt8GmeB.css
+++ b/web/assets/GraphView-BGt8GmeB.css
@@ -1,792 +0,0 @@
-
-.editable-text[data-v-54da6fc9] {
-  display: inline;
-}
-.editable-text input[data-v-54da6fc9] {
-  width: 100%;
-  box-sizing: border-box;
-}
-
-.group-title-editor.node-title-editor[data-v-fc3f26e3] {
-  z-index: 9999;
-  padding: 0.25rem;
-}
-[data-v-fc3f26e3] .editable-text {
-  width: 100%;
-  height: 100%;
-}
-[data-v-fc3f26e3] .editable-text input {
-  width: 100%;
-  height: 100%;
-  /* Override the default font size */
-  font-size: inherit;
-}
-
-.side-bar-button-icon {
-  font-size: var(--sidebar-icon-size) !important;
-}
-.side-bar-button-selected .side-bar-button-icon {
-  font-size: var(--sidebar-icon-size) !important;
-  font-weight: bold;
-}
-
-.side-bar-button[data-v-caa3ee9c] {
-  width: var(--sidebar-width);
-  height: var(--sidebar-width);
-  border-radius: 0;
-}
-.comfyui-body-left .side-bar-button.side-bar-button-selected[data-v-caa3ee9c],
-.comfyui-body-left .side-bar-button.side-bar-button-selected[data-v-caa3ee9c]:hover {
-  border-left: 4px solid var(--p-button-text-primary-color);
-}
-.comfyui-body-right .side-bar-button.side-bar-button-selected[data-v-caa3ee9c],
-.comfyui-body-right .side-bar-button.side-bar-button-selected[data-v-caa3ee9c]:hover {
-  border-right: 4px solid var(--p-button-text-primary-color);
-}
-
-:root {
-  --sidebar-width: 64px;
-  --sidebar-icon-size: 1.5rem;
-}
-:root .small-sidebar {
-  --sidebar-width: 40px;
-  --sidebar-icon-size: 1rem;
-}
-
-.side-tool-bar-container[data-v-4da64512] {
-  display: flex;
-  flex-direction: column;
-  align-items: center;
-
-  pointer-events: auto;
-
-  width: var(--sidebar-width);
-  height: 100%;
-
-  background-color: var(--comfy-menu-bg);
-  color: var(--fg-color);
-}
-.side-tool-bar-end[data-v-4da64512] {
-  align-self: flex-end;
-  margin-top: auto;
-}
-.sidebar-content-container[data-v-4da64512] {
-  height: 100%;
-  overflow-y: auto;
-}
-
-.p-splitter-gutter {
-  pointer-events: auto;
-}
-.gutter-hidden {
-  display: none !important;
-}
-
-.side-bar-panel[data-v-b9df3042] {
-  background-color: var(--bg-color);
-  pointer-events: auto;
-}
-.splitter-overlay[data-v-b9df3042] {
-  width: 100%;
-  height: 100%;
-  position: absolute;
-  top: 0;
-  left: 0;
-  background-color: transparent;
-  pointer-events: none;
-  /* Set it the same as the ComfyUI menu */
-  /* Note: Lite-graph DOM widgets have the same z-index as the node id, so
-  999 should be sufficient to make sure splitter overlays on node's DOM
-  widgets */
-  z-index: 999;
-  border: none;
-}
-
-._content[data-v-e7b35fd9] {
-
-    display: flex;
-
-    flex-direction: column
-}
-._content[data-v-e7b35fd9] > :not([hidden]) ~ :not([hidden]) {
-
-    --tw-space-y-reverse: 0;
-
-    margin-top: calc(0.5rem * calc(1 - var(--tw-space-y-reverse)));
-
-    margin-bottom: calc(0.5rem * var(--tw-space-y-reverse))
-}
-._footer[data-v-e7b35fd9] {
-
-    display: flex;
-
-    flex-direction: column;
-
-    align-items: flex-end;
-
-    padding-top: 1rem
-}
-
-[data-v-37f672ab] .highlight {
-  background-color: var(--p-primary-color);
-  color: var(--p-primary-contrast-color);
-  font-weight: bold;
-  border-radius: 0.25rem;
-  padding: 0rem 0.125rem;
-  margin: -0.125rem 0.125rem;
-}
-
-.slot_row[data-v-ff07c900] {
-  padding: 2px;
-}
-
-/* Original N-Sidebar styles */
-._sb_dot[data-v-ff07c900] {
-  width: 8px;
-  height: 8px;
-  border-radius: 50%;
-  background-color: grey;
-}
-.node_header[data-v-ff07c900] {
-  line-height: 1;
-  padding: 8px 13px 7px;
-  margin-bottom: 5px;
-  font-size: 15px;
-  text-wrap: nowrap;
-  overflow: hidden;
-  display: flex;
-  align-items: center;
-}
-.headdot[data-v-ff07c900] {
-  width: 10px;
-  height: 10px;
-  float: inline-start;
-  margin-right: 8px;
-}
-.IMAGE[data-v-ff07c900] {
-  background-color: #64b5f6;
-}
-.VAE[data-v-ff07c900] {
-  background-color: #ff6e6e;
-}
-.LATENT[data-v-ff07c900] {
-  background-color: #ff9cf9;
-}
-.MASK[data-v-ff07c900] {
-  background-color: #81c784;
-}
-.CONDITIONING[data-v-ff07c900] {
-  background-color: #ffa931;
-}
-.CLIP[data-v-ff07c900] {
-  background-color: #ffd500;
-}
-.MODEL[data-v-ff07c900] {
-  background-color: #b39ddb;
-}
-.CONTROL_NET[data-v-ff07c900] {
-  background-color: #a5d6a7;
-}
-._sb_node_preview[data-v-ff07c900] {
-  background-color: var(--comfy-menu-bg);
-  font-family: 'Open Sans', sans-serif;
-  font-size: small;
-  color: var(--descrip-text);
-  border: 1px solid var(--descrip-text);
-  min-width: 300px;
-  width: -moz-min-content;
-  width: min-content;
-  height: -moz-fit-content;
-  height: fit-content;
-  z-index: 9999;
-  border-radius: 12px;
-  overflow: hidden;
-  font-size: 12px;
-  padding-bottom: 10px;
-}
-._sb_node_preview ._sb_description[data-v-ff07c900] {
-  margin: 10px;
-  padding: 6px;
-  background: var(--border-color);
-  border-radius: 5px;
-  font-style: italic;
-  font-weight: 500;
-  font-size: 0.9rem;
-  word-break: break-word;
-}
-._sb_table[data-v-ff07c900] {
-  display: grid;
-
-  grid-column-gap: 10px;
-  /* Spazio tra le colonne */
-  width: 100%;
-  /* Imposta la larghezza della tabella al 100% del contenitore */
-}
-._sb_row[data-v-ff07c900] {
-  display: grid;
-  grid-template-columns: 10px 1fr 1fr 1fr 10px;
-  grid-column-gap: 10px;
-  align-items: center;
-  padding-left: 9px;
-  padding-right: 9px;
-}
-._sb_row_string[data-v-ff07c900] {
-  grid-template-columns: 10px 1fr 1fr 10fr 1fr;
-}
-._sb_col[data-v-ff07c900] {
-  border: 0px solid #000;
-  display: flex;
-  align-items: flex-end;
-  flex-direction: row-reverse;
-  flex-wrap: nowrap;
-  align-content: flex-start;
-  justify-content: flex-end;
-}
-._sb_inherit[data-v-ff07c900] {
-  display: inherit;
-}
-._long_field[data-v-ff07c900] {
-  background: var(--bg-color);
-  border: 2px solid var(--border-color);
-  margin: 5px 5px 0 5px;
-  border-radius: 10px;
-  line-height: 1.7;
-  text-wrap: nowrap;
-}
-._sb_arrow[data-v-ff07c900] {
-  color: var(--fg-color);
-}
-._sb_preview_badge[data-v-ff07c900] {
-  text-align: center;
-  background: var(--comfy-input-bg);
-  font-weight: bold;
-  color: var(--error-text);
-}
-
-.comfy-vue-node-search-container[data-v-2d409367] {
-  display: flex;
-  width: 100%;
-  min-width: 26rem;
-  align-items: center;
-  justify-content: center;
-}
-.comfy-vue-node-search-container[data-v-2d409367] * {
-  pointer-events: auto;
-}
-.comfy-vue-node-preview-container[data-v-2d409367] {
-  position: absolute;
-  left: -350px;
-  top: 50px;
-}
-.comfy-vue-node-search-box[data-v-2d409367] {
-  z-index: 10;
-  flex-grow: 1;
-}
-._filter-button[data-v-2d409367] {
-  z-index: 10;
-}
-._dialog[data-v-2d409367] {
-  min-width: 26rem;
-}
-
-.invisible-dialog-root {
-  width: 60%;
-  min-width: 24rem;
-  max-width: 48rem;
-  border: 0 !important;
-  background-color: transparent !important;
-  margin-top: 25vh;
-  margin-left: 400px;
-}
-@media all and (max-width: 768px) {
-.invisible-dialog-root {
-    margin-left: 0px;
-}
-}
-.node-search-box-dialog-mask {
-  align-items: flex-start !important;
-}
-
-.node-tooltip[data-v-0a4402f9] {
-  background: var(--comfy-input-bg);
-  border-radius: 5px;
-  box-shadow: 0 0 5px rgba(0, 0, 0, 0.4);
-  color: var(--input-text);
-  font-family: sans-serif;
-  left: 0;
-  max-width: 30vw;
-  padding: 4px 8px;
-  position: absolute;
-  top: 0;
-  transform: translate(5px, calc(-100% - 5px));
-  white-space: pre-wrap;
-  z-index: 99999;
-}
-
-.p-buttongroup-vertical[data-v-ce8bd6ac] {
-  display: flex;
-  flex-direction: column;
-  border-radius: var(--p-button-border-radius);
-  overflow: hidden;
-  border: 1px solid var(--p-panel-border-color);
-}
-.p-buttongroup-vertical .p-button[data-v-ce8bd6ac] {
-  margin: 0;
-  border-radius: 0;
-}
-
-.comfy-image-wrap[data-v-9bc23daf] {
-  display: contents;
-}
-.comfy-image-blur[data-v-9bc23daf] {
-  position: absolute;
-  top: 0;
-  left: 0;
-  width: 100%;
-  height: 100%;
-  -o-object-fit: cover;
-     object-fit: cover;
-}
-.comfy-image-main[data-v-9bc23daf] {
-  width: 100%;
-  height: 100%;
-  -o-object-fit: cover;
-     object-fit: cover;
-  -o-object-position: center;
-     object-position: center;
-  z-index: 1;
-}
-.contain .comfy-image-wrap[data-v-9bc23daf] {
-  position: relative;
-  width: 100%;
-  height: 100%;
-}
-.contain .comfy-image-main[data-v-9bc23daf] {
-  -o-object-fit: contain;
-     object-fit: contain;
-  -webkit-backdrop-filter: blur(10px);
-          backdrop-filter: blur(10px);
-  position: absolute;
-}
-.broken-image-placeholder[data-v-9bc23daf] {
-  display: flex;
-  flex-direction: column;
-  align-items: center;
-  justify-content: center;
-  width: 100%;
-  height: 100%;
-  margin: 2rem;
-}
-.broken-image-placeholder i[data-v-9bc23daf] {
-  font-size: 3rem;
-  margin-bottom: 0.5rem;
-}
-
-.result-container[data-v-d9c060ae] {
-  width: 100%;
-  height: 100%;
-  aspect-ratio: 1 / 1;
-  overflow: hidden;
-  position: relative;
-  display: flex;
-  justify-content: center;
-  align-items: center;
-}
-.image-preview-mask[data-v-d9c060ae] {
-  position: absolute;
-  left: 50%;
-  top: 50%;
-  transform: translate(-50%, -50%);
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  opacity: 0;
-  transition: opacity 0.3s ease;
-  z-index: 1;
-}
-.result-container:hover .image-preview-mask[data-v-d9c060ae] {
-  opacity: 1;
-}
-
-.task-result-preview[data-v-d4c8a1fe] {
-  aspect-ratio: 1 / 1;
-  overflow: hidden;
-  display: flex;
-  justify-content: center;
-  align-items: center;
-  width: 100%;
-  height: 100%;
-}
-.task-result-preview i[data-v-d4c8a1fe],
-.task-result-preview span[data-v-d4c8a1fe] {
-  font-size: 2rem;
-}
-.task-item[data-v-d4c8a1fe] {
-  display: flex;
-  flex-direction: column;
-  border-radius: 4px;
-  overflow: hidden;
-  position: relative;
-}
-.task-item-details[data-v-d4c8a1fe] {
-  position: absolute;
-  bottom: 0;
-  padding: 0.6rem;
-  display: flex;
-  justify-content: space-between;
-  align-items: center;
-  width: 100%;
-  z-index: 1;
-}
-.task-node-link[data-v-d4c8a1fe] {
-  padding: 2px;
-}
-
-/* In dark mode, transparent background color for tags is not ideal for tags that
-are floating on top of images. */
-.tag-wrapper[data-v-d4c8a1fe] {
-  background-color: var(--p-primary-contrast-color);
-  border-radius: 6px;
-  display: inline-flex;
-}
-.node-name-tag[data-v-d4c8a1fe] {
-  word-break: break-all;
-}
-.status-tag-group[data-v-d4c8a1fe] {
-  display: flex;
-  flex-direction: column;
-}
-.progress-preview-img[data-v-d4c8a1fe] {
-  width: 100%;
-  height: 100%;
-  -o-object-fit: cover;
-     object-fit: cover;
-  -o-object-position: center;
-     object-position: center;
-}
-
-/* PrimeVue's galleria teleports the fullscreen gallery out of subtree so we
-cannot use scoped style here. */
-img.galleria-image {
-  max-width: 100vw;
-  max-height: 100vh;
-  -o-object-fit: contain;
-     object-fit: contain;
-}
-.p-galleria-close-button {
-  /* Set z-index so the close button doesn't get hidden behind the image when image is large */
-  z-index: 1;
-}
-
-.comfy-vue-side-bar-container[data-v-1b0a8fe3] {
-  display: flex;
-  flex-direction: column;
-  height: 100%;
-  overflow: hidden;
-}
-.comfy-vue-side-bar-header[data-v-1b0a8fe3] {
-  flex-shrink: 0;
-  border-left: none;
-  border-right: none;
-  border-top: none;
-  border-radius: 0;
-  padding: 0.25rem 1rem;
-  min-height: 2.5rem;
-}
-.comfy-vue-side-bar-header-span[data-v-1b0a8fe3] {
-  font-size: small;
-}
-.comfy-vue-side-bar-body[data-v-1b0a8fe3] {
-  flex-grow: 1;
-  overflow: auto;
-  scrollbar-width: thin;
-  scrollbar-color: transparent transparent;
-}
-.comfy-vue-side-bar-body[data-v-1b0a8fe3]::-webkit-scrollbar {
-  width: 1px;
-}
-.comfy-vue-side-bar-body[data-v-1b0a8fe3]::-webkit-scrollbar-thumb {
-  background-color: transparent;
-}
-
-.scroll-container[data-v-08fa89b1] {
-  height: 100%;
-  overflow-y: auto;
-}
-.queue-grid[data-v-08fa89b1] {
-  display: grid;
-  grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
-  padding: 0.5rem;
-  gap: 0.5rem;
-}
-
-.tree-node[data-v-633e27ab] {
-  width: 100%;
-  display: flex;
-  align-items: center;
-  justify-content: space-between;
-}
-.leaf-count-badge[data-v-633e27ab] {
-  margin-left: 0.5rem;
-}
-.node-content[data-v-633e27ab] {
-  display: flex;
-  align-items: center;
-  flex-grow: 1;
-}
-.leaf-label[data-v-633e27ab] {
-  margin-left: 0.5rem;
-}
-[data-v-633e27ab] .editable-text span {
-  word-break: break-all;
-}
-
-[data-v-bd7bae90] .tree-explorer-node-label {
-  width: 100%;
-  display: flex;
-  align-items: center;
-  margin-left: var(--p-tree-node-gap);
-  flex-grow: 1;
-}
-
-/*
- * The following styles are necessary to avoid layout shift when dragging nodes over folders.
- * By setting the position to relative on the parent and using an absolutely positioned pseudo-element,
- * we can create a visual indicator for the drop target without affecting the layout of other elements.
- */
-[data-v-bd7bae90] .p-tree-node-content:has(.tree-folder) {
-  position: relative;
-}
-[data-v-bd7bae90] .p-tree-node-content:has(.tree-folder.can-drop)::after {
-  content: '';
-  position: absolute;
-  top: 0;
-  left: 0;
-  right: 0;
-  bottom: 0;
-  border: 1px solid var(--p-content-color);
-  pointer-events: none;
-}
-
-.node-lib-node-container[data-v-90dfee08] {
-    height: 100%;
-    width: 100%
-}
-
-.p-selectbutton .p-button[data-v-91077f2a] {
-  padding: 0.5rem;
-}
-.p-selectbutton .p-button .pi[data-v-91077f2a] {
-  font-size: 1.5rem;
-}
-.field[data-v-91077f2a] {
-  display: flex;
-  flex-direction: column;
-  gap: 0.5rem;
-}
-.color-picker-container[data-v-91077f2a] {
-  display: flex;
-  align-items: center;
-  gap: 0.5rem;
-}
-
-.node-lib-filter-popup {
-  margin-left: -13px;
-}
-
-[data-v-f6a7371a] .comfy-vue-side-bar-body {
-  background: var(--p-tree-background);
-}
-[data-v-f6a7371a] .node-lib-bookmark-tree-explorer {
-  padding-bottom: 2px;
-}
-[data-v-f6a7371a] .p-divider {
-  margin: var(--comfy-tree-explorer-item-padding) 0px;
-}
-
-.model_preview[data-v-32e6c4d9] {
-  background-color: var(--comfy-menu-bg);
-  font-family: 'Open Sans', sans-serif;
-  color: var(--descrip-text);
-  border: 1px solid var(--descrip-text);
-  min-width: 300px;
-  max-width: 500px;
-  width: -moz-fit-content;
-  width: fit-content;
-  height: -moz-fit-content;
-  height: fit-content;
-  z-index: 9999;
-  border-radius: 12px;
-  overflow: hidden;
-  font-size: 12px;
-  padding: 10px;
-}
-.model_preview_image[data-v-32e6c4d9] {
-  margin: auto;
-  width: -moz-fit-content;
-  width: fit-content;
-}
-.model_preview_image img[data-v-32e6c4d9] {
-  max-width: 100%;
-  max-height: 150px;
-  -o-object-fit: contain;
-     object-fit: contain;
-}
-.model_preview_title[data-v-32e6c4d9] {
-  font-weight: bold;
-  text-align: center;
-  font-size: 14px;
-}
-.model_preview_top_container[data-v-32e6c4d9] {
-  text-align: center;
-  line-height: 0.5;
-}
-.model_preview_filename[data-v-32e6c4d9],
-.model_preview_author[data-v-32e6c4d9],
-.model_preview_architecture[data-v-32e6c4d9] {
-  display: inline-block;
-  text-align: center;
-  margin: 5px;
-  font-size: 10px;
-}
-.model_preview_prefix[data-v-32e6c4d9] {
-  font-weight: bold;
-}
-
-.model-lib-model-icon-container[data-v-70b69131] {
-  display: inline-block;
-  position: relative;
-  left: 0;
-  height: 1.5rem;
-  vertical-align: top;
-  width: 0px;
-}
-.model-lib-model-icon[data-v-70b69131] {
-  background-size: cover;
-  background-position: center;
-  display: inline-block;
-  position: relative;
-  left: -2.5rem;
-  height: 2rem;
-  width: 2rem;
-  vertical-align: top;
-}
-
-.pi-fake-spacer {
-  height: 1px;
-  width: 16px;
-}
-
-[data-v-74b01bce] .comfy-vue-side-bar-body {
-  background: var(--p-tree-background);
-}
-
-[data-v-d2d58252] .comfy-vue-side-bar-body {
-  background: var(--p-tree-background);
-}
-
-[data-v-84e785b8] .p-togglebutton::before {
-  display: none
-}
-[data-v-84e785b8] .p-togglebutton {
-  position: relative;
-  flex-shrink: 0;
-  border-radius: 0px;
-  background-color: transparent;
-  padding-left: 0.5rem;
-  padding-right: 0.5rem
-}
-[data-v-84e785b8] .p-togglebutton.p-togglebutton-checked {
-  border-bottom-width: 2px;
-  border-bottom-color: var(--p-button-text-primary-color)
-}
-[data-v-84e785b8] .p-togglebutton-checked .close-button,[data-v-84e785b8] .p-togglebutton:hover .close-button {
-  visibility: visible
-}
-.status-indicator[data-v-84e785b8] {
-  position: absolute;
-  font-weight: 700;
-  font-size: 1.5rem;
-  top: 50%;
-  left: 50%;
-  transform: translate(-50%, -50%)
-}
-[data-v-84e785b8] .p-togglebutton:hover .status-indicator {
-  display: none
-}
-[data-v-84e785b8] .p-togglebutton .close-button {
-  visibility: hidden
-}
-
-.top-menubar[data-v-2ec1b620] .p-menubar-item-link svg {
-  display: none;
-}
-[data-v-2ec1b620] .p-menubar-submenu.dropdown-direction-up {
-  top: auto;
-  bottom: 100%;
-  flex-direction: column-reverse;
-}
-.keybinding-tag[data-v-2ec1b620] {
-  background: var(--p-content-hover-background);
-  border-color: var(--p-content-border-color);
-  border-style: solid;
-}
-
-[data-v-713442be] .p-inputtext {
-  border-top-left-radius: 0;
-  border-bottom-left-radius: 0;
-}
-
-.comfyui-queue-button[data-v-fcd3efcd] .p-splitbutton-dropdown {
-  border-top-right-radius: 0;
-  border-bottom-right-radius: 0;
-}
-
-.actionbar[data-v-bc6c78dd] {
-  pointer-events: all;
-  position: fixed;
-  z-index: 1000;
-}
-.actionbar.is-docked[data-v-bc6c78dd] {
-  position: static;
-  border-style: none;
-  background-color: transparent;
-  padding: 0px;
-}
-.actionbar.is-dragging[data-v-bc6c78dd] {
-  -webkit-user-select: none;
-     -moz-user-select: none;
-          user-select: none;
-}
-[data-v-bc6c78dd] .p-panel-content {
-  padding: 0.25rem;
-}
-[data-v-bc6c78dd] .p-panel-header {
-  display: none;
-}
-
-.comfyui-menu[data-v-b13fdc92] {
-  width: 100vw;
-  background: var(--comfy-menu-bg);
-  color: var(--fg-color);
-  font-family: Arial, Helvetica, sans-serif;
-  font-size: 0.8em;
-  box-sizing: border-box;
-  z-index: 1000;
-  order: 0;
-  grid-column: 1/-1;
-  max-height: 90vh;
-}
-.comfyui-menu.dropzone[data-v-b13fdc92] {
-  background: var(--p-highlight-background);
-}
-.comfyui-menu.dropzone-active[data-v-b13fdc92] {
-  background: var(--p-highlight-background-focus);
-}
-.comfyui-logo[data-v-b13fdc92] {
-  font-size: 1.2em;
-  -webkit-user-select: none;
-     -moz-user-select: none;
-          user-select: none;
-  cursor: default;
-}
--- a/web/assets/GraphView-CVV2XJjS.js
+++ b/web/assets/GraphView-CVV2XJjS.js
--- a/web/assets/GraphView-CVV2XJjS.js.map
+++ b/web/assets/GraphView-CVV2XJjS.js.map
--- a/web/assets/GraphView-CghYAxkP.css
+++ b/web/assets/GraphView-CghYAxkP.css
@@ -0,0 +1,291 @@
+
+.group-title-editor.node-title-editor[data-v-8a100d5a] {
+  z-index: 9999;
+  padding: 0.25rem;
+}
+[data-v-8a100d5a] .editable-text {
+  width: 100%;
+  height: 100%;
+}
+[data-v-8a100d5a] .editable-text input {
+  width: 100%;
+  height: 100%;
+  /* Override the default font size */
+  font-size: inherit;
+}
+
+.side-bar-button-icon {
+  font-size: var(--sidebar-icon-size) !important;
+}
+.side-bar-button-selected .side-bar-button-icon {
+  font-size: var(--sidebar-icon-size) !important;
+  font-weight: bold;
+}
+
+.side-bar-button[data-v-caa3ee9c] {
+  width: var(--sidebar-width);
+  height: var(--sidebar-width);
+  border-radius: 0;
+}
+.comfyui-body-left .side-bar-button.side-bar-button-selected[data-v-caa3ee9c],
+.comfyui-body-left .side-bar-button.side-bar-button-selected[data-v-caa3ee9c]:hover {
+  border-left: 4px solid var(--p-button-text-primary-color);
+}
+.comfyui-body-right .side-bar-button.side-bar-button-selected[data-v-caa3ee9c],
+.comfyui-body-right .side-bar-button.side-bar-button-selected[data-v-caa3ee9c]:hover {
+  border-right: 4px solid var(--p-button-text-primary-color);
+}
+
+:root {
+  --sidebar-width: 64px;
+  --sidebar-icon-size: 1.5rem;
+}
+:root .small-sidebar {
+  --sidebar-width: 40px;
+  --sidebar-icon-size: 1rem;
+}
+
+.side-tool-bar-container[data-v-e0812a25] {
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+
+  pointer-events: auto;
+
+  width: var(--sidebar-width);
+  height: 100%;
+
+  background-color: var(--comfy-menu-bg);
+  color: var(--fg-color);
+}
+.side-tool-bar-end[data-v-e0812a25] {
+  align-self: flex-end;
+  margin-top: auto;
+}
+
+[data-v-7c3279c1] .p-splitter-gutter {
+  pointer-events: auto;
+}
+[data-v-7c3279c1] .p-splitter-gutter:hover,[data-v-7c3279c1] .p-splitter-gutter[data-p-gutter-resizing='true'] {
+  transition: background-color 0.2s ease 300ms;
+  background-color: var(--p-primary-color);
+}
+.side-bar-panel[data-v-7c3279c1] {
+  background-color: var(--bg-color);
+  pointer-events: auto;
+}
+.bottom-panel[data-v-7c3279c1] {
+  background-color: var(--bg-color);
+  pointer-events: auto;
+}
+.splitter-overlay[data-v-7c3279c1] {
+  pointer-events: none;
+  border-style: none;
+  background-color: transparent;
+}
+.splitter-overlay-root[data-v-7c3279c1] {
+  position: absolute;
+  top: 0px;
+  left: 0px;
+  height: 100%;
+  width: 100%;
+
+  /* Set it the same as the ComfyUI menu */
+  /* Note: Lite-graph DOM widgets have the same z-index as the node id, so
+  999 should be sufficient to make sure splitter overlays on node's DOM
+  widgets */
+  z-index: 999;
+}
+
+[data-v-37f672ab] .highlight {
+  background-color: var(--p-primary-color);
+  color: var(--p-primary-contrast-color);
+  font-weight: bold;
+  border-radius: 0.25rem;
+  padding: 0rem 0.125rem;
+  margin: -0.125rem 0.125rem;
+}
+
+.comfy-vue-node-search-container[data-v-2d409367] {
+  display: flex;
+  width: 100%;
+  min-width: 26rem;
+  align-items: center;
+  justify-content: center;
+}
+.comfy-vue-node-search-container[data-v-2d409367] * {
+  pointer-events: auto;
+}
+.comfy-vue-node-preview-container[data-v-2d409367] {
+  position: absolute;
+  left: -350px;
+  top: 50px;
+}
+.comfy-vue-node-search-box[data-v-2d409367] {
+  z-index: 10;
+  flex-grow: 1;
+}
+._filter-button[data-v-2d409367] {
+  z-index: 10;
+}
+._dialog[data-v-2d409367] {
+  min-width: 26rem;
+}
+
+.invisible-dialog-root {
+  width: 60%;
+  min-width: 24rem;
+  max-width: 48rem;
+  border: 0 !important;
+  background-color: transparent !important;
+  margin-top: 25vh;
+  margin-left: 400px;
+}
+@media all and (max-width: 768px) {
+.invisible-dialog-root {
+    margin-left: 0px;
+}
+}
+.node-search-box-dialog-mask {
+  align-items: flex-start !important;
+}
+
+.node-tooltip[data-v-c2e0098f] {
+  background: var(--comfy-input-bg);
+  border-radius: 5px;
+  box-shadow: 0 0 5px rgba(0, 0, 0, 0.4);
+  color: var(--input-text);
+  font-family: sans-serif;
+  left: 0;
+  max-width: 30vw;
+  padding: 4px 8px;
+  position: absolute;
+  top: 0;
+  transform: translate(5px, calc(-100% - 5px));
+  white-space: pre-wrap;
+  z-index: 99999;
+}
+
+.p-buttongroup-vertical[data-v-94481f39] {
+  display: flex;
+  flex-direction: column;
+  border-radius: var(--p-button-border-radius);
+  overflow: hidden;
+  border: 1px solid var(--p-panel-border-color);
+}
+.p-buttongroup-vertical .p-button[data-v-94481f39] {
+  margin: 0;
+  border-radius: 0;
+}
+
+.comfy-menu-hamburger[data-v-2ddd26e8] {
+  pointer-events: auto;
+  position: fixed;
+  z-index: 9999;
+}
+
+[data-v-9eb975c3] .p-togglebutton::before {
+  display: none
+}
+[data-v-9eb975c3] .p-togglebutton {
+  position: relative;
+  flex-shrink: 0;
+  border-radius: 0px;
+  background-color: transparent;
+  padding-left: 0.5rem;
+  padding-right: 0.5rem
+}
+[data-v-9eb975c3] .p-togglebutton.p-togglebutton-checked {
+  border-bottom-width: 2px;
+  border-bottom-color: var(--p-button-text-primary-color)
+}
+[data-v-9eb975c3] .p-togglebutton-checked .close-button,[data-v-9eb975c3] .p-togglebutton:hover .close-button {
+  visibility: visible
+}
+.status-indicator[data-v-9eb975c3] {
+  position: absolute;
+  font-weight: 700;
+  font-size: 1.5rem;
+  top: 50%;
+  left: 50%;
+  transform: translate(-50%, -50%)
+}
+[data-v-9eb975c3] .p-togglebutton:hover .status-indicator {
+  display: none
+}
+[data-v-9eb975c3] .p-togglebutton .close-button {
+  visibility: hidden
+}
+
+.top-menubar[data-v-9646ca0a] .p-menubar-item-link svg {
+  display: none;
+}
+[data-v-9646ca0a] .p-menubar-submenu.dropdown-direction-up {
+  top: auto;
+  bottom: 100%;
+  flex-direction: column-reverse;
+}
+.keybinding-tag[data-v-9646ca0a] {
+  background: var(--p-content-hover-background);
+  border-color: var(--p-content-border-color);
+  border-style: solid;
+}
+
+[data-v-713442be] .p-inputtext {
+  border-top-left-radius: 0;
+  border-bottom-left-radius: 0;
+}
+
+.comfyui-queue-button[data-v-95bc9be0] .p-splitbutton-dropdown {
+  border-top-right-radius: 0;
+  border-bottom-right-radius: 0;
+}
+
+.actionbar[data-v-eb6e9acf] {
+  pointer-events: all;
+  position: fixed;
+  z-index: 1000;
+}
+.actionbar.is-docked[data-v-eb6e9acf] {
+  position: static;
+  border-style: none;
+  background-color: transparent;
+  padding: 0px;
+}
+.actionbar.is-dragging[data-v-eb6e9acf] {
+  -webkit-user-select: none;
+     -moz-user-select: none;
+          user-select: none;
+}
+[data-v-eb6e9acf] .p-panel-content {
+  padding: 0.25rem;
+}
+[data-v-eb6e9acf] .p-panel-header {
+  display: none;
+}
+
+.comfyui-menu[data-v-d84a704d] {
+  width: 100vw;
+  background: var(--comfy-menu-bg);
+  color: var(--fg-color);
+  font-family: Arial, Helvetica, sans-serif;
+  font-size: 0.8em;
+  box-sizing: border-box;
+  z-index: 1000;
+  order: 0;
+  grid-column: 1/-1;
+  max-height: 90vh;
+}
+.comfyui-menu.dropzone[data-v-d84a704d] {
+  background: var(--p-highlight-background);
+}
+.comfyui-menu.dropzone-active[data-v-d84a704d] {
+  background: var(--p-highlight-background-focus);
+}
+.comfyui-logo[data-v-d84a704d] {
+  font-size: 1.2em;
+  -webkit-user-select: none;
+     -moz-user-select: none;
+          user-select: none;
+  cursor: default;
+}
--- a/web/assets/InstallView-CN3CA9Fk.css
+++ b/web/assets/InstallView-CN3CA9Fk.css
@@ -0,0 +1,4 @@
+
+[data-v-53e62b05] .p-steppanel {
+    background-color: transparent
+}
--- a/web/assets/InstallView-D9ueAxrz.js
+++ b/web/assets/InstallView-D9ueAxrz.js
--- a/web/assets/InstallView-D9ueAxrz.js.map
+++ b/web/assets/InstallView-D9ueAxrz.js.map
--- a/web/assets/KeybindingPanel-CB_wEOHl.css
+++ b/web/assets/KeybindingPanel-CB_wEOHl.css
@@ -0,0 +1,8 @@
+
+[data-v-2d8b3a76] .p-datatable-tbody > tr > td {
+  padding: 0.25rem;
+  min-height: 2rem
+}
+[data-v-2d8b3a76] .p-datatable-row-selected .actions,[data-v-2d8b3a76] .p-datatable-selectable-row:hover .actions {
+  visibility: visible
+}
--- a/web/assets/KeybindingPanel-DcEfyPZZ.js
+++ b/web/assets/KeybindingPanel-DcEfyPZZ.js
@@ -0,0 +1,274 @@
+var __defProp = Object.defineProperty;
+var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
+import { d as defineComponent, q as computed, g as openBlock, h as createElementBlock, N as Fragment, O as renderList, i as createVNode, y as withCtx, aw as createTextVNode, a6 as toDisplayString, z as unref, aA as script, j as createCommentVNode, r as ref, c3 as FilterMatchMode, M as useKeybindingStore, F as useCommandStore, aJ as watchEffect, be as useToast, t as resolveDirective, c4 as SearchBox, A as createBaseVNode, D as script$2, x as createBlock, ao as script$4, bi as withModifiers, bR as script$5, aH as script$6, v as withDirectives, P as pushScopeId, Q as popScopeId, b$ as KeyComboImpl, c5 as KeybindingImpl, _ as _export_sfc } from "./index-B6dYHNhg.js";
+import { s as script$1, a as script$3 } from "./index-CjwCGacA.js";
+import "./index-MX9DEi8Q.js";
+const _hoisted_1$1 = {
+  key: 0,
+  class: "px-2"
+};
+const _sfc_main$1 = /* @__PURE__ */ defineComponent({
+  __name: "KeyComboDisplay",
+  props: {
+    keyCombo: {},
+    isModified: { type: Boolean, default: false }
+  },
+  setup(__props) {
+    const props = __props;
+    const keySequences = computed(() => props.keyCombo.getKeySequences());
+    return (_ctx, _cache) => {
+      return openBlock(), createElementBlock("span", null, [
+        (openBlock(true), createElementBlock(Fragment, null, renderList(keySequences.value, (sequence, index) => {
+          return openBlock(), createElementBlock(Fragment, { key: index }, [
+            createVNode(unref(script), {
+              severity: _ctx.isModified ? "info" : "secondary"
+            }, {
+              default: withCtx(() => [
+                createTextVNode(toDisplayString(sequence), 1)
+              ]),
+              _: 2
+            }, 1032, ["severity"]),
+            index < keySequences.value.length - 1 ? (openBlock(), createElementBlock("span", _hoisted_1$1, "+")) : createCommentVNode("", true)
+          ], 64);
+        }), 128))
+      ]);
+    };
+  }
+});
+const _withScopeId = /* @__PURE__ */ __name((n) => (pushScopeId("data-v-2d8b3a76"), n = n(), popScopeId(), n), "_withScopeId");
+const _hoisted_1 = { class: "keybinding-panel" };
+const _hoisted_2 = { class: "actions invisible flex flex-row" };
+const _hoisted_3 = ["title"];
+const _hoisted_4 = { key: 1 };
+const _sfc_main = /* @__PURE__ */ defineComponent({
+  __name: "KeybindingPanel",
+  setup(__props) {
+    const filters = ref({
+      global: { value: "", matchMode: FilterMatchMode.CONTAINS }
+    });
+    const keybindingStore = useKeybindingStore();
+    const commandStore = useCommandStore();
+    const commandsData = computed(() => {
+      return Object.values(commandStore.commands).map((command) => ({
+        id: command.id,
+        keybinding: keybindingStore.getKeybindingByCommandId(command.id)
+      }));
+    });
+    const selectedCommandData = ref(null);
+    const editDialogVisible = ref(false);
+    const newBindingKeyCombo = ref(null);
+    const currentEditingCommand = ref(null);
+    const keybindingInput = ref(null);
+    const existingKeybindingOnCombo = computed(() => {
+      if (!currentEditingCommand.value) {
+        return null;
+      }
+      if (currentEditingCommand.value.keybinding?.combo?.equals(
+        newBindingKeyCombo.value
+      )) {
+        return null;
+      }
+      if (!newBindingKeyCombo.value) {
+        return null;
+      }
+      return keybindingStore.getKeybinding(newBindingKeyCombo.value);
+    });
+    function editKeybinding(commandData) {
+      currentEditingCommand.value = commandData;
+      newBindingKeyCombo.value = commandData.keybinding ? commandData.keybinding.combo : null;
+      editDialogVisible.value = true;
+    }
+    __name(editKeybinding, "editKeybinding");
+    watchEffect(() => {
+      if (editDialogVisible.value) {
+        setTimeout(() => {
+          keybindingInput.value?.$el?.focus();
+        }, 300);
+      }
+    });
+    function removeKeybinding(commandData) {
+      if (commandData.keybinding) {
+        keybindingStore.unsetKeybinding(commandData.keybinding);
+        keybindingStore.persistUserKeybindings();
+      }
+    }
+    __name(removeKeybinding, "removeKeybinding");
+    function captureKeybinding(event) {
+      const keyCombo = KeyComboImpl.fromEvent(event);
+      newBindingKeyCombo.value = keyCombo;
+    }
+    __name(captureKeybinding, "captureKeybinding");
+    function cancelEdit() {
+      editDialogVisible.value = false;
+      currentEditingCommand.value = null;
+      newBindingKeyCombo.value = null;
+    }
+    __name(cancelEdit, "cancelEdit");
+    function saveKeybinding() {
+      if (currentEditingCommand.value && newBindingKeyCombo.value) {
+        const updated = keybindingStore.updateKeybindingOnCommand(
+          new KeybindingImpl({
+            commandId: currentEditingCommand.value.id,
+            combo: newBindingKeyCombo.value
+          })
+        );
+        if (updated) {
+          keybindingStore.persistUserKeybindings();
+        }
+      }
+      cancelEdit();
+    }
+    __name(saveKeybinding, "saveKeybinding");
+    const toast = useToast();
+    async function resetKeybindings() {
+      keybindingStore.resetKeybindings();
+      await keybindingStore.persistUserKeybindings();
+      toast.add({
+        severity: "info",
+        summary: "Info",
+        detail: "Keybindings reset",
+        life: 3e3
+      });
+    }
+    __name(resetKeybindings, "resetKeybindings");
+    return (_ctx, _cache) => {
+      const _directive_tooltip = resolveDirective("tooltip");
+      return openBlock(), createElementBlock("div", _hoisted_1, [
+        createVNode(unref(script$3), {
+          value: commandsData.value,
+          selection: selectedCommandData.value,
+          "onUpdate:selection": _cache[1] || (_cache[1] = ($event) => selectedCommandData.value = $event),
+          "global-filter-fields": ["id"],
+          filters: filters.value,
+          selectionMode: "single",
+          stripedRows: "",
+          pt: {
+            header: "px-0"
+          }
+        }, {
+          header: withCtx(() => [
+            createVNode(SearchBox, {
+              modelValue: filters.value["global"].value,
+              "onUpdate:modelValue": _cache[0] || (_cache[0] = ($event) => filters.value["global"].value = $event),
+              placeholder: _ctx.$t("searchKeybindings") + "..."
+            }, null, 8, ["modelValue", "placeholder"])
+          ]),
+          default: withCtx(() => [
+            createVNode(unref(script$1), {
+              field: "actions",
+              header: ""
+            }, {
+              body: withCtx((slotProps) => [
+                createBaseVNode("div", _hoisted_2, [
+                  createVNode(unref(script$2), {
+                    icon: "pi pi-pencil",
+                    class: "p-button-text",
+                    onClick: /* @__PURE__ */ __name(($event) => editKeybinding(slotProps.data), "onClick")
+                  }, null, 8, ["onClick"]),
+                  createVNode(unref(script$2), {
+                    icon: "pi pi-trash",
+                    class: "p-button-text p-button-danger",
+                    onClick: /* @__PURE__ */ __name(($event) => removeKeybinding(slotProps.data), "onClick"),
+                    disabled: !slotProps.data.keybinding
+                  }, null, 8, ["onClick", "disabled"])
+                ])
+              ]),
+              _: 1
+            }),
+            createVNode(unref(script$1), {
+              field: "id",
+              header: "Command ID",
+              sortable: "",
+              class: "max-w-64 2xl:max-w-full"
+            }, {
+              body: withCtx((slotProps) => [
+                createBaseVNode("div", {
+                  class: "overflow-hidden text-ellipsis whitespace-nowrap",
+                  title: slotProps.data.id
+                }, toDisplayString(slotProps.data.id), 9, _hoisted_3)
+              ]),
+              _: 1
+            }),
+            createVNode(unref(script$1), {
+              field: "keybinding",
+              header: "Keybinding"
+            }, {
+              body: withCtx((slotProps) => [
+                slotProps.data.keybinding ? (openBlock(), createBlock(_sfc_main$1, {
+                  key: 0,
+                  keyCombo: slotProps.data.keybinding.combo,
+                  isModified: unref(keybindingStore).isCommandKeybindingModified(slotProps.data.id)
+                }, null, 8, ["keyCombo", "isModified"])) : (openBlock(), createElementBlock("span", _hoisted_4, "-"))
+              ]),
+              _: 1
+            })
+          ]),
+          _: 1
+        }, 8, ["value", "selection", "filters"]),
+        createVNode(unref(script$6), {
+          class: "min-w-96",
+          visible: editDialogVisible.value,
+          "onUpdate:visible": _cache[2] || (_cache[2] = ($event) => editDialogVisible.value = $event),
+          modal: "",
+          header: currentEditingCommand.value?.id,
+          onHide: cancelEdit
+        }, {
+          footer: withCtx(() => [
+            createVNode(unref(script$2), {
+              label: "Save",
+              icon: "pi pi-check",
+              onClick: saveKeybinding,
+              disabled: !!existingKeybindingOnCombo.value,
+              autofocus: ""
+            }, null, 8, ["disabled"])
+          ]),
+          default: withCtx(() => [
+            createBaseVNode("div", null, [
+              createVNode(unref(script$4), {
+                class: "mb-2 text-center",
+                ref_key: "keybindingInput",
+                ref: keybindingInput,
+                modelValue: newBindingKeyCombo.value?.toString() ?? "",
+                placeholder: "Press keys for new binding",
+                onKeydown: withModifiers(captureKeybinding, ["stop", "prevent"]),
+                autocomplete: "off",
+                fluid: "",
+                invalid: !!existingKeybindingOnCombo.value
+              }, null, 8, ["modelValue", "invalid"]),
+              existingKeybindingOnCombo.value ? (openBlock(), createBlock(unref(script$5), {
+                key: 0,
+                severity: "error"
+              }, {
+                default: withCtx(() => [
+                  createTextVNode(" Keybinding already exists on "),
+                  createVNode(unref(script), {
+                    severity: "secondary",
+                    value: existingKeybindingOnCombo.value.commandId
+                  }, null, 8, ["value"])
+                ]),
+                _: 1
+              })) : createCommentVNode("", true)
+            ])
+          ]),
+          _: 1
+        }, 8, ["visible", "header"]),
+        withDirectives(createVNode(unref(script$2), {
+          class: "mt-4",
+          label: _ctx.$t("reset"),
+          icon: "pi pi-trash",
+          severity: "danger",
+          fluid: "",
+          text: "",
+          onClick: resetKeybindings
+        }, null, 8, ["label"]), [
+          [_directive_tooltip, _ctx.$t("resetKeybindingsTooltip")]
+        ])
+      ]);
+    };
+  }
+});
+const KeybindingPanel = /* @__PURE__ */ _export_sfc(_sfc_main, [["__scopeId", "data-v-2d8b3a76"]]);
+export {
+  KeybindingPanel as default
+};
+//# sourceMappingURL=KeybindingPanel-DcEfyPZZ.js.map
--- a/web/assets/KeybindingPanel-DcEfyPZZ.js.map
+++ b/web/assets/KeybindingPanel-DcEfyPZZ.js.map
--- a/web/assets/ServerStartView-e57oVZ6V.js
+++ b/web/assets/ServerStartView-e57oVZ6V.js
@@ -0,0 +1,102 @@
+var __defProp = Object.defineProperty;
+var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
+import { d as defineComponent, r as ref, o as onMounted, w as watch, I as onBeforeUnmount, g as openBlock, h as createElementBlock, i as createVNode, y as withCtx, A as createBaseVNode, a6 as toDisplayString, z as unref, bK as script, bL as electronAPI } from "./index-B6dYHNhg.js";
+import { t, s } from "./index-B4gmhi99.js";
+const _hoisted_1$1 = { class: "p-terminal rounded-none h-full w-full" };
+const _hoisted_2$1 = { class: "px-4 whitespace-pre-wrap" };
+const _sfc_main$1 = /* @__PURE__ */ defineComponent({
+  __name: "LogTerminal",
+  props: {
+    fetchLogs: { type: Function },
+    fetchInterval: {}
+  },
+  setup(__props) {
+    const props = __props;
+    const log = ref("");
+    const scrollPanelRef = ref(null);
+    const scrolledToBottom = ref(false);
+    let intervalId = 0;
+    onMounted(async () => {
+      const element = scrollPanelRef.value?.$el;
+      const scrollContainer = element?.querySelector(".p-scrollpanel-content");
+      if (scrollContainer) {
+        scrollContainer.addEventListener("scroll", () => {
+          scrolledToBottom.value = scrollContainer.scrollTop + scrollContainer.clientHeight === scrollContainer.scrollHeight;
+        });
+      }
+      const scrollToBottom = /* @__PURE__ */ __name(() => {
+        if (scrollContainer) {
+          scrollContainer.scrollTop = scrollContainer.scrollHeight;
+        }
+      }, "scrollToBottom");
+      watch(log, () => {
+        if (scrolledToBottom.value) {
+          scrollToBottom();
+        }
+      });
+      const fetchLogs = /* @__PURE__ */ __name(async () => {
+        log.value = await props.fetchLogs();
+      }, "fetchLogs");
+      await fetchLogs();
+      scrollToBottom();
+      intervalId = window.setInterval(fetchLogs, props.fetchInterval);
+    });
+    onBeforeUnmount(() => {
+      window.clearInterval(intervalId);
+    });
+    return (_ctx, _cache) => {
+      return openBlock(), createElementBlock("div", _hoisted_1$1, [
+        createVNode(unref(script), {
+          class: "h-full w-full",
+          ref_key: "scrollPanelRef",
+          ref: scrollPanelRef
+        }, {
+          default: withCtx(() => [
+            createBaseVNode("pre", _hoisted_2$1, toDisplayString(log.value), 1)
+          ]),
+          _: 1
+        }, 512)
+      ]);
+    };
+  }
+});
+const _hoisted_1 = { class: "font-sans flex flex-col justify-center items-center h-screen m-0 text-neutral-300 bg-neutral-900 dark-theme pointer-events-auto" };
+const _hoisted_2 = { class: "text-2xl font-bold" };
+const _sfc_main = /* @__PURE__ */ defineComponent({
+  __name: "ServerStartView",
+  setup(__props) {
+    const electron = electronAPI();
+    const status = ref(t.INITIAL_STATE);
+    const logs = ref([]);
+    const updateProgress = /* @__PURE__ */ __name(({ status: newStatus }) => {
+      status.value = newStatus;
+      logs.value = [];
+    }, "updateProgress");
+    const addLogMessage = /* @__PURE__ */ __name((message) => {
+      logs.value = [...logs.value, message];
+    }, "addLogMessage");
+    const fetchLogs = /* @__PURE__ */ __name(async () => {
+      return logs.value.join("\n");
+    }, "fetchLogs");
+    onMounted(() => {
+      electron.sendReady();
+      electron.onProgressUpdate(updateProgress);
+      electron.onLogMessage((message) => {
+        addLogMessage(message);
+      });
+    });
+    return (_ctx, _cache) => {
+      return openBlock(), createElementBlock("div", _hoisted_1, [
+        createBaseVNode("h2", _hoisted_2, toDisplayString(unref(s)[status.value]), 1),
+        createVNode(_sfc_main$1, {
+          "fetch-logs": fetchLogs,
+          "fetch-interval": 500
+        })
+      ]);
+    };
+  }
+});
+export {
+  _sfc_main as default
+};
+//# sourceMappingURL=ServerStartView-e57oVZ6V.js.map
--- a/web/assets/ServerStartView-e57oVZ6V.js.map
+++ b/web/assets/ServerStartView-e57oVZ6V.js.map
@@ -0,0 +1 @@
+{"version":3,"file":"ServerStartView-e57oVZ6V.js","sources":["../../src/components/common/LogTerminal.vue","../../src/views/ServerStartView.vue"],"sourcesContent":["<!-- A simple read-only terminal component that displays logs. -->\n<template>\n  <div class=\"p-terminal rounded-none h-full w-full\">\n    <ScrollPanel class=\"h-full w-full\" ref=\"scrollPanelRef\">\n      <pre class=\"px-4 whitespace-pre-wrap\">{{ log }}</pre>\n    </ScrollPanel>\n  </div>\n</template>\n\n<script setup lang=\"ts\">\nimport ScrollPanel from 'primevue/scrollpanel'\nimport { onBeforeUnmount, onMounted, ref, watch } from 'vue'\n\nconst props = defineProps<{\n  fetchLogs: () => Promise<string>\n  fetchInterval: number\n}>()\n\nconst log = ref<string>('')\nconst scrollPanelRef = ref<InstanceType<typeof ScrollPanel> | null>(null)\n/**\n * Whether the user has scrolled to the bottom of the terminal.\n * This is used to prevent the terminal from scrolling to the bottom\n * when new logs are fetched.\n */\nconst scrolledToBottom = ref(false)\n\nlet intervalId: number = 0\n\nonMounted(async () => {\n  const element = scrollPanelRef.value?.$el\n  const scrollContainer = element?.querySelector('.p-scrollpanel-content')\n\n  if (scrollContainer) {\n    scrollContainer.addEventListener('scroll', () => {\n      scrolledToBottom.value =\n        scrollContainer.scrollTop + scrollContainer.clientHeight ===\n        scrollContainer.scrollHeight\n    })\n  }\n\n  const scrollToBottom = () => {\n    if (scrollContainer) {\n      scrollContainer.scrollTop = scrollContainer.scrollHeight\n    }\n  }\n\n  watch(log, () => {\n    if (scrolledToBottom.value) {\n      scrollToBottom()\n    }\n  })\n\n  const fetchLogs = async () => {\n    log.value = await props.fetchLogs()\n  }\n\n  await fetchLogs()\n  scrollToBottom()\n  intervalId = window.setInterval(fetchLogs, props.fetchInterval)\n})\n\nonBeforeUnmount(() => {\n  window.clearInterval(intervalId)\n})\n</script>\n","<template>\n  <div\n    class=\"font-sans flex flex-col justify-center items-center h-screen m-0 text-neutral-300 bg-neutral-900 dark-theme pointer-events-auto\"\n  >\n    <h2 class=\"text-2xl font-bold\">{{ ProgressMessages[status] }}</h2>\n    <LogTerminal :fetch-logs=\"fetchLogs\" :fetch-interval=\"500\" />\n  </div>\n</template>\n\n<script setup lang=\"ts\">\nimport { ref, onMounted } from 'vue'\nimport LogTerminal from '@/components/common/LogTerminal.vue'\nimport {\n  ProgressStatus,\n  ProgressMessages\n} from '@comfyorg/comfyui-electron-types'\nimport { electronAPI } from '@/utils/envUtil'\n\nconst electron = electronAPI()\n\nconst status = ref<ProgressStatus>(ProgressStatus.INITIAL_STATE)\nconst logs = ref<string[]>([])\n\nconst updateProgress = ({ status: newStatus }: { status: ProgressStatus }) => {\n  status.value = newStatus\n  logs.value = [] // Clear logs when status changes\n}\n\nconst addLogMessage = (message: string) => {\n  logs.value = [...logs.value, message]\n}\n\nconst fetchLogs = async () => {\n  return logs.value.join('\\n')\n}\n\nonMounted(() => {\n  electron.sendReady()\n  electron.onProgressUpdate(updateProgress)\n  electron.onLogMessage((message: string) => {\n    addLogMessage(message)\n  })\n})\n</script>\n"],"names":["ProgressStatus"],"mappings":";;;;;;;;;;;;;AAaA,UAAM,QAAQ;AAKR,UAAA,MAAM,IAAY,EAAE;AACpB,UAAA,iBAAiB,IAA6C,IAAI;AAMlE,UAAA,mBAAmB,IAAI,KAAK;AAElC,QAAI,aAAqB;AAEzB,cAAU,YAAY;AACd,YAAA,UAAU,eAAe,OAAO;AAChC,YAAA,kBAAkB,SAAS,cAAc,wBAAwB;AAEvE,UAAI,iBAAiB;AACH,wBAAA,iBAAiB,UAAU,MAAM;AAC/C,2BAAiB,QACf,gBAAgB,YAAY,gBAAgB,iBAC5C,gBAAgB;AAAA,QAAA,CACnB;AAAA,MACH;AAEA,YAAM,iBAAiB,6BAAM;AAC3B,YAAI,iBAAiB;AACnB,0BAAgB,YAAY,gBAAgB;AAAA,QAC9C;AAAA,MAAA,GAHqB;AAMvB,YAAM,KAAK,MAAM;AACf,YAAI,iBAAiB,OAAO;AACX;QACjB;AAAA,MAAA,CACD;AAED,YAAM,YAAY,mCAAY;AACxB,YAAA,QAAQ,MAAM,MAAM,UAAU;AAAA,MAAA,GADlB;AAIlB,YAAM,UAAU;AACD;AACf,mBAAa,OAAO,YAAY,WAAW,MAAM,aAAa;AAAA,IAAA,CAC/D;AAED,oBAAgB,MAAM;AACpB,aAAO,cAAc,UAAU;AAAA,IAAA,CAChC;;;;;;;;;;;;;;;;;;;;;;AC9CD,UAAM,WAAW;AAEX,UAAA,SAAS,IAAoBA,EAAe,aAAa;AACzD,UAAA,OAAO,IAAc,CAAA,CAAE;AAE7B,UAAM,iBAAiB,wBAAC,EAAE,QAAQ,gBAA4C;AAC5E,aAAO,QAAQ;AACf,WAAK,QAAQ;IAAC,GAFO;AAKjB,UAAA,gBAAgB,wBAAC,YAAoB;AACzC,WAAK,QAAQ,CAAC,GAAG,KAAK,OAAO,OAAO;AAAA,IAAA,GADhB;AAItB,UAAM,YAAY,mCAAY;AACrB,aAAA,KAAK,MAAM,KAAK,IAAI;AAAA,IAAA,GADX;AAIlB,cAAU,MAAM;AACd,eAAS,UAAU;AACnB,eAAS,iBAAiB,cAAc;AAC/B,eAAA,aAAa,CAAC,YAAoB;AACzC,sBAAc,OAAO;AAAA,MAAA,CACtB;AAAA,IAAA,CACF;;;;;;;;;;;;"}
--- a/web/assets/WelcomeView-DQQgHnsr.css
+++ b/web/assets/WelcomeView-DQQgHnsr.css
@@ -0,0 +1,36 @@
+
+.animated-gradient-text[data-v-12b8b11b] {
+  font-weight: 700;
+  font-size: clamp(2rem, 8vw, 4rem);
+  background: linear-gradient(to right, #12c2e9, #c471ed, #f64f59, #12c2e9);
+  background-size: 300% auto;
+  background-clip: text;
+  -webkit-background-clip: text;
+  -webkit-text-fill-color: transparent;
+  animation: gradient-12b8b11b 8s linear infinite;
+}
+.text-glow[data-v-12b8b11b] {
+  filter: drop-shadow(0 0 8px rgba(255, 255, 255, 0.3));
+}
+@keyframes gradient-12b8b11b {
+0% {
+    background-position: 0% center;
+}
+100% {
+    background-position: 300% center;
+}
+}
+.fade-in-up[data-v-12b8b11b] {
+  animation: fadeInUp-12b8b11b 1.5s ease-out;
+  animation-fill-mode: both;
+}
+@keyframes fadeInUp-12b8b11b {
+0% {
+    opacity: 0;
+    transform: translateY(20px);
+}
+100% {
+    opacity: 1;
+    transform: translateY(0);
+}
+}
--- a/web/assets/WelcomeView-DT4bj-QV.js
+++ b/web/assets/WelcomeView-DT4bj-QV.js
@@ -0,0 +1,33 @@
+var __defProp = Object.defineProperty;
+var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
+import { d as defineComponent, g as openBlock, h as createElementBlock, A as createBaseVNode, a6 as toDisplayString, i as createVNode, z as unref, D as script, P as pushScopeId, Q as popScopeId, _ as _export_sfc } from "./index-B6dYHNhg.js";
+const _withScopeId = /* @__PURE__ */ __name((n) => (pushScopeId("data-v-12b8b11b"), n = n(), popScopeId(), n), "_withScopeId");
+const _hoisted_1 = { class: "font-sans flex flex-col justify-center items-center h-screen m-0 text-neutral-300 bg-neutral-900 dark-theme pointer-events-auto" };
+const _hoisted_2 = { class: "flex flex-col items-center justify-center gap-8 p-8" };
+const _hoisted_3 = { class: "animated-gradient-text text-glow select-none" };
+const _sfc_main = /* @__PURE__ */ defineComponent({
+  __name: "WelcomeView",
+  setup(__props) {
+    return (_ctx, _cache) => {
+      return openBlock(), createElementBlock("div", _hoisted_1, [
+        createBaseVNode("div", _hoisted_2, [
+          createBaseVNode("h1", _hoisted_3, toDisplayString(_ctx.$t("welcome.title")), 1),
+          createVNode(unref(script), {
+            label: _ctx.$t("welcome.getStarted"),
+            icon: "pi pi-arrow-right",
+            iconPos: "right",
+            size: "large",
+            rounded: "",
+            onClick: _cache[0] || (_cache[0] = ($event) => _ctx.$router.push("/install")),
+            class: "p-4 text-lg fade-in-up"
+          }, null, 8, ["label"])
+        ])
+      ]);
+    };
+  }
+});
+const WelcomeView = /* @__PURE__ */ _export_sfc(_sfc_main, [["__scopeId", "data-v-12b8b11b"]]);
+export {
+  WelcomeView as default
+};
+//# sourceMappingURL=WelcomeView-DT4bj-QV.js.map
--- a/web/assets/WelcomeView-DT4bj-QV.js.map
+++ b/web/assets/WelcomeView-DT4bj-QV.js.map
@@ -0,0 +1 @@
+{"version":3,"file":"WelcomeView-DT4bj-QV.js","sources":[],"sourcesContent":[],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;"}
--- a/web/assets/colorPalette-D5oi2-2V.js
+++ b/web/assets/colorPalette-D5oi2-2V.js
@@ -1,865 +0,0 @@
-var __defProp = Object.defineProperty;
-var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
-import { k as app, aP as LGraphCanvas, bO as useToastStore, ca as $el, z as LiteGraph } from "./index-DGAbdBYF.js";
-const colorPalettes = {
-  dark: {
-    id: "dark",
-    name: "Dark (Default)",
-    colors: {
-      node_slot: {
-        CLIP: "#FFD500",
-        // bright yellow
-        CLIP_VISION: "#A8DADC",
-        // light blue-gray
-        CLIP_VISION_OUTPUT: "#ad7452",
-        // rusty brown-orange
-        CONDITIONING: "#FFA931",
-        // vibrant orange-yellow
-        CONTROL_NET: "#6EE7B7",
-        // soft mint green
-        IMAGE: "#64B5F6",
-        // bright sky blue
-        LATENT: "#FF9CF9",
-        // light pink-purple
-        MASK: "#81C784",
-        // muted green
-        MODEL: "#B39DDB",
-        // light lavender-purple
-        STYLE_MODEL: "#C2FFAE",
-        // light green-yellow
-        VAE: "#FF6E6E",
-        // bright red
-        NOISE: "#B0B0B0",
-        // gray
-        GUIDER: "#66FFFF",
-        // cyan
-        SAMPLER: "#ECB4B4",
-        // very soft red
-        SIGMAS: "#CDFFCD",
-        // soft lime green
-        TAESD: "#DCC274"
-        // cheesecake
-      },
-      litegraph_base: {
-        BACKGROUND_IMAGE: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAGQAAABkCAIAAAD/gAIDAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAQBJREFUeNrs1rEKwjAUhlETUkj3vP9rdmr1Ysammk2w5wdxuLgcMHyptfawuZX4pJSWZTnfnu/lnIe/jNNxHHGNn//HNbbv+4dr6V+11uF527arU7+u63qfa/bnmh8sWLBgwYJlqRf8MEptXPBXJXa37BSl3ixYsGDBMliwFLyCV/DeLIMFCxYsWLBMwSt4Be/NggXLYMGCBUvBK3iNruC9WbBgwYJlsGApeAWv4L1ZBgsWLFiwYJmCV/AK3psFC5bBggULloJX8BpdwXuzYMGCBctgwVLwCl7Be7MMFixYsGDBsu8FH1FaSmExVfAxBa/gvVmwYMGCZbBg/W4vAQYA5tRF9QYlv/QAAAAASUVORK5CYII=",
-        CLEAR_BACKGROUND_COLOR: "#222",
-        NODE_TITLE_COLOR: "#999",
-        NODE_SELECTED_TITLE_COLOR: "#FFF",
-        NODE_TEXT_SIZE: 14,
-        NODE_TEXT_COLOR: "#AAA",
-        NODE_SUBTEXT_SIZE: 12,
-        NODE_DEFAULT_COLOR: "#333",
-        NODE_DEFAULT_BGCOLOR: "#353535",
-        NODE_DEFAULT_BOXCOLOR: "#666",
-        NODE_DEFAULT_SHAPE: "box",
-        NODE_BOX_OUTLINE_COLOR: "#FFF",
-        NODE_BYPASS_BGCOLOR: "#FF00FF",
-        DEFAULT_SHADOW_COLOR: "rgba(0,0,0,0.5)",
-        DEFAULT_GROUP_FONT: 24,
-        WIDGET_BGCOLOR: "#222",
-        WIDGET_OUTLINE_COLOR: "#666",
-        WIDGET_TEXT_COLOR: "#DDD",
-        WIDGET_SECONDARY_TEXT_COLOR: "#999",
-        LINK_COLOR: "#9A9",
-        EVENT_LINK_COLOR: "#A86",
-        CONNECTING_LINK_COLOR: "#AFA",
-        BADGE_FG_COLOR: "#FFF",
-        BADGE_BG_COLOR: "#0F1F0F"
-      },
-      comfy_base: {
-        "fg-color": "#fff",
-        "bg-color": "#202020",
-        "comfy-menu-bg": "#353535",
-        "comfy-input-bg": "#222",
-        "input-text": "#ddd",
-        "descrip-text": "#999",
-        "drag-text": "#ccc",
-        "error-text": "#ff4444",
-        "border-color": "#4e4e4e",
-        "tr-even-bg-color": "#222",
-        "tr-odd-bg-color": "#353535",
-        "content-bg": "#4e4e4e",
-        "content-fg": "#fff",
-        "content-hover-bg": "#222",
-        "content-hover-fg": "#fff"
-      }
-    }
-  },
-  light: {
-    id: "light",
-    name: "Light",
-    colors: {
-      node_slot: {
-        CLIP: "#FFA726",
-        // orange
-        CLIP_VISION: "#5C6BC0",
-        // indigo
-        CLIP_VISION_OUTPUT: "#8D6E63",
-        // brown
-        CONDITIONING: "#EF5350",
-        // red
-        CONTROL_NET: "#66BB6A",
-        // green
-        IMAGE: "#42A5F5",
-        // blue
-        LATENT: "#AB47BC",
-        // purple
-        MASK: "#9CCC65",
-        // light green
-        MODEL: "#7E57C2",
-        // deep purple
-        STYLE_MODEL: "#D4E157",
-        // lime
-        VAE: "#FF7043"
-        // deep orange
-      },
-      litegraph_base: {
-        BACKGROUND_IMAGE: "data:image/gif;base64,R0lGODlhZABkALMAAAAAAP///+vr6+rq6ujo6Ofn5+bm5uXl5d3d3f///wAAAAAAAAAAAAAAAAAAAAAAACH5BAEAAAkALAAAAABkAGQAAAT/UMhJq7046827HkcoHkYxjgZhnGG6si5LqnIM0/fL4qwwIMAg0CAsEovBIxKhRDaNy2GUOX0KfVFrssrNdpdaqTeKBX+dZ+jYvEaTf+y4W66mC8PUdrE879f9d2mBeoNLfH+IhYBbhIx2jkiHiomQlGKPl4uZe3CaeZifnnijgkESBqipqqusra6vsLGys62SlZO4t7qbuby7CLa+wqGWxL3Gv3jByMOkjc2lw8vOoNSi0czAncXW3Njdx9Pf48/Z4Kbbx+fQ5evZ4u3k1fKR6cn03vHlp7T9/v8A/8Gbp4+gwXoFryXMB2qgwoMMHyKEqA5fxX322FG8tzBcRnMW/zlulPbRncmQGidKjMjyYsOSKEF2FBlJQMCbOHP6c9iSZs+UnGYCdbnSo1CZI5F64kn0p1KnTH02nSoV3dGTV7FFHVqVq1dtWcMmVQZTbNGu72zqXMuW7danVL+6e4t1bEy6MeueBYLXrNO5Ze36jQtWsOG97wIj1vt3St/DjTEORss4nNq2mDP3e7w4r1bFkSET5hy6s2TRlD2/mSxXtSHQhCunXo26NevCpmvD/UU6tuullzULH76q92zdZG/Ltv1a+W+osI/nRmyc+fRi1Xdbh+68+0vv10dH3+77KD/i6IdnX669/frn5Zsjh4/2PXju8+8bzc9/6fj27LFnX11/+IUnXWl7BJfegm79FyB9JOl3oHgSklefgxAC+FmFGpqHIYcCfkhgfCohSKKJVo044YUMttggiBkmp6KFXw1oII24oYhjiDByaKOOHcp3Y5BD/njikSkO+eBREQAAOw==",
-        CLEAR_BACKGROUND_COLOR: "lightgray",
-        NODE_TITLE_COLOR: "#222",
-        NODE_SELECTED_TITLE_COLOR: "#000",
-        NODE_TEXT_SIZE: 14,
-        NODE_TEXT_COLOR: "#444",
-        NODE_SUBTEXT_SIZE: 12,
-        NODE_DEFAULT_COLOR: "#F7F7F7",
-        NODE_DEFAULT_BGCOLOR: "#F5F5F5",
-        NODE_DEFAULT_BOXCOLOR: "#CCC",
-        NODE_DEFAULT_SHAPE: "box",
-        NODE_BOX_OUTLINE_COLOR: "#000",
-        NODE_BYPASS_BGCOLOR: "#FF00FF",
-        DEFAULT_SHADOW_COLOR: "rgba(0,0,0,0.1)",
-        DEFAULT_GROUP_FONT: 24,
-        WIDGET_BGCOLOR: "#D4D4D4",
-        WIDGET_OUTLINE_COLOR: "#999",
-        WIDGET_TEXT_COLOR: "#222",
-        WIDGET_SECONDARY_TEXT_COLOR: "#555",
-        LINK_COLOR: "#4CAF50",
-        EVENT_LINK_COLOR: "#FF9800",
-        CONNECTING_LINK_COLOR: "#2196F3",
-        BADGE_FG_COLOR: "#000",
-        BADGE_BG_COLOR: "#FFF"
-      },
-      comfy_base: {
-        "fg-color": "#222",
-        "bg-color": "#DDD",
-        "comfy-menu-bg": "#F5F5F5",
-        "comfy-input-bg": "#C9C9C9",
-        "input-text": "#222",
-        "descrip-text": "#444",
-        "drag-text": "#555",
-        "error-text": "#F44336",
-        "border-color": "#888",
-        "tr-even-bg-color": "#f9f9f9",
-        "tr-odd-bg-color": "#fff",
-        "content-bg": "#e0e0e0",
-        "content-fg": "#222",
-        "content-hover-bg": "#adadad",
-        "content-hover-fg": "#222"
-      }
-    }
-  },
-  solarized: {
-    id: "solarized",
-    name: "Solarized",
-    colors: {
-      node_slot: {
-        CLIP: "#2AB7CA",
-        // light blue
-        CLIP_VISION: "#6c71c4",
-        // blue violet
-        CLIP_VISION_OUTPUT: "#859900",
-        // olive green
-        CONDITIONING: "#d33682",
-        // magenta
-        CONTROL_NET: "#d1ffd7",
-        // light mint green
-        IMAGE: "#5940bb",
-        // deep blue violet
-        LATENT: "#268bd2",
-        // blue
-        MASK: "#CCC9E7",
-        // light purple-gray
-        MODEL: "#dc322f",
-        // red
-        STYLE_MODEL: "#1a998a",
-        // teal
-        UPSCALE_MODEL: "#054A29",
-        // dark green
-        VAE: "#facfad"
-        // light pink-orange
-      },
-      litegraph_base: {
-        NODE_TITLE_COLOR: "#fdf6e3",
-        // Base3
-        NODE_SELECTED_TITLE_COLOR: "#A9D400",
-        NODE_TEXT_SIZE: 14,
-        NODE_TEXT_COLOR: "#657b83",
-        // Base00
-        NODE_SUBTEXT_SIZE: 12,
-        NODE_DEFAULT_COLOR: "#094656",
-        NODE_DEFAULT_BGCOLOR: "#073642",
-        // Base02
-        NODE_DEFAULT_BOXCOLOR: "#839496",
-        // Base0
-        NODE_DEFAULT_SHAPE: "box",
-        NODE_BOX_OUTLINE_COLOR: "#fdf6e3",
-        // Base3
-        NODE_BYPASS_BGCOLOR: "#FF00FF",
-        DEFAULT_SHADOW_COLOR: "rgba(0,0,0,0.5)",
-        DEFAULT_GROUP_FONT: 24,
-        WIDGET_BGCOLOR: "#002b36",
-        // Base03
-        WIDGET_OUTLINE_COLOR: "#839496",
-        // Base0
-        WIDGET_TEXT_COLOR: "#fdf6e3",
-        // Base3
-        WIDGET_SECONDARY_TEXT_COLOR: "#93a1a1",
-        // Base1
-        LINK_COLOR: "#2aa198",
-        // Solarized Cyan
-        EVENT_LINK_COLOR: "#268bd2",
-        // Solarized Blue
-        CONNECTING_LINK_COLOR: "#859900"
-        // Solarized Green
-      },
-      comfy_base: {
-        "fg-color": "#fdf6e3",
-        // Base3
-        "bg-color": "#002b36",
-        // Base03
-        "comfy-menu-bg": "#073642",
-        // Base02
-        "comfy-input-bg": "#002b36",
-        // Base03
-        "input-text": "#93a1a1",
-        // Base1
-        "descrip-text": "#586e75",
-        // Base01
-        "drag-text": "#839496",
-        // Base0
-        "error-text": "#dc322f",
-        // Solarized Red
-        "border-color": "#657b83",
-        // Base00
-        "tr-even-bg-color": "#002b36",
-        "tr-odd-bg-color": "#073642",
-        "content-bg": "#657b83",
-        "content-fg": "#fdf6e3",
-        "content-hover-bg": "#002b36",
-        "content-hover-fg": "#fdf6e3"
-      }
-    }
-  },
-  arc: {
-    id: "arc",
-    name: "Arc",
-    colors: {
-      node_slot: {
-        BOOLEAN: "",
-        CLIP: "#eacb8b",
-        CLIP_VISION: "#A8DADC",
-        CLIP_VISION_OUTPUT: "#ad7452",
-        CONDITIONING: "#cf876f",
-        CONTROL_NET: "#00d78d",
-        CONTROL_NET_WEIGHTS: "",
-        FLOAT: "",
-        GLIGEN: "",
-        IMAGE: "#80a1c0",
-        IMAGEUPLOAD: "",
-        INT: "",
-        LATENT: "#b38ead",
-        LATENT_KEYFRAME: "",
-        MASK: "#a3bd8d",
-        MODEL: "#8978a7",
-        SAMPLER: "",
-        SIGMAS: "",
-        STRING: "",
-        STYLE_MODEL: "#C2FFAE",
-        T2I_ADAPTER_WEIGHTS: "",
-        TAESD: "#DCC274",
-        TIMESTEP_KEYFRAME: "",
-        UPSCALE_MODEL: "",
-        VAE: "#be616b"
-      },
-      litegraph_base: {
-        BACKGROUND_IMAGE: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAGQAAABkCAYAAABw4pVUAAAACXBIWXMAAAsTAAALEwEAmpwYAAABcklEQVR4nO3YMUoDARgF4RfxBqZI6/0vZqFn0MYtrLIQMFN8U6V4LAtD+Jm9XG/v30OGl2e/AP7yevz4+vx45nvgF/+QGITEICQGITEIiUFIjNNC3q43u3/YnRJyPOzeQ+0e220nhRzReC8e7R7bbdvl+Jal1Bs46jEIiUFIDEJiEBKDkBhKPbZT6qHdptRTu02p53DUYxASg5AYhMQgJAYhMZR6bKfUQ7tNqad2m1LP4ajHICQGITEIiUFIDEJiKPXYTqmHdptST+02pZ7DUY9BSAxCYhASg5AYhMRQ6rGdUg/tNqWe2m1KPYejHoOQGITEICQGITEIiaHUYzulHtptSj2125R6Dkc9BiExCIlBSAxCYhASQ6nHdko9tNuUemq3KfUcjnoMQmIQEoOQGITEICSGUo/tlHpotyn11G5T6jkc9RiExCAkBiExCIlBSAylHtsp9dBuU+qp3abUczjqMQiJQUgMQmIQEoOQGITE+AHFISNQrFTGuwAAAABJRU5ErkJggg==",
-        CLEAR_BACKGROUND_COLOR: "#2b2f38",
-        NODE_TITLE_COLOR: "#b2b7bd",
-        NODE_SELECTED_TITLE_COLOR: "#FFF",
-        NODE_TEXT_SIZE: 14,
-        NODE_TEXT_COLOR: "#AAA",
-        NODE_SUBTEXT_SIZE: 12,
-        NODE_DEFAULT_COLOR: "#2b2f38",
-        NODE_DEFAULT_BGCOLOR: "#242730",
-        NODE_DEFAULT_BOXCOLOR: "#6e7581",
-        NODE_DEFAULT_SHAPE: "box",
-        NODE_BOX_OUTLINE_COLOR: "#FFF",
-        NODE_BYPASS_BGCOLOR: "#FF00FF",
-        DEFAULT_SHADOW_COLOR: "rgba(0,0,0,0.5)",
-        DEFAULT_GROUP_FONT: 22,
-        WIDGET_BGCOLOR: "#2b2f38",
-        WIDGET_OUTLINE_COLOR: "#6e7581",
-        WIDGET_TEXT_COLOR: "#DDD",
-        WIDGET_SECONDARY_TEXT_COLOR: "#b2b7bd",
-        LINK_COLOR: "#9A9",
-        EVENT_LINK_COLOR: "#A86",
-        CONNECTING_LINK_COLOR: "#AFA"
-      },
-      comfy_base: {
-        "fg-color": "#fff",
-        "bg-color": "#2b2f38",
-        "comfy-menu-bg": "#242730",
-        "comfy-input-bg": "#2b2f38",
-        "input-text": "#ddd",
-        "descrip-text": "#b2b7bd",
-        "drag-text": "#ccc",
-        "error-text": "#ff4444",
-        "border-color": "#6e7581",
-        "tr-even-bg-color": "#2b2f38",
-        "tr-odd-bg-color": "#242730",
-        "content-bg": "#6e7581",
-        "content-fg": "#fff",
-        "content-hover-bg": "#2b2f38",
-        "content-hover-fg": "#fff"
-      }
-    }
-  },
-  nord: {
-    id: "nord",
-    name: "Nord",
-    colors: {
-      node_slot: {
-        BOOLEAN: "",
-        CLIP: "#eacb8b",
-        CLIP_VISION: "#A8DADC",
-        CLIP_VISION_OUTPUT: "#ad7452",
-        CONDITIONING: "#cf876f",
-        CONTROL_NET: "#00d78d",
-        CONTROL_NET_WEIGHTS: "",
-        FLOAT: "",
-        GLIGEN: "",
-        IMAGE: "#80a1c0",
-        IMAGEUPLOAD: "",
-        INT: "",
-        LATENT: "#b38ead",
-        LATENT_KEYFRAME: "",
-        MASK: "#a3bd8d",
-        MODEL: "#8978a7",
-        SAMPLER: "",
-        SIGMAS: "",
-        STRING: "",
-        STYLE_MODEL: "#C2FFAE",
-        T2I_ADAPTER_WEIGHTS: "",
-        TAESD: "#DCC274",
-        TIMESTEP_KEYFRAME: "",
-        UPSCALE_MODEL: "",
-        VAE: "#be616b"
-      },
-      litegraph_base: {
-        BACKGROUND_IMAGE: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAGQAAABkCAIAAAD/gAIDAAAACXBIWXMAAAsTAAALEwEAmpwYAAAFu2lUWHRYTUw6Y29tLmFkb2JlLnhtcAAAAAAAPD94cGFja2V0IGJlZ2luPSLvu78iIGlkPSJXNU0wTXBDZWhpSHpyZVN6TlRjemtjOWQiPz4gPHg6eG1wbWV0YSB4bWxuczp4PSJhZG9iZTpuczptZXRhLyIgeDp4bXB0az0iQWRvYmUgWE1QIENvcmUgOS4xLWMwMDEgNzkuMTQ2Mjg5OSwgMjAyMy8wNi8yNS0yMDowMTo1NSAgICAgICAgIj4gPHJkZjpSREYgeG1sbnM6cmRmPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5LzAyLzIyLXJkZi1zeW50YXgtbnMjIj4gPHJkZjpEZXNjcmlwdGlvbiByZGY6YWJvdXQ9IiIgeG1sbnM6eG1wPSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvIiB4bWxuczpkYz0iaHR0cDovL3B1cmwub3JnL2RjL2VsZW1lbnRzLzEuMS8iIHhtbG5zOnBob3Rvc2hvcD0iaHR0cDovL25zLmFkb2JlLmNvbS9waG90b3Nob3AvMS4wLyIgeG1sbnM6eG1wTU09Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC9tbS8iIHhtbG5zOnN0RXZ0PSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvc1R5cGUvUmVzb3VyY2VFdmVudCMiIHhtcDpDcmVhdG9yVG9vbD0iQWRvYmUgUGhvdG9zaG9wIDI1LjEgKFdpbmRvd3MpIiB4bXA6Q3JlYXRlRGF0ZT0iMjAyMy0xMS0xM1QwMDoxODowMiswMTowMCIgeG1wOk1vZGlmeURhdGU9IjIwMjMtMTEtMTVUMDE6MjA6NDUrMDE6MDAiIHhtcDpNZXRhZGF0YURhdGU9IjIwMjMtMTEtMTVUMDE6MjA6NDUrMDE6MDAiIGRjOmZvcm1hdD0iaW1hZ2UvcG5nIiBwaG90b3Nob3A6Q29sb3JNb2RlPSIzIiB4bXBNTTpJbnN0YW5jZUlEPSJ4bXAuaWlkOjUwNDFhMmZjLTEzNzQtMTk0ZC1hZWY4LTYxMzM1MTVmNjUwMCIgeG1wTU06RG9jdW1lbnRJRD0ieG1wLmRpZDoyMzFiMTBiMC1iNGZiLTAyNGUtYjEyZS0zMDUzMDNjZDA3YzgiIHhtcE1NOk9yaWdpbmFsRG9jdW1lbnRJRD0ieG1wLmRpZDoyMzFiMTBiMC1iNGZiLTAyNGUtYjEyZS0zMDUzMDNjZDA3YzgiPiA8eG1wTU06SGlzdG9yeT4gPHJkZjpTZXE+IDxyZGY6bGkgc3RFdnQ6YWN0aW9uPSJjcmVhdGVkIiBzdEV2dDppbnN0YW5jZUlEPSJ4bXAuaWlkOjIzMWIxMGIwLWI0ZmItMDI0ZS1iMTJlLTMwNTMwM2NkMDdjOCIgc3RFdnQ6d2hlbj0iMjAyMy0xMS0xM1QwMDoxODowMiswMTowMCIgc3RFdnQ6c29mdHdhcmVBZ2VudD0iQWRvYmUgUGhvdG9zaG9wIDI1LjEgKFdpbmRvd3MpIi8+IDxyZGY6bGkgc3RFdnQ6YWN0aW9uPSJzYXZlZCIgc3RFdnQ6aW5zdGFuY2VJRD0ieG1wLmlpZDo1MDQxYTJmYy0xMzc0LTE5NGQtYWVmOC02MTMzNTE1ZjY1MDAiIHN0RXZ0OndoZW49IjIwMjMtMTEtMTVUMDE6MjA6NDUrMDE6MDAiIHN0RXZ0OnNvZnR3YXJlQWdlbnQ9IkFkb2JlIFBob3Rvc2hvcCAyNS4xIChXaW5kb3dzKSIgc3RFdnQ6Y2hhbmdlZD0iLyIvPiA8L3JkZjpTZXE+IDwveG1wTU06SGlzdG9yeT4gPC9yZGY6RGVzY3JpcHRpb24+IDwvcmRmOlJERj4gPC94OnhtcG1ldGE+IDw/eHBhY2tldCBlbmQ9InIiPz73jWg/AAAAyUlEQVR42u3WKwoAIBRFQRdiMb1idv9Lsxn9gEFw4Dbb8JCTojbbXEJwjJVL2HKwYMGCBQuWLbDmjr+9zrBGjHl1WVcvy2DBggULFizTWQpewSt4HzwsgwULFiwFr7MUvMtS8D54WLBgGSxYCl7BK3iXZbBgwYIFC5bpLAWv4BW8Dx6WwYIFC5aC11kK3mUpeB88LFiwDBYsBa/gFbzLMliwYMGCBct0loJX8AreBw/LYMGCBUvB6ywF77IUvA8eFixYBgsWrNfWAZPltufdad+1AAAAAElFTkSuQmCC",
-        CLEAR_BACKGROUND_COLOR: "#212732",
-        NODE_TITLE_COLOR: "#999",
-        NODE_SELECTED_TITLE_COLOR: "#e5eaf0",
-        NODE_TEXT_SIZE: 14,
-        NODE_TEXT_COLOR: "#bcc2c8",
-        NODE_SUBTEXT_SIZE: 12,
-        NODE_DEFAULT_COLOR: "#2e3440",
-        NODE_DEFAULT_BGCOLOR: "#161b22",
-        NODE_DEFAULT_BOXCOLOR: "#545d70",
-        NODE_DEFAULT_SHAPE: "box",
-        NODE_BOX_OUTLINE_COLOR: "#e5eaf0",
-        NODE_BYPASS_BGCOLOR: "#FF00FF",
-        DEFAULT_SHADOW_COLOR: "rgba(0,0,0,0.5)",
-        DEFAULT_GROUP_FONT: 24,
-        WIDGET_BGCOLOR: "#2e3440",
-        WIDGET_OUTLINE_COLOR: "#545d70",
-        WIDGET_TEXT_COLOR: "#bcc2c8",
-        WIDGET_SECONDARY_TEXT_COLOR: "#999",
-        LINK_COLOR: "#9A9",
-        EVENT_LINK_COLOR: "#A86",
-        CONNECTING_LINK_COLOR: "#AFA"
-      },
-      comfy_base: {
-        "fg-color": "#e5eaf0",
-        "bg-color": "#2e3440",
-        "comfy-menu-bg": "#161b22",
-        "comfy-input-bg": "#2e3440",
-        "input-text": "#bcc2c8",
-        "descrip-text": "#999",
-        "drag-text": "#ccc",
-        "error-text": "#ff4444",
-        "border-color": "#545d70",
-        "tr-even-bg-color": "#2e3440",
-        "tr-odd-bg-color": "#161b22",
-        "content-bg": "#545d70",
-        "content-fg": "#e5eaf0",
-        "content-hover-bg": "#2e3440",
-        "content-hover-fg": "#e5eaf0"
-      }
-    }
-  },
-  github: {
-    id: "github",
-    name: "Github",
-    colors: {
-      node_slot: {
-        BOOLEAN: "",
-        CLIP: "#eacb8b",
-        CLIP_VISION: "#A8DADC",
-        CLIP_VISION_OUTPUT: "#ad7452",
-        CONDITIONING: "#cf876f",
-        CONTROL_NET: "#00d78d",
-        CONTROL_NET_WEIGHTS: "",
-        FLOAT: "",
-        GLIGEN: "",
-        IMAGE: "#80a1c0",
-        IMAGEUPLOAD: "",
-        INT: "",
-        LATENT: "#b38ead",
-        LATENT_KEYFRAME: "",
-        MASK: "#a3bd8d",
-        MODEL: "#8978a7",
-        SAMPLER: "",
-        SIGMAS: "",
-        STRING: "",
-        STYLE_MODEL: "#C2FFAE",
-        T2I_ADAPTER_WEIGHTS: "",
-        TAESD: "#DCC274",
-        TIMESTEP_KEYFRAME: "",
-        UPSCALE_MODEL: "",
-        VAE: "#be616b"
-      },
-      litegraph_base: {
-        BACKGROUND_IMAGE: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAGQAAABkCAIAAAD/gAIDAAAACXBIWXMAAAsTAAALEwEAmpwYAAAGlmlUWHRYTUw6Y29tLmFkb2JlLnhtcAAAAAAAPD94cGFja2V0IGJlZ2luPSLvu78iIGlkPSJXNU0wTXBDZWhpSHpyZVN6TlRjemtjOWQiPz4gPHg6eG1wbWV0YSB4bWxuczp4PSJhZG9iZTpuczptZXRhLyIgeDp4bXB0az0iQWRvYmUgWE1QIENvcmUgOS4xLWMwMDEgNzkuMTQ2Mjg5OSwgMjAyMy8wNi8yNS0yMDowMTo1NSAgICAgICAgIj4gPHJkZjpSREYgeG1sbnM6cmRmPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5LzAyLzIyLXJkZi1zeW50YXgtbnMjIj4gPHJkZjpEZXNjcmlwdGlvbiByZGY6YWJvdXQ9IiIgeG1sbnM6eG1wPSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvIiB4bWxuczpkYz0iaHR0cDovL3B1cmwub3JnL2RjL2VsZW1lbnRzLzEuMS8iIHhtbG5zOnBob3Rvc2hvcD0iaHR0cDovL25zLmFkb2JlLmNvbS9waG90b3Nob3AvMS4wLyIgeG1sbnM6eG1wTU09Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC9tbS8iIHhtbG5zOnN0RXZ0PSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvc1R5cGUvUmVzb3VyY2VFdmVudCMiIHhtcDpDcmVhdG9yVG9vbD0iQWRvYmUgUGhvdG9zaG9wIDI1LjEgKFdpbmRvd3MpIiB4bXA6Q3JlYXRlRGF0ZT0iMjAyMy0xMS0xM1QwMDoxODowMiswMTowMCIgeG1wOk1vZGlmeURhdGU9IjIwMjMtMTEtMTVUMDI6MDQ6NTkrMDE6MDAiIHhtcDpNZXRhZGF0YURhdGU9IjIwMjMtMTEtMTVUMDI6MDQ6NTkrMDE6MDAiIGRjOmZvcm1hdD0iaW1hZ2UvcG5nIiBwaG90b3Nob3A6Q29sb3JNb2RlPSIzIiB4bXBNTTpJbnN0YW5jZUlEPSJ4bXAuaWlkOmIyYzRhNjA5LWJmYTctYTg0MC1iOGFlLTk3MzE2ZjM1ZGIyNyIgeG1wTU06RG9jdW1lbnRJRD0iYWRvYmU6ZG9jaWQ6cGhvdG9zaG9wOjk0ZmNlZGU4LTE1MTctZmQ0MC04ZGU3LWYzOTgxM2E3ODk5ZiIgeG1wTU06T3JpZ2luYWxEb2N1bWVudElEPSJ4bXAuZGlkOjIzMWIxMGIwLWI0ZmItMDI0ZS1iMTJlLTMwNTMwM2NkMDdjOCI+IDx4bXBNTTpIaXN0b3J5PiA8cmRmOlNlcT4gPHJkZjpsaSBzdEV2dDphY3Rpb249ImNyZWF0ZWQiIHN0RXZ0Omluc3RhbmNlSUQ9InhtcC5paWQ6MjMxYjEwYjAtYjRmYi0wMjRlLWIxMmUtMzA1MzAzY2QwN2M4IiBzdEV2dDp3aGVuPSIyMDIzLTExLTEzVDAwOjE4OjAyKzAxOjAwIiBzdEV2dDpzb2Z0d2FyZUFnZW50PSJBZG9iZSBQaG90b3Nob3AgMjUuMSAoV2luZG93cykiLz4gPHJkZjpsaSBzdEV2dDphY3Rpb249InNhdmVkIiBzdEV2dDppbnN0YW5jZUlEPSJ4bXAuaWlkOjQ4OWY1NzlmLTJkNjUtZWQ0Zi04OTg0LTA4NGE2MGE1ZTMzNSIgc3RFdnQ6d2hlbj0iMjAyMy0xMS0xNVQwMjowNDo1OSswMTowMCIgc3RFdnQ6c29mdHdhcmVBZ2VudD0iQWRvYmUgUGhvdG9zaG9wIDI1LjEgKFdpbmRvd3MpIiBzdEV2dDpjaGFuZ2VkPSIvIi8+IDxyZGY6bGkgc3RFdnQ6YWN0aW9uPSJzYXZlZCIgc3RFdnQ6aW5zdGFuY2VJRD0ieG1wLmlpZDpiMmM0YTYwOS1iZmE3LWE4NDAtYjhhZS05NzMxNmYzNWRiMjciIHN0RXZ0OndoZW49IjIwMjMtMTEtMTVUMDI6MDQ6NTkrMDE6MDAiIHN0RXZ0OnNvZnR3YXJlQWdlbnQ9IkFkb2JlIFBob3Rvc2hvcCAyNS4xIChXaW5kb3dzKSIgc3RFdnQ6Y2hhbmdlZD0iLyIvPiA8L3JkZjpTZXE+IDwveG1wTU06SGlzdG9yeT4gPC9yZGY6RGVzY3JpcHRpb24+IDwvcmRmOlJERj4gPC94OnhtcG1ldGE+IDw/eHBhY2tldCBlbmQ9InIiPz4OTe6GAAAAx0lEQVR42u3WMQoAIQxFwRzJys77X8vSLiRgITif7bYbgrwYc/mKXyBoY4VVBgsWLFiwYFmOlTv+9jfDOjHmr8u6eVkGCxYsWLBgmc5S8ApewXvgYRksWLBgKXidpeBdloL3wMOCBctgwVLwCl7BuyyDBQsWLFiwTGcpeAWv4D3wsAwWLFiwFLzOUvAuS8F74GHBgmWwYCl4Ba/gXZbBggULFixYprMUvIJX8B54WAYLFixYCl5nKXiXpeA98LBgwTJYsGC9tg1o8f4TTtqzNQAAAABJRU5ErkJggg==",
-        CLEAR_BACKGROUND_COLOR: "#040506",
-        NODE_TITLE_COLOR: "#999",
-        NODE_SELECTED_TITLE_COLOR: "#e5eaf0",
-        NODE_TEXT_SIZE: 14,
-        NODE_TEXT_COLOR: "#bcc2c8",
-        NODE_SUBTEXT_SIZE: 12,
-        NODE_DEFAULT_COLOR: "#161b22",
-        NODE_DEFAULT_BGCOLOR: "#13171d",
-        NODE_DEFAULT_BOXCOLOR: "#30363d",
-        NODE_DEFAULT_SHAPE: "box",
-        NODE_BOX_OUTLINE_COLOR: "#e5eaf0",
-        NODE_BYPASS_BGCOLOR: "#FF00FF",
-        DEFAULT_SHADOW_COLOR: "rgba(0,0,0,0.5)",
-        DEFAULT_GROUP_FONT: 24,
-        WIDGET_BGCOLOR: "#161b22",
-        WIDGET_OUTLINE_COLOR: "#30363d",
-        WIDGET_TEXT_COLOR: "#bcc2c8",
-        WIDGET_SECONDARY_TEXT_COLOR: "#999",
-        LINK_COLOR: "#9A9",
-        EVENT_LINK_COLOR: "#A86",
-        CONNECTING_LINK_COLOR: "#AFA"
-      },
-      comfy_base: {
-        "fg-color": "#e5eaf0",
-        "bg-color": "#161b22",
-        "comfy-menu-bg": "#13171d",
-        "comfy-input-bg": "#161b22",
-        "input-text": "#bcc2c8",
-        "descrip-text": "#999",
-        "drag-text": "#ccc",
-        "error-text": "#ff4444",
-        "border-color": "#30363d",
-        "tr-even-bg-color": "#161b22",
-        "tr-odd-bg-color": "#13171d",
-        "content-bg": "#30363d",
-        "content-fg": "#e5eaf0",
-        "content-hover-bg": "#161b22",
-        "content-hover-fg": "#e5eaf0"
-      }
-    }
-  }
-};
-const id = "Comfy.ColorPalette";
-const idCustomColorPalettes = "Comfy.CustomColorPalettes";
-const defaultColorPaletteId = "dark";
-const els = {
-  select: null
-};
-const getCustomColorPalettes = /* @__PURE__ */ __name(() => {
-  return app.ui.settings.getSettingValue(idCustomColorPalettes, {});
-}, "getCustomColorPalettes");
-const setCustomColorPalettes = /* @__PURE__ */ __name((customColorPalettes) => {
-  return app.ui.settings.setSettingValue(
-    idCustomColorPalettes,
-    customColorPalettes
-  );
-}, "setCustomColorPalettes");
-const defaultColorPalette = colorPalettes[defaultColorPaletteId];
-const getColorPalette = /* @__PURE__ */ __name((colorPaletteId) => {
-  if (!colorPaletteId) {
-    colorPaletteId = app.ui.settings.getSettingValue(id, defaultColorPaletteId);
-  }
-  if (colorPaletteId.startsWith("custom_")) {
-    colorPaletteId = colorPaletteId.substr(7);
-    let customColorPalettes = getCustomColorPalettes();
-    if (customColorPalettes[colorPaletteId]) {
-      return customColorPalettes[colorPaletteId];
-    }
-  }
-  return colorPalettes[colorPaletteId];
-}, "getColorPalette");
-const setColorPalette = /* @__PURE__ */ __name((colorPaletteId) => {
-  app.ui.settings.setSettingValue(id, colorPaletteId);
-}, "setColorPalette");
-app.registerExtension({
-  name: id,
-  init() {
-    LGraphCanvas.prototype.updateBackground = function(image, clearBackgroundColor) {
-      this._bg_img = new Image();
-      this._bg_img.name = image;
-      this._bg_img.src = image;
-      this._bg_img.onload = () => {
-        this.draw(true, true);
-      };
-      this.background_image = image;
-      this.clear_background = true;
-      this.clear_background_color = clearBackgroundColor;
-      this._pattern = null;
-    };
-  },
-  addCustomNodeDefs(node_defs) {
-    const sortObjectKeys = /* @__PURE__ */ __name((unordered) => {
-      return Object.keys(unordered).sort().reduce((obj, key) => {
-        obj[key] = unordered[key];
-        return obj;
-      }, {});
-    }, "sortObjectKeys");
-    function getSlotTypes() {
-      var types = [];
-      const defs = node_defs;
-      for (const nodeId in defs) {
-        const nodeData = defs[nodeId];
-        var inputs = nodeData["input"]["required"];
-        if (nodeData["input"]["optional"] !== void 0) {
-          inputs = Object.assign(
-            {},
-            nodeData["input"]["required"],
-            nodeData["input"]["optional"]
-          );
-        }
-        for (const inputName in inputs) {
-          const inputData = inputs[inputName];
-          const type = inputData[0];
-          if (!Array.isArray(type)) {
-            types.push(type);
-          }
-        }
-        for (const o in nodeData["output"]) {
-          const output = nodeData["output"][o];
-          types.push(output);
-        }
-      }
-      return types;
-    }
-    __name(getSlotTypes, "getSlotTypes");
-    function completeColorPalette(colorPalette) {
-      var types = getSlotTypes();
-      for (const type of types) {
-        if (!colorPalette.colors.node_slot[type]) {
-          colorPalette.colors.node_slot[type] = "";
-        }
-      }
-      colorPalette.colors.node_slot = sortObjectKeys(
-        colorPalette.colors.node_slot
-      );
-      return colorPalette;
-    }
-    __name(completeColorPalette, "completeColorPalette");
-    const getColorPaletteTemplate = /* @__PURE__ */ __name(async () => {
-      let colorPalette = {
-        id: "my_color_palette_unique_id",
-        name: "My Color Palette",
-        colors: {
-          node_slot: {},
-          litegraph_base: {},
-          comfy_base: {}
-        }
-      };
-      const defaultColorPalette2 = colorPalettes[defaultColorPaletteId];
-      for (const key in defaultColorPalette2.colors.litegraph_base) {
-        if (!colorPalette.colors.litegraph_base[key]) {
-          colorPalette.colors.litegraph_base[key] = "";
-        }
-      }
-      for (const key in defaultColorPalette2.colors.comfy_base) {
-        if (!colorPalette.colors.comfy_base[key]) {
-          colorPalette.colors.comfy_base[key] = "";
-        }
-      }
-      return completeColorPalette(colorPalette);
-    }, "getColorPaletteTemplate");
-    const addCustomColorPalette = /* @__PURE__ */ __name(async (colorPalette) => {
-      if (typeof colorPalette !== "object") {
-        useToastStore().addAlert("Invalid color palette.");
-        return;
-      }
-      if (!colorPalette.id) {
-        useToastStore().addAlert("Color palette missing id.");
-        return;
-      }
-      if (!colorPalette.name) {
-        useToastStore().addAlert("Color palette missing name.");
-        return;
-      }
-      if (!colorPalette.colors) {
-        useToastStore().addAlert("Color palette missing colors.");
-        return;
-      }
-      if (colorPalette.colors.node_slot && typeof colorPalette.colors.node_slot !== "object") {
-        useToastStore().addAlert("Invalid color palette colors.node_slot.");
-        return;
-      }
-      const customColorPalettes = getCustomColorPalettes();
-      customColorPalettes[colorPalette.id] = colorPalette;
-      setCustomColorPalettes(customColorPalettes);
-      for (const option of els.select.childNodes) {
-        if (option.value === "custom_" + colorPalette.id) {
-          els.select.removeChild(option);
-        }
-      }
-      els.select.append(
-        $el("option", {
-          textContent: colorPalette.name + " (custom)",
-          value: "custom_" + colorPalette.id,
-          selected: true
-        })
-      );
-      setColorPalette("custom_" + colorPalette.id);
-      await loadColorPalette(colorPalette);
-    }, "addCustomColorPalette");
-    const deleteCustomColorPalette = /* @__PURE__ */ __name(async (colorPaletteId) => {
-      const customColorPalettes = getCustomColorPalettes();
-      delete customColorPalettes[colorPaletteId];
-      setCustomColorPalettes(customColorPalettes);
-      for (const opt of els.select.childNodes) {
-        const option = opt;
-        if (option.value === defaultColorPaletteId) {
-          option.selected = true;
-        }
-        if (option.value === "custom_" + colorPaletteId) {
-          els.select.removeChild(option);
-        }
-      }
-      setColorPalette(defaultColorPaletteId);
-      await loadColorPalette(getColorPalette());
-    }, "deleteCustomColorPalette");
-    const loadColorPalette = /* @__PURE__ */ __name(async (colorPalette) => {
-      colorPalette = await completeColorPalette(colorPalette);
-      if (colorPalette.colors) {
-        if (colorPalette.colors.node_slot) {
-          Object.assign(
-            app.canvas.default_connection_color_byType,
-            colorPalette.colors.node_slot
-          );
-          Object.assign(
-            LGraphCanvas.link_type_colors,
-            colorPalette.colors.node_slot
-          );
-        }
-        if (colorPalette.colors.litegraph_base) {
-          app.canvas.node_title_color = colorPalette.colors.litegraph_base.NODE_TITLE_COLOR;
-          app.canvas.default_link_color = colorPalette.colors.litegraph_base.LINK_COLOR;
-          for (const key in colorPalette.colors.litegraph_base) {
-            if (colorPalette.colors.litegraph_base.hasOwnProperty(key) && LiteGraph.hasOwnProperty(key)) {
-              LiteGraph[key] = colorPalette.colors.litegraph_base[key];
-            }
-          }
-        }
-        if (colorPalette.colors.comfy_base) {
-          const rootStyle = document.documentElement.style;
-          for (const key in colorPalette.colors.comfy_base) {
-            rootStyle.setProperty(
-              "--" + key,
-              colorPalette.colors.comfy_base[key]
-            );
-          }
-        }
-        if (colorPalette.colors.litegraph_base.NODE_BYPASS_BGCOLOR) {
-          app.bypassBgColor = colorPalette.colors.litegraph_base.NODE_BYPASS_BGCOLOR;
-        }
-        app.canvas.draw(true, true);
-      }
-    }, "loadColorPalette");
-    const fileInput = $el("input", {
-      type: "file",
-      accept: ".json",
-      style: { display: "none" },
-      parent: document.body,
-      onchange: /* @__PURE__ */ __name(() => {
-        const file = fileInput.files[0];
-        if (file.type === "application/json" || file.name.endsWith(".json")) {
-          const reader = new FileReader();
-          reader.onload = async () => {
-            await addCustomColorPalette(JSON.parse(reader.result));
-          };
-          reader.readAsText(file);
-        }
-      }, "onchange")
-    });
-    app.ui.settings.addSetting({
-      id,
-      category: ["Comfy", "ColorPalette"],
-      name: "Color Palette",
-      type: /* @__PURE__ */ __name((name, setter, value) => {
-        const options = [
-          ...Object.values(colorPalettes).map(
-            (c) => $el("option", {
-              textContent: c.name,
-              value: c.id,
-              selected: c.id === value
-            })
-          ),
-          ...Object.values(getCustomColorPalettes()).map(
-            (c) => $el("option", {
-              textContent: `${c.name} (custom)`,
-              value: `custom_${c.id}`,
-              selected: `custom_${c.id}` === value
-            })
-          )
-        ];
-        els.select = $el(
-          "select",
-          {
-            style: {
-              marginBottom: "0.15rem",
-              width: "100%"
-            },
-            onchange: /* @__PURE__ */ __name((e) => {
-              setter(e.target.value);
-            }, "onchange")
-          },
-          options
-        );
-        return $el("tr", [
-          $el("td", [
-            els.select,
-            $el(
-              "div",
-              {
-                style: {
-                  display: "grid",
-                  gap: "4px",
-                  gridAutoFlow: "column"
-                }
-              },
-              [
-                $el("input", {
-                  type: "button",
-                  value: "Export",
-                  onclick: /* @__PURE__ */ __name(async () => {
-                    const colorPaletteId = app.ui.settings.getSettingValue(
-                      id,
-                      defaultColorPaletteId
-                    );
-                    const colorPalette = await completeColorPalette(
-                      getColorPalette(colorPaletteId)
-                    );
-                    const json = JSON.stringify(colorPalette, null, 2);
-                    const blob = new Blob([json], { type: "application/json" });
-                    const url = URL.createObjectURL(blob);
-                    const a = $el("a", {
-                      href: url,
-                      download: colorPaletteId + ".json",
-                      style: { display: "none" },
-                      parent: document.body
-                    });
-                    a.click();
-                    setTimeout(function() {
-                      a.remove();
-                      window.URL.revokeObjectURL(url);
-                    }, 0);
-                  }, "onclick")
-                }),
-                $el("input", {
-                  type: "button",
-                  value: "Import",
-                  onclick: /* @__PURE__ */ __name(() => {
-                    fileInput.click();
-                  }, "onclick")
-                }),
-                $el("input", {
-                  type: "button",
-                  value: "Template",
-                  onclick: /* @__PURE__ */ __name(async () => {
-                    const colorPalette = await getColorPaletteTemplate();
-                    const json = JSON.stringify(colorPalette, null, 2);
-                    const blob = new Blob([json], { type: "application/json" });
-                    const url = URL.createObjectURL(blob);
-                    const a = $el("a", {
-                      href: url,
-                      download: "color_palette.json",
-                      style: { display: "none" },
-                      parent: document.body
-                    });
-                    a.click();
-                    setTimeout(function() {
-                      a.remove();
-                      window.URL.revokeObjectURL(url);
-                    }, 0);
-                  }, "onclick")
-                }),
-                $el("input", {
-                  type: "button",
-                  value: "Delete",
-                  onclick: /* @__PURE__ */ __name(async () => {
-                    let colorPaletteId = app.ui.settings.getSettingValue(
-                      id,
-                      defaultColorPaletteId
-                    );
-                    if (colorPalettes[colorPaletteId]) {
-                      useToastStore().addAlert(
-                        "You cannot delete a built-in color palette."
-                      );
-                      return;
-                    }
-                    if (colorPaletteId.startsWith("custom_")) {
-                      colorPaletteId = colorPaletteId.substr(7);
-                    }
-                    await deleteCustomColorPalette(colorPaletteId);
-                  }, "onclick")
-                })
-              ]
-            )
-          ])
-        ]);
-      }, "type"),
-      defaultValue: defaultColorPaletteId,
-      async onChange(value) {
-        if (!value) {
-          return;
-        }
-        let palette = colorPalettes[value];
-        if (palette) {
-          await loadColorPalette(palette);
-        } else if (value.startsWith("custom_")) {
-          value = value.substr(7);
-          let customColorPalettes = getCustomColorPalettes();
-          if (customColorPalettes[value]) {
-            palette = customColorPalettes[value];
-            await loadColorPalette(customColorPalettes[value]);
-          }
-        }
-        let { BACKGROUND_IMAGE, CLEAR_BACKGROUND_COLOR } = palette.colors.litegraph_base;
-        if (BACKGROUND_IMAGE === void 0 || CLEAR_BACKGROUND_COLOR === void 0) {
-          const base = colorPalettes["dark"].colors.litegraph_base;
-          BACKGROUND_IMAGE = base.BACKGROUND_IMAGE;
-          CLEAR_BACKGROUND_COLOR = base.CLEAR_BACKGROUND_COLOR;
-        }
-        app.canvas.updateBackground(BACKGROUND_IMAGE, CLEAR_BACKGROUND_COLOR);
-      }
-    });
-  }
-});
-window.comfyAPI = window.comfyAPI || {};
-window.comfyAPI.colorPalette = window.comfyAPI.colorPalette || {};
-window.comfyAPI.colorPalette.defaultColorPalette = defaultColorPalette;
-window.comfyAPI.colorPalette.getColorPalette = getColorPalette;
-export {
-  defaultColorPalette as d,
-  getColorPalette as g
-};
-//# sourceMappingURL=colorPalette-D5oi2-2V.js.map
--- a/web/assets/colorPalette-D5oi2-2V.js.map
+++ b/web/assets/colorPalette-D5oi2-2V.js.map
--- a/web/assets/index-B1vRdV2i.js
+++ b/web/assets/index-B1vRdV2i.js
@@ -1,8 +1,7 @@
 var __defProp = Object.defineProperty;
 var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
-import { c9 as ComfyDialog, ca as $el, cb as ComfyApp, k as app, z as LiteGraph, aP as LGraphCanvas, cc as DraggableList, bO as useToastStore, aq as useNodeDefStore, b4 as api, L as LGraphGroup, cd as KeyComboImpl, aT as useKeybindingStore, aL as useCommandStore, l as LGraphNode, ce as ComfyWidgets, cf as applyTextReplacements, aA as NodeSourceType, cg as NodeBadgeMode, h as useSettingStore, F as computed, w as watch, ch as BadgePosition, aR as LGraphBadge, au as _ } from "./index-DGAbdBYF.js";
-import { g as getColorPalette, d as defaultColorPalette } from "./colorPalette-D5oi2-2V.js";
-import { mergeIfValid, getWidgetConfig, setWidgetConfig } from "./widgetInputs-DdoWwzg5.js";
+import { bV as ComfyDialog, bW as $el, bX as ComfyApp, c as app, k as LiteGraph, b2 as LGraphCanvas, bY as DraggableList, bf as useToastStore, bZ as serialise, aE as useNodeDefStore, b_ as deserialiseAndCreate, bH as api, L as LGraphGroup, b$ as KeyComboImpl, M as useKeybindingStore, F as useCommandStore, e as LGraphNode, c0 as ComfyWidgets, c1 as applyTextReplacements } from "./index-B6dYHNhg.js";
+import { mergeIfValid, getWidgetConfig, setWidgetConfig } from "./widgetInputs-BJ21PG7d.js";
 class ClipspaceDialog extends ComfyDialog {
  static {
    __name(this, "ClipspaceDialog");
@@ -38,7 +37,9 @@ class ClipspaceDialog extends ComfyDialog {
        ...self.createButtons()
      ]);
      if (self.element) {
-        self.element.removeChild(self.element.firstChild);
+        if (self.element.firstChild) {
+          self.element.removeChild(self.element.firstChild);
+        }
        self.element.appendChild(children);
      } else {
        self.element = $el("div.comfy-modal", { parent: document.body }, [
@@ -77,7 +78,7 @@ class ClipspaceDialog extends ComfyDialog {
    return buttons;
  }
  createImgSettings() {
-    if (ComfyApp.clipspace.imgs) {
+    if (ComfyApp.clipspace?.imgs) {
      const combo_items = [];
      const imgs = ComfyApp.clipspace.imgs;
      for (let i = 0; i < imgs.length; i++) {
@@ -88,8 +89,10 @@ class ClipspaceDialog extends ComfyDialog {
        {
          id: "clipspace_img_selector",
          onchange: /* @__PURE__ */ __name((event) => {
-            ComfyApp.clipspace["selectedIndex"] = event.target.selectedIndex;
-            ClipspaceDialog.invalidatePreview();
+            if (event.target && ComfyApp.clipspace) {
+              ComfyApp.clipspace["selectedIndex"] = event.target.selectedIndex;
+              ClipspaceDialog.invalidatePreview();
+            }
          }, "onchange")
        },
        combo_items
@@ -103,7 +106,9 @@ class ClipspaceDialog extends ComfyDialog {
        {
          id: "clipspace_img_paste_mode",
          onchange: /* @__PURE__ */ __name((event) => {
-            ComfyApp.clipspace["img_paste_mode"] = event.target.value;
+            if (event.target && ComfyApp.clipspace) {
+              ComfyApp.clipspace["img_paste_mode"] = event.target.value;
+            }
          }, "onchange")
        },
        [
@@ -128,7 +133,7 @@ class ClipspaceDialog extends ComfyDialog {
    }
  }
  createImgPreview() {
-    if (ComfyApp.clipspace.imgs) {
+    if (ComfyApp.clipspace?.imgs) {
      return $el("img", { id: "clipspace_preview", ondragstart: /* @__PURE__ */ __name(() => false, "ondragstart") });
    } else return [];
  }
@@ -155,7 +160,7 @@ app.registerExtension({
 window.comfyAPI = window.comfyAPI || {};
 window.comfyAPI.clipspace = window.comfyAPI.clipspace || {};
 window.comfyAPI.clipspace.ClipspaceDialog = ClipspaceDialog;
-const ext$2 = {
+const ext$1 = {
  name: "Comfy.ContextMenuFilter",
  init() {
    const ctxMenu = LiteGraph.ContextMenu;
@@ -173,10 +178,10 @@ const ext$2 = {
        let itemCount = displayedItems.length;
        requestAnimationFrame(() => {
          const currentNode = LGraphCanvas.active_canvas.current_node;
-          const clickedComboValue = currentNode.widgets?.filter(
-            (w) => w.type === "combo" && w.options.values.length === values.length
+          const clickedComboValue = currentNode?.widgets?.filter(
+            (w) => w.type === "combo" && w.options.values?.length === values.length
          ).find(
-            (w) => w.options.values.every((v, i) => v === values[i])
+            (w) => w.options.values?.every((v, i) => v === values[i])
          )?.value;
          let selectedIndex = clickedComboValue ? values.findIndex((v) => v === clickedComboValue) : 0;
          if (selectedIndex < 0) {
@@ -245,7 +250,7 @@ const ext$2 = {
          filter.addEventListener("input", () => {
            const term = filter.value.toLocaleLowerCase();
            displayedItems = items.filter((item) => {
-              const isVisible = !term || item.textContent.toLocaleLowerCase().includes(term);
+              const isVisible = !term || item.textContent?.toLocaleLowerCase().includes(term);
              item.style.display = isVisible ? "block" : "none";
              return isVisible;
            });
@@ -279,7 +284,7 @@ const ext$2 = {
    LiteGraph.ContextMenu.prototype = ctxMenu.prototype;
  }
 };
-app.registerExtension(ext$2);
+app.registerExtension(ext$1);
 function stripComments(str) {
  return str.replace(/\/\*[\s\S]*?\*\/|\/\/.*/g, "");
 }
@@ -339,7 +344,7 @@ app.registerExtension({
        if (text[start] === "(") openCount++;
        if (text[start] === ")") closeCount++;
      }
-      if (start < 0) return false;
+      if (start < 0) return null;
      openCount = 0;
      closeCount = 0;
      while (end < text.length) {
@@ -348,7 +353,7 @@ app.registerExtension({
        if (text[end] === ")") closeCount++;
        end++;
      }
-      if (end === text.length) return false;
+      if (end === text.length) return null;
      return { start: start + 1, end };
    }
    __name(findNearestEnclosure, "findNearestEnclosure");
@@ -961,17 +966,13 @@ class GroupNodeBuilder {
        }
      }
    }, "storeExternalLinks");
-    const backup = localStorage.getItem("litegrapheditor_clipboard");
    try {
-      app.canvas.copyToClipboard(this.nodes);
-      const config = JSON.parse(
-        localStorage.getItem("litegrapheditor_clipboard")
-      );
+      const serialised = serialise(this.nodes, app.canvas.graph);
+      const config = JSON.parse(serialised);
      storeLinkTypes(config);
      storeExternalLinks(config);
      return config;
    } finally {
-      localStorage.setItem("litegrapheditor_clipboard", backup);
    }
  }
 }
@@ -1225,7 +1226,7 @@ class GroupNodeConfig {
  checkPrimitiveConnection(link, inputName, inputs) {
    const sourceNode = this.nodeData.nodes[link[0]];
    if (sourceNode.type === "PrimitiveNode") {
-      const [sourceNodeId, _2, targetNodeId, __] = link;
+      const [sourceNodeId, _, targetNodeId, __] = link;
      const primitiveDef = this.primitiveDefs[sourceNodeId];
      const targetWidget = inputs[inputName];
      const primitiveConfig = primitiveDef.input.required.value;
@@ -1512,7 +1513,6 @@ class GroupNodeHandler {
    };
    this.node.convertToNodes = () => {
      const addInnerNodes = /* @__PURE__ */ __name(() => {
-        const backup = localStorage.getItem("litegrapheditor_clipboard");
        const c = { ...this.groupData.nodeData };
        c.nodes = [...c.nodes];
        const innerNodes = this.node.getInnerNodes();
@@ -1526,9 +1526,7 @@ class GroupNodeHandler {
          }
          c.nodes[i] = { ...c.nodes[i], id: id2 };
        }
-        localStorage.setItem("litegrapheditor_clipboard", JSON.stringify(c));
-        app.canvas.pasteFromClipboard();
-        localStorage.setItem("litegrapheditor_clipboard", backup);
+        deserialiseAndCreate(JSON.stringify(c), app.canvas);
        const [x, y] = this.node.pos;
        let top;
        let left;
@@ -1575,10 +1573,8 @@ class GroupNodeHandler {
          }
        }
        for (const newNode of newNodes2) {
-          newNode.pos = [
-            newNode.pos[0] - (left - x),
-            newNode.pos[1] - (top - y)
-          ];
+          newNode.pos[0] -= left - x;
+          newNode.pos[1] -= top - y;
        }
        return { newNodes: newNodes2, selectedIds: selectedIds2 };
      }, "addInnerNodes");
@@ -1613,14 +1609,16 @@ class GroupNodeHandler {
          }
        }
      }, "reconnectOutputs");
+      app.canvas.emitBeforeChange();
      const { newNodes, selectedIds } = addInnerNodes();
      reconnectInputs(selectedIds);
      reconnectOutputs(selectedIds);
      app.graph.remove(this.node);
+      app.canvas.emitAfterChange();
      return newNodes;
    };
    const getExtraMenuOptions = this.node.getExtraMenuOptions;
-    this.node.getExtraMenuOptions = function(_2, options) {
+    this.node.getExtraMenuOptions = function(_, options) {
      getExtraMenuOptions?.apply(this, arguments);
      let optionIndex = options.findIndex((o) => o.content === "Outputs");
      if (optionIndex === -1) optionIndex = options.length;
@@ -1637,9 +1635,7 @@ class GroupNodeHandler {
        },
        {
          content: "Manage Group Node",
-          callback: /* @__PURE__ */ __name(() => {
-            new ManageGroupDialog(app).show(this.type);
-          }, "callback")
+          callback: manageGroupNodes
        }
      );
    };
@@ -1796,7 +1792,7 @@ class GroupNodeHandler {
      } else if (innerNode.type === "Reroute") {
        const rerouteLinks = this.groupData.linksFrom[old.node.index];
        if (rerouteLinks) {
-          for (const [_2, , targetNodeId, targetSlot] of rerouteLinks["0"]) {
+          for (const [_, , targetNodeId, targetSlot] of rerouteLinks["0"]) {
            const node = this.innerNodes[targetNodeId];
            const input = node.inputs[targetSlot];
            if (input.widget) {
@@ -1960,9 +1956,7 @@ function addConvertToGroupOptions() {
    options.splice(index + 1, null, {
      content: `Convert to Group Node`,
      disabled,
-      callback: /* @__PURE__ */ __name(async () => {
-        return await GroupNodeHandler.fromNodes(selected);
-      }, "callback")
+      callback: convertSelectedNodesToGroupNode
    });
  }
  __name(addConvertOption, "addConvertOption");
@@ -1972,9 +1966,7 @@ function addConvertToGroupOptions() {
    options.splice(index + 1, null, {
      content: `Manage Group Nodes`,
      disabled,
-      callback: /* @__PURE__ */ __name(() => {
-        new ManageGroupDialog(app).show();
-      }, "callback")
+      callback: manageGroupNodes
    });
  }
  __name(addManageOption, "addManageOption");
@@ -2004,10 +1996,77 @@ const replaceLegacySeparators = /* @__PURE__ */ __name((nodes) => {
    }
  }
 }, "replaceLegacySeparators");
-const id$3 = "Comfy.GroupNode";
+async function convertSelectedNodesToGroupNode() {
+  const nodes = Object.values(app.canvas.selected_nodes ?? {});
+  if (nodes.length === 0) {
+    throw new Error("No nodes selected");
+  }
+  if (nodes.length === 1) {
+    throw new Error("Please select multiple nodes to convert to group node");
+  }
+  if (nodes.some((n) => GroupNodeHandler.isGroupNode(n))) {
+    throw new Error("Selected nodes contain a group node");
+  }
+  return await GroupNodeHandler.fromNodes(nodes);
+}
+__name(convertSelectedNodesToGroupNode, "convertSelectedNodesToGroupNode");
+function ungroupSelectedGroupNodes() {
+  const nodes = Object.values(app.canvas.selected_nodes ?? {});
+  for (const node of nodes) {
+    if (GroupNodeHandler.isGroupNode(node)) {
+      node["convertToNodes"]?.();
+    }
+  }
+}
+__name(ungroupSelectedGroupNodes, "ungroupSelectedGroupNodes");
+function manageGroupNodes() {
+  new ManageGroupDialog(app).show();
+}
+__name(manageGroupNodes, "manageGroupNodes");
+const id$2 = "Comfy.GroupNode";
 let globalDefs;
-const ext$1 = {
-  name: id$3,
+const ext = {
+  name: id$2,
+  commands: [
+    {
+      id: "Comfy.GroupNode.ConvertSelectedNodesToGroupNode",
+      label: "Convert selected nodes to group node",
+      icon: "pi pi-sitemap",
+      versionAdded: "1.3.17",
+      function: convertSelectedNodesToGroupNode
+    },
+    {
+      id: "Comfy.GroupNode.UngroupSelectedGroupNodes",
+      label: "Ungroup selected group nodes",
+      icon: "pi pi-sitemap",
+      versionAdded: "1.3.17",
+      function: ungroupSelectedGroupNodes
+    },
+    {
+      id: "Comfy.GroupNode.ManageGroupNodes",
+      label: "Manage group nodes",
+      icon: "pi pi-cog",
+      versionAdded: "1.3.17",
+      function: manageGroupNodes
+    }
+  ],
+  keybindings: [
+    {
+      commandId: "Comfy.GroupNode.ConvertSelectedNodesToGroupNode",
+      combo: {
+        alt: true,
+        key: "g"
+      }
+    },
+    {
+      commandId: "Comfy.GroupNode.UngroupSelectedGroupNodes",
+      combo: {
+        alt: true,
+        shift: true,
+        key: "G"
+      }
+    }
+  ],
  setup() {
    addConvertToGroupOptions();
  },
@@ -2037,56 +2096,18 @@ const ext$1 = {
    }
  }
 };
-app.registerExtension(ext$1);
+app.registerExtension(ext);
 window.comfyAPI = window.comfyAPI || {};
 window.comfyAPI.groupNode = window.comfyAPI.groupNode || {};
 window.comfyAPI.groupNode.GroupNodeConfig = GroupNodeConfig;
 window.comfyAPI.groupNode.GroupNodeHandler = GroupNodeHandler;
 function setNodeMode(node, mode) {
  node.mode = mode;
-  node.graph.change();
+  node.graph?.change();
 }
 __name(setNodeMode, "setNodeMode");
-function addNodesToGroup(group, nodes = []) {
-  var x1, y1, x2, y2;
-  var nx1, ny1, nx2, ny2;
-  var node;
-  x1 = y1 = x2 = y2 = -1;
-  nx1 = ny1 = nx2 = ny2 = -1;
-  for (var n of [group.nodes, nodes]) {
-    for (var i in n) {
-      node = n[i];
-      nx1 = node.pos[0];
-      ny1 = node.pos[1];
-      nx2 = node.pos[0] + node.size[0];
-      ny2 = node.pos[1] + node.size[1];
-      if (node.type != "Reroute") {
-        ny1 -= LiteGraph.NODE_TITLE_HEIGHT;
-      }
-      if (node.flags?.collapsed) {
-        ny2 = ny1 + LiteGraph.NODE_TITLE_HEIGHT;
-        if (node?._collapsed_width) {
-          nx2 = nx1 + Math.round(node._collapsed_width);
-        }
-      }
-      if (x1 == -1 || nx1 < x1) {
-        x1 = nx1;
-      }
-      if (y1 == -1 || ny1 < y1) {
-        y1 = ny1;
-      }
-      if (x2 == -1 || nx2 > x2) {
-        x2 = nx2;
-      }
-      if (y2 == -1 || ny2 > y2) {
-        y2 = ny2;
-      }
-    }
-  }
-  var padding = 10;
-  y1 = y1 - Math.round(group.font_size * 1.4);
-  group.pos = [x1 - padding, y1 - padding];
-  group.size = [x2 - x1 + padding * 2, y2 - y1 + padding * 2];
+function addNodesToGroup(group, items) {
+  group.resizeTo([...group.children, ...items]);
 }
 __name(addNodesToGroup, "addNodesToGroup");
 app.registerExtension({
@@ -2102,11 +2123,11 @@ app.registerExtension({
      if (!group) {
        options.push({
          content: "Add Group For Selected Nodes",
-          disabled: !Object.keys(app.canvas.selected_nodes || {}).length,
+          disabled: !this.selectedItems?.size,
          callback: /* @__PURE__ */ __name(() => {
            const group2 = new LGraphGroup();
-            addNodesToGroup(group2, this.selected_nodes);
-            app.canvas.graph.add(group2);
+            addNodesToGroup(group2, this.selectedItems);
+            this.graph.add(group2);
            this.graph.change();
          }, "callback")
        });
@@ -2116,9 +2137,9 @@ app.registerExtension({
      const nodesInGroup = group.nodes;
      options.push({
        content: "Add Selected Nodes To Group",
-        disabled: !Object.keys(app.canvas.selected_nodes || {}).length,
+        disabled: !this.selectedItems?.size,
        callback: /* @__PURE__ */ __name(() => {
-          addNodesToGroup(group, this.selected_nodes);
+          addNodesToGroup(group, this.selectedItems);
          this.graph.change();
        }, "callback")
      });
@@ -2137,7 +2158,8 @@ app.registerExtension({
      options.push({
        content: "Fit Group To Nodes",
        callback: /* @__PURE__ */ __name(() => {
-          addNodesToGroup(group);
+          group.recomputeInsideNodes();
+          group.resizeTo(group.children);
          this.graph.change();
        }, "callback")
      });
@@ -2263,9 +2285,9 @@ app.registerExtension({
    };
  }
 });
-const id$2 = "Comfy.InvertMenuScrolling";
+const id$1 = "Comfy.InvertMenuScrolling";
 app.registerExtension({
-  name: id$2,
+  name: id$1,
  init() {
    const ctxMenu = LiteGraph.ContextMenu;
    const replace = /* @__PURE__ */ __name(() => {
@@ -2281,7 +2303,7 @@ app.registerExtension({
      LiteGraph.ContextMenu.prototype = ctxMenu.prototype;
    }, "replace");
    app.ui.settings.addSetting({
-      id: id$2,
+      id: id$1,
      category: ["Comfy", "Graph", "InvertMenuScrolling"],
      name: "Invert Context Menu Scrolling",
      type: "boolean",
@@ -2313,8 +2335,8 @@ app.registerExtension({
      const commandStore = useCommandStore();
      const keybinding = keybindingStore.getKeybinding(keyCombo);
      if (keybinding && keybinding.targetSelector !== "#graph-canvas") {
-        await commandStore.execute(keybinding.commandId);
        event.preventDefault();
+        await commandStore.execute(keybinding.commandId);
        return;
      }
      if (event.ctrlKey || event.altKey || event.metaKey) {
@@ -2337,35 +2359,6 @@ app.registerExtension({
    window.addEventListener("keydown", keybindListener);
  }
 });
-const id$1 = "Comfy.LinkRenderMode";
-const ext = {
-  name: id$1,
-  async setup(app2) {
-    app2.ui.settings.addSetting({
-      id: id$1,
-      category: ["Comfy", "Graph", "LinkRenderMode"],
-      name: "Link Render Mode",
-      defaultValue: 2,
-      type: "combo",
-      options: [
-        { value: LiteGraph.STRAIGHT_LINK, text: "Straight" },
-        { value: LiteGraph.LINEAR_LINK, text: "Linear" },
-        { value: LiteGraph.SPLINE_LINK, text: "Spline" },
-        { value: LiteGraph.HIDDEN_LINK, text: "Hidden" }
-      ],
-      onChange(value) {
-        app2.canvas.links_render_mode = +value;
-        app2.canvas.setDirty(
-          /* fg */
-          false,
-          /* bg */
-          true
-        );
-      }
-    });
-  }
-};
-app.registerExtension(ext);
 function dataURLToBlob(dataURL) {
  const parts = dataURL.split(";base64,");
  const contentType = parts[0].split(":")[1];
@@ -3648,8 +3641,12 @@ app.registerExtension({
            clipboardAction(async () => {
              const data = JSON.parse(t.data);
              await GroupNodeConfig.registerFromWorkflow(data.groupNodes, {});
-              localStorage.setItem("litegrapheditor_clipboard", t.data);
-              app.canvas.pasteFromClipboard();
+              if (!data.reroutes) {
+                deserialiseAndCreate(t.data, app.canvas);
+              } else {
+                localStorage.setItem("litegrapheditor_clipboard", t.data);
+                app.canvas.pasteFromClipboard();
+              }
            });
          }, "callback")
        };
@@ -3874,7 +3871,7 @@ app.registerExtension({
        };
        this.isVirtualNode = true;
      }
-      getExtraMenuOptions(_2, options) {
+      getExtraMenuOptions(_, options) {
        options.unshift(
          {
            content: (this.properties.showOutputText ? "Hide" : "Show") + " Type",
@@ -3983,9 +3980,10 @@ let touchCount = 0;
 app.registerExtension({
  name: "Comfy.SimpleTouchSupport",
  setup() {
-    let zoomPos;
+    let touchDist;
    let touchTime;
    let lastTouch;
+    let lastScale;
    function getMultiTouchPos(e) {
      return Math.hypot(
        e.touches[0].clientX - e.touches[1].clientX,
@@ -3993,63 +3991,90 @@ app.registerExtension({
      );
    }
    __name(getMultiTouchPos, "getMultiTouchPos");
-    app.canvasEl.addEventListener(
+    function getMultiTouchCenter(e) {
+      return {
+        clientX: (e.touches[0].clientX + e.touches[1].clientX) / 2,
+        clientY: (e.touches[0].clientY + e.touches[1].clientY) / 2
+      };
+    }
+    __name(getMultiTouchCenter, "getMultiTouchCenter");
+    app.canvasEl.parentElement.addEventListener(
      "touchstart",
      (e) => {
        touchCount++;
        lastTouch = null;
+        lastScale = null;
        if (e.touches?.length === 1) {
          touchTime = /* @__PURE__ */ new Date();
          lastTouch = e.touches[0];
        } else {
          touchTime = null;
          if (e.touches?.length === 2) {
-            zoomPos = getMultiTouchPos(e);
+            lastScale = app.canvas.ds.scale;
+            lastTouch = getMultiTouchCenter(e);
+            touchDist = getMultiTouchPos(e);
            app.canvas.pointer_is_down = false;
          }
        }
      },
      true
    );
-    app.canvasEl.addEventListener("touchend", (e) => {
-      touchZooming = false;
-      touchCount = e.touches?.length ?? touchCount - 1;
+    app.canvasEl.parentElement.addEventListener("touchend", (e) => {
+      touchCount--;
+      if (e.touches?.length !== 1) touchZooming = false;
      if (touchTime && !e.touches?.length) {
        if ((/* @__PURE__ */ new Date()).getTime() - touchTime > 600) {
-          try {
-            e.constructor = CustomEvent;
-          } catch (error) {
+          if (e.target === app.canvasEl) {
+            app.canvasEl.dispatchEvent(
+              new PointerEvent("pointerdown", {
+                button: 2,
+                clientX: e.changedTouches[0].clientX,
+                clientY: e.changedTouches[0].clientY
+              })
+            );
+            e.preventDefault();
          }
-          e.clientX = lastTouch.clientX;
-          e.clientY = lastTouch.clientY;
-          app.canvas.pointer_is_down = true;
-          app.canvas._mousedown_callback(e);
        }
        touchTime = null;
      }
    });
-    app.canvasEl.addEventListener(
+    app.canvasEl.parentElement.addEventListener(
      "touchmove",
      (e) => {
        touchTime = null;
-        if (e.touches?.length === 2) {
+        if (e.touches?.length === 2 && lastTouch && !e.ctrlKey && !e.shiftKey) {
+          e.preventDefault();
          app.canvas.pointer_is_down = false;
          touchZooming = true;
-          LiteGraph.closeAllContextMenus();
+          LiteGraph.closeAllContextMenus(window);
          app.canvas.search_box?.close();
-          const newZoomPos = getMultiTouchPos(e);
-          const midX = (e.touches[0].clientX + e.touches[1].clientX) / 2;
-          const midY = (e.touches[0].clientY + e.touches[1].clientY) / 2;
-          let scale = app.canvas.ds.scale;
-          const diff = zoomPos - newZoomPos;
-          if (diff > 0.5) {
-            scale *= 1 / 1.07;
-          } else if (diff < -0.5) {
-            scale *= 1.07;
+          const newTouchDist = getMultiTouchPos(e);
+          const center = getMultiTouchCenter(e);
+          let scale = lastScale * newTouchDist / touchDist;
+          const newX = (center.clientX - lastTouch.clientX) / scale;
+          const newY = (center.clientY - lastTouch.clientY) / scale;
+          if (scale < app.canvas.ds.min_scale) {
+            scale = app.canvas.ds.min_scale;
+          } else if (scale > app.canvas.ds.max_scale) {
+            scale = app.canvas.ds.max_scale;
          }
-          app.canvas.ds.changeScale(scale, [midX, midY]);
+          const oldScale = app.canvas.ds.scale;
+          app.canvas.ds.scale = scale;
+          if (Math.abs(app.canvas.ds.scale - 1) < 0.01) {
+            app.canvas.ds.scale = 1;
+          }
+          const newScale = app.canvas.ds.scale;
+          const convertScaleToOffset = /* @__PURE__ */ __name((scale2) => [
+            center.clientX / scale2 - app.canvas.ds.offset[0],
+            center.clientY / scale2 - app.canvas.ds.offset[1]
+          ], "convertScaleToOffset");
+          var oldCenter = convertScaleToOffset(oldScale);
+          var newCenter = convertScaleToOffset(newScale);
+          app.canvas.ds.offset[0] += newX + newCenter[0] - oldCenter[0];
+          app.canvas.ds.offset[1] += newY + newCenter[1] - oldCenter[1];
+          lastTouch.clientX = center.clientX;
+          lastTouch.clientY = center.clientY;
          app.canvas.setDirty(true, true);
-          zoomPos = newZoomPos;
        }
      },
      true
@@ -4061,6 +4086,7 @@ LGraphCanvas.prototype.processMouseDown = function(e) {
  if (touchZooming || touchCount) {
    return;
  }
+  app.canvas.pointer_is_down = false;
  return processMouseDown.apply(this, arguments);
 };
 const processMouseMove = LGraphCanvas.prototype.processMouseMove;
@@ -4097,7 +4123,7 @@ app.registerExtension({
  slot_types_default_in: {},
  async beforeRegisterNodeDef(nodeType, nodeData, app2) {
    var nodeId = nodeData.name;
-    const inputs = nodeData["input"]["required"];
+    const inputs = nodeData["input"]?.["required"];
    for (const inputKey in inputs) {
      var input = inputs[inputKey];
      if (typeof input[0] !== "string") continue;
@@ -4119,19 +4145,19 @@ app.registerExtension({
        nodeType.comfyClass
      );
    }
-    var outputs = nodeData["output"];
-    for (const key in outputs) {
-      var type = outputs[key];
-      if (!(type in this.slot_types_default_in)) {
-        this.slot_types_default_in[type] = ["Reroute"];
+    var outputs = nodeData["output"] ?? [];
+    for (const el of outputs) {
+      const type2 = el;
+      if (!(type2 in this.slot_types_default_in)) {
+        this.slot_types_default_in[type2] = ["Reroute"];
      }
-      this.slot_types_default_in[type].push(nodeId);
-      if (!(type in LiteGraph.registered_slot_out_types)) {
-        LiteGraph.registered_slot_out_types[type] = { nodes: [] };
+      this.slot_types_default_in[type2].push(nodeId);
+      if (!(type2 in LiteGraph.registered_slot_out_types)) {
+        LiteGraph.registered_slot_out_types[type2] = { nodes: [] };
      }
-      LiteGraph.registered_slot_out_types[type].nodes.push(nodeType.comfyClass);
-      if (!LiteGraph.slot_types_out.includes(type)) {
-        LiteGraph.slot_types_out.push(type);
+      LiteGraph.registered_slot_out_types[type2].nodes.push(nodeType.comfyClass);
+      if (!LiteGraph.slot_types_out.includes(type2)) {
+        LiteGraph.slot_types_out.push(type2);
      }
    }
    var maxNum = this.suggestionsNumber.value;
@@ -4172,10 +4198,19 @@ app.registerExtension({
        LiteGraph.CANVAS_GRID_SIZE = +value || 10;
      }
    });
+    const alwaysSnapToGrid = app.ui.settings.addSetting({
+      id: "pysssss.SnapToGrid",
+      category: ["Comfy", "Graph", "AlwaysSnapToGrid"],
+      name: "Always snap to grid",
+      type: "boolean",
+      defaultValue: false,
+      versionAdded: "1.3.13"
+    });
+    const shouldSnapToGrid = /* @__PURE__ */ __name(() => app.shiftDown || alwaysSnapToGrid.value, "shouldSnapToGrid");
    const onNodeMoved = app.canvas.onNodeMoved;
    app.canvas.onNodeMoved = function(node) {
      const r = onNodeMoved?.apply(this, arguments);
-      if (app.shiftDown) {
+      if (shouldSnapToGrid()) {
        for (const id2 in this.selected_nodes) {
          this.selected_nodes[id2].alignToGrid();
        }
@@ -4186,7 +4221,7 @@ app.registerExtension({
    app.graph.onNodeAdded = function(node) {
      const onResize = node.onResize;
      node.onResize = function() {
-        if (app.shiftDown) {
+        if (shouldSnapToGrid()) {
          roundVectorToGrid(node.size);
        }
        return onResize?.apply(this, arguments);
@@ -4195,7 +4230,7 @@ app.registerExtension({
    };
    const origDrawNode = LGraphCanvas.prototype.drawNode;
    LGraphCanvas.prototype.drawNode = function(node, ctx) {
-      if (app.shiftDown && this.node_dragged && node.id in this.selected_nodes) {
+      if (shouldSnapToGrid() && this.node_dragged && node.id in this.selected_nodes) {
        const [x, y] = roundVectorToGrid([...node.pos]);
        const shiftX = x - node.pos[0];
        let shiftY = y - node.pos[1];
@@ -4207,7 +4242,7 @@ app.registerExtension({
        } else {
          w = node.size[0];
          h = node.size[1];
-          let titleMode = node.constructor.title_mode;
+          const titleMode = node.constructor.title_mode;
          if (titleMode !== LiteGraph.TRANSPARENT_TITLE && titleMode !== LiteGraph.NO_TITLE) {
            h += LiteGraph.NODE_TITLE_HEIGHT;
            shiftY -= LiteGraph.NODE_TITLE_HEIGHT;
@@ -4227,7 +4262,7 @@ app.registerExtension({
      if (!selectedAndMovingGroup && app.canvas.selected_group === this && (deltax || deltay)) {
        selectedAndMovingGroup = this;
      }
-      if (app.canvas.last_mouse_dragging === false && app.shiftDown) {
+      if (app.canvas.last_mouse_dragging === false && shouldSnapToGrid()) {
        this.recomputeInsideNodes();
        for (const node of this.nodes) {
          node.alignToGrid();
@@ -4238,7 +4273,7 @@ app.registerExtension({
    };
    const drawGroups = LGraphCanvas.prototype.drawGroups;
    LGraphCanvas.prototype.drawGroups = function(canvas, ctx) {
-      if (this.selected_group && app.shiftDown) {
+      if (this.selected_group && shouldSnapToGrid()) {
        if (this.selected_group_resizing) {
          roundVectorToGrid(this.selected_group.size);
        } else if (selectedAndMovingGroup) {
@@ -4261,7 +4296,7 @@ app.registerExtension({
    const onGroupAdd = LGraphCanvas.onGroupAdd;
    LGraphCanvas.onGroupAdd = function() {
      const v = onGroupAdd.apply(app.canvas, arguments);
-      if (app.shiftDown) {
+      if (shouldSnapToGrid()) {
        const lastGroup = app.graph.groups[app.graph.groups.length - 1];
        if (lastGroup) {
          roundVectorToGrid(lastGroup.pos);
@@ -4274,7 +4309,7 @@ app.registerExtension({
 });
 app.registerExtension({
  name: "Comfy.UploadImage",
-  async beforeRegisterNodeDef(nodeType, nodeData, app2) {
+  beforeRegisterNodeDef(nodeType, nodeData) {
    if (nodeData?.input?.required?.image?.[1]?.image_upload === true) {
      nodeData.input.required.upload = ["IMAGEUPLOAD"];
    }
@@ -4464,7 +4499,9 @@ app.registerExtension({
          /* name=*/
          "audioUI",
          audio,
-          { serialize: false }
+          {
+            serialize: false
+          }
        );
        const isOutputNode = node.constructor.nodeData.output_node;
        if (isOutputNode) {
@@ -4558,108 +4595,4 @@ app.registerExtension({
    };
  }
 });
-function getNodeSource(node) {
-  const nodeDef = node.constructor.nodeData;
-  if (!nodeDef) {
-    return null;
-  }
-  const nodeDefStore = useNodeDefStore();
-  return nodeDefStore.nodeDefsByName[nodeDef.name]?.nodeSource ?? null;
-}
-__name(getNodeSource, "getNodeSource");
-function isCoreNode(node) {
-  return getNodeSource(node)?.type === NodeSourceType.Core;
-}
-__name(isCoreNode, "isCoreNode");
-function badgeTextVisible(node, badgeMode) {
-  return badgeMode === NodeBadgeMode.None || isCoreNode(node) && badgeMode === NodeBadgeMode.HideBuiltIn;
-}
-__name(badgeTextVisible, "badgeTextVisible");
-function getNodeIdBadgeText(node, nodeIdBadgeMode) {
-  return badgeTextVisible(node, nodeIdBadgeMode) ? "" : `#${node.id}`;
-}
-__name(getNodeIdBadgeText, "getNodeIdBadgeText");
-function getNodeSourceBadgeText(node, nodeSourceBadgeMode) {
-  const nodeSource = getNodeSource(node);
-  return badgeTextVisible(node, nodeSourceBadgeMode) ? "" : nodeSource?.badgeText ?? "";
-}
-__name(getNodeSourceBadgeText, "getNodeSourceBadgeText");
-function getNodeLifeCycleBadgeText(node, nodeLifeCycleBadgeMode) {
-  let text = "";
-  const nodeDef = node.constructor.nodeData;
-  if (!nodeDef) {
-    return "";
-  }
-  if (nodeDef.deprecated) {
-    text = "[DEPR]";
-  }
-  if (nodeDef.experimental) {
-    text = "[BETA]";
-  }
-  return badgeTextVisible(node, nodeLifeCycleBadgeMode) ? "" : text;
-}
-__name(getNodeLifeCycleBadgeText, "getNodeLifeCycleBadgeText");
-class NodeBadgeExtension {
-  static {
-    __name(this, "NodeBadgeExtension");
-  }
-  constructor(nodeIdBadgeMode = null, nodeSourceBadgeMode = null, nodeLifeCycleBadgeMode = null, colorPalette = null) {
-    this.nodeIdBadgeMode = nodeIdBadgeMode;
-    this.nodeSourceBadgeMode = nodeSourceBadgeMode;
-    this.nodeLifeCycleBadgeMode = nodeLifeCycleBadgeMode;
-    this.colorPalette = colorPalette;
-  }
-  name = "Comfy.NodeBadge";
-  init(app2) {
-    const settingStore = useSettingStore();
-    this.nodeSourceBadgeMode = computed(
-      () => settingStore.get("Comfy.NodeBadge.NodeSourceBadgeMode")
-    );
-    this.nodeIdBadgeMode = computed(
-      () => settingStore.get("Comfy.NodeBadge.NodeIdBadgeMode")
-    );
-    this.nodeLifeCycleBadgeMode = computed(
-      () => settingStore.get(
-        "Comfy.NodeBadge.NodeLifeCycleBadgeMode"
-      )
-    );
-    this.colorPalette = computed(
-      () => getColorPalette(settingStore.get("Comfy.ColorPalette"))
-    );
-    watch(this.nodeSourceBadgeMode, () => {
-      app2.graph.setDirtyCanvas(true, true);
-    });
-    watch(this.nodeIdBadgeMode, () => {
-      app2.graph.setDirtyCanvas(true, true);
-    });
-    watch(this.nodeLifeCycleBadgeMode, () => {
-      app2.graph.setDirtyCanvas(true, true);
-    });
-  }
-  nodeCreated(node, app2) {
-    node.badgePosition = BadgePosition.TopRight;
-    node.badge_enabled = true;
-    const badge = computed(
-      () => new LGraphBadge({
-        text: _.truncate(
-          [
-            getNodeIdBadgeText(node, this.nodeIdBadgeMode.value),
-            getNodeLifeCycleBadgeText(
-              node,
-              this.nodeLifeCycleBadgeMode.value
-            ),
-            getNodeSourceBadgeText(node, this.nodeSourceBadgeMode.value)
-          ].filter((s) => s.length > 0).join(" "),
-          {
-            length: 31
-          }
-        ),
-        fgColor: this.colorPalette.value.colors.litegraph_base?.BADGE_FG_COLOR || defaultColorPalette.colors.litegraph_base.BADGE_FG_COLOR,
-        bgColor: this.colorPalette.value.colors.litegraph_base?.BADGE_BG_COLOR || defaultColorPalette.colors.litegraph_base.BADGE_BG_COLOR
-      })
-    );
-    node.badges.push(() => badge.value);
-  }
-}
-app.registerExtension(new NodeBadgeExtension());
-//# sourceMappingURL=index-BMC1ey-i.js.map
+//# sourceMappingURL=index-B1vRdV2i.js.map
--- a/web/assets/index-B1vRdV2i.js.map
+++ b/web/assets/index-B1vRdV2i.js.map
--- a/web/assets/index-B4gmhi99.js
+++ b/web/assets/index-B4gmhi99.js
@@ -0,0 +1,62 @@
+const o = {
+  LOADING_PROGRESS: "loading-progress",
+  IS_PACKAGED: "is-packaged",
+  RENDERER_READY: "renderer-ready",
+  RESTART_APP: "restart-app",
+  REINSTALL: "reinstall",
+  LOG_MESSAGE: "log-message",
+  OPEN_DIALOG: "open-dialog",
+  DOWNLOAD_PROGRESS: "download-progress",
+  START_DOWNLOAD: "start-download",
+  PAUSE_DOWNLOAD: "pause-download",
+  RESUME_DOWNLOAD: "resume-download",
+  CANCEL_DOWNLOAD: "cancel-download",
+  DELETE_MODEL: "delete-model",
+  GET_ALL_DOWNLOADS: "get-all-downloads",
+  GET_ELECTRON_VERSION: "get-electron-version",
+  SEND_ERROR_TO_SENTRY: "send-error-to-sentry",
+  GET_BASE_PATH: "get-base-path",
+  GET_MODEL_CONFIG_PATH: "get-model-config-path",
+  OPEN_PATH: "open-path",
+  OPEN_LOGS_PATH: "open-logs-path",
+  OPEN_DEV_TOOLS: "open-dev-tools",
+  IS_FIRST_TIME_SETUP: "is-first-time-setup",
+  GET_SYSTEM_PATHS: "get-system-paths",
+  VALIDATE_INSTALL_PATH: "validate-install-path",
+  VALIDATE_COMFYUI_SOURCE: "validate-comfyui-source",
+  SHOW_DIRECTORY_PICKER: "show-directory-picker",
+  INSTALL_COMFYUI: "install-comfyui"
+};
+var t = /* @__PURE__ */ ((e) => (e.INITIAL_STATE = "initial-state", e.PYTHON_SETUP = "python-setup", e.STARTING_SERVER = "starting-server", e.READY = "ready", e.ERROR = "error", e.ERROR_INSTALL_PATH = "error-install-path", e))(t || {});
+const s = {
+  "initial-state": "Loading...",
+  "python-setup": "Setting up Python Environment...",
+  "starting-server": "Starting ComfyUI server...",
+  ready: "Finishing...",
+  error: "Was not able to start ComfyUI. Please check the logs for more details. You can open it from the Help menu. Please report issues to: https://forum.comfy.org",
+  "error-install-path": "Installation path does not exist. Please reset the installation location."
+}, a = "electronAPI", n = "https://942cadba58d247c9cab96f45221aa813@o4507954455314432.ingest.us.sentry.io/4508007940685824", r = [
+  {
+    id: "user_files",
+    label: "User Files",
+    description: "Settings and user-created workflows"
+  },
+  {
+    id: "models",
+    label: "Models",
+    description: "Reference model files from existing ComfyUI installations. (No copy)"
+  }
+  // TODO: Decide whether we want to auto-migrate custom nodes, and install their dependencies.
+  // huchenlei: This is a very essential thing for migration experience.
+  // {
+  //   id: 'custom_nodes',
+  //   label: 'Custom Nodes',
+  //   description: 'Reference custom node files from existing ComfyUI installations. (No copy)',
+  // },
+];
+export {
+  r,
+  s,
+  t
+};
+//# sourceMappingURL=index-B4gmhi99.js.map
--- a/web/assets/index-B4gmhi99.js.map
+++ b/web/assets/index-B4gmhi99.js.map
@@ -0,0 +1 @@
+{"version":3,"file":"index-B4gmhi99.js","sources":["../../node_modules/@comfyorg/comfyui-electron-types/index.mjs"],"sourcesContent":["const o = {\n  LOADING_PROGRESS: \"loading-progress\",\n  IS_PACKAGED: \"is-packaged\",\n  RENDERER_READY: \"renderer-ready\",\n  RESTART_APP: \"restart-app\",\n  REINSTALL: \"reinstall\",\n  LOG_MESSAGE: \"log-message\",\n  OPEN_DIALOG: \"open-dialog\",\n  DOWNLOAD_PROGRESS: \"download-progress\",\n  START_DOWNLOAD: \"start-download\",\n  PAUSE_DOWNLOAD: \"pause-download\",\n  RESUME_DOWNLOAD: \"resume-download\",\n  CANCEL_DOWNLOAD: \"cancel-download\",\n  DELETE_MODEL: \"delete-model\",\n  GET_ALL_DOWNLOADS: \"get-all-downloads\",\n  GET_ELECTRON_VERSION: \"get-electron-version\",\n  SEND_ERROR_TO_SENTRY: \"send-error-to-sentry\",\n  GET_BASE_PATH: \"get-base-path\",\n  GET_MODEL_CONFIG_PATH: \"get-model-config-path\",\n  OPEN_PATH: \"open-path\",\n  OPEN_LOGS_PATH: \"open-logs-path\",\n  OPEN_DEV_TOOLS: \"open-dev-tools\",\n  IS_FIRST_TIME_SETUP: \"is-first-time-setup\",\n  GET_SYSTEM_PATHS: \"get-system-paths\",\n  VALIDATE_INSTALL_PATH: \"validate-install-path\",\n  VALIDATE_COMFYUI_SOURCE: \"validate-comfyui-source\",\n  SHOW_DIRECTORY_PICKER: \"show-directory-picker\",\n  INSTALL_COMFYUI: \"install-comfyui\"\n};\nvar t = /* @__PURE__ */ ((e) => (e.INITIAL_STATE = \"initial-state\", e.PYTHON_SETUP = \"python-setup\", e.STARTING_SERVER = \"starting-server\", e.READY = \"ready\", e.ERROR = \"error\", e.ERROR_INSTALL_PATH = \"error-install-path\", e))(t || {});\nconst s = {\n  \"initial-state\": \"Loading...\",\n  \"python-setup\": \"Setting up Python Environment...\",\n  \"starting-server\": \"Starting ComfyUI server...\",\n  ready: \"Finishing...\",\n  error: \"Was not able to start ComfyUI. Please check the logs for more details. You can open it from the Help menu. Please report issues to: https://forum.comfy.org\",\n  \"error-install-path\": \"Installation path does not exist. Please reset the installation location.\"\n}, a = \"electronAPI\", n = \"https://942cadba58d247c9cab96f45221aa813@o4507954455314432.ingest.us.sentry.io/4508007940685824\", r = [\n  {\n    id: \"user_files\",\n    label: \"User Files\",\n    description: \"Settings and user-created workflows\"\n  },\n  {\n    id: \"models\",\n    label: \"Models\",\n    description: \"Reference model files from existing ComfyUI installations. (No copy)\"\n  }\n  // TODO: Decide whether we want to auto-migrate custom nodes, and install their dependencies.\n  // huchenlei: This is a very essential thing for migration experience.\n  // {\n  //   id: 'custom_nodes',\n  //   label: 'Custom Nodes',\n  //   description: 'Reference custom node files from existing ComfyUI installations. (No copy)',\n  // },\n];\nexport {\n  a as ELECTRON_BRIDGE_API,\n  o as IPC_CHANNELS,\n  r as MigrationItems,\n  s as ProgressMessages,\n  t as ProgressStatus,\n  n as SENTRY_URL_ENDPOINT\n};\n"],"names":[],"mappings":"AAAA,MAAM,IAAI;AAAA,EACR,kBAAkB;AAAA,EAClB,aAAa;AAAA,EACb,gBAAgB;AAAA,EAChB,aAAa;AAAA,EACb,WAAW;AAAA,EACX,aAAa;AAAA,EACb,aAAa;AAAA,EACb,mBAAmB;AAAA,EACnB,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,iBAAiB;AAAA,EACjB,iBAAiB;AAAA,EACjB,cAAc;AAAA,EACd,mBAAmB;AAAA,EACnB,sBAAsB;AAAA,EACtB,sBAAsB;AAAA,EACtB,eAAe;AAAA,EACf,uBAAuB;AAAA,EACvB,WAAW;AAAA,EACX,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,qBAAqB;AAAA,EACrB,kBAAkB;AAAA,EAClB,uBAAuB;AAAA,EACvB,yBAAyB;AAAA,EACzB,uBAAuB;AAAA,EACvB,iBAAiB;AACnB;AACG,IAAC,IAAqB,kBAAC,OAAO,EAAE,gBAAgB,iBAAiB,EAAE,eAAe,gBAAgB,EAAE,kBAAkB,mBAAmB,EAAE,QAAQ,SAAS,EAAE,QAAQ,SAAS,EAAE,qBAAqB,sBAAsB,IAAI,KAAK,CAAA,CAAE;AACrO,MAAC,IAAI;AAAA,EACR,iBAAiB;AAAA,EACjB,gBAAgB;AAAA,EAChB,mBAAmB;AAAA,EACnB,OAAO;AAAA,EACP,OAAO;AAAA,EACP,sBAAsB;AACxB,GAAG,IAAI,eAAe,IAAI,mGAAmG,IAAI;AAAA,EAC/H;AAAA,IACE,IAAI;AAAA,IACJ,OAAO;AAAA,IACP,aAAa;AAAA,EACd;AAAA,EACD;AAAA,IACE,IAAI;AAAA,IACJ,OAAO;AAAA,IACP,aAAa;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAQH;","x_google_ignoreList":[0]}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
comfyanonymous	bc6be6c11e	Some fixes to the lowvram system.	2024-11-22 16:40:04 -05:00
comfyanonymous	94323a26a7	Remove prints.	2024-11-22 10:51:31 -05:00
comfyanonymous	5818f6cf51	Remove print.	2024-11-22 10:49:15 -05:00
comfyanonymous	0b734de449	Add LTX-Video support to the Readme.	2024-11-22 09:24:20 -05:00
comfyanonymous	5e16f1d24b	Support Lightricks LTX-Video model.	2024-11-22 08:46:39 -05:00
comfyanonymous	2fd9c1308a	Fix mask issue in some attention functions.	2024-11-22 02:10:09 -05:00
comfyanonymous	8f0009aad0	Support new flux model variants.	2024-11-21 08:38:23 -05:00
comfyanonymous	41444b5236	Add some new weight patching functionality. Add a way to reshape lora weights. Allow weight patches to all weight not just .weight and .bias Add a way for a lora to set a weight to a specific value.	2024-11-21 07:19:17 -05:00
comfyanonymous	772e620e32	Update readme.	2024-11-20 20:42:51 -05:00
comfyanonymous	07f6eeaa13	Fix mask issue with attention_xformers.	2024-11-20 17:07:46 -05:00
comfyanonymous	22535d0589	Skip layer guidance now works on stable audio model.	2024-11-20 07:33:06 -05:00
comfyanonymous	898615122f	Rename add_noise_mask -> noise_mask.	2024-11-19 15:31:09 -05:00
comfyanonymous	156a28786b	Add boolean to InpaintModelConditioning to disable the noise mask.	2024-11-19 07:31:29 -05:00
Yoland Yan	f498d855ba	Add terminal size fallback (#5623 )	2024-11-19 03:34:20 -05:00
comfyanonymous	b699a15062	Refactor inpaint/ip2p code.	2024-11-19 03:25:25 -05:00
Chenlei Hu	9cc90ee3eb	Update UI screenshot in README (#5666 ) * Update UI ScreenShot in README * Remove legacy UI screenshot file * nit * nit	2024-11-18 16:50:34 -05:00
comfyanonymous	9a0a5d32ee	Add a skip layer guidance node that can also skip single layers. This one should work for skipping the single layers of models like Flux and Auraflow. If you want to see how these models work and how many double/single layers they have see the "ModelMerge*" nodes for the specific model.	2024-11-18 02:20:43 -05:00
comfyanonymous	d9f90965c8	Support block replace patches in auraflow.	2024-11-17 08:19:59 -05:00
comfyanonymous	41886af138	Add transformer options blocks replace patch to mochi.	2024-11-16 20:48:14 -05:00
Chenlei Hu	22a1d7ce78	Fix 3.8 compatibility in user_manager.py (#5645 )	2024-11-16 20:42:21 -05:00
Chenlei Hu	4ac401af2b	Update web content to release v1.3.44 (#5620 ) * Update web content to release v1.3.44 * nit	2024-11-15 20:17:15 -05:00
comfyanonymous	5fb59c8475	Add a node to block merge auraflow models.	2024-11-15 12:47:55 -05:00
comfyanonymous	122c9ca1ce	Add advanced model merging node for mochi.	2024-11-14 07:51:20 -05:00
comfyanonymous	3b9a6cf2b1	Fix issue with 3d masks.	2024-11-13 07:18:30 -05:00
comfyanonymous	3748e7ef7a	Fix regression.	2024-11-13 04:24:48 -05:00
comfyanonymous	8ebf2d8831	Add block replace transformer_options to flux.	2024-11-12 08:00:39 -05:00
Bratzmeister	a72d152b0c	fix --cuda-device arg for AMD/HIP devices (#5586 ) * fix --cuda-device arg for AMD/HIP devices CUDA_VISIBLE_DEVICES is ignored for HIP devices/backend. Instead it uses HIP_VISIBLE_DEVICES. Setting this environment variable has no side effect for CUDA/NVIDIA so it can safely be set in any case and vice versa. * deleted accidental if	2024-11-12 06:53:36 -05:00
comfyanonymous	eb476e6ea9	Allow 1D masks for 1D latents.	2024-11-11 14:44:52 -05:00
Dr.Lt.Data	2d28b0b479	improve: add descriptions for clip loaders (#5576 )	2024-11-11 05:37:23 -05:00
comfyanonymous	8b275ce5be	Support auto detecting some zsnr anime checkpoints.	2024-11-11 05:34:11 -05:00
comfyanonymous	2a18e98ccf	Refactor so that zsnr can be set in the sampling_settings.	2024-11-11 04:55:56 -05:00
comfyanonymous	8a5281006f	Fix some custom nodes.	2024-11-10 22:41:00 -05:00
comfyanonymous	bdeb1c171c	Fast previews for mochi.	2024-11-10 03:39:35 -05:00
comfyanonymous	9c1ed58ef2	proper fix for sag.	2024-11-10 00:10:45 -05:00
comfyanonymous	8b90e50979	Properly handle and reshape masks when used on 3d latents.	2024-11-09 15:30:19 -05:00
pythongosssss	6ee066a14f	Live terminal output (#5396 ) * Add /logs/raw and /logs/subscribe for getting logs on frontend Hijacks stderr/stdout to send all output data to the client on flush * Use existing send sync method * Fix get_logs should return string * Fix bug * pass no server * fix tests * Fix output flush on linux	2024-11-08 19:13:34 -05:00
DenOfEquity	dd5b57e3d7	fix for SAG with Kohya HRFix/ Deep Shrink (#5546 ) now works with arbitrary downscale factors	2024-11-08 18:16:29 -05:00
comfyanonymous	75a818c720	Move mochi latent node to: latent/video.	2024-11-08 08:33:44 -05:00
comfyanonymous	2865f913f7	Free memory before doing tiled decode.	2024-11-07 04:01:24 -05:00
comfyanonymous	b49616f951	Make VAEDecodeTiled node work with video VAEs.	2024-11-07 03:47:12 -05:00
comfyanonymous	5e29e7a488	Remove scaled_fp8 key after reading it to silence warning.	2024-11-06 04:56:42 -05:00
comfyanonymous	8afb97cd3f	Fix unknown VAE being detected as the mochi VAE.	2024-11-05 03:43:27 -05:00
contentis	69694f40b3	fix dynamic shape export (#5490 )	2024-11-04 14:59:28 -05:00
Chenlei Hu	c49025f01b	Allow POST `/userdata/{file}` endpoint to return full file info (#5446 ) * Refactor listuserdata * Full info param * Add tests * Fix mock * Add full_info support for move user file	2024-11-04 13:57:21 -05:00
comfyanonymous	696672905f	Add mochi support to readme.	2024-11-04 04:55:07 -05:00
comfyanonymous	6c9dbde7de	Fix mochi all in one checkpoint t5xxl key names.	2024-11-03 01:40:42 -05:00
comfyanonymous	ee8abf0cff	Update folder paths: "clip" -> "text_encoders" You can still use models/clip but the folder might get removed eventually on new installs of ComfyUI.	2024-11-02 15:35:38 -04:00
comfyanonymous	fabf449feb	Mochi VAE encoder.	2024-11-01 17:33:09 -04:00
Uriel Deveaud	cc9cf6d1bd	Rename some nodes in Display Name Mappings (nodes.py) (#5439 ) * Update nodes_images.py Nodes menu has inconsistency in names, some with spaces between words, other not. * Update nodes.py Include the node mapping name line for Image Crop Node * Update nodes_images.py * Rename image nodes add space between words for consistency > Display name mappings	2024-10-31 15:18:05 -04:00
Aarni Koskela	1c8286a44b	Avoid SyntaxWarning in UniPC docstring (#5442 )	2024-10-31 15:17:26 -04:00
comfyanonymous	1af4a47fd1	Bump up mac version for attention upcast bug workaround.	2024-10-31 15:15:31 -04:00
Uriel Deveaud	f2aaa0a475	Rename `ImageCrop` to `Image Crop` (#5424 ) * Update nodes_images.py Nodes menu has inconsistency in names, some with spaces between words, other not. * Update nodes.py Include the node mapping name line for Image Crop Node * Update nodes_images.py	2024-10-31 00:35:34 -04:00
comfyanonymous	daa1565b93	Fix diffusers flux controlnet regression.	2024-10-30 13:11:34 -04:00
comfyanonymous	09fdb2b269	Support SD3.5 medium diffusers format weights and loras.	2024-10-30 04:24:00 -04:00
Chenlei Hu	65a8659182	Update web content to release v1.3.26 (#5413 ) * Update web content to release v1.3.26 * nit	2024-10-29 14:14:06 -04:00
comfyanonymous	770ab200f2	Cleanup SkipLayerGuidanceSD3 node.	2024-10-29 10:11:46 -04:00
Dango233	954683d0db	SLG first implementation for SD3.5 (#5404 ) * SLG first implementation for SD3.5 * * Simplify and align with comfy style	2024-10-29 09:59:21 -04:00
comfyanonymous	30c0c81351	Add a way to patch blocks in SD3.	2024-10-29 00:48:32 -04:00
comfyanonymous	13b0ff8a6f	Update SD3 code.	2024-10-28 21:58:52 -04:00
comfyanonymous	c320801187	Remove useless line.	2024-10-28 17:41:12 -04:00
Chenlei Hu	c0b0cfaeec	Update web content to release v1.3.21 (#5351 ) * Update web content to release v1.3.21 * nit	2024-10-28 14:29:38 -04:00
comfyanonymous	669d9e4c67	Set default shift on mochi to 6.0	2024-10-27 22:21:04 -04:00
comfyanonymous	9ee0a6553a	float16 inference is a bit broken on mochi.	2024-10-27 04:56:40 -04:00
comfyanonymous	5cbb01bc2f	Basic Genmo Mochi video model support. To use: "Load CLIP" node with t5xxl + type mochi "Load Diffusion Model" node with the mochi dit file. "Load VAE" with the mochi vae file. EmptyMochiLatentVideo node for the latent. euler + linear_quadratic in the KSampler node.	2024-10-26 06:54:00 -04:00
comfyanonymous	c3ffbae067	Make LatentUpscale nodes work on 3d latents.	2024-10-26 01:50:51 -04:00
comfyanonymous	d605677b33	Make euler_ancestral work on flow models (credit: Ashen).	2024-10-25 19:53:44 -04:00
Chenlei Hu	ce759b7db6	Revert download to .tmp in frontend_management (#5369 )	2024-10-25 19:26:13 -04:00
comfyanonymous	52810907e2	Add a model merge node for SD3.5 large.	2024-10-24 16:46:21 -04:00
PsychoLogicAu	af8cf79a2d	support SimpleTuner lycoris lora for SD3 (#5340 )	2024-10-24 01:18:32 -04:00
comfyanonymous	66b0961a46	Fix ControlLora issue with last commit.	2024-10-23 17:02:40 -04:00
comfyanonymous	754597c8a9	Clean up some controlnet code. Remove self.device which was useless.	2024-10-23 14:19:05 -04:00
comfyanonymous	915fdb5745	Fix lowvram edge case.	2024-10-22 16:34:50 -04:00
contentis	5a8a48931a	remove attention abstraction (#5324 )	2024-10-22 14:02:38 -04:00
comfyanonymous	8ce2a1052c	Optimizations to --fast and scaled fp8.	2024-10-22 02:12:28 -04:00
comfyanonymous	f82314fcfc	Fix duplicate sigmas on beta scheduler.	2024-10-21 20:19:45 -04:00
comfyanonymous	0075c6d096	Mixed precision diffusion models with scaled fp8. This change allows supports for diffusion models where all the linears are scaled fp8 while the other weights are the original precision.	2024-10-21 18:12:51 -04:00
comfyanonymous	83ca891118	Support scaled fp8 t5xxl model.	2024-10-20 22:27:00 -04:00
comfyanonymous	f9f9faface	Fixed model merging issue with scaled fp8.	2024-10-20 06:24:31 -04:00
comfyanonymous	471cd3eace	fp8 casting is fast on GPUs that support fp8 compute.	2024-10-20 00:54:47 -04:00
comfyanonymous	a68bbafddb	Support diffusion models with scaled fp8 weights.	2024-10-19 23:47:42 -04:00
comfyanonymous	73e3a9e676	Clamp output when rounding weight to prevent Nan.	2024-10-19 19:07:10 -04:00
comfyanonymous	518c0dc2fe	Add tooltips to LoraSave node.	2024-10-18 06:01:09 -04:00
comfyanonymous	ce0542e10b	Add a note that python 3.13 is not yet supported to the README.	2024-10-17 19:27:37 -04:00
comfyanonymous	8473019d40	Pytorch can be shipped with numpy 2 now.	2024-10-17 19:15:17 -04:00
Xiaodong Xie	89f15894dd	Ignore more network related errors during websocket communication. (#5269 ) Intermittent network issues during websocket communication should not crash ComfyUi process. Co-authored-by: Xiaodong Xie <xie.xiaodong@frever.com>	2024-10-17 18:31:45 -04:00
comfyanonymous	67158994a4	Use the lowvram cast_to function for everything.	2024-10-17 17:25:56 -04:00
comfyanonymous	7390ff3b1e	Add missing import.	2024-10-16 14:58:30 -04:00
comfyanonymous	0bedfb26af	Revert "Fix Transformers FutureWarning (#5140 )" This reverts commit `95b7cf9bbe`.	2024-10-16 12:36:19 -04:00
comfyanonymous	f71cfd2687	Add an experimental node to sharpen latents. Can be used with LatentApplyOperationCFG for interesting results.	2024-10-16 05:25:31 -04:00
Alex "mcmonkey" Goodwin	c695c4af7f	Frontend Manager: avoid redundant gh calls for static versions (#5152 ) * Frontend Manager: avoid redundant gh calls for static versions * actually, removing old tmpdir isn't needed I tested - downloader code handles this case well already (also rmdir was wrong func anyway, needed shutil.rmtree if it had content) * add code comment	2024-10-16 03:35:37 -04:00
comfyanonymous	0dbba9f751	Add some latent operation nodes. This is a port of the ModelSamplerTonemapNoiseTest from the experiments repo. To replicate that node use LatentOperationTonemapReinhard and LatentApplyOperationCFG together.	2024-10-15 15:00:36 -04:00
comfyanonymous	f584758271	Cleanup some useless lines.	2024-10-14 21:02:39 -04:00
svdc	95b7cf9bbe	Fix Transformers FutureWarning (#5140 ) * Update sd1_clip.py Fix Transformers FutureWarning * Update sd1_clip.py Fix comment	2024-10-14 20:12:20 -04:00
comfyanonymous	191a0d56b4	Switch default packaging workflows to python 3.12	2024-10-13 06:59:31 -04:00
comfyanonymous	3c60ecd7a8	Fix fp8 ops staying enabled.	2024-10-12 14:10:13 -04:00
comfyanonymous	7ae6626723	Remove useless argument.	2024-10-12 07:16:21 -04:00
comfyanonymous	6632365e16	model_options consistency between functions. weight_dtype -> dtype	2024-10-11 20:51:19 -04:00
Kadir Nar	ad07796777	🐛 Add device to variable c (#5210 )	2024-10-11 20:37:50 -04:00
comfyanonymous	1b80895285	Make clip loader nodes support loading sd3 t5xxl in lower precision. Add attention mask support in the SD3 text encoder code.	2024-10-10 15:06:15 -04:00
				`@@ -0,0 +1 @@`
				{"version":3,"file":"ExtensionPanel-CfMfcLgI.js","sources":["../../src/components/dialog/content/setting/ExtensionPanel.vue"],"sourcesContent":["<template>\n <div class=\"extension-panel\">\n <DataTable :value=\"extensionStore.extensions\" stripedRows size=\"small\">\n <Column field=\"name\" :header=\"$t('extensionName')\" sortable></Column>\n <Column\n :pt=\"{\n bodyCell: 'flex items-center justify-end'\n }\"\n >\n <template #body=\"slotProps\">\n <ToggleSwitch\n v-model=\"editingEnabledExtensions[slotProps.data.name]\"\n @change=\"updateExtensionStatus\"\n />\n </template>\n </Column>\n </DataTable>\n <div class=\"mt-4\">\n <Message v-if=\"hasChanges\" severity=\"info\">\n <ul>\n <li v-for=\"ext in changedExtensions\" :key=\"ext.name\">\n <span>\n {{ extensionStore.isExtensionEnabled(ext.name) ? '[-]' : '[+]' }}\n </span>\n {{ ext.name }}\n </li>\n </ul>\n </Message>\n <Button\n :label=\"$t('reloadToApplyChanges')\"\n icon=\"pi pi-refresh\"\n @click=\"applyChanges\"\n :disabled=\"!hasChanges\"\n text\n fluid\n severity=\"danger\"\n />\n </div>\n </div>\n</template>\n\n<script setup lang=\"ts\">\nimport { ref, computed, onMounted } from 'vue'\nimport { useExtensionStore } from '@/stores/extensionStore'\nimport { useSettingStore } from '@/stores/settingStore'\nimport DataTable from 'primevue/datatable'\nimport Column from 'primevue/column'\nimport ToggleSwitch from 'primevue/toggleswitch'\nimport Button from 'primevue/button'\nimport Message from 'primevue/message'\n\nconst extensionStore = useExtensionStore()\nconst settingStore = useSettingStore()\n\nconst editingEnabledExtensions = ref<Record<string, boolean>>({})\n\nonMounted(() => {\n extensionStore.extensions.forEach((ext) => {\n editingEnabledExtensions.value[ext.name] =\n extensionStore.isExtensionEnabled(ext.name)\n })\n})\n\nconst changedExtensions = computed(() => {\n return extensionStore.extensions.filter(\n (ext) =>\n editingEnabledExtensions.value[ext.name] !==\n extensionStore.isExtensionEnabled(ext.name)\n )\n})\n\nconst hasChanges = computed(() => {\n return changedExtensions.value.length > 0\n})\n\nconst updateExtensionStatus = () => {\n const editingDisabledExtensionNames = Object.entries(\n editingEnabledExtensions.value\n )\n .filter(([_, enabled]) => !enabled)\n .map(([name]) => name)\n\n settingStore.set('Comfy.Extension.Disabled', [\n ...extensionStore.inactiveDisabledExtensionNames,\n ...editingDisabledExtensionNames\n ])\n}\n\nconst applyChanges = () => {\n // Refresh the page to apply changes\n window.location.reload()\n}\n</script>\n"],"names":[],"mappings":";;;;;;;;;;AAmDA,UAAM,iBAAiB;AACvB,UAAM,eAAe;AAEf,UAAA,2BAA2B,IAA6B,CAAA,CAAE;AAEhE,cAAU,MAAM;AACC,qBAAA,WAAW,QAAQ,CAAC,QAAQ;AACzC,iCAAyB,MAAM,IAAI,IAAI,IACrC,eAAe,mBAAmB,IAAI,IAAI;AAAA,MAAA,CAC7C;AAAA,IAAA,CACF;AAEK,UAAA,oBAAoB,SAAS,MAAM;AACvC,aAAO,eAAe,WAAW;AAAA,QAC/B,CAAC,QACC,yBAAyB,MAAM,IAAI,IAAI,MACvC,eAAe,mBAAmB,IAAI,IAAI;AAAA,MAAA;AAAA,IAC9C,CACD;AAEK,UAAA,aAAa,SAAS,MAAM;AACzB,aAAA,kBAAkB,MAAM,SAAS;AAAA,IAAA,CACzC;AAED,UAAM,wBAAwB,6BAAM;AAClC,YAAM,gCAAgC,OAAO;AAAA,QAC3C,yBAAyB;AAAA,MAExB,EAAA,OAAO,CAAC,CAAC,GAAG,OAAO,MAAM,CAAC,OAAO,EACjC,IAAI,CAAC,CAAC,IAAI,MAAM,IAAI;AAEvB,mBAAa,IAAI,4BAA4B;AAAA,QAC3C,GAAG,eAAe;AAAA,QAClB,GAAG;AAAA,MAAA,CACJ;AAAA,IAAA,GAV2B;AAa9B,UAAM,eAAe,6BAAM;AAEzB,aAAO,SAAS;IAAO,GAFJ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;"}
				`@@ -0,0 +1 @@`
				{"version":3,"file":"ServerStartView-e57oVZ6V.js","sources":["../../src/components/common/LogTerminal.vue","../../src/views/ServerStartView.vue"],"sourcesContent":["<!-- A simple read-only terminal component that displays logs. -->\n<template>\n <div class=\"p-terminal rounded-none h-full w-full\">\n <ScrollPanel class=\"h-full w-full\" ref=\"scrollPanelRef\">\n <pre class=\"px-4 whitespace-pre-wrap\">{{ log }}</pre>\n </ScrollPanel>\n </div>\n</template>\n\n<script setup lang=\"ts\">\nimport ScrollPanel from 'primevue/scrollpanel'\nimport { onBeforeUnmount, onMounted, ref, watch } from 'vue'\n\nconst props = defineProps<{\n fetchLogs: () => Promise<string>\n fetchInterval: number\n}>()\n\nconst log = ref<string>('')\nconst scrollPanelRef = ref<InstanceType<typeof ScrollPanel> \| null>(null)\n/*\n Whether the user has scrolled to the bottom of the terminal.\n * This is used to prevent the terminal from scrolling to the bottom\n * when new logs are fetched.\n */\nconst scrolledToBottom = ref(false)\n\nlet intervalId: number = 0\n\nonMounted(async () => {\n const element = scrollPanelRef.value?.$el\n const scrollContainer = element?.querySelector('.p-scrollpanel-content')\n\n if (scrollContainer) {\n scrollContainer.addEventListener('scroll', () => {\n scrolledToBottom.value =\n scrollContainer.scrollTop + scrollContainer.clientHeight ===\n scrollContainer.scrollHeight\n })\n }\n\n const scrollToBottom = () => {\n if (scrollContainer) {\n scrollContainer.scrollTop = scrollContainer.scrollHeight\n }\n }\n\n watch(log, () => {\n if (scrolledToBottom.value) {\n scrollToBottom()\n }\n })\n\n const fetchLogs = async () => {\n log.value = await props.fetchLogs()\n }\n\n await fetchLogs()\n scrollToBottom()\n intervalId = window.setInterval(fetchLogs, props.fetchInterval)\n})\n\nonBeforeUnmount(() => {\n window.clearInterval(intervalId)\n})\n</script>\n","<template>\n <div\n class=\"font-sans flex flex-col justify-center items-center h-screen m-0 text-neutral-300 bg-neutral-900 dark-theme pointer-events-auto\"\n >\n <h2 class=\"text-2xl font-bold\">{{ ProgressMessages[status] }}</h2>\n <LogTerminal :fetch-logs=\"fetchLogs\" :fetch-interval=\"500\" />\n </div>\n</template>\n\n<script setup lang=\"ts\">\nimport { ref, onMounted } from 'vue'\nimport LogTerminal from '@/components/common/LogTerminal.vue'\nimport {\n ProgressStatus,\n ProgressMessages\n} from '@comfyorg/comfyui-electron-types'\nimport { electronAPI } from '@/utils/envUtil'\n\nconst electron = electronAPI()\n\nconst status = ref<ProgressStatus>(ProgressStatus.INITIAL_STATE)\nconst logs = ref<string[]>([])\n\nconst updateProgress = ({ status: newStatus }: { status: ProgressStatus }) => {\n status.value = newStatus\n logs.value = [] // Clear logs when status changes\n}\n\nconst addLogMessage = (message: string) => {\n logs.value = [...logs.value, message]\n}\n\nconst fetchLogs = async () => {\n return logs.value.join('\\n')\n}\n\nonMounted(() => {\n electron.sendReady()\n electron.onProgressUpdate(updateProgress)\n electron.onLogMessage((message: string) => {\n addLogMessage(message)\n })\n})\n</script>\n"],"names":["ProgressStatus"],"mappings":";;;;;;;;;;;;;AAaA,UAAM,QAAQ;AAKR,UAAA,MAAM,IAAY,EAAE;AACpB,UAAA,iBAAiB,IAA6C,IAAI;AAMlE,UAAA,mBAAmB,IAAI,KAAK;AAElC,QAAI,aAAqB;AAEzB,cAAU,YAAY;AACd,YAAA,UAAU,eAAe,OAAO;AAChC,YAAA,kBAAkB,SAAS,cAAc,wBAAwB;AAEvE,UAAI,iBAAiB;AACH,wBAAA,iBAAiB,UAAU,MAAM;AAC/C,2BAAiB,QACf,gBAAgB,YAAY,gBAAgB,iBAC5C,gBAAgB;AAAA,QAAA,CACnB;AAAA,MACH;AAEA,YAAM,iBAAiB,6BAAM;AAC3B,YAAI,iBAAiB;AACnB,0BAAgB,YAAY,gBAAgB;AAAA,QAC9C;AAAA,MAAA,GAHqB;AAMvB,YAAM,KAAK,MAAM;AACf,YAAI,iBAAiB,OAAO;AACX;QACjB;AAAA,MAAA,CACD;AAED,YAAM,YAAY,mCAAY;AACxB,YAAA,QAAQ,MAAM,MAAM,UAAU;AAAA,MAAA,GADlB;AAIlB,YAAM,UAAU;AACD;AACf,mBAAa,OAAO,YAAY,WAAW,MAAM,aAAa;AAAA,IAAA,CAC/D;AAED,oBAAgB,MAAM;AACpB,aAAO,cAAc,UAAU;AAAA,IAAA,CAChC;;;;;;;;;;;;;;;;;;;;;;AC9CD,UAAM,WAAW;AAEX,UAAA,SAAS,IAAoBA,EAAe,aAAa;AACzD,UAAA,OAAO,IAAc,CAAA,CAAE;AAE7B,UAAM,iBAAiB,wBAAC,EAAE,QAAQ,gBAA4C;AAC5E,aAAO,QAAQ;AACf,WAAK,QAAQ;IAAC,GAFO;AAKjB,UAAA,gBAAgB,wBAAC,YAAoB;AACzC,WAAK,QAAQ,CAAC,GAAG,KAAK,OAAO,OAAO;AAAA,IAAA,GADhB;AAItB,UAAM,YAAY,mCAAY;AACrB,aAAA,KAAK,MAAM,KAAK,IAAI;AAAA,IAAA,GADX;AAIlB,cAAU,MAAM;AACd,eAAS,UAAU;AACnB,eAAS,iBAAiB,cAAc;AAC/B,eAAA,aAAa,CAAC,YAAoB;AACzC,sBAAc,OAAO;AAAA,MAAA,CACtB;AAAA,IAAA,CACF;;;;;;;;;;;;"}
				`@@ -0,0 +1 @@`
				`{"version":3,"file":"WelcomeView-DT4bj-QV.js","sources":[],"sourcesContent":[],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;"}`
				`@@ -0,0 +1 @@`
				{"version":3,"file":"index-B4gmhi99.js","sources":["../../node_modules/@comfyorg/comfyui-electron-types/index.mjs"],"sourcesContent":["const o = {\n LOADING_PROGRESS: \"loading-progress\",\n IS_PACKAGED: \"is-packaged\",\n RENDERER_READY: \"renderer-ready\",\n RESTART_APP: \"restart-app\",\n REINSTALL: \"reinstall\",\n LOG_MESSAGE: \"log-message\",\n OPEN_DIALOG: \"open-dialog\",\n DOWNLOAD_PROGRESS: \"download-progress\",\n START_DOWNLOAD: \"start-download\",\n PAUSE_DOWNLOAD: \"pause-download\",\n RESUME_DOWNLOAD: \"resume-download\",\n CANCEL_DOWNLOAD: \"cancel-download\",\n DELETE_MODEL: \"delete-model\",\n GET_ALL_DOWNLOADS: \"get-all-downloads\",\n GET_ELECTRON_VERSION: \"get-electron-version\",\n SEND_ERROR_TO_SENTRY: \"send-error-to-sentry\",\n GET_BASE_PATH: \"get-base-path\",\n GET_MODEL_CONFIG_PATH: \"get-model-config-path\",\n OPEN_PATH: \"open-path\",\n OPEN_LOGS_PATH: \"open-logs-path\",\n OPEN_DEV_TOOLS: \"open-dev-tools\",\n IS_FIRST_TIME_SETUP: \"is-first-time-setup\",\n GET_SYSTEM_PATHS: \"get-system-paths\",\n VALIDATE_INSTALL_PATH: \"validate-install-path\",\n VALIDATE_COMFYUI_SOURCE: \"validate-comfyui-source\",\n SHOW_DIRECTORY_PICKER: \"show-directory-picker\",\n INSTALL_COMFYUI: \"install-comfyui\"\n};\nvar t = /* @__PURE__ */ ((e) => (e.INITIAL_STATE = \"initial-state\", e.PYTHON_SETUP = \"python-setup\", e.STARTING_SERVER = \"starting-server\", e.READY = \"ready\", e.ERROR = \"error\", e.ERROR_INSTALL_PATH = \"error-install-path\", e))(t \|\| {});\nconst s = {\n \"initial-state\": \"Loading...\",\n \"python-setup\": \"Setting up Python Environment...\",\n \"starting-server\": \"Starting ComfyUI server...\",\n ready: \"Finishing...\",\n error: \"Was not able to start ComfyUI. Please check the logs for more details. You can open it from the Help menu. Please report issues to: https://forum.comfy.org\",\n \"error-install-path\": \"Installation path does not exist. Please reset the installation location.\"\n}, a = \"electronAPI\", n = \"https://942cadba58d247c9cab96f45221aa813@o4507954455314432.ingest.us.sentry.io/4508007940685824\", r = [\n {\n id: \"user_files\",\n label: \"User Files\",\n description: \"Settings and user-created workflows\"\n },\n {\n id: \"models\",\n label: \"Models\",\n description: \"Reference model files from existing ComfyUI installations. (No copy)\"\n }\n // TODO: Decide whether we want to auto-migrate custom nodes, and install their dependencies.\n // huchenlei: This is a very essential thing for migration experience.\n // {\n // id: 'custom_nodes',\n // label: 'Custom Nodes',\n // description: 'Reference custom node files from existing ComfyUI installations. (No copy)',\n // },\n];\nexport {\n a as ELECTRON_BRIDGE_API,\n o as IPC_CHANNELS,\n r as MigrationItems,\n s as ProgressMessages,\n t as ProgressStatus,\n n as SENTRY_URL_ENDPOINT\n};\n"],"names":[],"mappings":"AAAA,MAAM,IAAI;AAAA,EACR,kBAAkB;AAAA,EAClB,aAAa;AAAA,EACb,gBAAgB;AAAA,EAChB,aAAa;AAAA,EACb,WAAW;AAAA,EACX,aAAa;AAAA,EACb,aAAa;AAAA,EACb,mBAAmB;AAAA,EACnB,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,iBAAiB;AAAA,EACjB,iBAAiB;AAAA,EACjB,cAAc;AAAA,EACd,mBAAmB;AAAA,EACnB,sBAAsB;AAAA,EACtB,sBAAsB;AAAA,EACtB,eAAe;AAAA,EACf,uBAAuB;AAAA,EACvB,WAAW;AAAA,EACX,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,qBAAqB;AAAA,EACrB,kBAAkB;AAAA,EAClB,uBAAuB;AAAA,EACvB,yBAAyB;AAAA,EACzB,uBAAuB;AAAA,EACvB,iBAAiB;AACnB;AACG,IAAC,IAAqB,kBAAC,OAAO,EAAE,gBAAgB,iBAAiB,EAAE,eAAe,gBAAgB,EAAE,kBAAkB,mBAAmB,EAAE,QAAQ,SAAS,EAAE,QAAQ,SAAS,EAAE,qBAAqB,sBAAsB,IAAI,KAAK,CAAA,CAAE;AACrO,MAAC,IAAI;AAAA,EACR,iBAAiB;AAAA,EACjB,gBAAgB;AAAA,EAChB,mBAAmB;AAAA,EACnB,OAAO;AAAA,EACP,OAAO;AAAA,EACP,sBAAsB;AACxB,GAAG,IAAI,eAAe,IAAI,mGAAmG,IAAI;AAAA,EAC/H;AAAA,IACE,IAAI;AAAA,IACJ,OAAO;AAAA,IACP,aAAa;AAAA,EACd;AAAA,EACD;AAAA,IACE,IAAI;AAAA,IACJ,OAAO;AAAA,IACP,aAAa;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAQH;","x_google_ignoreList":[0]}