diff --git a/.ci/update_windows/update.py b/.ci/update_windows/update.py index 731b6bc5..51a26320 100755 --- a/.ci/update_windows/update.py +++ b/.ci/update_windows/update.py @@ -63,7 +63,12 @@ except: print("checking out master branch") # noqa: T201 branch = repo.lookup_branch('master') if branch is None: - ref = repo.lookup_reference('refs/remotes/origin/master') + try: + ref = repo.lookup_reference('refs/remotes/origin/master') + except: + print("pulling.") # noqa: T201 + pull(repo) + ref = repo.lookup_reference('refs/remotes/origin/master') repo.checkout(ref) branch = repo.lookup_branch('master') if branch is None: diff --git a/.ci/windows_nightly_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat b/.ci/windows_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat similarity index 100% rename from .ci/windows_nightly_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat rename to .ci/windows_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat diff --git a/.github/workflows/stable-release.yml b/.github/workflows/stable-release.yml index f7d30a9a..a046ff9e 100644 --- a/.github/workflows/stable-release.yml +++ b/.github/workflows/stable-release.yml @@ -12,7 +12,7 @@ on: description: 'CUDA version' required: true type: string - default: "126" + default: "128" python_minor: description: 'Python minor version' required: true @@ -22,7 +22,7 @@ on: description: 'Python patch version' required: true type: string - default: "9" + default: "10" jobs: @@ -36,7 +36,7 @@ jobs: - uses: actions/checkout@v4 with: ref: ${{ inputs.git_tag }} - fetch-depth: 0 + fetch-depth: 150 persist-credentials: false - uses: actions/cache/restore@v4 id: cache @@ -70,7 +70,7 @@ jobs: cd .. git clone --depth 1 https://github.com/comfyanonymous/taesd - cp taesd/*.pth ./ComfyUI_copy/models/vae_approx/ + cp taesd/*.safetensors ./ComfyUI_copy/models/vae_approx/ mkdir ComfyUI_windows_portable mv python_embeded ComfyUI_windows_portable @@ -85,12 +85,14 @@ jobs: cd .. - "C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma2 -mx=8 -mfb=64 -md=32m -ms=on -mf=BCJ2 ComfyUI_windows_portable.7z ComfyUI_windows_portable + "C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma2 -mx=9 -mfb=128 -md=512m -ms=on -mf=BCJ2 ComfyUI_windows_portable.7z ComfyUI_windows_portable mv ComfyUI_windows_portable.7z ComfyUI/ComfyUI_windows_portable_nvidia.7z cd ComfyUI_windows_portable python_embeded/python.exe -s ComfyUI/main.py --quick-test-for-ci --cpu + python_embeded/python.exe -s ./update/update.py ComfyUI/ + ls - name: Upload binaries to release diff --git a/.github/workflows/test-launch.yml b/.github/workflows/test-launch.yml index c56283c2..1735fd83 100644 --- a/.github/workflows/test-launch.yml +++ b/.github/workflows/test-launch.yml @@ -17,7 +17,7 @@ jobs: path: "ComfyUI" - uses: actions/setup-python@v4 with: - python-version: '3.9' + python-version: '3.10' - name: Install requirements run: | python -m pip install --upgrade pip diff --git a/.github/workflows/update-api-stubs.yml b/.github/workflows/update-api-stubs.yml new file mode 100644 index 00000000..c99ec9fc --- /dev/null +++ b/.github/workflows/update-api-stubs.yml @@ -0,0 +1,56 @@ +name: Generate Pydantic Stubs from api.comfy.org + +on: + schedule: + - cron: '0 0 * * 1' + workflow_dispatch: + +jobs: + generate-models: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install 'datamodel-code-generator[http]' + npm install @redocly/cli + + - name: Download OpenAPI spec + run: | + curl -o openapi.yaml https://api.comfy.org/openapi + + - name: Filter OpenAPI spec with Redocly + run: | + npx @redocly/cli bundle openapi.yaml --output filtered-openapi.yaml --config comfy_api_nodes/redocly.yaml --remove-unused-components + + - name: Generate API models + run: | + datamodel-codegen --use-subclass-enum --input filtered-openapi.yaml --output comfy_api_nodes/apis --output-model-type pydantic_v2.BaseModel + + - name: Check for changes + id: git-check + run: | + git diff --exit-code comfy_api_nodes/apis || echo "changes=true" >> $GITHUB_OUTPUT + + - name: Create Pull Request + if: steps.git-check.outputs.changes == 'true' + uses: peter-evans/create-pull-request@v5 + with: + commit-message: 'chore: update API models from OpenAPI spec' + title: 'Update API models from api.comfy.org' + body: | + This PR updates the API models based on the latest api.comfy.org OpenAPI specification. + + Generated automatically by the a Github workflow. + branch: update-api-stubs + delete-branch: true + base: master diff --git a/.github/workflows/windows_release_dependencies.yml b/.github/workflows/windows_release_dependencies.yml index 7a8ec578..dfdb96d5 100644 --- a/.github/workflows/windows_release_dependencies.yml +++ b/.github/workflows/windows_release_dependencies.yml @@ -17,7 +17,7 @@ on: description: 'cuda version' required: true type: string - default: "126" + default: "128" python_minor: description: 'python minor version' @@ -29,7 +29,7 @@ on: description: 'python patch version' required: true type: string - default: "9" + default: "10" # push: # branches: # - master diff --git a/.github/workflows/windows_release_nightly_pytorch.yml b/.github/workflows/windows_release_nightly_pytorch.yml index 24599249..eb5ed9c9 100644 --- a/.github/workflows/windows_release_nightly_pytorch.yml +++ b/.github/workflows/windows_release_nightly_pytorch.yml @@ -56,7 +56,7 @@ jobs: cd .. git clone --depth 1 https://github.com/comfyanonymous/taesd - cp taesd/*.pth ./ComfyUI_copy/models/vae_approx/ + cp taesd/*.safetensors ./ComfyUI_copy/models/vae_approx/ mkdir ComfyUI_windows_portable_nightly_pytorch mv python_embeded ComfyUI_windows_portable_nightly_pytorch diff --git a/.github/workflows/windows_release_package.yml b/.github/workflows/windows_release_package.yml index 416544f7..3926a65f 100644 --- a/.github/workflows/windows_release_package.yml +++ b/.github/workflows/windows_release_package.yml @@ -7,7 +7,7 @@ on: description: 'cuda version' required: true type: string - default: "126" + default: "128" python_minor: description: 'python minor version' @@ -19,7 +19,7 @@ on: description: 'python patch version' required: true type: string - default: "9" + default: "10" # push: # branches: # - master @@ -50,7 +50,7 @@ jobs: - uses: actions/checkout@v4 with: - fetch-depth: 0 + fetch-depth: 150 persist-credentials: false - shell: bash run: | @@ -67,7 +67,7 @@ jobs: cd .. git clone --depth 1 https://github.com/comfyanonymous/taesd - cp taesd/*.pth ./ComfyUI_copy/models/vae_approx/ + cp taesd/*.safetensors ./ComfyUI_copy/models/vae_approx/ mkdir ComfyUI_windows_portable mv python_embeded ComfyUI_windows_portable @@ -82,12 +82,14 @@ jobs: cd .. - "C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma2 -mx=8 -mfb=64 -md=32m -ms=on -mf=BCJ2 ComfyUI_windows_portable.7z ComfyUI_windows_portable + "C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma2 -mx=9 -mfb=128 -md=512m -ms=on -mf=BCJ2 ComfyUI_windows_portable.7z ComfyUI_windows_portable mv ComfyUI_windows_portable.7z ComfyUI/new_ComfyUI_windows_portable_nvidia_cu${{ inputs.cu }}_or_cpu.7z cd ComfyUI_windows_portable python_embeded/python.exe -s ComfyUI/main.py --quick-test-for-ci --cpu + python_embeded/python.exe -s ./update/update.py ComfyUI/ + ls - name: Upload binaries to release diff --git a/.gitignore b/.gitignore index 61881b8a..4e8cea71 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,6 @@ venv/ *.log web_custom_versions/ .DS_Store +openapi.yaml +filtered-openapi.yaml +uv.lock diff --git a/README.md b/README.md index cf6df7e5..deee70c6 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,6 @@ Supports all operating systems and GPU types (NVIDIA, AMD, Intel, Apple Silicon, ## [Examples](https://comfyanonymous.github.io/ComfyUI_examples/) See what ComfyUI can do with the [example workflows](https://comfyanonymous.github.io/ComfyUI_examples/). - ## Features - Nodes/graph/flowchart interface to experiment and create complex Stable Diffusion workflows without needing to code anything. - Image Models @@ -70,9 +69,11 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith - [Hunyuan Video](https://comfyanonymous.github.io/ComfyUI_examples/hunyuan_video/) - [Nvidia Cosmos](https://comfyanonymous.github.io/ComfyUI_examples/cosmos/) - [Wan 2.1](https://comfyanonymous.github.io/ComfyUI_examples/wan/) +- Audio Models + - [Stable Audio](https://comfyanonymous.github.io/ComfyUI_examples/audio/) + - [ACE Step](https://comfyanonymous.github.io/ComfyUI_examples/audio/) - 3D Models - [Hunyuan3D 2.0](https://docs.comfy.org/tutorials/3d/hunyuan3D-2) -- [Stable Audio](https://comfyanonymous.github.io/ComfyUI_examples/audio/) - Asynchronous Queue system - Many optimizations: Only re-executes the parts of the workflow that changes between executions. - Smart memory management: can automatically run models on GPUs with as low as 1GB vram. @@ -99,6 +100,23 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith Workflow examples can be found on the [Examples page](https://comfyanonymous.github.io/ComfyUI_examples/) +## Release Process + +ComfyUI follows a weekly release cycle every Friday, with three interconnected repositories: + +1. **[ComfyUI Core](https://github.com/comfyanonymous/ComfyUI)** + - Releases a new stable version (e.g., v0.7.0) + - Serves as the foundation for the desktop release + +2. **[ComfyUI Desktop](https://github.com/Comfy-Org/desktop)** + - Builds a new release using the latest stable core version + - Version numbers match the core release (e.g., Desktop v1.7.0 uses Core v1.7.0) + +3. **[ComfyUI Frontend](https://github.com/Comfy-Org/ComfyUI_frontend)** + - Weekly frontend updates are merged into the core repository + - Features are frozen for the upcoming core release + - Development continues for the next release cycle + ## Shortcuts | Keybind | Explanation | @@ -149,8 +167,6 @@ Simply download, extract with [7-Zip](https://7-zip.org) and run. Make sure you If you have trouble extracting it, right click the file -> properties -> unblock -If you have a 50 series Blackwell card like a 5090 or 5080 see [this discussion thread](https://github.com/comfyanonymous/ComfyUI/discussions/6643) - #### How do I share models between another UI and ComfyUI? See the [Config file](extra_model_paths.yaml.example) to set the search paths for models. In the standalone windows build you can find this file in the ComfyUI directory. Rename this file to extra_model_paths.yaml and edit it with your favorite text editor. @@ -216,9 +232,9 @@ Additional discussion and help can be found [here](https://github.com/comfyanony Nvidia users should install stable pytorch using this command: -```pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu126``` +```pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu128``` -This is the command to install pytorch nightly instead which supports the new blackwell 50xx series GPUs and might have performance improvements. +This is the command to install pytorch nightly instead which might have performance improvements. ```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128``` diff --git a/app/custom_node_manager.py b/app/custom_node_manager.py index 42b0d75b..281febca 100644 --- a/app/custom_node_manager.py +++ b/app/custom_node_manager.py @@ -93,16 +93,20 @@ class CustomNodeManager: def add_routes(self, routes, webapp, loadedModules): + example_workflow_folder_names = ["example_workflows", "example", "examples", "workflow", "workflows"] + @routes.get("/workflow_templates") async def get_workflow_templates(request): """Returns a web response that contains the map of custom_nodes names and their associated workflow templates. The ones without templates are omitted.""" - files = [ - file - for folder in folder_paths.get_folder_paths("custom_nodes") - for file in glob.glob( - os.path.join(folder, "*/example_workflows/*.json") - ) - ] + + files = [] + + for folder in folder_paths.get_folder_paths("custom_nodes"): + for folder_name in example_workflow_folder_names: + pattern = os.path.join(folder, f"*/{folder_name}/*.json") + matched_files = glob.glob(pattern) + files.extend(matched_files) + workflow_templates_dict = ( {} ) # custom_nodes folder name -> example workflow names @@ -118,15 +122,22 @@ class CustomNodeManager: # Serve workflow templates from custom nodes. for module_name, module_dir in loadedModules: - workflows_dir = os.path.join(module_dir, "example_workflows") - if os.path.exists(workflows_dir): - webapp.add_routes( - [ - web.static( - "/api/workflow_templates/" + module_name, workflows_dir - ) - ] - ) + for folder_name in example_workflow_folder_names: + workflows_dir = os.path.join(module_dir, folder_name) + + if os.path.exists(workflows_dir): + if folder_name != "example_workflows": + logging.debug( + "Found example workflow folder '%s' for custom node '%s', consider renaming it to 'example_workflows'", + folder_name, module_name) + + webapp.add_routes( + [ + web.static( + "/api/workflow_templates/" + module_name, workflows_dir + ) + ] + ) @routes.get("/i18n") async def get_i18n(request): diff --git a/app/user_manager.py b/app/user_manager.py index e7381e62..d31da5b9 100644 --- a/app/user_manager.py +++ b/app/user_manager.py @@ -197,6 +197,112 @@ class UserManager(): return web.json_response(results) + @routes.get("/v2/userdata") + async def list_userdata_v2(request): + """ + List files and directories in a user's data directory. + + This endpoint provides a structured listing of contents within a specified + subdirectory of the user's data storage. + + Query Parameters: + - path (optional): The relative path within the user's data directory + to list. Defaults to the root (''). + + Returns: + - 400: If the requested path is invalid, outside the user's data directory, or is not a directory. + - 404: If the requested path does not exist. + - 403: If the user is invalid. + - 500: If there is an error reading the directory contents. + - 200: JSON response containing a list of file and directory objects. + Each object includes: + - name: The name of the file or directory. + - type: 'file' or 'directory'. + - path: The relative path from the user's data root. + - size (for files): The size in bytes. + - modified (for files): The last modified timestamp (Unix epoch). + """ + requested_rel_path = request.rel_url.query.get('path', '') + + # URL-decode the path parameter + try: + requested_rel_path = parse.unquote(requested_rel_path) + except Exception as e: + logging.warning(f"Failed to decode path parameter: {requested_rel_path}, Error: {e}") + return web.Response(status=400, text="Invalid characters in path parameter") + + + # Check user validity and get the absolute path for the requested directory + try: + base_user_path = self.get_request_user_filepath(request, None, create_dir=False) + + if requested_rel_path: + target_abs_path = self.get_request_user_filepath(request, requested_rel_path, create_dir=False) + else: + target_abs_path = base_user_path + + except KeyError as e: + # Invalid user detected by get_request_user_id inside get_request_user_filepath + logging.warning(f"Access denied for user: {e}") + return web.Response(status=403, text="Invalid user specified in request") + + + if not target_abs_path: + # Path traversal or other issue detected by get_request_user_filepath + return web.Response(status=400, text="Invalid path requested") + + # Handle cases where the user directory or target path doesn't exist + if not os.path.exists(target_abs_path): + # Check if it's the base user directory that's missing (new user case) + if target_abs_path == base_user_path: + # It's okay if the base user directory doesn't exist yet, return empty list + return web.json_response([]) + else: + # A specific subdirectory was requested but doesn't exist + return web.Response(status=404, text="Requested path not found") + + if not os.path.isdir(target_abs_path): + return web.Response(status=400, text="Requested path is not a directory") + + results = [] + try: + for root, dirs, files in os.walk(target_abs_path, topdown=True): + # Process directories + for dir_name in dirs: + dir_path = os.path.join(root, dir_name) + rel_path = os.path.relpath(dir_path, base_user_path).replace(os.sep, '/') + results.append({ + "name": dir_name, + "path": rel_path, + "type": "directory" + }) + + # Process files + for file_name in files: + file_path = os.path.join(root, file_name) + rel_path = os.path.relpath(file_path, base_user_path).replace(os.sep, '/') + entry_info = { + "name": file_name, + "path": rel_path, + "type": "file" + } + try: + stats = os.stat(file_path) # Use os.stat for potentially better performance with os.walk + entry_info["size"] = stats.st_size + entry_info["modified"] = stats.st_mtime + except OSError as stat_error: + logging.warning(f"Could not stat file {file_path}: {stat_error}") + pass # Include file with available info + results.append(entry_info) + except OSError as e: + logging.error(f"Error listing directory {target_abs_path}: {e}") + return web.Response(status=500, text="Error reading directory contents") + + # Sort results alphabetically, directories first then files + results.sort(key=lambda x: (x['type'] != 'directory', x['name'].lower())) + + return web.json_response(results) + def get_user_data_path(request, check_exists = False, param = "file"): file = request.match_info.get(param, None) if not file: diff --git a/comfy/cli_args.py b/comfy/cli_args.py index 723c19ef..d5ecdf89 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -128,6 +128,7 @@ vram_group.add_argument("--cpu", action="store_true", help="To use the CPU for e parser.add_argument("--reserve-vram", type=float, default=None, help="Set the amount of vram in GB you want to reserve for use by your OS/other software. By default some amount is reserved depending on your OS.") +parser.add_argument("--async-offload", action="store_true", help="Use async weight offloading.") parser.add_argument("--default-hashing-function", type=str, choices=['md5', 'sha1', 'sha256', 'sha512'], default='sha256', help="Allows you to choose the hash function to use for duplicate filename / contents comparison. Default is sha256.") @@ -141,12 +142,15 @@ class PerformanceFeature(enum.Enum): parser.add_argument("--fast", nargs="*", type=PerformanceFeature, help="Enable some untested and potentially quality deteriorating optimizations. --fast with no arguments enables everything. You can pass a list specific optimizations if you only want to enable specific ones. Current valid optimizations: fp16_accumulation fp8_matrix_mult cublas_ops") +parser.add_argument("--mmap-torch-files", action="store_true", help="Use mmap when loading ckpt/pt files.") + parser.add_argument("--dont-print-server", action="store_true", help="Don't print server output.") parser.add_argument("--quick-test-for-ci", action="store_true", help="Quick test for CI.") parser.add_argument("--windows-standalone-build", action="store_true", help="Windows standalone build: Enable convenient things that most people using the standalone windows build will probably enjoy (like auto opening the page on startup).") parser.add_argument("--disable-metadata", action="store_true", help="Disable saving prompt metadata in files.") parser.add_argument("--disable-all-custom-nodes", action="store_true", help="Disable loading all custom nodes.") +parser.add_argument("--disable-api-nodes", action="store_true", help="Disable loading all api nodes.") parser.add_argument("--multi-user", action="store_true", help="Enables per-user storage.") @@ -191,6 +195,13 @@ parser.add_argument("--user-directory", type=is_valid_directory, default=None, h parser.add_argument("--enable-compress-response-body", action="store_true", help="Enable compressing response body.") +parser.add_argument( + "--comfy-api-base", + type=str, + default="https://api.comfy.org", + help="Set the base URL for the ComfyUI API. (default: https://api.comfy.org)", +) + if comfy.options.args_parsing: args = parser.parse_args() else: diff --git a/comfy/clip_vision.py b/comfy/clip_vision.py index 11bc5778..00aab916 100644 --- a/comfy/clip_vision.py +++ b/comfy/clip_vision.py @@ -18,6 +18,7 @@ class Output: setattr(self, key, item) def clip_preprocess(image, size=224, mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711], crop=True): + image = image[:, :, :, :3] if image.shape[3] > 3 else image mean = torch.tensor(mean, device=image.device, dtype=image.dtype) std = torch.tensor(std, device=image.device, dtype=image.dtype) image = image.movedim(-1, 1) diff --git a/comfy/comfy_types/node_typing.py b/comfy/comfy_types/node_typing.py index a348791a..2ffc9c02 100644 --- a/comfy/comfy_types/node_typing.py +++ b/comfy/comfy_types/node_typing.py @@ -1,7 +1,7 @@ """Comfy-specific type hinting""" from __future__ import annotations -from typing import Literal, TypedDict +from typing import Literal, TypedDict, Optional from typing_extensions import NotRequired from abc import ABC, abstractmethod from enum import Enum @@ -48,6 +48,7 @@ class IO(StrEnum): FACE_ANALYSIS = "FACE_ANALYSIS" BBOX = "BBOX" SEGS = "SEGS" + VIDEO = "VIDEO" ANY = "*" """Always matches any type, but at a price. @@ -120,6 +121,10 @@ class InputTypeOptions(TypedDict): Available from frontend v1.17.5 Ref: https://github.com/Comfy-Org/ComfyUI_frontend/pull/3548 """ + widgetType: NotRequired[str] + """Specifies a type to be used for widget initialization if different from the input type. + Available from frontend v1.18.0 + https://github.com/Comfy-Org/ComfyUI_frontend/pull/3550""" # class InputTypeNumber(InputTypeOptions): # default: float | int min: NotRequired[float] @@ -229,6 +234,8 @@ class ComfyNodeABC(ABC): """Flags a node as experimental, informing users that it may change or not work as expected.""" DEPRECATED: bool """Flags a node as deprecated, indicating to users that they should find alternatives to this node.""" + API_NODE: Optional[bool] + """Flags a node as an API node.""" @classmethod @abstractmethod @@ -267,7 +274,7 @@ class ComfyNodeABC(ABC): Comfy Docs: https://docs.comfy.org/custom-nodes/backend/lists#list-processing """ - OUTPUT_IS_LIST: tuple[bool] + OUTPUT_IS_LIST: tuple[bool, ...] """A tuple indicating which node outputs are lists, but will be connected to nodes that expect individual items. Connected nodes that do not implement `INPUT_IS_LIST` will be executed once for every item in the list. @@ -286,7 +293,7 @@ class ComfyNodeABC(ABC): Comfy Docs: https://docs.comfy.org/custom-nodes/backend/lists#list-processing """ - RETURN_TYPES: tuple[IO] + RETURN_TYPES: tuple[IO, ...] """A tuple representing the outputs of this node. Usage:: @@ -295,12 +302,12 @@ class ComfyNodeABC(ABC): Comfy Docs: https://docs.comfy.org/custom-nodes/backend/server_overview#return-types """ - RETURN_NAMES: tuple[str] + RETURN_NAMES: tuple[str, ...] """The output slot names for each item in `RETURN_TYPES`, e.g. ``RETURN_NAMES = ("count", "filter_string")`` Comfy Docs: https://docs.comfy.org/custom-nodes/backend/server_overview#return-names """ - OUTPUT_TOOLTIPS: tuple[str] + OUTPUT_TOOLTIPS: tuple[str, ...] """A tuple of strings to use as tooltips for node outputs, one for each item in `RETURN_TYPES`.""" FUNCTION: str """The name of the function to execute as a literal string, e.g. `FUNCTION = "execute"` diff --git a/comfy/image_encoders/dino2.py b/comfy/image_encoders/dino2.py index 130ed6fd..976f98c6 100644 --- a/comfy/image_encoders/dino2.py +++ b/comfy/image_encoders/dino2.py @@ -116,7 +116,7 @@ class Dino2Embeddings(torch.nn.Module): def forward(self, pixel_values): x = self.patch_embeddings(pixel_values) # TODO: mask_token? - x = torch.cat((self.cls_token.expand(x.shape[0], -1, -1), x), dim=1) + x = torch.cat((self.cls_token.to(device=x.device, dtype=x.dtype).expand(x.shape[0], -1, -1), x), dim=1) x = x + comfy.model_management.cast_to_device(self.position_embeddings, x.device, x.dtype) return x diff --git a/comfy/k_diffusion/sampling.py b/comfy/k_diffusion/sampling.py index 6388d3fa..fbdf6f55 100644 --- a/comfy/k_diffusion/sampling.py +++ b/comfy/k_diffusion/sampling.py @@ -1277,6 +1277,7 @@ def res_multistep(model, x, sigmas, extra_args=None, callback=None, disable=None phi1_fn = lambda t: torch.expm1(t) / t phi2_fn = lambda t: (phi1_fn(t) - 1.0) / t + old_sigma_down = None old_denoised = None uncond_denoised = None def post_cfg_function(args): @@ -1304,9 +1305,9 @@ def res_multistep(model, x, sigmas, extra_args=None, callback=None, disable=None x = x + d * dt else: # Second order multistep method in https://arxiv.org/pdf/2308.02157 - t, t_next, t_prev = t_fn(sigmas[i]), t_fn(sigma_down), t_fn(sigmas[i - 1]) + t, t_old, t_next, t_prev = t_fn(sigmas[i]), t_fn(old_sigma_down), t_fn(sigma_down), t_fn(sigmas[i - 1]) h = t_next - t - c2 = (t_prev - t) / h + c2 = (t_prev - t_old) / h phi1_val, phi2_val = phi1_fn(-h), phi2_fn(-h) b1 = torch.nan_to_num(phi1_val - phi2_val / c2, nan=0.0) @@ -1326,6 +1327,7 @@ def res_multistep(model, x, sigmas, extra_args=None, callback=None, disable=None old_denoised = uncond_denoised else: old_denoised = denoised + old_sigma_down = sigma_down return x @torch.no_grad() @@ -1345,28 +1347,52 @@ def sample_res_multistep_ancestral_cfg_pp(model, x, sigmas, extra_args=None, cal return res_multistep(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, s_noise=s_noise, noise_sampler=noise_sampler, eta=eta, cfg_pp=True) @torch.no_grad() -def sample_gradient_estimation(model, x, sigmas, extra_args=None, callback=None, disable=None, ge_gamma=2.): +def sample_gradient_estimation(model, x, sigmas, extra_args=None, callback=None, disable=None, ge_gamma=2., cfg_pp=False): """Gradient-estimation sampler. Paper: https://openreview.net/pdf?id=o2ND9v0CeK""" extra_args = {} if extra_args is None else extra_args s_in = x.new_ones([x.shape[0]]) old_d = None + uncond_denoised = None + def post_cfg_function(args): + nonlocal uncond_denoised + uncond_denoised = args["uncond_denoised"] + return args["denoised"] + + if cfg_pp: + model_options = extra_args.get("model_options", {}).copy() + extra_args["model_options"] = comfy.model_patcher.set_model_options_post_cfg_function(model_options, post_cfg_function, disable_cfg1_optimization=True) + for i in trange(len(sigmas) - 1, disable=disable): denoised = model(x, sigmas[i] * s_in, **extra_args) - d = to_d(x, sigmas[i], denoised) + if cfg_pp: + d = to_d(x, sigmas[i], uncond_denoised) + else: + d = to_d(x, sigmas[i], denoised) if callback is not None: callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) dt = sigmas[i + 1] - sigmas[i] if i == 0: # Euler method - x = x + d * dt + if cfg_pp: + x = denoised + d * sigmas[i + 1] + else: + x = x + d * dt else: # Gradient estimation - d_bar = ge_gamma * d + (1 - ge_gamma) * old_d - x = x + d_bar * dt + if cfg_pp: + d_bar = (ge_gamma - 1) * (d - old_d) + x = denoised + d * sigmas[i + 1] + d_bar * dt + else: + d_bar = ge_gamma * d + (1 - ge_gamma) * old_d + x = x + d_bar * dt old_d = d return x +@torch.no_grad() +def sample_gradient_estimation_cfg_pp(model, x, sigmas, extra_args=None, callback=None, disable=None, ge_gamma=2.): + return sample_gradient_estimation(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, ge_gamma=ge_gamma, cfg_pp=True) + @torch.no_grad() def sample_er_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, s_noise=1., noise_sampler=None, noise_scaler=None, max_stage=3): """ diff --git a/comfy/latent_formats.py b/comfy/latent_formats.py index 556c3951..82d9f9bb 100644 --- a/comfy/latent_formats.py +++ b/comfy/latent_formats.py @@ -466,3 +466,7 @@ class Hunyuan3Dv2mini(LatentFormat): latent_channels = 64 latent_dimensions = 1 scale_factor = 1.0188137142395404 + +class ACEAudio(LatentFormat): + latent_channels = 8 + latent_dimensions = 2 diff --git a/comfy/ldm/ace/attention.py b/comfy/ldm/ace/attention.py new file mode 100644 index 00000000..f20a0166 --- /dev/null +++ b/comfy/ldm/ace/attention.py @@ -0,0 +1,761 @@ +# Original from: https://github.com/ace-step/ACE-Step/blob/main/models/attention.py +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Tuple, Union, Optional + +import torch +import torch.nn.functional as F +from torch import nn + +import comfy.model_management +from comfy.ldm.modules.attention import optimized_attention + +class Attention(nn.Module): + def __init__( + self, + query_dim: int, + cross_attention_dim: Optional[int] = None, + heads: int = 8, + kv_heads: Optional[int] = None, + dim_head: int = 64, + dropout: float = 0.0, + bias: bool = False, + qk_norm: Optional[str] = None, + added_kv_proj_dim: Optional[int] = None, + added_proj_bias: Optional[bool] = True, + out_bias: bool = True, + scale_qk: bool = True, + only_cross_attention: bool = False, + eps: float = 1e-5, + rescale_output_factor: float = 1.0, + residual_connection: bool = False, + processor=None, + out_dim: int = None, + out_context_dim: int = None, + context_pre_only=None, + pre_only=False, + elementwise_affine: bool = True, + is_causal: bool = False, + dtype=None, device=None, operations=None + ): + super().__init__() + + self.inner_dim = out_dim if out_dim is not None else dim_head * heads + self.inner_kv_dim = self.inner_dim if kv_heads is None else dim_head * kv_heads + self.query_dim = query_dim + self.use_bias = bias + self.is_cross_attention = cross_attention_dim is not None + self.cross_attention_dim = cross_attention_dim if cross_attention_dim is not None else query_dim + self.rescale_output_factor = rescale_output_factor + self.residual_connection = residual_connection + self.dropout = dropout + self.fused_projections = False + self.out_dim = out_dim if out_dim is not None else query_dim + self.out_context_dim = out_context_dim if out_context_dim is not None else query_dim + self.context_pre_only = context_pre_only + self.pre_only = pre_only + self.is_causal = is_causal + + self.scale_qk = scale_qk + self.scale = dim_head**-0.5 if self.scale_qk else 1.0 + + self.heads = out_dim // dim_head if out_dim is not None else heads + # for slice_size > 0 the attention score computation + # is split across the batch axis to save memory + # You can set slice_size with `set_attention_slice` + self.sliceable_head_dim = heads + + self.added_kv_proj_dim = added_kv_proj_dim + self.only_cross_attention = only_cross_attention + + if self.added_kv_proj_dim is None and self.only_cross_attention: + raise ValueError( + "`only_cross_attention` can only be set to True if `added_kv_proj_dim` is not None. Make sure to set either `only_cross_attention=False` or define `added_kv_proj_dim`." + ) + + self.group_norm = None + self.spatial_norm = None + + self.norm_q = None + self.norm_k = None + + self.norm_cross = None + self.to_q = operations.Linear(query_dim, self.inner_dim, bias=bias, dtype=dtype, device=device) + + if not self.only_cross_attention: + # only relevant for the `AddedKVProcessor` classes + self.to_k = operations.Linear(self.cross_attention_dim, self.inner_kv_dim, bias=bias, dtype=dtype, device=device) + self.to_v = operations.Linear(self.cross_attention_dim, self.inner_kv_dim, bias=bias, dtype=dtype, device=device) + else: + self.to_k = None + self.to_v = None + + self.added_proj_bias = added_proj_bias + if self.added_kv_proj_dim is not None: + self.add_k_proj = operations.Linear(added_kv_proj_dim, self.inner_kv_dim, bias=added_proj_bias, dtype=dtype, device=device) + self.add_v_proj = operations.Linear(added_kv_proj_dim, self.inner_kv_dim, bias=added_proj_bias, dtype=dtype, device=device) + if self.context_pre_only is not None: + self.add_q_proj = operations.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias, dtype=dtype, device=device) + else: + self.add_q_proj = None + self.add_k_proj = None + self.add_v_proj = None + + if not self.pre_only: + self.to_out = nn.ModuleList([]) + self.to_out.append(operations.Linear(self.inner_dim, self.out_dim, bias=out_bias, dtype=dtype, device=device)) + self.to_out.append(nn.Dropout(dropout)) + else: + self.to_out = None + + if self.context_pre_only is not None and not self.context_pre_only: + self.to_add_out = operations.Linear(self.inner_dim, self.out_context_dim, bias=out_bias, dtype=dtype, device=device) + else: + self.to_add_out = None + + self.norm_added_q = None + self.norm_added_k = None + self.processor = processor + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: Optional[torch.Tensor] = None, + attention_mask: Optional[torch.Tensor] = None, + **cross_attention_kwargs, + ) -> torch.Tensor: + return self.processor( + self, + hidden_states, + encoder_hidden_states=encoder_hidden_states, + attention_mask=attention_mask, + **cross_attention_kwargs, + ) + + +class CustomLiteLAProcessor2_0: + """Attention processor used typically in processing the SD3-like self-attention projections. add rms norm for query and key and apply RoPE""" + + def __init__(self): + self.kernel_func = nn.ReLU(inplace=False) + self.eps = 1e-15 + self.pad_val = 1.0 + + def apply_rotary_emb( + self, + x: torch.Tensor, + freqs_cis: Union[torch.Tensor, Tuple[torch.Tensor]], + ) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Apply rotary embeddings to input tensors using the given frequency tensor. This function applies rotary embeddings + to the given query or key 'x' tensors using the provided frequency tensor 'freqs_cis'. The input tensors are + reshaped as complex numbers, and the frequency tensor is reshaped for broadcasting compatibility. The resulting + tensors contain rotary embeddings and are returned as real tensors. + + Args: + x (`torch.Tensor`): + Query or key tensor to apply rotary embeddings. [B, H, S, D] xk (torch.Tensor): Key tensor to apply + freqs_cis (`Tuple[torch.Tensor]`): Precomputed frequency tensor for complex exponentials. ([S, D], [S, D],) + + Returns: + Tuple[torch.Tensor, torch.Tensor]: Tuple of modified query tensor and key tensor with rotary embeddings. + """ + cos, sin = freqs_cis # [S, D] + cos = cos[None, None] + sin = sin[None, None] + cos, sin = cos.to(x.device), sin.to(x.device) + + x_real, x_imag = x.reshape(*x.shape[:-1], -1, 2).unbind(-1) # [B, S, H, D//2] + x_rotated = torch.stack([-x_imag, x_real], dim=-1).flatten(3) + out = (x.float() * cos + x_rotated.float() * sin).to(x.dtype) + + return out + + def __call__( + self, + attn: Attention, + hidden_states: torch.FloatTensor, + encoder_hidden_states: torch.FloatTensor = None, + attention_mask: Optional[torch.FloatTensor] = None, + encoder_attention_mask: Optional[torch.FloatTensor] = None, + rotary_freqs_cis: Union[torch.Tensor, Tuple[torch.Tensor]] = None, + rotary_freqs_cis_cross: Union[torch.Tensor, Tuple[torch.Tensor]] = None, + *args, + **kwargs, + ) -> torch.FloatTensor: + hidden_states_len = hidden_states.shape[1] + + input_ndim = hidden_states.ndim + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + if encoder_hidden_states is not None: + context_input_ndim = encoder_hidden_states.ndim + if context_input_ndim == 4: + batch_size, channel, height, width = encoder_hidden_states.shape + encoder_hidden_states = encoder_hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + batch_size = hidden_states.shape[0] + + # `sample` projections. + dtype = hidden_states.dtype + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + # `context` projections. + has_encoder_hidden_state_proj = hasattr(attn, "add_q_proj") and hasattr(attn, "add_k_proj") and hasattr(attn, "add_v_proj") + if encoder_hidden_states is not None and has_encoder_hidden_state_proj: + encoder_hidden_states_query_proj = attn.add_q_proj(encoder_hidden_states) + encoder_hidden_states_key_proj = attn.add_k_proj(encoder_hidden_states) + encoder_hidden_states_value_proj = attn.add_v_proj(encoder_hidden_states) + + # attention + if not attn.is_cross_attention: + query = torch.cat([query, encoder_hidden_states_query_proj], dim=1) + key = torch.cat([key, encoder_hidden_states_key_proj], dim=1) + value = torch.cat([value, encoder_hidden_states_value_proj], dim=1) + else: + query = hidden_states + key = encoder_hidden_states + value = encoder_hidden_states + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + + query = query.transpose(-1, -2).reshape(batch_size, attn.heads, head_dim, -1) + key = key.transpose(-1, -2).reshape(batch_size, attn.heads, head_dim, -1).transpose(-1, -2) + value = value.transpose(-1, -2).reshape(batch_size, attn.heads, head_dim, -1) + + # RoPE需要 [B, H, S, D] 输入 + # 此时 query是 [B, H, D, S], 需要转成 [B, H, S, D] 才能应用RoPE + query = query.permute(0, 1, 3, 2) # [B, H, S, D] (从 [B, H, D, S]) + + # Apply query and key normalization if needed + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # Apply RoPE if needed + if rotary_freqs_cis is not None: + query = self.apply_rotary_emb(query, rotary_freqs_cis) + if not attn.is_cross_attention: + key = self.apply_rotary_emb(key, rotary_freqs_cis) + elif rotary_freqs_cis_cross is not None and has_encoder_hidden_state_proj: + key = self.apply_rotary_emb(key, rotary_freqs_cis_cross) + + # 此时 query是 [B, H, S, D],需要还原成 [B, H, D, S] + query = query.permute(0, 1, 3, 2) # [B, H, D, S] + + if attention_mask is not None: + # attention_mask: [B, S] -> [B, 1, S, 1] + attention_mask = attention_mask[:, None, :, None].to(key.dtype) # [B, 1, S, 1] + query = query * attention_mask.permute(0, 1, 3, 2) # [B, H, S, D] * [B, 1, S, 1] + if not attn.is_cross_attention: + key = key * attention_mask # key: [B, h, S, D] 与 mask [B, 1, S, 1] 相乘 + value = value * attention_mask.permute(0, 1, 3, 2) # 如果 value 是 [B, h, D, S],那么需调整mask以匹配S维度 + + if attn.is_cross_attention and encoder_attention_mask is not None and has_encoder_hidden_state_proj: + encoder_attention_mask = encoder_attention_mask[:, None, :, None].to(key.dtype) # [B, 1, S_enc, 1] + # 此时 key: [B, h, S_enc, D], value: [B, h, D, S_enc] + key = key * encoder_attention_mask # [B, h, S_enc, D] * [B, 1, S_enc, 1] + value = value * encoder_attention_mask.permute(0, 1, 3, 2) # [B, h, D, S_enc] * [B, 1, 1, S_enc] + + query = self.kernel_func(query) + key = self.kernel_func(key) + + query, key, value = query.float(), key.float(), value.float() + + value = F.pad(value, (0, 0, 0, 1), mode="constant", value=self.pad_val) + + vk = torch.matmul(value, key) + + hidden_states = torch.matmul(vk, query) + + if hidden_states.dtype in [torch.float16, torch.bfloat16]: + hidden_states = hidden_states.float() + + hidden_states = hidden_states[:, :, :-1] / (hidden_states[:, :, -1:] + self.eps) + + hidden_states = hidden_states.view(batch_size, attn.heads * head_dim, -1).permute(0, 2, 1) + + hidden_states = hidden_states.to(dtype) + if encoder_hidden_states is not None: + encoder_hidden_states = encoder_hidden_states.to(dtype) + + # Split the attention outputs. + if encoder_hidden_states is not None and not attn.is_cross_attention and has_encoder_hidden_state_proj: + hidden_states, encoder_hidden_states = ( + hidden_states[:, : hidden_states_len], + hidden_states[:, hidden_states_len:], + ) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + if encoder_hidden_states is not None and not attn.context_pre_only and not attn.is_cross_attention and hasattr(attn, "to_add_out"): + encoder_hidden_states = attn.to_add_out(encoder_hidden_states) + + if input_ndim == 4: + hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) + if encoder_hidden_states is not None and context_input_ndim == 4: + encoder_hidden_states = encoder_hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) + + if torch.get_autocast_gpu_dtype() == torch.float16: + hidden_states = hidden_states.clip(-65504, 65504) + if encoder_hidden_states is not None: + encoder_hidden_states = encoder_hidden_states.clip(-65504, 65504) + + return hidden_states, encoder_hidden_states + + +class CustomerAttnProcessor2_0: + r""" + Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). + """ + + def apply_rotary_emb( + self, + x: torch.Tensor, + freqs_cis: Union[torch.Tensor, Tuple[torch.Tensor]], + ) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Apply rotary embeddings to input tensors using the given frequency tensor. This function applies rotary embeddings + to the given query or key 'x' tensors using the provided frequency tensor 'freqs_cis'. The input tensors are + reshaped as complex numbers, and the frequency tensor is reshaped for broadcasting compatibility. The resulting + tensors contain rotary embeddings and are returned as real tensors. + + Args: + x (`torch.Tensor`): + Query or key tensor to apply rotary embeddings. [B, H, S, D] xk (torch.Tensor): Key tensor to apply + freqs_cis (`Tuple[torch.Tensor]`): Precomputed frequency tensor for complex exponentials. ([S, D], [S, D],) + + Returns: + Tuple[torch.Tensor, torch.Tensor]: Tuple of modified query tensor and key tensor with rotary embeddings. + """ + cos, sin = freqs_cis # [S, D] + cos = cos[None, None] + sin = sin[None, None] + cos, sin = cos.to(x.device), sin.to(x.device) + + x_real, x_imag = x.reshape(*x.shape[:-1], -1, 2).unbind(-1) # [B, S, H, D//2] + x_rotated = torch.stack([-x_imag, x_real], dim=-1).flatten(3) + out = (x.float() * cos + x_rotated.float() * sin).to(x.dtype) + + return out + + def __call__( + self, + attn: Attention, + hidden_states: torch.FloatTensor, + encoder_hidden_states: torch.FloatTensor = None, + attention_mask: Optional[torch.FloatTensor] = None, + encoder_attention_mask: Optional[torch.FloatTensor] = None, + rotary_freqs_cis: Union[torch.Tensor, Tuple[torch.Tensor]] = None, + rotary_freqs_cis_cross: Union[torch.Tensor, Tuple[torch.Tensor]] = None, + *args, + **kwargs, + ) -> torch.Tensor: + + residual = hidden_states + input_ndim = hidden_states.ndim + + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + batch_size, sequence_length, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + + has_encoder_hidden_state_proj = hasattr(attn, "add_q_proj") and hasattr(attn, "add_k_proj") and hasattr(attn, "add_v_proj") + + if attn.group_norm is not None: + hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) + + query = attn.to_q(hidden_states) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + elif attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # Apply RoPE if needed + if rotary_freqs_cis is not None: + query = self.apply_rotary_emb(query, rotary_freqs_cis) + if not attn.is_cross_attention: + key = self.apply_rotary_emb(key, rotary_freqs_cis) + elif rotary_freqs_cis_cross is not None and has_encoder_hidden_state_proj: + key = self.apply_rotary_emb(key, rotary_freqs_cis_cross) + + if attn.is_cross_attention and encoder_attention_mask is not None and has_encoder_hidden_state_proj: + # attention_mask: N x S1 + # encoder_attention_mask: N x S2 + # cross attention 整合attention_mask和encoder_attention_mask + combined_mask = attention_mask[:, :, None] * encoder_attention_mask[:, None, :] + attention_mask = torch.where(combined_mask == 1, 0.0, -torch.inf) + attention_mask = attention_mask[:, None, :, :].expand(-1, attn.heads, -1, -1).to(query.dtype) + + elif not attn.is_cross_attention and attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + # scaled_dot_product_attention expects attention_mask shape to be + # (batch, heads, source_length, target_length) + attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + hidden_states = optimized_attention( + query, key, value, heads=query.shape[1], mask=attention_mask, skip_reshape=True, + ).to(query.dtype) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + if input_ndim == 4: + hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + +def val2list(x: list or tuple or any, repeat_time=1) -> list: # type: ignore + """Repeat `val` for `repeat_time` times and return the list or val if list/tuple.""" + if isinstance(x, (list, tuple)): + return list(x) + return [x for _ in range(repeat_time)] + + +def val2tuple(x: list or tuple or any, min_len: int = 1, idx_repeat: int = -1) -> tuple: # type: ignore + """Return tuple with min_len by repeating element at idx_repeat.""" + # convert to list first + x = val2list(x) + + # repeat elements if necessary + if len(x) > 0: + x[idx_repeat:idx_repeat] = [x[idx_repeat] for _ in range(min_len - len(x))] + + return tuple(x) + + +def t2i_modulate(x, shift, scale): + return x * (1 + scale) + shift + + +def get_same_padding(kernel_size: Union[int, Tuple[int, ...]]) -> Union[int, Tuple[int, ...]]: + if isinstance(kernel_size, tuple): + return tuple([get_same_padding(ks) for ks in kernel_size]) + else: + assert kernel_size % 2 > 0, f"kernel size {kernel_size} should be odd number" + return kernel_size // 2 + +class ConvLayer(nn.Module): + def __init__( + self, + in_dim: int, + out_dim: int, + kernel_size=3, + stride=1, + dilation=1, + groups=1, + padding: Union[int, None] = None, + use_bias=False, + norm=None, + act=None, + dtype=None, device=None, operations=None + ): + super().__init__() + if padding is None: + padding = get_same_padding(kernel_size) + padding *= dilation + + self.in_dim = in_dim + self.out_dim = out_dim + self.kernel_size = kernel_size + self.stride = stride + self.dilation = dilation + self.groups = groups + self.padding = padding + self.use_bias = use_bias + + self.conv = operations.Conv1d( + in_dim, + out_dim, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=use_bias, + device=device, + dtype=dtype + ) + if norm is not None: + self.norm = operations.RMSNorm(out_dim, elementwise_affine=False, dtype=dtype, device=device) + else: + self.norm = None + if act is not None: + self.act = nn.SiLU(inplace=True) + else: + self.act = None + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.conv(x) + if self.norm: + x = self.norm(x) + if self.act: + x = self.act(x) + return x + + +class GLUMBConv(nn.Module): + def __init__( + self, + in_features: int, + hidden_features: int, + out_feature=None, + kernel_size=3, + stride=1, + padding: Union[int, None] = None, + use_bias=False, + norm=(None, None, None), + act=("silu", "silu", None), + dilation=1, + dtype=None, device=None, operations=None + ): + out_feature = out_feature or in_features + super().__init__() + use_bias = val2tuple(use_bias, 3) + norm = val2tuple(norm, 3) + act = val2tuple(act, 3) + + self.glu_act = nn.SiLU(inplace=False) + self.inverted_conv = ConvLayer( + in_features, + hidden_features * 2, + 1, + use_bias=use_bias[0], + norm=norm[0], + act=act[0], + dtype=dtype, + device=device, + operations=operations, + ) + self.depth_conv = ConvLayer( + hidden_features * 2, + hidden_features * 2, + kernel_size, + stride=stride, + groups=hidden_features * 2, + padding=padding, + use_bias=use_bias[1], + norm=norm[1], + act=None, + dilation=dilation, + dtype=dtype, + device=device, + operations=operations, + ) + self.point_conv = ConvLayer( + hidden_features, + out_feature, + 1, + use_bias=use_bias[2], + norm=norm[2], + act=act[2], + dtype=dtype, + device=device, + operations=operations, + ) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = x.transpose(1, 2) + x = self.inverted_conv(x) + x = self.depth_conv(x) + + x, gate = torch.chunk(x, 2, dim=1) + gate = self.glu_act(gate) + x = x * gate + + x = self.point_conv(x) + x = x.transpose(1, 2) + + return x + + +class LinearTransformerBlock(nn.Module): + """ + A Sana block with global shared adaptive layer norm (adaLN-single) conditioning. + """ + def __init__( + self, + dim, + num_attention_heads, + attention_head_dim, + use_adaln_single=True, + cross_attention_dim=None, + added_kv_proj_dim=None, + context_pre_only=False, + mlp_ratio=4.0, + add_cross_attention=False, + add_cross_attention_dim=None, + qk_norm=None, + dtype=None, device=None, operations=None + ): + super().__init__() + + self.norm1 = operations.RMSNorm(dim, elementwise_affine=False, eps=1e-6) + self.attn = Attention( + query_dim=dim, + cross_attention_dim=cross_attention_dim, + added_kv_proj_dim=added_kv_proj_dim, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=dim, + bias=True, + qk_norm=qk_norm, + processor=CustomLiteLAProcessor2_0(), + dtype=dtype, + device=device, + operations=operations, + ) + + self.add_cross_attention = add_cross_attention + self.context_pre_only = context_pre_only + + if add_cross_attention and add_cross_attention_dim is not None: + self.cross_attn = Attention( + query_dim=dim, + cross_attention_dim=add_cross_attention_dim, + added_kv_proj_dim=add_cross_attention_dim, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=dim, + context_pre_only=context_pre_only, + bias=True, + qk_norm=qk_norm, + processor=CustomerAttnProcessor2_0(), + dtype=dtype, + device=device, + operations=operations, + ) + + self.norm2 = operations.RMSNorm(dim, 1e-06, elementwise_affine=False) + + self.ff = GLUMBConv( + in_features=dim, + hidden_features=int(dim * mlp_ratio), + use_bias=(True, True, False), + norm=(None, None, None), + act=("silu", "silu", None), + dtype=dtype, + device=device, + operations=operations, + ) + self.use_adaln_single = use_adaln_single + if use_adaln_single: + self.scale_shift_table = nn.Parameter(torch.empty(6, dim, dtype=dtype, device=device)) + + def forward( + self, + hidden_states: torch.FloatTensor, + encoder_hidden_states: torch.FloatTensor = None, + attention_mask: torch.FloatTensor = None, + encoder_attention_mask: torch.FloatTensor = None, + rotary_freqs_cis: Union[torch.Tensor, Tuple[torch.Tensor]] = None, + rotary_freqs_cis_cross: Union[torch.Tensor, Tuple[torch.Tensor]] = None, + temb: torch.FloatTensor = None, + ): + + N = hidden_states.shape[0] + + # step 1: AdaLN single + if self.use_adaln_single: + shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = ( + comfy.model_management.cast_to(self.scale_shift_table[None], dtype=temb.dtype, device=temb.device) + temb.reshape(N, 6, -1) + ).chunk(6, dim=1) + + norm_hidden_states = self.norm1(hidden_states) + if self.use_adaln_single: + norm_hidden_states = norm_hidden_states * (1 + scale_msa) + shift_msa + + # step 2: attention + if not self.add_cross_attention: + attn_output, encoder_hidden_states = self.attn( + hidden_states=norm_hidden_states, + attention_mask=attention_mask, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_attention_mask, + rotary_freqs_cis=rotary_freqs_cis, + rotary_freqs_cis_cross=rotary_freqs_cis_cross, + ) + else: + attn_output, _ = self.attn( + hidden_states=norm_hidden_states, + attention_mask=attention_mask, + encoder_hidden_states=None, + encoder_attention_mask=None, + rotary_freqs_cis=rotary_freqs_cis, + rotary_freqs_cis_cross=None, + ) + + if self.use_adaln_single: + attn_output = gate_msa * attn_output + hidden_states = attn_output + hidden_states + + if self.add_cross_attention: + attn_output = self.cross_attn( + hidden_states=hidden_states, + attention_mask=attention_mask, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_attention_mask, + rotary_freqs_cis=rotary_freqs_cis, + rotary_freqs_cis_cross=rotary_freqs_cis_cross, + ) + hidden_states = attn_output + hidden_states + + # step 3: add norm + norm_hidden_states = self.norm2(hidden_states) + if self.use_adaln_single: + norm_hidden_states = norm_hidden_states * (1 + scale_mlp) + shift_mlp + + # step 4: feed forward + ff_output = self.ff(norm_hidden_states) + if self.use_adaln_single: + ff_output = gate_mlp * ff_output + + hidden_states = hidden_states + ff_output + + return hidden_states diff --git a/comfy/ldm/ace/lyric_encoder.py b/comfy/ldm/ace/lyric_encoder.py new file mode 100644 index 00000000..ff4359b2 --- /dev/null +++ b/comfy/ldm/ace/lyric_encoder.py @@ -0,0 +1,1067 @@ +# Original from: https://github.com/ace-step/ACE-Step/blob/main/models/lyrics_utils/lyric_encoder.py +from typing import Optional, Tuple, Union +import math +import torch +from torch import nn + +import comfy.model_management + +class ConvolutionModule(nn.Module): + """ConvolutionModule in Conformer model.""" + + def __init__(self, + channels: int, + kernel_size: int = 15, + activation: nn.Module = nn.ReLU(), + norm: str = "batch_norm", + causal: bool = False, + bias: bool = True, + dtype=None, device=None, operations=None): + """Construct an ConvolutionModule object. + Args: + channels (int): The number of channels of conv layers. + kernel_size (int): Kernel size of conv layers. + causal (int): Whether use causal convolution or not + """ + super().__init__() + + self.pointwise_conv1 = operations.Conv1d( + channels, + 2 * channels, + kernel_size=1, + stride=1, + padding=0, + bias=bias, + dtype=dtype, device=device + ) + # self.lorder is used to distinguish if it's a causal convolution, + # if self.lorder > 0: it's a causal convolution, the input will be + # padded with self.lorder frames on the left in forward. + # else: it's a symmetrical convolution + if causal: + padding = 0 + self.lorder = kernel_size - 1 + else: + # kernel_size should be an odd number for none causal convolution + assert (kernel_size - 1) % 2 == 0 + padding = (kernel_size - 1) // 2 + self.lorder = 0 + self.depthwise_conv = operations.Conv1d( + channels, + channels, + kernel_size, + stride=1, + padding=padding, + groups=channels, + bias=bias, + dtype=dtype, device=device + ) + + assert norm in ['batch_norm', 'layer_norm'] + if norm == "batch_norm": + self.use_layer_norm = False + self.norm = nn.BatchNorm1d(channels) + else: + self.use_layer_norm = True + self.norm = operations.LayerNorm(channels, dtype=dtype, device=device) + + self.pointwise_conv2 = operations.Conv1d( + channels, + channels, + kernel_size=1, + stride=1, + padding=0, + bias=bias, + dtype=dtype, device=device + ) + self.activation = activation + + def forward( + self, + x: torch.Tensor, + mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool), + cache: torch.Tensor = torch.zeros((0, 0, 0)), + ) -> Tuple[torch.Tensor, torch.Tensor]: + """Compute convolution module. + Args: + x (torch.Tensor): Input tensor (#batch, time, channels). + mask_pad (torch.Tensor): used for batch padding (#batch, 1, time), + (0, 0, 0) means fake mask. + cache (torch.Tensor): left context cache, it is only + used in causal convolution (#batch, channels, cache_t), + (0, 0, 0) meas fake cache. + Returns: + torch.Tensor: Output tensor (#batch, time, channels). + """ + # exchange the temporal dimension and the feature dimension + x = x.transpose(1, 2) # (#batch, channels, time) + + # mask batch padding + if mask_pad.size(2) > 0: # time > 0 + x.masked_fill_(~mask_pad, 0.0) + + if self.lorder > 0: + if cache.size(2) == 0: # cache_t == 0 + x = nn.functional.pad(x, (self.lorder, 0), 'constant', 0.0) + else: + assert cache.size(0) == x.size(0) # equal batch + assert cache.size(1) == x.size(1) # equal channel + x = torch.cat((cache, x), dim=2) + assert (x.size(2) > self.lorder) + new_cache = x[:, :, -self.lorder:] + else: + # It's better we just return None if no cache is required, + # However, for JIT export, here we just fake one tensor instead of + # None. + new_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device) + + # GLU mechanism + x = self.pointwise_conv1(x) # (batch, 2*channel, dim) + x = nn.functional.glu(x, dim=1) # (batch, channel, dim) + + # 1D Depthwise Conv + x = self.depthwise_conv(x) + if self.use_layer_norm: + x = x.transpose(1, 2) + x = self.activation(self.norm(x)) + if self.use_layer_norm: + x = x.transpose(1, 2) + x = self.pointwise_conv2(x) + # mask batch padding + if mask_pad.size(2) > 0: # time > 0 + x.masked_fill_(~mask_pad, 0.0) + + return x.transpose(1, 2), new_cache + +class PositionwiseFeedForward(torch.nn.Module): + """Positionwise feed forward layer. + + FeedForward are appied on each position of the sequence. + The output dim is same with the input dim. + + Args: + idim (int): Input dimenstion. + hidden_units (int): The number of hidden units. + dropout_rate (float): Dropout rate. + activation (torch.nn.Module): Activation function + """ + + def __init__( + self, + idim: int, + hidden_units: int, + dropout_rate: float, + activation: torch.nn.Module = torch.nn.ReLU(), + dtype=None, device=None, operations=None + ): + """Construct a PositionwiseFeedForward object.""" + super(PositionwiseFeedForward, self).__init__() + self.w_1 = operations.Linear(idim, hidden_units, dtype=dtype, device=device) + self.activation = activation + self.dropout = torch.nn.Dropout(dropout_rate) + self.w_2 = operations.Linear(hidden_units, idim, dtype=dtype, device=device) + + def forward(self, xs: torch.Tensor) -> torch.Tensor: + """Forward function. + + Args: + xs: input tensor (B, L, D) + Returns: + output tensor, (B, L, D) + """ + return self.w_2(self.dropout(self.activation(self.w_1(xs)))) + +class Swish(torch.nn.Module): + """Construct an Swish object.""" + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """Return Swish activation function.""" + return x * torch.sigmoid(x) + +class MultiHeadedAttention(nn.Module): + """Multi-Head Attention layer. + + Args: + n_head (int): The number of heads. + n_feat (int): The number of features. + dropout_rate (float): Dropout rate. + + """ + + def __init__(self, + n_head: int, + n_feat: int, + dropout_rate: float, + key_bias: bool = True, + dtype=None, device=None, operations=None): + """Construct an MultiHeadedAttention object.""" + super().__init__() + assert n_feat % n_head == 0 + # We assume d_v always equals d_k + self.d_k = n_feat // n_head + self.h = n_head + self.linear_q = operations.Linear(n_feat, n_feat, dtype=dtype, device=device) + self.linear_k = operations.Linear(n_feat, n_feat, bias=key_bias, dtype=dtype, device=device) + self.linear_v = operations.Linear(n_feat, n_feat, dtype=dtype, device=device) + self.linear_out = operations.Linear(n_feat, n_feat, dtype=dtype, device=device) + self.dropout = nn.Dropout(p=dropout_rate) + + def forward_qkv( + self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """Transform query, key and value. + + Args: + query (torch.Tensor): Query tensor (#batch, time1, size). + key (torch.Tensor): Key tensor (#batch, time2, size). + value (torch.Tensor): Value tensor (#batch, time2, size). + + Returns: + torch.Tensor: Transformed query tensor, size + (#batch, n_head, time1, d_k). + torch.Tensor: Transformed key tensor, size + (#batch, n_head, time2, d_k). + torch.Tensor: Transformed value tensor, size + (#batch, n_head, time2, d_k). + + """ + n_batch = query.size(0) + q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k) + k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k) + v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k) + q = q.transpose(1, 2) # (batch, head, time1, d_k) + k = k.transpose(1, 2) # (batch, head, time2, d_k) + v = v.transpose(1, 2) # (batch, head, time2, d_k) + return q, k, v + + def forward_attention( + self, + value: torch.Tensor, + scores: torch.Tensor, + mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool) + ) -> torch.Tensor: + """Compute attention context vector. + + Args: + value (torch.Tensor): Transformed value, size + (#batch, n_head, time2, d_k). + scores (torch.Tensor): Attention score, size + (#batch, n_head, time1, time2). + mask (torch.Tensor): Mask, size (#batch, 1, time2) or + (#batch, time1, time2), (0, 0, 0) means fake mask. + + Returns: + torch.Tensor: Transformed value (#batch, time1, d_model) + weighted by the attention score (#batch, time1, time2). + + """ + n_batch = value.size(0) + + if mask is not None and mask.size(2) > 0: # time2 > 0 + mask = mask.unsqueeze(1).eq(0) # (batch, 1, *, time2) + # For last chunk, time2 might be larger than scores.size(-1) + mask = mask[:, :, :, :scores.size(-1)] # (batch, 1, *, time2) + scores = scores.masked_fill(mask, -float('inf')) + attn = torch.softmax(scores, dim=-1).masked_fill( + mask, 0.0) # (batch, head, time1, time2) + + else: + attn = torch.softmax(scores, dim=-1) # (batch, head, time1, time2) + + p_attn = self.dropout(attn) + x = torch.matmul(p_attn, value) # (batch, head, time1, d_k) + x = (x.transpose(1, 2).contiguous().view(n_batch, -1, + self.h * self.d_k) + ) # (batch, time1, d_model) + + return self.linear_out(x) # (batch, time1, d_model) + + def forward( + self, + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool), + pos_emb: torch.Tensor = torch.empty(0), + cache: torch.Tensor = torch.zeros((0, 0, 0, 0)) + ) -> Tuple[torch.Tensor, torch.Tensor]: + """Compute scaled dot product attention. + + Args: + query (torch.Tensor): Query tensor (#batch, time1, size). + key (torch.Tensor): Key tensor (#batch, time2, size). + value (torch.Tensor): Value tensor (#batch, time2, size). + mask (torch.Tensor): Mask tensor (#batch, 1, time2) or + (#batch, time1, time2). + 1.When applying cross attention between decoder and encoder, + the batch padding mask for input is in (#batch, 1, T) shape. + 2.When applying self attention of encoder, + the mask is in (#batch, T, T) shape. + 3.When applying self attention of decoder, + the mask is in (#batch, L, L) shape. + 4.If the different position in decoder see different block + of the encoder, such as Mocha, the passed in mask could be + in (#batch, L, T) shape. But there is no such case in current + CosyVoice. + cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2), + where `cache_t == chunk_size * num_decoding_left_chunks` + and `head * d_k == size` + + + Returns: + torch.Tensor: Output tensor (#batch, time1, d_model). + torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2) + where `cache_t == chunk_size * num_decoding_left_chunks` + and `head * d_k == size` + + """ + q, k, v = self.forward_qkv(query, key, value) + if cache.size(0) > 0: + key_cache, value_cache = torch.split(cache, + cache.size(-1) // 2, + dim=-1) + k = torch.cat([key_cache, k], dim=2) + v = torch.cat([value_cache, v], dim=2) + new_cache = torch.cat((k, v), dim=-1) + + scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k) + return self.forward_attention(v, scores, mask), new_cache + + +class RelPositionMultiHeadedAttention(MultiHeadedAttention): + """Multi-Head Attention layer with relative position encoding. + Paper: https://arxiv.org/abs/1901.02860 + Args: + n_head (int): The number of heads. + n_feat (int): The number of features. + dropout_rate (float): Dropout rate. + """ + + def __init__(self, + n_head: int, + n_feat: int, + dropout_rate: float, + key_bias: bool = True, + dtype=None, device=None, operations=None): + """Construct an RelPositionMultiHeadedAttention object.""" + super().__init__(n_head, n_feat, dropout_rate, key_bias, dtype=dtype, device=device, operations=operations) + # linear transformation for positional encoding + self.linear_pos = operations.Linear(n_feat, n_feat, bias=False, dtype=dtype, device=device) + # these two learnable bias are used in matrix c and matrix d + # as described in https://arxiv.org/abs/1901.02860 Section 3.3 + self.pos_bias_u = nn.Parameter(torch.empty(self.h, self.d_k, dtype=dtype, device=device)) + self.pos_bias_v = nn.Parameter(torch.empty(self.h, self.d_k, dtype=dtype, device=device)) + # torch.nn.init.xavier_uniform_(self.pos_bias_u) + # torch.nn.init.xavier_uniform_(self.pos_bias_v) + + def rel_shift(self, x: torch.Tensor) -> torch.Tensor: + """Compute relative positional encoding. + + Args: + x (torch.Tensor): Input tensor (batch, head, time1, 2*time1-1). + time1 means the length of query vector. + + Returns: + torch.Tensor: Output tensor. + + """ + zero_pad = torch.zeros((x.size()[0], x.size()[1], x.size()[2], 1), + device=x.device, + dtype=x.dtype) + x_padded = torch.cat([zero_pad, x], dim=-1) + + x_padded = x_padded.view(x.size()[0], + x.size()[1], + x.size(3) + 1, x.size(2)) + x = x_padded[:, :, 1:].view_as(x)[ + :, :, :, : x.size(-1) // 2 + 1 + ] # only keep the positions from 0 to time2 + return x + + def forward( + self, + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + mask: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool), + pos_emb: torch.Tensor = torch.empty(0), + cache: torch.Tensor = torch.zeros((0, 0, 0, 0)) + ) -> Tuple[torch.Tensor, torch.Tensor]: + """Compute 'Scaled Dot Product Attention' with rel. positional encoding. + Args: + query (torch.Tensor): Query tensor (#batch, time1, size). + key (torch.Tensor): Key tensor (#batch, time2, size). + value (torch.Tensor): Value tensor (#batch, time2, size). + mask (torch.Tensor): Mask tensor (#batch, 1, time2) or + (#batch, time1, time2), (0, 0, 0) means fake mask. + pos_emb (torch.Tensor): Positional embedding tensor + (#batch, time2, size). + cache (torch.Tensor): Cache tensor (1, head, cache_t, d_k * 2), + where `cache_t == chunk_size * num_decoding_left_chunks` + and `head * d_k == size` + Returns: + torch.Tensor: Output tensor (#batch, time1, d_model). + torch.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2) + where `cache_t == chunk_size * num_decoding_left_chunks` + and `head * d_k == size` + """ + q, k, v = self.forward_qkv(query, key, value) + q = q.transpose(1, 2) # (batch, time1, head, d_k) + + if cache.size(0) > 0: + key_cache, value_cache = torch.split(cache, + cache.size(-1) // 2, + dim=-1) + k = torch.cat([key_cache, k], dim=2) + v = torch.cat([value_cache, v], dim=2) + # NOTE(xcsong): We do cache slicing in encoder.forward_chunk, since it's + # non-trivial to calculate `next_cache_start` here. + new_cache = torch.cat((k, v), dim=-1) + + n_batch_pos = pos_emb.size(0) + p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k) + p = p.transpose(1, 2) # (batch, head, time1, d_k) + + # (batch, head, time1, d_k) + q_with_bias_u = (q + comfy.model_management.cast_to(self.pos_bias_u, dtype=q.dtype, device=q.device)).transpose(1, 2) + # (batch, head, time1, d_k) + q_with_bias_v = (q + comfy.model_management.cast_to(self.pos_bias_v, dtype=q.dtype, device=q.device)).transpose(1, 2) + + # compute attention score + # first compute matrix a and matrix c + # as described in https://arxiv.org/abs/1901.02860 Section 3.3 + # (batch, head, time1, time2) + matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1)) + + # compute matrix b and matrix d + # (batch, head, time1, time2) + matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1)) + # NOTE(Xiang Lyu): Keep rel_shift since espnet rel_pos_emb is used + if matrix_ac.shape != matrix_bd.shape: + matrix_bd = self.rel_shift(matrix_bd) + + scores = (matrix_ac + matrix_bd) / math.sqrt( + self.d_k) # (batch, head, time1, time2) + + return self.forward_attention(v, scores, mask), new_cache + + + +def subsequent_mask( + size: int, + device: torch.device = torch.device("cpu"), +) -> torch.Tensor: + """Create mask for subsequent steps (size, size). + + This mask is used only in decoder which works in an auto-regressive mode. + This means the current step could only do attention with its left steps. + + In encoder, fully attention is used when streaming is not necessary and + the sequence is not long. In this case, no attention mask is needed. + + When streaming is need, chunk-based attention is used in encoder. See + subsequent_chunk_mask for the chunk-based attention mask. + + Args: + size (int): size of mask + str device (str): "cpu" or "cuda" or torch.Tensor.device + dtype (torch.device): result dtype + + Returns: + torch.Tensor: mask + + Examples: + >>> subsequent_mask(3) + [[1, 0, 0], + [1, 1, 0], + [1, 1, 1]] + """ + arange = torch.arange(size, device=device) + mask = arange.expand(size, size) + arange = arange.unsqueeze(-1) + mask = mask <= arange + return mask + + +def subsequent_chunk_mask( + size: int, + chunk_size: int, + num_left_chunks: int = -1, + device: torch.device = torch.device("cpu"), + ) -> torch.Tensor: + """Create mask for subsequent steps (size, size) with chunk size, + this is for streaming encoder + + Args: + size (int): size of mask + chunk_size (int): size of chunk + num_left_chunks (int): number of left chunks + <0: use full chunk + >=0: use num_left_chunks + device (torch.device): "cpu" or "cuda" or torch.Tensor.device + + Returns: + torch.Tensor: mask + + Examples: + >>> subsequent_chunk_mask(4, 2) + [[1, 1, 0, 0], + [1, 1, 0, 0], + [1, 1, 1, 1], + [1, 1, 1, 1]] + """ + ret = torch.zeros(size, size, device=device, dtype=torch.bool) + for i in range(size): + if num_left_chunks < 0: + start = 0 + else: + start = max((i // chunk_size - num_left_chunks) * chunk_size, 0) + ending = min((i // chunk_size + 1) * chunk_size, size) + ret[i, start:ending] = True + return ret + +def add_optional_chunk_mask(xs: torch.Tensor, + masks: torch.Tensor, + use_dynamic_chunk: bool, + use_dynamic_left_chunk: bool, + decoding_chunk_size: int, + static_chunk_size: int, + num_decoding_left_chunks: int, + enable_full_context: bool = True): + """ Apply optional mask for encoder. + + Args: + xs (torch.Tensor): padded input, (B, L, D), L for max length + mask (torch.Tensor): mask for xs, (B, 1, L) + use_dynamic_chunk (bool): whether to use dynamic chunk or not + use_dynamic_left_chunk (bool): whether to use dynamic left chunk for + training. + decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's + 0: default for training, use random dynamic chunk. + <0: for decoding, use full chunk. + >0: for decoding, use fixed chunk size as set. + static_chunk_size (int): chunk size for static chunk training/decoding + if it's greater than 0, if use_dynamic_chunk is true, + this parameter will be ignored + num_decoding_left_chunks: number of left chunks, this is for decoding, + the chunk size is decoding_chunk_size. + >=0: use num_decoding_left_chunks + <0: use all left chunks + enable_full_context (bool): + True: chunk size is either [1, 25] or full context(max_len) + False: chunk size ~ U[1, 25] + + Returns: + torch.Tensor: chunk mask of the input xs. + """ + # Whether to use chunk mask or not + if use_dynamic_chunk: + max_len = xs.size(1) + if decoding_chunk_size < 0: + chunk_size = max_len + num_left_chunks = -1 + elif decoding_chunk_size > 0: + chunk_size = decoding_chunk_size + num_left_chunks = num_decoding_left_chunks + else: + # chunk size is either [1, 25] or full context(max_len). + # Since we use 4 times subsampling and allow up to 1s(100 frames) + # delay, the maximum frame is 100 / 4 = 25. + chunk_size = torch.randint(1, max_len, (1, )).item() + num_left_chunks = -1 + if chunk_size > max_len // 2 and enable_full_context: + chunk_size = max_len + else: + chunk_size = chunk_size % 25 + 1 + if use_dynamic_left_chunk: + max_left_chunks = (max_len - 1) // chunk_size + num_left_chunks = torch.randint(0, max_left_chunks, + (1, )).item() + chunk_masks = subsequent_chunk_mask(xs.size(1), chunk_size, + num_left_chunks, + xs.device) # (L, L) + chunk_masks = chunk_masks.unsqueeze(0) # (1, L, L) + chunk_masks = masks & chunk_masks # (B, L, L) + elif static_chunk_size > 0: + num_left_chunks = num_decoding_left_chunks + chunk_masks = subsequent_chunk_mask(xs.size(1), static_chunk_size, + num_left_chunks, + xs.device) # (L, L) + chunk_masks = chunk_masks.unsqueeze(0) # (1, L, L) + chunk_masks = masks & chunk_masks # (B, L, L) + else: + chunk_masks = masks + return chunk_masks + + +class ConformerEncoderLayer(nn.Module): + """Encoder layer module. + Args: + size (int): Input dimension. + self_attn (torch.nn.Module): Self-attention module instance. + `MultiHeadedAttention` or `RelPositionMultiHeadedAttention` + instance can be used as the argument. + feed_forward (torch.nn.Module): Feed-forward module instance. + `PositionwiseFeedForward` instance can be used as the argument. + feed_forward_macaron (torch.nn.Module): Additional feed-forward module + instance. + `PositionwiseFeedForward` instance can be used as the argument. + conv_module (torch.nn.Module): Convolution module instance. + `ConvlutionModule` instance can be used as the argument. + dropout_rate (float): Dropout rate. + normalize_before (bool): + True: use layer_norm before each sub-block. + False: use layer_norm after each sub-block. + """ + + def __init__( + self, + size: int, + self_attn: torch.nn.Module, + feed_forward: Optional[nn.Module] = None, + feed_forward_macaron: Optional[nn.Module] = None, + conv_module: Optional[nn.Module] = None, + dropout_rate: float = 0.1, + normalize_before: bool = True, + dtype=None, device=None, operations=None + ): + """Construct an EncoderLayer object.""" + super().__init__() + self.self_attn = self_attn + self.feed_forward = feed_forward + self.feed_forward_macaron = feed_forward_macaron + self.conv_module = conv_module + self.norm_ff = operations.LayerNorm(size, eps=1e-5, dtype=dtype, device=device) # for the FNN module + self.norm_mha = operations.LayerNorm(size, eps=1e-5, dtype=dtype, device=device) # for the MHA module + if feed_forward_macaron is not None: + self.norm_ff_macaron = operations.LayerNorm(size, eps=1e-5, dtype=dtype, device=device) + self.ff_scale = 0.5 + else: + self.ff_scale = 1.0 + if self.conv_module is not None: + self.norm_conv = operations.LayerNorm(size, eps=1e-5, dtype=dtype, device=device) # for the CNN module + self.norm_final = operations.LayerNorm( + size, eps=1e-5, dtype=dtype, device=device) # for the final output of the block + self.dropout = nn.Dropout(dropout_rate) + self.size = size + self.normalize_before = normalize_before + + def forward( + self, + x: torch.Tensor, + mask: torch.Tensor, + pos_emb: torch.Tensor, + mask_pad: torch.Tensor = torch.ones((0, 0, 0), dtype=torch.bool), + att_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)), + cnn_cache: torch.Tensor = torch.zeros((0, 0, 0, 0)), + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + """Compute encoded features. + + Args: + x (torch.Tensor): (#batch, time, size) + mask (torch.Tensor): Mask tensor for the input (#batch, time,time), + (0, 0, 0) means fake mask. + pos_emb (torch.Tensor): positional encoding, must not be None + for ConformerEncoderLayer. + mask_pad (torch.Tensor): batch padding mask used for conv module. + (#batch, 1,time), (0, 0, 0) means fake mask. + att_cache (torch.Tensor): Cache tensor of the KEY & VALUE + (#batch=1, head, cache_t1, d_k * 2), head * d_k == size. + cnn_cache (torch.Tensor): Convolution cache in conformer layer + (#batch=1, size, cache_t2) + Returns: + torch.Tensor: Output tensor (#batch, time, size). + torch.Tensor: Mask tensor (#batch, time, time). + torch.Tensor: att_cache tensor, + (#batch=1, head, cache_t1 + time, d_k * 2). + torch.Tensor: cnn_cahce tensor (#batch, size, cache_t2). + """ + + # whether to use macaron style + if self.feed_forward_macaron is not None: + residual = x + if self.normalize_before: + x = self.norm_ff_macaron(x) + x = residual + self.ff_scale * self.dropout( + self.feed_forward_macaron(x)) + if not self.normalize_before: + x = self.norm_ff_macaron(x) + + # multi-headed self-attention module + residual = x + if self.normalize_before: + x = self.norm_mha(x) + x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, + att_cache) + x = residual + self.dropout(x_att) + if not self.normalize_before: + x = self.norm_mha(x) + + # convolution module + # Fake new cnn cache here, and then change it in conv_module + new_cnn_cache = torch.zeros((0, 0, 0), dtype=x.dtype, device=x.device) + if self.conv_module is not None: + residual = x + if self.normalize_before: + x = self.norm_conv(x) + x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache) + x = residual + self.dropout(x) + + if not self.normalize_before: + x = self.norm_conv(x) + + # feed forward module + residual = x + if self.normalize_before: + x = self.norm_ff(x) + + x = residual + self.ff_scale * self.dropout(self.feed_forward(x)) + if not self.normalize_before: + x = self.norm_ff(x) + + if self.conv_module is not None: + x = self.norm_final(x) + + return x, mask, new_att_cache, new_cnn_cache + + + +class EspnetRelPositionalEncoding(torch.nn.Module): + """Relative positional encoding module (new implementation). + + Details can be found in https://github.com/espnet/espnet/pull/2816. + + See : Appendix B in https://arxiv.org/abs/1901.02860 + + Args: + d_model (int): Embedding dimension. + dropout_rate (float): Dropout rate. + max_len (int): Maximum input length. + + """ + + def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000): + """Construct an PositionalEncoding object.""" + super(EspnetRelPositionalEncoding, self).__init__() + self.d_model = d_model + self.xscale = math.sqrt(self.d_model) + self.dropout = torch.nn.Dropout(p=dropout_rate) + self.pe = None + self.extend_pe(torch.tensor(0.0).expand(1, max_len)) + + def extend_pe(self, x: torch.Tensor): + """Reset the positional encodings.""" + if self.pe is not None: + # self.pe contains both positive and negative parts + # the length of self.pe is 2 * input_len - 1 + if self.pe.size(1) >= x.size(1) * 2 - 1: + if self.pe.dtype != x.dtype or self.pe.device != x.device: + self.pe = self.pe.to(dtype=x.dtype, device=x.device) + return + # Suppose `i` means to the position of query vecotr and `j` means the + # position of key vector. We use position relative positions when keys + # are to the left (i>j) and negative relative positions otherwise (i Tuple[torch.Tensor, torch.Tensor]: + """Add positional encoding. + + Args: + x (torch.Tensor): Input tensor (batch, time, `*`). + + Returns: + torch.Tensor: Encoded tensor (batch, time, `*`). + + """ + self.extend_pe(x) + x = x * self.xscale + pos_emb = self.position_encoding(size=x.size(1), offset=offset) + return self.dropout(x), self.dropout(pos_emb) + + def position_encoding(self, + offset: Union[int, torch.Tensor], + size: int) -> torch.Tensor: + """ For getting encoding in a streaming fashion + + Attention!!!!! + we apply dropout only once at the whole utterance level in a none + streaming way, but will call this function several times with + increasing input size in a streaming scenario, so the dropout will + be applied several times. + + Args: + offset (int or torch.tensor): start offset + size (int): required size of position encoding + + Returns: + torch.Tensor: Corresponding encoding + """ + pos_emb = self.pe[ + :, + self.pe.size(1) // 2 - size + 1: self.pe.size(1) // 2 + size, + ] + return pos_emb + + + +class LinearEmbed(torch.nn.Module): + """Linear transform the input without subsampling + + Args: + idim (int): Input dimension. + odim (int): Output dimension. + dropout_rate (float): Dropout rate. + + """ + + def __init__(self, idim: int, odim: int, dropout_rate: float, + pos_enc_class: torch.nn.Module, dtype=None, device=None, operations=None): + """Construct an linear object.""" + super().__init__() + self.out = torch.nn.Sequential( + operations.Linear(idim, odim, dtype=dtype, device=device), + operations.LayerNorm(odim, eps=1e-5, dtype=dtype, device=device), + torch.nn.Dropout(dropout_rate), + ) + self.pos_enc = pos_enc_class #rel_pos_espnet + + def position_encoding(self, offset: Union[int, torch.Tensor], + size: int) -> torch.Tensor: + return self.pos_enc.position_encoding(offset, size) + + def forward( + self, + x: torch.Tensor, + offset: Union[int, torch.Tensor] = 0 + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """Input x. + + Args: + x (torch.Tensor): Input tensor (#batch, time, idim). + x_mask (torch.Tensor): Input mask (#batch, 1, time). + + Returns: + torch.Tensor: linear input tensor (#batch, time', odim), + where time' = time . + torch.Tensor: linear input mask (#batch, 1, time'), + where time' = time . + + """ + x = self.out(x) + x, pos_emb = self.pos_enc(x, offset) + return x, pos_emb + + +ATTENTION_CLASSES = { + "selfattn": MultiHeadedAttention, + "rel_selfattn": RelPositionMultiHeadedAttention, +} + +ACTIVATION_CLASSES = { + "hardtanh": torch.nn.Hardtanh, + "tanh": torch.nn.Tanh, + "relu": torch.nn.ReLU, + "selu": torch.nn.SELU, + "swish": getattr(torch.nn, "SiLU", Swish), + "gelu": torch.nn.GELU, +} + + +def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor: + """Make mask tensor containing indices of padded part. + + See description of make_non_pad_mask. + + Args: + lengths (torch.Tensor): Batch of lengths (B,). + Returns: + torch.Tensor: Mask tensor containing indices of padded part. + + Examples: + >>> lengths = [5, 3, 2] + >>> make_pad_mask(lengths) + masks = [[0, 0, 0, 0 ,0], + [0, 0, 0, 1, 1], + [0, 0, 1, 1, 1]] + """ + batch_size = lengths.size(0) + max_len = max_len if max_len > 0 else lengths.max().item() + seq_range = torch.arange(0, + max_len, + dtype=torch.int64, + device=lengths.device) + seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len) + seq_length_expand = lengths.unsqueeze(-1) + mask = seq_range_expand >= seq_length_expand + return mask + +#https://github.com/FunAudioLLM/CosyVoice/blob/main/examples/magicdata-read/cosyvoice/conf/cosyvoice.yaml +class ConformerEncoder(torch.nn.Module): + """Conformer encoder module.""" + + def __init__( + self, + input_size: int, + output_size: int = 1024, + attention_heads: int = 16, + linear_units: int = 4096, + num_blocks: int = 6, + dropout_rate: float = 0.1, + positional_dropout_rate: float = 0.1, + attention_dropout_rate: float = 0.0, + input_layer: str = 'linear', + pos_enc_layer_type: str = 'rel_pos_espnet', + normalize_before: bool = True, + static_chunk_size: int = 1, # 1: causal_mask; 0: full_mask + use_dynamic_chunk: bool = False, + use_dynamic_left_chunk: bool = False, + positionwise_conv_kernel_size: int = 1, + macaron_style: bool =False, + selfattention_layer_type: str = "rel_selfattn", + activation_type: str = "swish", + use_cnn_module: bool = False, + cnn_module_kernel: int = 15, + causal: bool = False, + cnn_module_norm: str = "batch_norm", + key_bias: bool = True, + dtype=None, device=None, operations=None + ): + """Construct ConformerEncoder + + Args: + input_size to use_dynamic_chunk, see in BaseEncoder + positionwise_conv_kernel_size (int): Kernel size of positionwise + conv1d layer. + macaron_style (bool): Whether to use macaron style for + positionwise layer. + selfattention_layer_type (str): Encoder attention layer type, + the parameter has no effect now, it's just for configure + compatibility. #'rel_selfattn' + activation_type (str): Encoder activation function type. + use_cnn_module (bool): Whether to use convolution module. + cnn_module_kernel (int): Kernel size of convolution module. + causal (bool): whether to use causal convolution or not. + key_bias: whether use bias in attention.linear_k, False for whisper models. + """ + super().__init__() + self.output_size = output_size + self.embed = LinearEmbed(input_size, output_size, dropout_rate, + EspnetRelPositionalEncoding(output_size, positional_dropout_rate), dtype=dtype, device=device, operations=operations) + self.normalize_before = normalize_before + self.after_norm = operations.LayerNorm(output_size, eps=1e-5, dtype=dtype, device=device) + self.use_dynamic_chunk = use_dynamic_chunk + + self.static_chunk_size = static_chunk_size + self.use_dynamic_chunk = use_dynamic_chunk + self.use_dynamic_left_chunk = use_dynamic_left_chunk + activation = ACTIVATION_CLASSES[activation_type]() + + # self-attention module definition + encoder_selfattn_layer_args = ( + attention_heads, + output_size, + attention_dropout_rate, + key_bias, + ) + # feed-forward module definition + positionwise_layer_args = ( + output_size, + linear_units, + dropout_rate, + activation, + ) + # convolution module definition + convolution_layer_args = (output_size, cnn_module_kernel, activation, + cnn_module_norm, causal) + + self.encoders = torch.nn.ModuleList([ + ConformerEncoderLayer( + output_size, + RelPositionMultiHeadedAttention( + *encoder_selfattn_layer_args, dtype=dtype, device=device, operations=operations), + PositionwiseFeedForward(*positionwise_layer_args, dtype=dtype, device=device, operations=operations), + PositionwiseFeedForward( + *positionwise_layer_args, dtype=dtype, device=device, operations=operations) if macaron_style else None, + ConvolutionModule( + *convolution_layer_args, dtype=dtype, device=device, operations=operations) if use_cnn_module else None, + dropout_rate, + normalize_before, dtype=dtype, device=device, operations=operations + ) for _ in range(num_blocks) + ]) + + def forward_layers(self, xs: torch.Tensor, chunk_masks: torch.Tensor, + pos_emb: torch.Tensor, + mask_pad: torch.Tensor) -> torch.Tensor: + for layer in self.encoders: + xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad) + return xs + + def forward( + self, + xs: torch.Tensor, + pad_mask: torch.Tensor, + decoding_chunk_size: int = 0, + num_decoding_left_chunks: int = -1, + ) -> Tuple[torch.Tensor, torch.Tensor]: + """Embed positions in tensor. + + Args: + xs: padded input tensor (B, T, D) + xs_lens: input length (B) + decoding_chunk_size: decoding chunk size for dynamic chunk + 0: default for training, use random dynamic chunk. + <0: for decoding, use full chunk. + >0: for decoding, use fixed chunk size as set. + num_decoding_left_chunks: number of left chunks, this is for decoding, + the chunk size is decoding_chunk_size. + >=0: use num_decoding_left_chunks + <0: use all left chunks + Returns: + encoder output tensor xs, and subsampled masks + xs: padded output tensor (B, T' ~= T/subsample_rate, D) + masks: torch.Tensor batch padding mask after subsample + (B, 1, T' ~= T/subsample_rate) + NOTE(xcsong): + We pass the `__call__` method of the modules instead of `forward` to the + checkpointing API because `__call__` attaches all the hooks of the module. + https://discuss.pytorch.org/t/any-different-between-model-input-and-model-forward-input/3690/2 + """ + masks = None + if pad_mask is not None: + masks = pad_mask.to(torch.bool).unsqueeze(1) # (B, 1, T) + xs, pos_emb = self.embed(xs) + mask_pad = masks # (B, 1, T/subsample_rate) + chunk_masks = add_optional_chunk_mask(xs, masks, + self.use_dynamic_chunk, + self.use_dynamic_left_chunk, + decoding_chunk_size, + self.static_chunk_size, + num_decoding_left_chunks) + + xs = self.forward_layers(xs, chunk_masks, pos_emb, mask_pad) + if self.normalize_before: + xs = self.after_norm(xs) + # Here we assume the mask is not changed in encoder layers, so just + # return the masks before encoder layers, and the masks will be used + # for cross attention with decoder later + return xs, masks + diff --git a/comfy/ldm/ace/model.py b/comfy/ldm/ace/model.py new file mode 100644 index 00000000..12c52470 --- /dev/null +++ b/comfy/ldm/ace/model.py @@ -0,0 +1,385 @@ +# Original from: https://github.com/ace-step/ACE-Step/blob/main/models/ace_step_transformer.py + +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional, List, Union + +import torch +from torch import nn + +import comfy.model_management + +from comfy.ldm.lightricks.model import TimestepEmbedding, Timesteps +from .attention import LinearTransformerBlock, t2i_modulate +from .lyric_encoder import ConformerEncoder as LyricEncoder + + +def cross_norm(hidden_states, controlnet_input): + # input N x T x c + mean_hidden_states, std_hidden_states = hidden_states.mean(dim=(1,2), keepdim=True), hidden_states.std(dim=(1,2), keepdim=True) + mean_controlnet_input, std_controlnet_input = controlnet_input.mean(dim=(1,2), keepdim=True), controlnet_input.std(dim=(1,2), keepdim=True) + controlnet_input = (controlnet_input - mean_controlnet_input) * (std_hidden_states / (std_controlnet_input + 1e-12)) + mean_hidden_states + return controlnet_input + + +# Copied from transformers.models.mixtral.modeling_mixtral.MixtralRotaryEmbedding with Mixtral->Qwen2 +class Qwen2RotaryEmbedding(nn.Module): + def __init__(self, dim, max_position_embeddings=2048, base=10000, dtype=None, device=None): + super().__init__() + + self.dim = dim + self.max_position_embeddings = max_position_embeddings + self.base = base + inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2, dtype=torch.int64, device=device).float() / self.dim)) + self.register_buffer("inv_freq", inv_freq, persistent=False) + + # Build here to make `torch.jit.trace` work. + self._set_cos_sin_cache( + seq_len=max_position_embeddings, device=self.inv_freq.device, dtype=torch.float32 + ) + + def _set_cos_sin_cache(self, seq_len, device, dtype): + self.max_seq_len_cached = seq_len + t = torch.arange(self.max_seq_len_cached, device=device, dtype=torch.int64).type_as(self.inv_freq) + + freqs = torch.outer(t, self.inv_freq) + # Different from paper, but it uses a different permutation in order to obtain the same calculation + emb = torch.cat((freqs, freqs), dim=-1) + self.register_buffer("cos_cached", emb.cos().to(dtype), persistent=False) + self.register_buffer("sin_cached", emb.sin().to(dtype), persistent=False) + + def forward(self, x, seq_len=None): + # x: [bs, num_attention_heads, seq_len, head_size] + if seq_len > self.max_seq_len_cached: + self._set_cos_sin_cache(seq_len=seq_len, device=x.device, dtype=x.dtype) + + return ( + self.cos_cached[:seq_len].to(dtype=x.dtype), + self.sin_cached[:seq_len].to(dtype=x.dtype), + ) + + +class T2IFinalLayer(nn.Module): + """ + The final layer of Sana. + """ + + def __init__(self, hidden_size, patch_size=[16, 1], out_channels=256, dtype=None, device=None, operations=None): + super().__init__() + self.norm_final = operations.RMSNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) + self.linear = operations.Linear(hidden_size, patch_size[0] * patch_size[1] * out_channels, bias=True, dtype=dtype, device=device) + self.scale_shift_table = nn.Parameter(torch.empty(2, hidden_size, dtype=dtype, device=device)) + self.out_channels = out_channels + self.patch_size = patch_size + + def unpatchfy( + self, + hidden_states: torch.Tensor, + width: int, + ): + # 4 unpatchify + new_height, new_width = 1, hidden_states.size(1) + hidden_states = hidden_states.reshape( + shape=(hidden_states.shape[0], new_height, new_width, self.patch_size[0], self.patch_size[1], self.out_channels) + ).contiguous() + hidden_states = torch.einsum("nhwpqc->nchpwq", hidden_states) + output = hidden_states.reshape( + shape=(hidden_states.shape[0], self.out_channels, new_height * self.patch_size[0], new_width * self.patch_size[1]) + ).contiguous() + if width > new_width: + output = torch.nn.functional.pad(output, (0, width - new_width, 0, 0), 'constant', 0) + elif width < new_width: + output = output[:, :, :, :width] + return output + + def forward(self, x, t, output_length): + shift, scale = (comfy.model_management.cast_to(self.scale_shift_table[None], device=t.device, dtype=t.dtype) + t[:, None]).chunk(2, dim=1) + x = t2i_modulate(self.norm_final(x), shift, scale) + x = self.linear(x) + # unpatchify + output = self.unpatchfy(x, output_length) + return output + + +class PatchEmbed(nn.Module): + """2D Image to Patch Embedding""" + + def __init__( + self, + height=16, + width=4096, + patch_size=(16, 1), + in_channels=8, + embed_dim=1152, + bias=True, + dtype=None, device=None, operations=None + ): + super().__init__() + patch_size_h, patch_size_w = patch_size + self.early_conv_layers = nn.Sequential( + operations.Conv2d(in_channels, in_channels*256, kernel_size=patch_size, stride=patch_size, padding=0, bias=bias, dtype=dtype, device=device), + operations.GroupNorm(num_groups=32, num_channels=in_channels*256, eps=1e-6, affine=True, dtype=dtype, device=device), + operations.Conv2d(in_channels*256, embed_dim, kernel_size=1, stride=1, padding=0, bias=bias, dtype=dtype, device=device) + ) + self.patch_size = patch_size + self.height, self.width = height // patch_size_h, width // patch_size_w + self.base_size = self.width + + def forward(self, latent): + # early convolutions, N x C x H x W -> N x 256 * sqrt(patch_size) x H/patch_size x W/patch_size + latent = self.early_conv_layers(latent) + latent = latent.flatten(2).transpose(1, 2) # BCHW -> BNC + return latent + + +class ACEStepTransformer2DModel(nn.Module): + # _supports_gradient_checkpointing = True + + def __init__( + self, + in_channels: Optional[int] = 8, + num_layers: int = 28, + inner_dim: int = 1536, + attention_head_dim: int = 64, + num_attention_heads: int = 24, + mlp_ratio: float = 4.0, + out_channels: int = 8, + max_position: int = 32768, + rope_theta: float = 1000000.0, + speaker_embedding_dim: int = 512, + text_embedding_dim: int = 768, + ssl_encoder_depths: List[int] = [9, 9], + ssl_names: List[str] = ["mert", "m-hubert"], + ssl_latent_dims: List[int] = [1024, 768], + lyric_encoder_vocab_size: int = 6681, + lyric_hidden_size: int = 1024, + patch_size: List[int] = [16, 1], + max_height: int = 16, + max_width: int = 4096, + audio_model=None, + dtype=None, device=None, operations=None + + ): + super().__init__() + + self.dtype = dtype + self.num_attention_heads = num_attention_heads + self.attention_head_dim = attention_head_dim + inner_dim = num_attention_heads * attention_head_dim + self.inner_dim = inner_dim + self.out_channels = out_channels + self.max_position = max_position + self.patch_size = patch_size + + self.rope_theta = rope_theta + + self.rotary_emb = Qwen2RotaryEmbedding( + dim=self.attention_head_dim, + max_position_embeddings=self.max_position, + base=self.rope_theta, + dtype=dtype, + device=device, + ) + + # 2. Define input layers + self.in_channels = in_channels + + self.num_layers = num_layers + # 3. Define transformers blocks + self.transformer_blocks = nn.ModuleList( + [ + LinearTransformerBlock( + dim=self.inner_dim, + num_attention_heads=self.num_attention_heads, + attention_head_dim=attention_head_dim, + mlp_ratio=mlp_ratio, + add_cross_attention=True, + add_cross_attention_dim=self.inner_dim, + dtype=dtype, + device=device, + operations=operations, + ) + for i in range(self.num_layers) + ] + ) + + self.time_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0) + self.timestep_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=self.inner_dim, dtype=dtype, device=device, operations=operations) + self.t_block = nn.Sequential(nn.SiLU(), operations.Linear(self.inner_dim, 6 * self.inner_dim, bias=True, dtype=dtype, device=device)) + + # speaker + self.speaker_embedder = operations.Linear(speaker_embedding_dim, self.inner_dim, dtype=dtype, device=device) + + # genre + self.genre_embedder = operations.Linear(text_embedding_dim, self.inner_dim, dtype=dtype, device=device) + + # lyric + self.lyric_embs = operations.Embedding(lyric_encoder_vocab_size, lyric_hidden_size, dtype=dtype, device=device) + self.lyric_encoder = LyricEncoder(input_size=lyric_hidden_size, static_chunk_size=0, dtype=dtype, device=device, operations=operations) + self.lyric_proj = operations.Linear(lyric_hidden_size, self.inner_dim, dtype=dtype, device=device) + + projector_dim = 2 * self.inner_dim + + self.projectors = nn.ModuleList([ + nn.Sequential( + operations.Linear(self.inner_dim, projector_dim, dtype=dtype, device=device), + nn.SiLU(), + operations.Linear(projector_dim, projector_dim, dtype=dtype, device=device), + nn.SiLU(), + operations.Linear(projector_dim, ssl_dim, dtype=dtype, device=device), + ) for ssl_dim in ssl_latent_dims + ]) + + self.proj_in = PatchEmbed( + height=max_height, + width=max_width, + patch_size=patch_size, + embed_dim=self.inner_dim, + bias=True, + dtype=dtype, + device=device, + operations=operations, + ) + + self.final_layer = T2IFinalLayer(self.inner_dim, patch_size=patch_size, out_channels=out_channels, dtype=dtype, device=device, operations=operations) + + def forward_lyric_encoder( + self, + lyric_token_idx: Optional[torch.LongTensor] = None, + lyric_mask: Optional[torch.LongTensor] = None, + out_dtype=None, + ): + # N x T x D + lyric_embs = self.lyric_embs(lyric_token_idx, out_dtype=out_dtype) + prompt_prenet_out, _mask = self.lyric_encoder(lyric_embs, lyric_mask, decoding_chunk_size=1, num_decoding_left_chunks=-1) + prompt_prenet_out = self.lyric_proj(prompt_prenet_out) + return prompt_prenet_out + + def encode( + self, + encoder_text_hidden_states: Optional[torch.Tensor] = None, + text_attention_mask: Optional[torch.LongTensor] = None, + speaker_embeds: Optional[torch.FloatTensor] = None, + lyric_token_idx: Optional[torch.LongTensor] = None, + lyric_mask: Optional[torch.LongTensor] = None, + lyrics_strength=1.0, + ): + + bs = encoder_text_hidden_states.shape[0] + device = encoder_text_hidden_states.device + + # speaker embedding + encoder_spk_hidden_states = self.speaker_embedder(speaker_embeds).unsqueeze(1) + + # genre embedding + encoder_text_hidden_states = self.genre_embedder(encoder_text_hidden_states) + + # lyric + encoder_lyric_hidden_states = self.forward_lyric_encoder( + lyric_token_idx=lyric_token_idx, + lyric_mask=lyric_mask, + out_dtype=encoder_text_hidden_states.dtype, + ) + + encoder_lyric_hidden_states *= lyrics_strength + + encoder_hidden_states = torch.cat([encoder_spk_hidden_states, encoder_text_hidden_states, encoder_lyric_hidden_states], dim=1) + + encoder_hidden_mask = None + if text_attention_mask is not None: + speaker_mask = torch.ones(bs, 1, device=device) + encoder_hidden_mask = torch.cat([speaker_mask, text_attention_mask, lyric_mask], dim=1) + + return encoder_hidden_states, encoder_hidden_mask + + def decode( + self, + hidden_states: torch.Tensor, + attention_mask: torch.Tensor, + encoder_hidden_states: torch.Tensor, + encoder_hidden_mask: torch.Tensor, + timestep: Optional[torch.Tensor], + output_length: int = 0, + block_controlnet_hidden_states: Optional[Union[List[torch.Tensor], torch.Tensor]] = None, + controlnet_scale: Union[float, torch.Tensor] = 1.0, + ): + embedded_timestep = self.timestep_embedder(self.time_proj(timestep).to(dtype=hidden_states.dtype)) + temb = self.t_block(embedded_timestep) + + hidden_states = self.proj_in(hidden_states) + + # controlnet logic + if block_controlnet_hidden_states is not None: + control_condi = cross_norm(hidden_states, block_controlnet_hidden_states) + hidden_states = hidden_states + control_condi * controlnet_scale + + # inner_hidden_states = [] + + rotary_freqs_cis = self.rotary_emb(hidden_states, seq_len=hidden_states.shape[1]) + encoder_rotary_freqs_cis = self.rotary_emb(encoder_hidden_states, seq_len=encoder_hidden_states.shape[1]) + + for index_block, block in enumerate(self.transformer_blocks): + hidden_states = block( + hidden_states=hidden_states, + attention_mask=attention_mask, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_hidden_mask, + rotary_freqs_cis=rotary_freqs_cis, + rotary_freqs_cis_cross=encoder_rotary_freqs_cis, + temb=temb, + ) + + output = self.final_layer(hidden_states, embedded_timestep, output_length) + return output + + def forward( + self, + x, + timestep, + attention_mask=None, + context: Optional[torch.Tensor] = None, + text_attention_mask: Optional[torch.LongTensor] = None, + speaker_embeds: Optional[torch.FloatTensor] = None, + lyric_token_idx: Optional[torch.LongTensor] = None, + lyric_mask: Optional[torch.LongTensor] = None, + block_controlnet_hidden_states: Optional[Union[List[torch.Tensor], torch.Tensor]] = None, + controlnet_scale: Union[float, torch.Tensor] = 1.0, + lyrics_strength=1.0, + **kwargs + ): + hidden_states = x + encoder_text_hidden_states = context + encoder_hidden_states, encoder_hidden_mask = self.encode( + encoder_text_hidden_states=encoder_text_hidden_states, + text_attention_mask=text_attention_mask, + speaker_embeds=speaker_embeds, + lyric_token_idx=lyric_token_idx, + lyric_mask=lyric_mask, + lyrics_strength=lyrics_strength, + ) + + output_length = hidden_states.shape[-1] + + output = self.decode( + hidden_states=hidden_states, + attention_mask=attention_mask, + encoder_hidden_states=encoder_hidden_states, + encoder_hidden_mask=encoder_hidden_mask, + timestep=timestep, + output_length=output_length, + block_controlnet_hidden_states=block_controlnet_hidden_states, + controlnet_scale=controlnet_scale, + ) + + return output diff --git a/comfy/ldm/ace/vae/autoencoder_dc.py b/comfy/ldm/ace/vae/autoencoder_dc.py new file mode 100644 index 00000000..e7b1d480 --- /dev/null +++ b/comfy/ldm/ace/vae/autoencoder_dc.py @@ -0,0 +1,644 @@ +# Rewritten from diffusers +import torch +import torch.nn as nn +import torch.nn.functional as F +from typing import Tuple, Union + +import comfy.model_management +import comfy.ops +ops = comfy.ops.disable_weight_init + + +class RMSNorm(ops.RMSNorm): + def __init__(self, dim, eps=1e-5, elementwise_affine=True, bias=False): + super().__init__(dim, eps=eps, elementwise_affine=elementwise_affine) + if elementwise_affine: + self.bias = nn.Parameter(torch.empty(dim)) if bias else None + + def forward(self, x): + x = super().forward(x) + if self.elementwise_affine: + if self.bias is not None: + x = x + comfy.model_management.cast_to(self.bias, dtype=x.dtype, device=x.device) + return x + + +def get_normalization(norm_type, num_features, num_groups=32, eps=1e-5): + if norm_type == "batch_norm": + return nn.BatchNorm2d(num_features) + elif norm_type == "group_norm": + return ops.GroupNorm(num_groups, num_features) + elif norm_type == "layer_norm": + return ops.LayerNorm(num_features) + elif norm_type == "rms_norm": + return RMSNorm(num_features, eps=eps, elementwise_affine=True, bias=True) + else: + raise ValueError(f"Unknown normalization type: {norm_type}") + + +def get_activation(activation_type): + if activation_type == "relu": + return nn.ReLU() + elif activation_type == "relu6": + return nn.ReLU6() + elif activation_type == "silu": + return nn.SiLU() + elif activation_type == "leaky_relu": + return nn.LeakyReLU(0.2) + else: + raise ValueError(f"Unknown activation type: {activation_type}") + + +class ResBlock(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + norm_type: str = "batch_norm", + act_fn: str = "relu6", + ) -> None: + super().__init__() + + self.norm_type = norm_type + self.nonlinearity = get_activation(act_fn) if act_fn is not None else nn.Identity() + self.conv1 = ops.Conv2d(in_channels, in_channels, 3, 1, 1) + self.conv2 = ops.Conv2d(in_channels, out_channels, 3, 1, 1, bias=False) + self.norm = get_normalization(norm_type, out_channels) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + residual = hidden_states + hidden_states = self.conv1(hidden_states) + hidden_states = self.nonlinearity(hidden_states) + hidden_states = self.conv2(hidden_states) + + if self.norm_type == "rms_norm": + # move channel to the last dimension so we apply RMSnorm across channel dimension + hidden_states = self.norm(hidden_states.movedim(1, -1)).movedim(-1, 1) + else: + hidden_states = self.norm(hidden_states) + + return hidden_states + residual + +class SanaMultiscaleAttentionProjection(nn.Module): + def __init__( + self, + in_channels: int, + num_attention_heads: int, + kernel_size: int, + ) -> None: + super().__init__() + + channels = 3 * in_channels + self.proj_in = ops.Conv2d( + channels, + channels, + kernel_size, + padding=kernel_size // 2, + groups=channels, + bias=False, + ) + self.proj_out = ops.Conv2d(channels, channels, 1, 1, 0, groups=3 * num_attention_heads, bias=False) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.proj_in(hidden_states) + hidden_states = self.proj_out(hidden_states) + return hidden_states + +class SanaMultiscaleLinearAttention(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + num_attention_heads: int = None, + attention_head_dim: int = 8, + mult: float = 1.0, + norm_type: str = "batch_norm", + kernel_sizes: tuple = (5,), + eps: float = 1e-15, + residual_connection: bool = False, + ): + super().__init__() + + self.eps = eps + self.attention_head_dim = attention_head_dim + self.norm_type = norm_type + self.residual_connection = residual_connection + + num_attention_heads = ( + int(in_channels // attention_head_dim * mult) + if num_attention_heads is None + else num_attention_heads + ) + inner_dim = num_attention_heads * attention_head_dim + + self.to_q = ops.Linear(in_channels, inner_dim, bias=False) + self.to_k = ops.Linear(in_channels, inner_dim, bias=False) + self.to_v = ops.Linear(in_channels, inner_dim, bias=False) + + self.to_qkv_multiscale = nn.ModuleList() + for kernel_size in kernel_sizes: + self.to_qkv_multiscale.append( + SanaMultiscaleAttentionProjection(inner_dim, num_attention_heads, kernel_size) + ) + + self.nonlinearity = nn.ReLU() + self.to_out = ops.Linear(inner_dim * (1 + len(kernel_sizes)), out_channels, bias=False) + self.norm_out = get_normalization(norm_type, out_channels) + + def apply_linear_attention(self, query, key, value): + value = F.pad(value, (0, 0, 0, 1), mode="constant", value=1) + scores = torch.matmul(value, key.transpose(-1, -2)) + hidden_states = torch.matmul(scores, query) + + hidden_states = hidden_states.to(dtype=torch.float32) + hidden_states = hidden_states[:, :, :-1] / (hidden_states[:, :, -1:] + self.eps) + return hidden_states + + def apply_quadratic_attention(self, query, key, value): + scores = torch.matmul(key.transpose(-1, -2), query) + scores = scores.to(dtype=torch.float32) + scores = scores / (torch.sum(scores, dim=2, keepdim=True) + self.eps) + hidden_states = torch.matmul(value, scores.to(value.dtype)) + return hidden_states + + def forward(self, hidden_states): + height, width = hidden_states.shape[-2:] + if height * width > self.attention_head_dim: + use_linear_attention = True + else: + use_linear_attention = False + + residual = hidden_states + + batch_size, _, height, width = list(hidden_states.size()) + original_dtype = hidden_states.dtype + + hidden_states = hidden_states.movedim(1, -1) + query = self.to_q(hidden_states) + key = self.to_k(hidden_states) + value = self.to_v(hidden_states) + hidden_states = torch.cat([query, key, value], dim=3) + hidden_states = hidden_states.movedim(-1, 1) + + multi_scale_qkv = [hidden_states] + for block in self.to_qkv_multiscale: + multi_scale_qkv.append(block(hidden_states)) + + hidden_states = torch.cat(multi_scale_qkv, dim=1) + + if use_linear_attention: + # for linear attention upcast hidden_states to float32 + hidden_states = hidden_states.to(dtype=torch.float32) + + hidden_states = hidden_states.reshape(batch_size, -1, 3 * self.attention_head_dim, height * width) + + query, key, value = hidden_states.chunk(3, dim=2) + query = self.nonlinearity(query) + key = self.nonlinearity(key) + + if use_linear_attention: + hidden_states = self.apply_linear_attention(query, key, value) + hidden_states = hidden_states.to(dtype=original_dtype) + else: + hidden_states = self.apply_quadratic_attention(query, key, value) + + hidden_states = torch.reshape(hidden_states, (batch_size, -1, height, width)) + hidden_states = self.to_out(hidden_states.movedim(1, -1)).movedim(-1, 1) + + if self.norm_type == "rms_norm": + hidden_states = self.norm_out(hidden_states.movedim(1, -1)).movedim(-1, 1) + else: + hidden_states = self.norm_out(hidden_states) + + if self.residual_connection: + hidden_states = hidden_states + residual + + return hidden_states + + +class EfficientViTBlock(nn.Module): + def __init__( + self, + in_channels: int, + mult: float = 1.0, + attention_head_dim: int = 32, + qkv_multiscales: tuple = (5,), + norm_type: str = "batch_norm", + ) -> None: + super().__init__() + + self.attn = SanaMultiscaleLinearAttention( + in_channels=in_channels, + out_channels=in_channels, + mult=mult, + attention_head_dim=attention_head_dim, + norm_type=norm_type, + kernel_sizes=qkv_multiscales, + residual_connection=True, + ) + + self.conv_out = GLUMBConv( + in_channels=in_channels, + out_channels=in_channels, + norm_type="rms_norm", + ) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.attn(x) + x = self.conv_out(x) + return x + + +class GLUMBConv(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + expand_ratio: float = 4, + norm_type: str = None, + residual_connection: bool = True, + ) -> None: + super().__init__() + + hidden_channels = int(expand_ratio * in_channels) + self.norm_type = norm_type + self.residual_connection = residual_connection + + self.nonlinearity = nn.SiLU() + self.conv_inverted = ops.Conv2d(in_channels, hidden_channels * 2, 1, 1, 0) + self.conv_depth = ops.Conv2d(hidden_channels * 2, hidden_channels * 2, 3, 1, 1, groups=hidden_channels * 2) + self.conv_point = ops.Conv2d(hidden_channels, out_channels, 1, 1, 0, bias=False) + + self.norm = None + if norm_type == "rms_norm": + self.norm = RMSNorm(out_channels, eps=1e-5, elementwise_affine=True, bias=True) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + if self.residual_connection: + residual = hidden_states + + hidden_states = self.conv_inverted(hidden_states) + hidden_states = self.nonlinearity(hidden_states) + + hidden_states = self.conv_depth(hidden_states) + hidden_states, gate = torch.chunk(hidden_states, 2, dim=1) + hidden_states = hidden_states * self.nonlinearity(gate) + + hidden_states = self.conv_point(hidden_states) + + if self.norm_type == "rms_norm": + # move channel to the last dimension so we apply RMSnorm across channel dimension + hidden_states = self.norm(hidden_states.movedim(1, -1)).movedim(-1, 1) + + if self.residual_connection: + hidden_states = hidden_states + residual + + return hidden_states + + +def get_block( + block_type: str, + in_channels: int, + out_channels: int, + attention_head_dim: int, + norm_type: str, + act_fn: str, + qkv_mutliscales: tuple = (), +): + if block_type == "ResBlock": + block = ResBlock(in_channels, out_channels, norm_type, act_fn) + elif block_type == "EfficientViTBlock": + block = EfficientViTBlock( + in_channels, + attention_head_dim=attention_head_dim, + norm_type=norm_type, + qkv_multiscales=qkv_mutliscales + ) + else: + raise ValueError(f"Block with {block_type=} is not supported.") + + return block + + +class DCDownBlock2d(nn.Module): + def __init__(self, in_channels: int, out_channels: int, downsample: bool = False, shortcut: bool = True) -> None: + super().__init__() + + self.downsample = downsample + self.factor = 2 + self.stride = 1 if downsample else 2 + self.group_size = in_channels * self.factor**2 // out_channels + self.shortcut = shortcut + + out_ratio = self.factor**2 + if downsample: + assert out_channels % out_ratio == 0 + out_channels = out_channels // out_ratio + + self.conv = ops.Conv2d( + in_channels, + out_channels, + kernel_size=3, + stride=self.stride, + padding=1, + ) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + x = self.conv(hidden_states) + if self.downsample: + x = F.pixel_unshuffle(x, self.factor) + + if self.shortcut: + y = F.pixel_unshuffle(hidden_states, self.factor) + y = y.unflatten(1, (-1, self.group_size)) + y = y.mean(dim=2) + hidden_states = x + y + else: + hidden_states = x + + return hidden_states + + +class DCUpBlock2d(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + interpolate: bool = False, + shortcut: bool = True, + interpolation_mode: str = "nearest", + ) -> None: + super().__init__() + + self.interpolate = interpolate + self.interpolation_mode = interpolation_mode + self.shortcut = shortcut + self.factor = 2 + self.repeats = out_channels * self.factor**2 // in_channels + + out_ratio = self.factor**2 + if not interpolate: + out_channels = out_channels * out_ratio + + self.conv = ops.Conv2d(in_channels, out_channels, 3, 1, 1) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + if self.interpolate: + x = F.interpolate(hidden_states, scale_factor=self.factor, mode=self.interpolation_mode) + x = self.conv(x) + else: + x = self.conv(hidden_states) + x = F.pixel_shuffle(x, self.factor) + + if self.shortcut: + y = hidden_states.repeat_interleave(self.repeats, dim=1, output_size=hidden_states.shape[1] * self.repeats) + y = F.pixel_shuffle(y, self.factor) + hidden_states = x + y + else: + hidden_states = x + + return hidden_states + + +class Encoder(nn.Module): + def __init__( + self, + in_channels: int, + latent_channels: int, + attention_head_dim: int = 32, + block_type: str or tuple = "ResBlock", + block_out_channels: tuple = (128, 256, 512, 512, 1024, 1024), + layers_per_block: tuple = (2, 2, 2, 2, 2, 2), + qkv_multiscales: tuple = ((), (), (), (5,), (5,), (5,)), + downsample_block_type: str = "pixel_unshuffle", + out_shortcut: bool = True, + ): + super().__init__() + + num_blocks = len(block_out_channels) + + if isinstance(block_type, str): + block_type = (block_type,) * num_blocks + + if layers_per_block[0] > 0: + self.conv_in = ops.Conv2d( + in_channels, + block_out_channels[0] if layers_per_block[0] > 0 else block_out_channels[1], + kernel_size=3, + stride=1, + padding=1, + ) + else: + self.conv_in = DCDownBlock2d( + in_channels=in_channels, + out_channels=block_out_channels[0] if layers_per_block[0] > 0 else block_out_channels[1], + downsample=downsample_block_type == "pixel_unshuffle", + shortcut=False, + ) + + down_blocks = [] + for i, (out_channel, num_layers) in enumerate(zip(block_out_channels, layers_per_block)): + down_block_list = [] + + for _ in range(num_layers): + block = get_block( + block_type[i], + out_channel, + out_channel, + attention_head_dim=attention_head_dim, + norm_type="rms_norm", + act_fn="silu", + qkv_mutliscales=qkv_multiscales[i], + ) + down_block_list.append(block) + + if i < num_blocks - 1 and num_layers > 0: + downsample_block = DCDownBlock2d( + in_channels=out_channel, + out_channels=block_out_channels[i + 1], + downsample=downsample_block_type == "pixel_unshuffle", + shortcut=True, + ) + down_block_list.append(downsample_block) + + down_blocks.append(nn.Sequential(*down_block_list)) + + self.down_blocks = nn.ModuleList(down_blocks) + + self.conv_out = ops.Conv2d(block_out_channels[-1], latent_channels, 3, 1, 1) + + self.out_shortcut = out_shortcut + if out_shortcut: + self.out_shortcut_average_group_size = block_out_channels[-1] // latent_channels + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.conv_in(hidden_states) + for down_block in self.down_blocks: + hidden_states = down_block(hidden_states) + + if self.out_shortcut: + x = hidden_states.unflatten(1, (-1, self.out_shortcut_average_group_size)) + x = x.mean(dim=2) + hidden_states = self.conv_out(hidden_states) + x + else: + hidden_states = self.conv_out(hidden_states) + + return hidden_states + + +class Decoder(nn.Module): + def __init__( + self, + in_channels: int, + latent_channels: int, + attention_head_dim: int = 32, + block_type: str or tuple = "ResBlock", + block_out_channels: tuple = (128, 256, 512, 512, 1024, 1024), + layers_per_block: tuple = (2, 2, 2, 2, 2, 2), + qkv_multiscales: tuple = ((), (), (), (5,), (5,), (5,)), + norm_type: str or tuple = "rms_norm", + act_fn: str or tuple = "silu", + upsample_block_type: str = "pixel_shuffle", + in_shortcut: bool = True, + ): + super().__init__() + + num_blocks = len(block_out_channels) + + if isinstance(block_type, str): + block_type = (block_type,) * num_blocks + if isinstance(norm_type, str): + norm_type = (norm_type,) * num_blocks + if isinstance(act_fn, str): + act_fn = (act_fn,) * num_blocks + + self.conv_in = ops.Conv2d(latent_channels, block_out_channels[-1], 3, 1, 1) + + self.in_shortcut = in_shortcut + if in_shortcut: + self.in_shortcut_repeats = block_out_channels[-1] // latent_channels + + up_blocks = [] + for i, (out_channel, num_layers) in reversed(list(enumerate(zip(block_out_channels, layers_per_block)))): + up_block_list = [] + + if i < num_blocks - 1 and num_layers > 0: + upsample_block = DCUpBlock2d( + block_out_channels[i + 1], + out_channel, + interpolate=upsample_block_type == "interpolate", + shortcut=True, + ) + up_block_list.append(upsample_block) + + for _ in range(num_layers): + block = get_block( + block_type[i], + out_channel, + out_channel, + attention_head_dim=attention_head_dim, + norm_type=norm_type[i], + act_fn=act_fn[i], + qkv_mutliscales=qkv_multiscales[i], + ) + up_block_list.append(block) + + up_blocks.insert(0, nn.Sequential(*up_block_list)) + + self.up_blocks = nn.ModuleList(up_blocks) + + channels = block_out_channels[0] if layers_per_block[0] > 0 else block_out_channels[1] + + self.norm_out = RMSNorm(channels, 1e-5, elementwise_affine=True, bias=True) + self.conv_act = nn.ReLU() + self.conv_out = None + + if layers_per_block[0] > 0: + self.conv_out = ops.Conv2d(channels, in_channels, 3, 1, 1) + else: + self.conv_out = DCUpBlock2d( + channels, in_channels, interpolate=upsample_block_type == "interpolate", shortcut=False + ) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + if self.in_shortcut: + x = hidden_states.repeat_interleave( + self.in_shortcut_repeats, dim=1, output_size=hidden_states.shape[1] * self.in_shortcut_repeats + ) + hidden_states = self.conv_in(hidden_states) + x + else: + hidden_states = self.conv_in(hidden_states) + + for up_block in reversed(self.up_blocks): + hidden_states = up_block(hidden_states) + + hidden_states = self.norm_out(hidden_states.movedim(1, -1)).movedim(-1, 1) + hidden_states = self.conv_act(hidden_states) + hidden_states = self.conv_out(hidden_states) + return hidden_states + + +class AutoencoderDC(nn.Module): + def __init__( + self, + in_channels: int = 2, + latent_channels: int = 8, + attention_head_dim: int = 32, + encoder_block_types: Union[str, Tuple[str]] = ["ResBlock", "ResBlock", "ResBlock", "EfficientViTBlock"], + decoder_block_types: Union[str, Tuple[str]] = ["ResBlock", "ResBlock", "ResBlock", "EfficientViTBlock"], + encoder_block_out_channels: Tuple[int, ...] = (128, 256, 512, 1024), + decoder_block_out_channels: Tuple[int, ...] = (128, 256, 512, 1024), + encoder_layers_per_block: Tuple[int] = (2, 2, 3, 3), + decoder_layers_per_block: Tuple[int] = (3, 3, 3, 3), + encoder_qkv_multiscales: Tuple[Tuple[int, ...], ...] = ((), (), (5,), (5,)), + decoder_qkv_multiscales: Tuple[Tuple[int, ...], ...] = ((), (), (5,), (5,)), + upsample_block_type: str = "interpolate", + downsample_block_type: str = "Conv", + decoder_norm_types: Union[str, Tuple[str]] = "rms_norm", + decoder_act_fns: Union[str, Tuple[str]] = "silu", + scaling_factor: float = 0.41407, + ) -> None: + super().__init__() + + self.encoder = Encoder( + in_channels=in_channels, + latent_channels=latent_channels, + attention_head_dim=attention_head_dim, + block_type=encoder_block_types, + block_out_channels=encoder_block_out_channels, + layers_per_block=encoder_layers_per_block, + qkv_multiscales=encoder_qkv_multiscales, + downsample_block_type=downsample_block_type, + ) + + self.decoder = Decoder( + in_channels=in_channels, + latent_channels=latent_channels, + attention_head_dim=attention_head_dim, + block_type=decoder_block_types, + block_out_channels=decoder_block_out_channels, + layers_per_block=decoder_layers_per_block, + qkv_multiscales=decoder_qkv_multiscales, + norm_type=decoder_norm_types, + act_fn=decoder_act_fns, + upsample_block_type=upsample_block_type, + ) + + self.scaling_factor = scaling_factor + self.spatial_compression_ratio = 2 ** (len(encoder_block_out_channels) - 1) + + def encode(self, x: torch.Tensor) -> torch.Tensor: + """Internal encoding function.""" + encoded = self.encoder(x) + return encoded * self.scaling_factor + + def decode(self, z: torch.Tensor) -> torch.Tensor: + # Scale the latents back + z = z / self.scaling_factor + decoded = self.decoder(z) + return decoded + + def forward(self, x: torch.Tensor) -> torch.Tensor: + z = self.encode(x) + return self.decode(z) + diff --git a/comfy/ldm/ace/vae/music_dcae_pipeline.py b/comfy/ldm/ace/vae/music_dcae_pipeline.py new file mode 100644 index 00000000..af81280e --- /dev/null +++ b/comfy/ldm/ace/vae/music_dcae_pipeline.py @@ -0,0 +1,109 @@ +# Original from: https://github.com/ace-step/ACE-Step/blob/main/music_dcae/music_dcae_pipeline.py +import torch +from .autoencoder_dc import AutoencoderDC +import logging +try: + import torchaudio +except: + logging.warning("torchaudio missing, ACE model will be broken") + +import torchvision.transforms as transforms +from .music_vocoder import ADaMoSHiFiGANV1 + + +class MusicDCAE(torch.nn.Module): + def __init__(self, source_sample_rate=None, dcae_config={}, vocoder_config={}): + super(MusicDCAE, self).__init__() + + self.dcae = AutoencoderDC(**dcae_config) + self.vocoder = ADaMoSHiFiGANV1(**vocoder_config) + + if source_sample_rate is None: + self.source_sample_rate = 48000 + else: + self.source_sample_rate = source_sample_rate + + # self.resampler = torchaudio.transforms.Resample(source_sample_rate, 44100) + + self.transform = transforms.Compose([ + transforms.Normalize(0.5, 0.5), + ]) + self.min_mel_value = -11.0 + self.max_mel_value = 3.0 + self.audio_chunk_size = int(round((1024 * 512 / 44100 * 48000))) + self.mel_chunk_size = 1024 + self.time_dimention_multiple = 8 + self.latent_chunk_size = self.mel_chunk_size // self.time_dimention_multiple + self.scale_factor = 0.1786 + self.shift_factor = -1.9091 + + def load_audio(self, audio_path): + audio, sr = torchaudio.load(audio_path) + return audio, sr + + def forward_mel(self, audios): + mels = [] + for i in range(len(audios)): + image = self.vocoder.mel_transform(audios[i]) + mels.append(image) + mels = torch.stack(mels) + return mels + + @torch.no_grad() + def encode(self, audios, audio_lengths=None, sr=None): + if audio_lengths is None: + audio_lengths = torch.tensor([audios.shape[2]] * audios.shape[0]) + audio_lengths = audio_lengths.to(audios.device) + + if sr is None: + sr = self.source_sample_rate + + if sr != 44100: + audios = torchaudio.functional.resample(audios, sr, 44100) + + max_audio_len = audios.shape[-1] + if max_audio_len % (8 * 512) != 0: + audios = torch.nn.functional.pad(audios, (0, 8 * 512 - max_audio_len % (8 * 512))) + + mels = self.forward_mel(audios) + mels = (mels - self.min_mel_value) / (self.max_mel_value - self.min_mel_value) + mels = self.transform(mels) + latents = [] + for mel in mels: + latent = self.dcae.encoder(mel.unsqueeze(0)) + latents.append(latent) + latents = torch.cat(latents, dim=0) + # latent_lengths = (audio_lengths / sr * 44100 / 512 / self.time_dimention_multiple).long() + latents = (latents - self.shift_factor) * self.scale_factor + return latents + # return latents, latent_lengths + + @torch.no_grad() + def decode(self, latents, audio_lengths=None, sr=None): + latents = latents / self.scale_factor + self.shift_factor + + pred_wavs = [] + + for latent in latents: + mels = self.dcae.decoder(latent.unsqueeze(0)) + mels = mels * 0.5 + 0.5 + mels = mels * (self.max_mel_value - self.min_mel_value) + self.min_mel_value + wav = self.vocoder.decode(mels[0]).squeeze(1) + + if sr is not None: + # resampler = torchaudio.transforms.Resample(44100, sr).to(latents.device).to(latents.dtype) + wav = torchaudio.functional.resample(wav, 44100, sr) + # wav = resampler(wav) + else: + sr = 44100 + pred_wavs.append(wav) + + if audio_lengths is not None: + pred_wavs = [wav[:, :length].cpu() for wav, length in zip(pred_wavs, audio_lengths)] + return torch.stack(pred_wavs) + # return sr, pred_wavs + + def forward(self, audios, audio_lengths=None, sr=None): + latents, latent_lengths = self.encode(audios=audios, audio_lengths=audio_lengths, sr=sr) + sr, pred_wavs = self.decode(latents=latents, audio_lengths=audio_lengths, sr=sr) + return sr, pred_wavs, latents, latent_lengths diff --git a/comfy/ldm/ace/vae/music_log_mel.py b/comfy/ldm/ace/vae/music_log_mel.py new file mode 100755 index 00000000..9c584eb7 --- /dev/null +++ b/comfy/ldm/ace/vae/music_log_mel.py @@ -0,0 +1,113 @@ +# Original from: https://github.com/ace-step/ACE-Step/blob/main/music_dcae/music_log_mel.py +import torch +import torch.nn as nn +from torch import Tensor +import logging +try: + from torchaudio.transforms import MelScale +except: + logging.warning("torchaudio missing, ACE model will be broken") + +import comfy.model_management + +class LinearSpectrogram(nn.Module): + def __init__( + self, + n_fft=2048, + win_length=2048, + hop_length=512, + center=False, + mode="pow2_sqrt", + ): + super().__init__() + + self.n_fft = n_fft + self.win_length = win_length + self.hop_length = hop_length + self.center = center + self.mode = mode + + self.register_buffer("window", torch.hann_window(win_length)) + + def forward(self, y: Tensor) -> Tensor: + if y.ndim == 3: + y = y.squeeze(1) + + y = torch.nn.functional.pad( + y.unsqueeze(1), + ( + (self.win_length - self.hop_length) // 2, + (self.win_length - self.hop_length + 1) // 2, + ), + mode="reflect", + ).squeeze(1) + dtype = y.dtype + spec = torch.stft( + y.float(), + self.n_fft, + hop_length=self.hop_length, + win_length=self.win_length, + window=comfy.model_management.cast_to(self.window, dtype=torch.float32, device=y.device), + center=self.center, + pad_mode="reflect", + normalized=False, + onesided=True, + return_complex=True, + ) + spec = torch.view_as_real(spec) + + if self.mode == "pow2_sqrt": + spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) + spec = spec.to(dtype) + return spec + + +class LogMelSpectrogram(nn.Module): + def __init__( + self, + sample_rate=44100, + n_fft=2048, + win_length=2048, + hop_length=512, + n_mels=128, + center=False, + f_min=0.0, + f_max=None, + ): + super().__init__() + + self.sample_rate = sample_rate + self.n_fft = n_fft + self.win_length = win_length + self.hop_length = hop_length + self.center = center + self.n_mels = n_mels + self.f_min = f_min + self.f_max = f_max or sample_rate // 2 + + self.spectrogram = LinearSpectrogram(n_fft, win_length, hop_length, center) + self.mel_scale = MelScale( + self.n_mels, + self.sample_rate, + self.f_min, + self.f_max, + self.n_fft // 2 + 1, + "slaney", + "slaney", + ) + + def compress(self, x: Tensor) -> Tensor: + return torch.log(torch.clamp(x, min=1e-5)) + + def decompress(self, x: Tensor) -> Tensor: + return torch.exp(x) + + def forward(self, x: Tensor, return_linear: bool = False) -> Tensor: + linear = self.spectrogram(x) + x = self.mel_scale(linear) + x = self.compress(x) + # print(x.shape) + if return_linear: + return x, self.compress(linear) + + return x diff --git a/comfy/ldm/ace/vae/music_vocoder.py b/comfy/ldm/ace/vae/music_vocoder.py new file mode 100755 index 00000000..2f989fa8 --- /dev/null +++ b/comfy/ldm/ace/vae/music_vocoder.py @@ -0,0 +1,538 @@ +# Original from: https://github.com/ace-step/ACE-Step/blob/main/music_dcae/music_vocoder.py +import torch +from torch import nn + +from functools import partial +from math import prod +from typing import Callable, Tuple, List + +import numpy as np +import torch.nn.functional as F +from torch.nn.utils.parametrize import remove_parametrizations as remove_weight_norm + +from .music_log_mel import LogMelSpectrogram + +import comfy.model_management +import comfy.ops +ops = comfy.ops.disable_weight_init + + +def drop_path( + x, drop_prob: float = 0.0, training: bool = False, scale_by_keep: bool = True +): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + + This is the same as the DropConnect impl I created for EfficientNet, etc networks, however, + the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... + See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for + changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use + 'survival rate' as the argument. + + """ # noqa: E501 + + if drop_prob == 0.0 or not training: + return x + keep_prob = 1 - drop_prob + shape = (x.shape[0],) + (1,) * ( + x.ndim - 1 + ) # work with diff dim tensors, not just 2D ConvNets + random_tensor = x.new_empty(shape).bernoulli_(keep_prob) + if keep_prob > 0.0 and scale_by_keep: + random_tensor.div_(keep_prob) + return x * random_tensor + + +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).""" # noqa: E501 + + def __init__(self, drop_prob: float = 0.0, scale_by_keep: bool = True): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + self.scale_by_keep = scale_by_keep + + def forward(self, x): + return drop_path(x, self.drop_prob, self.training, self.scale_by_keep) + + def extra_repr(self): + return f"drop_prob={round(self.drop_prob,3):0.3f}" + + +class LayerNorm(nn.Module): + r"""LayerNorm that supports two data formats: channels_last (default) or channels_first. + The ordering of the dimensions in the inputs. channels_last corresponds to inputs with + shape (batch_size, height, width, channels) while channels_first corresponds to inputs + with shape (batch_size, channels, height, width). + """ # noqa: E501 + + def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"): + super().__init__() + self.weight = nn.Parameter(torch.ones(normalized_shape)) + self.bias = nn.Parameter(torch.zeros(normalized_shape)) + self.eps = eps + self.data_format = data_format + if self.data_format not in ["channels_last", "channels_first"]: + raise NotImplementedError + self.normalized_shape = (normalized_shape,) + + def forward(self, x): + if self.data_format == "channels_last": + return F.layer_norm( + x, self.normalized_shape, comfy.model_management.cast_to(self.weight, dtype=x.dtype, device=x.device), comfy.model_management.cast_to(self.bias, dtype=x.dtype, device=x.device), self.eps + ) + elif self.data_format == "channels_first": + u = x.mean(1, keepdim=True) + s = (x - u).pow(2).mean(1, keepdim=True) + x = (x - u) / torch.sqrt(s + self.eps) + x = comfy.model_management.cast_to(self.weight[:, None], dtype=x.dtype, device=x.device) * x + comfy.model_management.cast_to(self.bias[:, None], dtype=x.dtype, device=x.device) + return x + + +class ConvNeXtBlock(nn.Module): + r"""ConvNeXt Block. There are two equivalent implementations: + (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W) + (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back + We use (2) as we find it slightly faster in PyTorch + + Args: + dim (int): Number of input channels. + drop_path (float): Stochastic depth rate. Default: 0.0 + layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.0. + kernel_size (int): Kernel size for depthwise conv. Default: 7. + dilation (int): Dilation for depthwise conv. Default: 1. + """ # noqa: E501 + + def __init__( + self, + dim: int, + drop_path: float = 0.0, + layer_scale_init_value: float = 1e-6, + mlp_ratio: float = 4.0, + kernel_size: int = 7, + dilation: int = 1, + ): + super().__init__() + + self.dwconv = ops.Conv1d( + dim, + dim, + kernel_size=kernel_size, + padding=int(dilation * (kernel_size - 1) / 2), + groups=dim, + ) # depthwise conv + self.norm = LayerNorm(dim, eps=1e-6) + self.pwconv1 = ops.Linear( + dim, int(mlp_ratio * dim) + ) # pointwise/1x1 convs, implemented with linear layers + self.act = nn.GELU() + self.pwconv2 = ops.Linear(int(mlp_ratio * dim), dim) + self.gamma = ( + nn.Parameter(torch.empty((dim)), requires_grad=False) + if layer_scale_init_value > 0 + else None + ) + self.drop_path = DropPath( + drop_path) if drop_path > 0.0 else nn.Identity() + + def forward(self, x, apply_residual: bool = True): + input = x + + x = self.dwconv(x) + x = x.permute(0, 2, 1) # (N, C, L) -> (N, L, C) + x = self.norm(x) + x = self.pwconv1(x) + x = self.act(x) + x = self.pwconv2(x) + + if self.gamma is not None: + x = comfy.model_management.cast_to(self.gamma, dtype=x.dtype, device=x.device) * x + + x = x.permute(0, 2, 1) # (N, L, C) -> (N, C, L) + x = self.drop_path(x) + + if apply_residual: + x = input + x + + return x + + +class ParallelConvNeXtBlock(nn.Module): + def __init__(self, kernel_sizes: List[int], *args, **kwargs): + super().__init__() + self.blocks = nn.ModuleList( + [ + ConvNeXtBlock(kernel_size=kernel_size, *args, **kwargs) + for kernel_size in kernel_sizes + ] + ) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return torch.stack( + [block(x, apply_residual=False) for block in self.blocks] + [x], + dim=1, + ).sum(dim=1) + + +class ConvNeXtEncoder(nn.Module): + def __init__( + self, + input_channels=3, + depths=[3, 3, 9, 3], + dims=[96, 192, 384, 768], + drop_path_rate=0.0, + layer_scale_init_value=1e-6, + kernel_sizes: Tuple[int] = (7,), + ): + super().__init__() + assert len(depths) == len(dims) + + self.channel_layers = nn.ModuleList() + stem = nn.Sequential( + ops.Conv1d( + input_channels, + dims[0], + kernel_size=7, + padding=3, + padding_mode="replicate", + ), + LayerNorm(dims[0], eps=1e-6, data_format="channels_first"), + ) + self.channel_layers.append(stem) + + for i in range(len(depths) - 1): + mid_layer = nn.Sequential( + LayerNorm(dims[i], eps=1e-6, data_format="channels_first"), + ops.Conv1d(dims[i], dims[i + 1], kernel_size=1), + ) + self.channel_layers.append(mid_layer) + + block_fn = ( + partial(ConvNeXtBlock, kernel_size=kernel_sizes[0]) + if len(kernel_sizes) == 1 + else partial(ParallelConvNeXtBlock, kernel_sizes=kernel_sizes) + ) + + self.stages = nn.ModuleList() + drop_path_rates = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(depths)) + ] + + cur = 0 + for i in range(len(depths)): + stage = nn.Sequential( + *[ + block_fn( + dim=dims[i], + drop_path=drop_path_rates[cur + j], + layer_scale_init_value=layer_scale_init_value, + ) + for j in range(depths[i]) + ] + ) + self.stages.append(stage) + cur += depths[i] + + self.norm = LayerNorm(dims[-1], eps=1e-6, data_format="channels_first") + + def forward( + self, + x: torch.Tensor, + ) -> torch.Tensor: + for channel_layer, stage in zip(self.channel_layers, self.stages): + x = channel_layer(x) + x = stage(x) + + return self.norm(x) + + +def get_padding(kernel_size, dilation=1): + return (kernel_size * dilation - dilation) // 2 + + +class ResBlock1(torch.nn.Module): + def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)): + super().__init__() + + self.convs1 = nn.ModuleList( + [ + torch.nn.utils.parametrizations.weight_norm( + ops.Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]), + ) + ), + torch.nn.utils.parametrizations.weight_norm( + ops.Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]), + ) + ), + torch.nn.utils.parametrizations.weight_norm( + ops.Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[2], + padding=get_padding(kernel_size, dilation[2]), + ) + ), + ] + ) + + self.convs2 = nn.ModuleList( + [ + torch.nn.utils.parametrizations.weight_norm( + ops.Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=1, + padding=get_padding(kernel_size, 1), + ) + ), + torch.nn.utils.parametrizations.weight_norm( + ops.Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=1, + padding=get_padding(kernel_size, 1), + ) + ), + torch.nn.utils.parametrizations.weight_norm( + ops.Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=1, + padding=get_padding(kernel_size, 1), + ) + ), + ] + ) + + def forward(self, x): + for c1, c2 in zip(self.convs1, self.convs2): + xt = F.silu(x) + xt = c1(xt) + xt = F.silu(xt) + xt = c2(xt) + x = xt + x + return x + + def remove_weight_norm(self): + for conv in self.convs1: + remove_weight_norm(conv) + for conv in self.convs2: + remove_weight_norm(conv) + + +class HiFiGANGenerator(nn.Module): + def __init__( + self, + *, + hop_length: int = 512, + upsample_rates: Tuple[int] = (8, 8, 2, 2, 2), + upsample_kernel_sizes: Tuple[int] = (16, 16, 8, 2, 2), + resblock_kernel_sizes: Tuple[int] = (3, 7, 11), + resblock_dilation_sizes: Tuple[Tuple[int]] = ( + (1, 3, 5), (1, 3, 5), (1, 3, 5)), + num_mels: int = 128, + upsample_initial_channel: int = 512, + use_template: bool = True, + pre_conv_kernel_size: int = 7, + post_conv_kernel_size: int = 7, + post_activation: Callable = partial(nn.SiLU, inplace=True), + ): + super().__init__() + + assert ( + prod(upsample_rates) == hop_length + ), f"hop_length must be {prod(upsample_rates)}" + + self.conv_pre = torch.nn.utils.parametrizations.weight_norm( + ops.Conv1d( + num_mels, + upsample_initial_channel, + pre_conv_kernel_size, + 1, + padding=get_padding(pre_conv_kernel_size), + ) + ) + + self.num_upsamples = len(upsample_rates) + self.num_kernels = len(resblock_kernel_sizes) + + self.noise_convs = nn.ModuleList() + self.use_template = use_template + self.ups = nn.ModuleList() + + for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)): + c_cur = upsample_initial_channel // (2 ** (i + 1)) + self.ups.append( + torch.nn.utils.parametrizations.weight_norm( + ops.ConvTranspose1d( + upsample_initial_channel // (2**i), + upsample_initial_channel // (2 ** (i + 1)), + k, + u, + padding=(k - u) // 2, + ) + ) + ) + + if not use_template: + continue + + if i + 1 < len(upsample_rates): + stride_f0 = np.prod(upsample_rates[i + 1:]) + self.noise_convs.append( + ops.Conv1d( + 1, + c_cur, + kernel_size=stride_f0 * 2, + stride=stride_f0, + padding=stride_f0 // 2, + ) + ) + else: + self.noise_convs.append(ops.Conv1d(1, c_cur, kernel_size=1)) + + self.resblocks = nn.ModuleList() + for i in range(len(self.ups)): + ch = upsample_initial_channel // (2 ** (i + 1)) + for k, d in zip(resblock_kernel_sizes, resblock_dilation_sizes): + self.resblocks.append(ResBlock1(ch, k, d)) + + self.activation_post = post_activation() + self.conv_post = torch.nn.utils.parametrizations.weight_norm( + ops.Conv1d( + ch, + 1, + post_conv_kernel_size, + 1, + padding=get_padding(post_conv_kernel_size), + ) + ) + + def forward(self, x, template=None): + x = self.conv_pre(x) + + for i in range(self.num_upsamples): + x = F.silu(x, inplace=True) + x = self.ups[i](x) + + if self.use_template: + x = x + self.noise_convs[i](template) + + xs = None + + for j in range(self.num_kernels): + if xs is None: + xs = self.resblocks[i * self.num_kernels + j](x) + else: + xs += self.resblocks[i * self.num_kernels + j](x) + + x = xs / self.num_kernels + + x = self.activation_post(x) + x = self.conv_post(x) + x = torch.tanh(x) + + return x + + def remove_weight_norm(self): + for up in self.ups: + remove_weight_norm(up) + for block in self.resblocks: + block.remove_weight_norm() + remove_weight_norm(self.conv_pre) + remove_weight_norm(self.conv_post) + + +class ADaMoSHiFiGANV1(nn.Module): + def __init__( + self, + input_channels: int = 128, + depths: List[int] = [3, 3, 9, 3], + dims: List[int] = [128, 256, 384, 512], + drop_path_rate: float = 0.0, + kernel_sizes: Tuple[int] = (7,), + upsample_rates: Tuple[int] = (4, 4, 2, 2, 2, 2, 2), + upsample_kernel_sizes: Tuple[int] = (8, 8, 4, 4, 4, 4, 4), + resblock_kernel_sizes: Tuple[int] = (3, 7, 11, 13), + resblock_dilation_sizes: Tuple[Tuple[int]] = ( + (1, 3, 5), (1, 3, 5), (1, 3, 5), (1, 3, 5)), + num_mels: int = 512, + upsample_initial_channel: int = 1024, + use_template: bool = False, + pre_conv_kernel_size: int = 13, + post_conv_kernel_size: int = 13, + sampling_rate: int = 44100, + n_fft: int = 2048, + win_length: int = 2048, + hop_length: int = 512, + f_min: int = 40, + f_max: int = 16000, + n_mels: int = 128, + ): + super().__init__() + + self.backbone = ConvNeXtEncoder( + input_channels=input_channels, + depths=depths, + dims=dims, + drop_path_rate=drop_path_rate, + kernel_sizes=kernel_sizes, + ) + + self.head = HiFiGANGenerator( + hop_length=hop_length, + upsample_rates=upsample_rates, + upsample_kernel_sizes=upsample_kernel_sizes, + resblock_kernel_sizes=resblock_kernel_sizes, + resblock_dilation_sizes=resblock_dilation_sizes, + num_mels=num_mels, + upsample_initial_channel=upsample_initial_channel, + use_template=use_template, + pre_conv_kernel_size=pre_conv_kernel_size, + post_conv_kernel_size=post_conv_kernel_size, + ) + self.sampling_rate = sampling_rate + self.mel_transform = LogMelSpectrogram( + sample_rate=sampling_rate, + n_fft=n_fft, + win_length=win_length, + hop_length=hop_length, + f_min=f_min, + f_max=f_max, + n_mels=n_mels, + ) + self.eval() + + @torch.no_grad() + def decode(self, mel): + y = self.backbone(mel) + y = self.head(y) + return y + + @torch.no_grad() + def encode(self, x): + return self.mel_transform(x) + + def forward(self, mel): + y = self.backbone(mel) + y = self.head(y) + return y diff --git a/comfy/ldm/audio/autoencoder.py b/comfy/ldm/audio/autoencoder.py index 9e7e7c87..78ed6ffa 100644 --- a/comfy/ldm/audio/autoencoder.py +++ b/comfy/ldm/audio/autoencoder.py @@ -75,16 +75,10 @@ class SnakeBeta(nn.Module): return x def WNConv1d(*args, **kwargs): - try: - return torch.nn.utils.parametrizations.weight_norm(ops.Conv1d(*args, **kwargs)) - except: - return torch.nn.utils.weight_norm(ops.Conv1d(*args, **kwargs)) #support pytorch 2.1 and older + return torch.nn.utils.parametrizations.weight_norm(ops.Conv1d(*args, **kwargs)) def WNConvTranspose1d(*args, **kwargs): - try: - return torch.nn.utils.parametrizations.weight_norm(ops.ConvTranspose1d(*args, **kwargs)) - except: - return torch.nn.utils.weight_norm(ops.ConvTranspose1d(*args, **kwargs)) #support pytorch 2.1 and older + return torch.nn.utils.parametrizations.weight_norm(ops.ConvTranspose1d(*args, **kwargs)) def get_activation(activation: Literal["elu", "snake", "none"], antialias=False, channels=None) -> nn.Module: if activation == "elu": diff --git a/comfy/ldm/chroma/layers.py b/comfy/ldm/chroma/layers.py new file mode 100644 index 00000000..35da91ee --- /dev/null +++ b/comfy/ldm/chroma/layers.py @@ -0,0 +1,183 @@ +import torch +from torch import Tensor, nn + +from comfy.ldm.flux.math import attention +from comfy.ldm.flux.layers import ( + MLPEmbedder, + RMSNorm, + QKNorm, + SelfAttention, + ModulationOut, +) + + + +class ChromaModulationOut(ModulationOut): + @classmethod + def from_offset(cls, tensor: torch.Tensor, offset: int = 0) -> ModulationOut: + return cls( + shift=tensor[:, offset : offset + 1, :], + scale=tensor[:, offset + 1 : offset + 2, :], + gate=tensor[:, offset + 2 : offset + 3, :], + ) + + + + +class Approximator(nn.Module): + def __init__(self, in_dim: int, out_dim: int, hidden_dim: int, n_layers = 5, dtype=None, device=None, operations=None): + super().__init__() + self.in_proj = operations.Linear(in_dim, hidden_dim, bias=True, dtype=dtype, device=device) + self.layers = nn.ModuleList([MLPEmbedder(hidden_dim, hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)]) + self.norms = nn.ModuleList([RMSNorm(hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)]) + self.out_proj = operations.Linear(hidden_dim, out_dim, dtype=dtype, device=device) + + @property + def device(self): + # Get the device of the module (assumes all parameters are on the same device) + return next(self.parameters()).device + + def forward(self, x: Tensor) -> Tensor: + x = self.in_proj(x) + + for layer, norms in zip(self.layers, self.norms): + x = x + layer(norms(x)) + + x = self.out_proj(x) + + return x + + +class DoubleStreamBlock(nn.Module): + def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, flipped_img_txt=False, dtype=None, device=None, operations=None): + super().__init__() + + mlp_hidden_dim = int(hidden_size * mlp_ratio) + self.num_heads = num_heads + self.hidden_size = hidden_size + self.img_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) + self.img_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations) + + self.img_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) + self.img_mlp = nn.Sequential( + operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device), + nn.GELU(approximate="tanh"), + operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device), + ) + + self.txt_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) + self.txt_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations) + + self.txt_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) + self.txt_mlp = nn.Sequential( + operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device), + nn.GELU(approximate="tanh"), + operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device), + ) + self.flipped_img_txt = flipped_img_txt + + def forward(self, img: Tensor, txt: Tensor, pe: Tensor, vec: Tensor, attn_mask=None): + (img_mod1, img_mod2), (txt_mod1, txt_mod2) = vec + + # prepare image for attention + img_modulated = self.img_norm1(img) + img_modulated = (1 + img_mod1.scale) * img_modulated + img_mod1.shift + img_qkv = self.img_attn.qkv(img_modulated) + img_q, img_k, img_v = img_qkv.view(img_qkv.shape[0], img_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) + img_q, img_k = self.img_attn.norm(img_q, img_k, img_v) + + # prepare txt for attention + txt_modulated = self.txt_norm1(txt) + txt_modulated = (1 + txt_mod1.scale) * txt_modulated + txt_mod1.shift + txt_qkv = self.txt_attn.qkv(txt_modulated) + txt_q, txt_k, txt_v = txt_qkv.view(txt_qkv.shape[0], txt_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) + txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v) + + # run actual attention + attn = attention(torch.cat((txt_q, img_q), dim=2), + torch.cat((txt_k, img_k), dim=2), + torch.cat((txt_v, img_v), dim=2), + pe=pe, mask=attn_mask) + + txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1] :] + + # calculate the img bloks + img = img + img_mod1.gate * self.img_attn.proj(img_attn) + img = img + img_mod2.gate * self.img_mlp((1 + img_mod2.scale) * self.img_norm2(img) + img_mod2.shift) + + # calculate the txt bloks + txt += txt_mod1.gate * self.txt_attn.proj(txt_attn) + txt += txt_mod2.gate * self.txt_mlp((1 + txt_mod2.scale) * self.txt_norm2(txt) + txt_mod2.shift) + + if txt.dtype == torch.float16: + txt = torch.nan_to_num(txt, nan=0.0, posinf=65504, neginf=-65504) + + return img, txt + + +class SingleStreamBlock(nn.Module): + """ + A DiT block with parallel linear layers as described in + https://arxiv.org/abs/2302.05442 and adapted modulation interface. + """ + + def __init__( + self, + hidden_size: int, + num_heads: int, + mlp_ratio: float = 4.0, + qk_scale: float = None, + dtype=None, + device=None, + operations=None + ): + super().__init__() + self.hidden_dim = hidden_size + self.num_heads = num_heads + head_dim = hidden_size // num_heads + self.scale = qk_scale or head_dim**-0.5 + + self.mlp_hidden_dim = int(hidden_size * mlp_ratio) + # qkv and mlp_in + self.linear1 = operations.Linear(hidden_size, hidden_size * 3 + self.mlp_hidden_dim, dtype=dtype, device=device) + # proj and mlp_out + self.linear2 = operations.Linear(hidden_size + self.mlp_hidden_dim, hidden_size, dtype=dtype, device=device) + + self.norm = QKNorm(head_dim, dtype=dtype, device=device, operations=operations) + + self.hidden_size = hidden_size + self.pre_norm = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) + + self.mlp_act = nn.GELU(approximate="tanh") + + def forward(self, x: Tensor, pe: Tensor, vec: Tensor, attn_mask=None) -> Tensor: + mod = vec + x_mod = (1 + mod.scale) * self.pre_norm(x) + mod.shift + qkv, mlp = torch.split(self.linear1(x_mod), [3 * self.hidden_size, self.mlp_hidden_dim], dim=-1) + + q, k, v = qkv.view(qkv.shape[0], qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) + q, k = self.norm(q, k, v) + + # compute attention + attn = attention(q, k, v, pe=pe, mask=attn_mask) + # compute activation in mlp stream, cat again and run second linear layer + output = self.linear2(torch.cat((attn, self.mlp_act(mlp)), 2)) + x += mod.gate * output + if x.dtype == torch.float16: + x = torch.nan_to_num(x, nan=0.0, posinf=65504, neginf=-65504) + return x + + +class LastLayer(nn.Module): + def __init__(self, hidden_size: int, patch_size: int, out_channels: int, dtype=None, device=None, operations=None): + super().__init__() + self.norm_final = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) + self.linear = operations.Linear(hidden_size, out_channels, bias=True, dtype=dtype, device=device) + + def forward(self, x: Tensor, vec: Tensor) -> Tensor: + shift, scale = vec + shift = shift.squeeze(1) + scale = scale.squeeze(1) + x = (1 + scale[:, None, :]) * self.norm_final(x) + shift[:, None, :] + x = self.linear(x) + return x diff --git a/comfy/ldm/chroma/model.py b/comfy/ldm/chroma/model.py new file mode 100644 index 00000000..636748fc --- /dev/null +++ b/comfy/ldm/chroma/model.py @@ -0,0 +1,271 @@ +#Original code can be found on: https://github.com/black-forest-labs/flux + +from dataclasses import dataclass + +import torch +from torch import Tensor, nn +from einops import rearrange, repeat +import comfy.ldm.common_dit + +from comfy.ldm.flux.layers import ( + EmbedND, + timestep_embedding, +) + +from .layers import ( + DoubleStreamBlock, + LastLayer, + SingleStreamBlock, + Approximator, + ChromaModulationOut, +) + + +@dataclass +class ChromaParams: + in_channels: int + out_channels: int + context_in_dim: int + hidden_size: int + mlp_ratio: float + num_heads: int + depth: int + depth_single_blocks: int + axes_dim: list + theta: int + patch_size: int + qkv_bias: bool + in_dim: int + out_dim: int + hidden_dim: int + n_layers: int + + + + +class Chroma(nn.Module): + """ + Transformer model for flow matching on sequences. + """ + + def __init__(self, image_model=None, final_layer=True, dtype=None, device=None, operations=None, **kwargs): + super().__init__() + self.dtype = dtype + params = ChromaParams(**kwargs) + self.params = params + self.patch_size = params.patch_size + self.in_channels = params.in_channels + self.out_channels = params.out_channels + if params.hidden_size % params.num_heads != 0: + raise ValueError( + f"Hidden size {params.hidden_size} must be divisible by num_heads {params.num_heads}" + ) + pe_dim = params.hidden_size // params.num_heads + if sum(params.axes_dim) != pe_dim: + raise ValueError(f"Got {params.axes_dim} but expected positional dim {pe_dim}") + self.hidden_size = params.hidden_size + self.num_heads = params.num_heads + self.in_dim = params.in_dim + self.out_dim = params.out_dim + self.hidden_dim = params.hidden_dim + self.n_layers = params.n_layers + self.pe_embedder = EmbedND(dim=pe_dim, theta=params.theta, axes_dim=params.axes_dim) + self.img_in = operations.Linear(self.in_channels, self.hidden_size, bias=True, dtype=dtype, device=device) + self.txt_in = operations.Linear(params.context_in_dim, self.hidden_size, dtype=dtype, device=device) + # set as nn identity for now, will overwrite it later. + self.distilled_guidance_layer = Approximator( + in_dim=self.in_dim, + hidden_dim=self.hidden_dim, + out_dim=self.out_dim, + n_layers=self.n_layers, + dtype=dtype, device=device, operations=operations + ) + + + self.double_blocks = nn.ModuleList( + [ + DoubleStreamBlock( + self.hidden_size, + self.num_heads, + mlp_ratio=params.mlp_ratio, + qkv_bias=params.qkv_bias, + dtype=dtype, device=device, operations=operations + ) + for _ in range(params.depth) + ] + ) + + self.single_blocks = nn.ModuleList( + [ + SingleStreamBlock(self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio, dtype=dtype, device=device, operations=operations) + for _ in range(params.depth_single_blocks) + ] + ) + + if final_layer: + self.final_layer = LastLayer(self.hidden_size, 1, self.out_channels, dtype=dtype, device=device, operations=operations) + + self.skip_mmdit = [] + self.skip_dit = [] + self.lite = False + + def get_modulations(self, tensor: torch.Tensor, block_type: str, *, idx: int = 0): + # This function slices up the modulations tensor which has the following layout: + # single : num_single_blocks * 3 elements + # double_img : num_double_blocks * 6 elements + # double_txt : num_double_blocks * 6 elements + # final : 2 elements + if block_type == "final": + return (tensor[:, -2:-1, :], tensor[:, -1:, :]) + single_block_count = self.params.depth_single_blocks + double_block_count = self.params.depth + offset = 3 * idx + if block_type == "single": + return ChromaModulationOut.from_offset(tensor, offset) + # Double block modulations are 6 elements so we double 3 * idx. + offset *= 2 + if block_type in {"double_img", "double_txt"}: + # Advance past the single block modulations. + offset += 3 * single_block_count + if block_type == "double_txt": + # Advance past the double block img modulations. + offset += 6 * double_block_count + return ( + ChromaModulationOut.from_offset(tensor, offset), + ChromaModulationOut.from_offset(tensor, offset + 3), + ) + raise ValueError("Bad block_type") + + + def forward_orig( + self, + img: Tensor, + img_ids: Tensor, + txt: Tensor, + txt_ids: Tensor, + timesteps: Tensor, + guidance: Tensor = None, + control = None, + transformer_options={}, + attn_mask: Tensor = None, + ) -> Tensor: + patches_replace = transformer_options.get("patches_replace", {}) + if img.ndim != 3 or txt.ndim != 3: + raise ValueError("Input img and txt tensors must have 3 dimensions.") + + # running on sequences img + img = self.img_in(img) + + # distilled vector guidance + mod_index_length = 344 + distill_timestep = timestep_embedding(timesteps.detach().clone(), 16).to(img.device, img.dtype) + # guidance = guidance * + distil_guidance = timestep_embedding(guidance.detach().clone(), 16).to(img.device, img.dtype) + + # get all modulation index + modulation_index = timestep_embedding(torch.arange(mod_index_length), 32).to(img.device, img.dtype) + # we need to broadcast the modulation index here so each batch has all of the index + modulation_index = modulation_index.unsqueeze(0).repeat(img.shape[0], 1, 1).to(img.device, img.dtype) + # and we need to broadcast timestep and guidance along too + timestep_guidance = torch.cat([distill_timestep, distil_guidance], dim=1).unsqueeze(1).repeat(1, mod_index_length, 1).to(img.dtype).to(img.device, img.dtype) + # then and only then we could concatenate it together + input_vec = torch.cat([timestep_guidance, modulation_index], dim=-1).to(img.device, img.dtype) + + mod_vectors = self.distilled_guidance_layer(input_vec) + + txt = self.txt_in(txt) + + ids = torch.cat((txt_ids, img_ids), dim=1) + pe = self.pe_embedder(ids) + + blocks_replace = patches_replace.get("dit", {}) + for i, block in enumerate(self.double_blocks): + if i not in self.skip_mmdit: + double_mod = ( + self.get_modulations(mod_vectors, "double_img", idx=i), + self.get_modulations(mod_vectors, "double_txt", idx=i), + ) + if ("double_block", i) in blocks_replace: + def block_wrap(args): + out = {} + out["img"], out["txt"] = block(img=args["img"], + txt=args["txt"], + vec=args["vec"], + pe=args["pe"], + attn_mask=args.get("attn_mask")) + return out + + out = blocks_replace[("double_block", i)]({"img": img, + "txt": txt, + "vec": double_mod, + "pe": pe, + "attn_mask": attn_mask}, + {"original_block": block_wrap}) + txt = out["txt"] + img = out["img"] + else: + img, txt = block(img=img, + txt=txt, + vec=double_mod, + pe=pe, + attn_mask=attn_mask) + + if control is not None: # Controlnet + control_i = control.get("input") + if i < len(control_i): + add = control_i[i] + if add is not None: + img += add + + img = torch.cat((txt, img), 1) + + for i, block in enumerate(self.single_blocks): + if i not in self.skip_dit: + single_mod = self.get_modulations(mod_vectors, "single", idx=i) + if ("single_block", i) in blocks_replace: + def block_wrap(args): + out = {} + out["img"] = block(args["img"], + vec=args["vec"], + pe=args["pe"], + attn_mask=args.get("attn_mask")) + return out + + out = blocks_replace[("single_block", i)]({"img": img, + "vec": single_mod, + "pe": pe, + "attn_mask": attn_mask}, + {"original_block": block_wrap}) + img = out["img"] + else: + img = block(img, vec=single_mod, pe=pe, attn_mask=attn_mask) + + if control is not None: # Controlnet + control_o = control.get("output") + if i < len(control_o): + add = control_o[i] + if add is not None: + img[:, txt.shape[1] :, ...] += add + + img = img[:, txt.shape[1] :, ...] + final_mod = self.get_modulations(mod_vectors, "final") + img = self.final_layer(img, vec=final_mod) # (N, T, patch_size ** 2 * out_channels) + return img + + def forward(self, x, timestep, context, guidance, control=None, transformer_options={}, **kwargs): + bs, c, h, w = x.shape + patch_size = 2 + x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size)) + + img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) + + h_len = ((h + (patch_size // 2)) // patch_size) + w_len = ((w + (patch_size // 2)) // patch_size) + img_ids = torch.zeros((h_len, w_len, 3), device=x.device, dtype=x.dtype) + img_ids[:, :, 1] = img_ids[:, :, 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).unsqueeze(1) + img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0) + img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs) + + txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype) + out = self.forward_orig(img, img_ids, context, txt_ids, timestep, guidance, control, transformer_options, attn_mask=kwargs.get("attention_mask", None)) + return rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h,:w] diff --git a/comfy/ldm/cosmos/blocks.py b/comfy/ldm/cosmos/blocks.py index 84fd6d83..a12f892d 100644 --- a/comfy/ldm/cosmos/blocks.py +++ b/comfy/ldm/cosmos/blocks.py @@ -23,7 +23,6 @@ from einops import rearrange, repeat from einops.layers.torch import Rearrange from torch import nn -from comfy.ldm.modules.diffusionmodules.mmdit import RMSNorm from comfy.ldm.modules.attention import optimized_attention @@ -37,11 +36,11 @@ def apply_rotary_pos_emb( return t_out -def get_normalization(name: str, channels: int, weight_args={}): +def get_normalization(name: str, channels: int, weight_args={}, operations=None): if name == "I": return nn.Identity() elif name == "R": - return RMSNorm(channels, elementwise_affine=True, eps=1e-6, **weight_args) + return operations.RMSNorm(channels, elementwise_affine=True, eps=1e-6, **weight_args) else: raise ValueError(f"Normalization {name} not found") @@ -120,15 +119,15 @@ class Attention(nn.Module): self.to_q = nn.Sequential( operations.Linear(query_dim, inner_dim, bias=qkv_bias, **weight_args), - get_normalization(qkv_norm[0], norm_dim), + get_normalization(qkv_norm[0], norm_dim, weight_args=weight_args, operations=operations), ) self.to_k = nn.Sequential( operations.Linear(context_dim, inner_dim, bias=qkv_bias, **weight_args), - get_normalization(qkv_norm[1], norm_dim), + get_normalization(qkv_norm[1], norm_dim, weight_args=weight_args, operations=operations), ) self.to_v = nn.Sequential( operations.Linear(context_dim, inner_dim, bias=qkv_bias, **weight_args), - get_normalization(qkv_norm[2], norm_dim), + get_normalization(qkv_norm[2], norm_dim, weight_args=weight_args, operations=operations), ) self.to_out = nn.Sequential( diff --git a/comfy/ldm/cosmos/model.py b/comfy/ldm/cosmos/model.py index 06d0baef..4836e0b6 100644 --- a/comfy/ldm/cosmos/model.py +++ b/comfy/ldm/cosmos/model.py @@ -27,8 +27,6 @@ from torchvision import transforms from enum import Enum import logging -from comfy.ldm.modules.diffusionmodules.mmdit import RMSNorm - from .blocks import ( FinalLayer, GeneralDITTransformerBlock, @@ -195,7 +193,7 @@ class GeneralDIT(nn.Module): if self.affline_emb_norm: logging.debug("Building affine embedding normalization layer") - self.affline_norm = RMSNorm(model_channels, elementwise_affine=True, eps=1e-6) + self.affline_norm = operations.RMSNorm(model_channels, elementwise_affine=True, eps=1e-6, device=device, dtype=dtype) else: self.affline_norm = nn.Identity() diff --git a/comfy/ldm/genmo/joint_model/asymm_models_joint.py b/comfy/ldm/genmo/joint_model/asymm_models_joint.py index 2c46c24b..366a8b71 100644 --- a/comfy/ldm/genmo/joint_model/asymm_models_joint.py +++ b/comfy/ldm/genmo/joint_model/asymm_models_joint.py @@ -13,7 +13,6 @@ from comfy.ldm.modules.attention import optimized_attention from .layers import ( FeedForward, PatchEmbed, - RMSNorm, TimestepEmbedder, ) @@ -90,10 +89,10 @@ class AsymmetricAttention(nn.Module): # Query and key normalization for stability. assert qk_norm - self.q_norm_x = RMSNorm(self.head_dim, device=device, dtype=dtype) - self.k_norm_x = RMSNorm(self.head_dim, device=device, dtype=dtype) - self.q_norm_y = RMSNorm(self.head_dim, device=device, dtype=dtype) - self.k_norm_y = RMSNorm(self.head_dim, device=device, dtype=dtype) + self.q_norm_x = operations.RMSNorm(self.head_dim, eps=1e-5, device=device, dtype=dtype) + self.k_norm_x = operations.RMSNorm(self.head_dim, eps=1e-5, device=device, dtype=dtype) + self.q_norm_y = operations.RMSNorm(self.head_dim, eps=1e-5, device=device, dtype=dtype) + self.k_norm_y = operations.RMSNorm(self.head_dim, eps=1e-5, device=device, dtype=dtype) # Output layers. y features go back down from dim_x -> dim_y. self.proj_x = operations.Linear(dim_x, dim_x, bias=out_bias, device=device, dtype=dtype) diff --git a/comfy/ldm/genmo/joint_model/layers.py b/comfy/ldm/genmo/joint_model/layers.py index 51d97955..e310bd71 100644 --- a/comfy/ldm/genmo/joint_model/layers.py +++ b/comfy/ldm/genmo/joint_model/layers.py @@ -151,14 +151,3 @@ class PatchEmbed(nn.Module): x = self.norm(x) return x - - -class RMSNorm(torch.nn.Module): - def __init__(self, hidden_size, eps=1e-5, device=None, dtype=None): - super().__init__() - self.eps = eps - self.weight = torch.nn.Parameter(torch.empty(hidden_size, device=device, dtype=dtype)) - self.register_parameter("bias", None) - - def forward(self, x): - return comfy.ldm.common_dit.rms_norm(x, self.weight, self.eps) diff --git a/comfy/ldm/hidream/model.py b/comfy/ldm/hidream/model.py index fcb5a9c5..0305747b 100644 --- a/comfy/ldm/hidream/model.py +++ b/comfy/ldm/hidream/model.py @@ -699,10 +699,13 @@ class HiDreamImageTransformer2DModel(nn.Module): y: Optional[torch.Tensor] = None, context: Optional[torch.Tensor] = None, encoder_hidden_states_llama3=None, + image_cond=None, control = None, transformer_options = {}, ) -> torch.Tensor: bs, c, h, w = x.shape + if image_cond is not None: + x = torch.cat([x, image_cond], dim=-1) hidden_states = comfy.ldm.common_dit.pad_to_patch_size(x, (self.patch_size, self.patch_size)) timesteps = t pooled_embeds = y diff --git a/comfy/ldm/hydit/models.py b/comfy/ldm/hydit/models.py index 359f6a96..5ba2b76e 100644 --- a/comfy/ldm/hydit/models.py +++ b/comfy/ldm/hydit/models.py @@ -3,7 +3,7 @@ import torch import torch.nn as nn import comfy.ops -from comfy.ldm.modules.diffusionmodules.mmdit import Mlp, TimestepEmbedder, PatchEmbed, RMSNorm +from comfy.ldm.modules.diffusionmodules.mmdit import Mlp, TimestepEmbedder, PatchEmbed from comfy.ldm.modules.diffusionmodules.util import timestep_embedding from torch.utils import checkpoint @@ -51,7 +51,7 @@ class HunYuanDiTBlock(nn.Module): if norm_type == "layer": norm_layer = operations.LayerNorm elif norm_type == "rms": - norm_layer = RMSNorm + norm_layer = operations.RMSNorm else: raise ValueError(f"Unknown norm_type: {norm_type}") diff --git a/comfy/ldm/lightricks/model.py b/comfy/ldm/lightricks/model.py index 6e8e0618..056e101a 100644 --- a/comfy/ldm/lightricks/model.py +++ b/comfy/ldm/lightricks/model.py @@ -1,7 +1,6 @@ import torch from torch import nn import comfy.ldm.modules.attention -from comfy.ldm.genmo.joint_model.layers import RMSNorm import comfy.ldm.common_dit from einops import rearrange import math @@ -262,8 +261,8 @@ class CrossAttention(nn.Module): self.heads = heads self.dim_head = dim_head - self.q_norm = RMSNorm(inner_dim, dtype=dtype, device=device) - self.k_norm = RMSNorm(inner_dim, dtype=dtype, device=device) + self.q_norm = operations.RMSNorm(inner_dim, dtype=dtype, device=device) + self.k_norm = operations.RMSNorm(inner_dim, dtype=dtype, device=device) self.to_q = operations.Linear(query_dim, inner_dim, bias=True, dtype=dtype, device=device) self.to_k = operations.Linear(context_dim, inner_dim, bias=True, dtype=dtype, device=device) diff --git a/comfy/ldm/lumina/model.py b/comfy/ldm/lumina/model.py index ccd5d2c0..f8dc4d7d 100644 --- a/comfy/ldm/lumina/model.py +++ b/comfy/ldm/lumina/model.py @@ -8,7 +8,7 @@ import torch.nn as nn import torch.nn.functional as F import comfy.ldm.common_dit -from comfy.ldm.modules.diffusionmodules.mmdit import TimestepEmbedder, RMSNorm +from comfy.ldm.modules.diffusionmodules.mmdit import TimestepEmbedder from comfy.ldm.modules.attention import optimized_attention_masked from comfy.ldm.flux.layers import EmbedND @@ -64,8 +64,8 @@ class JointAttention(nn.Module): ) if qk_norm: - self.q_norm = RMSNorm(self.head_dim, elementwise_affine=True, **operation_settings) - self.k_norm = RMSNorm(self.head_dim, elementwise_affine=True, **operation_settings) + self.q_norm = operation_settings.get("operations").RMSNorm(self.head_dim, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) + self.k_norm = operation_settings.get("operations").RMSNorm(self.head_dim, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) else: self.q_norm = self.k_norm = nn.Identity() @@ -242,11 +242,11 @@ class JointTransformerBlock(nn.Module): operation_settings=operation_settings, ) self.layer_id = layer_id - self.attention_norm1 = RMSNorm(dim, eps=norm_eps, elementwise_affine=True, **operation_settings) - self.ffn_norm1 = RMSNorm(dim, eps=norm_eps, elementwise_affine=True, **operation_settings) + self.attention_norm1 = operation_settings.get("operations").RMSNorm(dim, eps=norm_eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) + self.ffn_norm1 = operation_settings.get("operations").RMSNorm(dim, eps=norm_eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) - self.attention_norm2 = RMSNorm(dim, eps=norm_eps, elementwise_affine=True, **operation_settings) - self.ffn_norm2 = RMSNorm(dim, eps=norm_eps, elementwise_affine=True, **operation_settings) + self.attention_norm2 = operation_settings.get("operations").RMSNorm(dim, eps=norm_eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) + self.ffn_norm2 = operation_settings.get("operations").RMSNorm(dim, eps=norm_eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) self.modulation = modulation if modulation: @@ -431,7 +431,7 @@ class NextDiT(nn.Module): self.t_embedder = TimestepEmbedder(min(dim, 1024), **operation_settings) self.cap_embedder = nn.Sequential( - RMSNorm(cap_feat_dim, eps=norm_eps, elementwise_affine=True, **operation_settings), + operation_settings.get("operations").RMSNorm(cap_feat_dim, eps=norm_eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")), operation_settings.get("operations").Linear( cap_feat_dim, dim, @@ -457,7 +457,7 @@ class NextDiT(nn.Module): for layer_id in range(n_layers) ] ) - self.norm_final = RMSNorm(dim, eps=norm_eps, elementwise_affine=True, **operation_settings) + self.norm_final = operation_settings.get("operations").RMSNorm(dim, eps=norm_eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) self.final_layer = FinalLayer(dim, patch_size, self.out_channels, operation_settings=operation_settings) assert (dim // n_heads) == sum(axes_dims) diff --git a/comfy/ldm/wan/model.py b/comfy/ldm/wan/model.py index b8eec3af..fc5ff40c 100644 --- a/comfy/ldm/wan/model.py +++ b/comfy/ldm/wan/model.py @@ -9,7 +9,6 @@ from einops import repeat from comfy.ldm.modules.attention import optimized_attention from comfy.ldm.flux.layers import EmbedND from comfy.ldm.flux.math import apply_rope -from comfy.ldm.modules.diffusionmodules.mmdit import RMSNorm import comfy.ldm.common_dit import comfy.model_management @@ -49,8 +48,8 @@ class WanSelfAttention(nn.Module): self.k = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) self.v = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) self.o = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) - self.norm_q = RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity() - self.norm_k = RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity() + self.norm_q = operation_settings.get("operations").RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity() + self.norm_k = operation_settings.get("operations").RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity() def forward(self, x, freqs): r""" @@ -114,7 +113,7 @@ class WanI2VCrossAttention(WanSelfAttention): self.k_img = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) self.v_img = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) # self.alpha = nn.Parameter(torch.zeros((1, ))) - self.norm_k_img = RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity() + self.norm_k_img = operation_settings.get("operations").RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity() def forward(self, x, context, context_img_len): r""" @@ -631,6 +630,7 @@ class VaceWanModel(WanModel): if ii is not None: c_skip, c = self.vace_blocks[ii](c, x=x_orig, e=e0, freqs=freqs, context=context, context_img_len=context_img_len) x += c_skip * vace_strength + del c_skip # head x = self.head(x, e) diff --git a/comfy/lora.py b/comfy/lora.py index 8760a21f..fff524be 100644 --- a/comfy/lora.py +++ b/comfy/lora.py @@ -279,6 +279,13 @@ def model_lora_keys_unet(model, key_map={}): key_map["transformer.{}".format(key_lora)] = k key_map["diffusion_model.{}".format(key_lora)] = k # Old loras + if isinstance(model, comfy.model_base.HiDream): + for k in sdk: + if k.startswith("diffusion_model."): + if k.endswith(".weight"): + key_lora = k[len("diffusion_model."):-len(".weight")].replace(".", "_") + key_map["lycoris_{}".format(key_lora)] = k #SimpleTuner lycoris format + return key_map diff --git a/comfy/model_base.py b/comfy/model_base.py index b0c6a465..6d27930d 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -38,6 +38,8 @@ import comfy.ldm.lumina.model import comfy.ldm.wan.model import comfy.ldm.hunyuan3d.model import comfy.ldm.hidream.model +import comfy.ldm.chroma.model +import comfy.ldm.ace.model import comfy.model_management import comfy.patcher_extension @@ -786,8 +788,8 @@ class PixArt(BaseModel): return out class Flux(BaseModel): - def __init__(self, model_config, model_type=ModelType.FLUX, device=None): - super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.flux.model.Flux) + def __init__(self, model_config, model_type=ModelType.FLUX, device=None, unet_model=comfy.ldm.flux.model.Flux): + super().__init__(model_config, model_type, device=device, unet_model=unet_model) def concat_cond(self, **kwargs): try: @@ -1104,4 +1106,38 @@ class HiDream(BaseModel): conditioning_llama3 = kwargs.get("conditioning_llama3", None) if conditioning_llama3 is not None: out['encoder_hidden_states_llama3'] = comfy.conds.CONDRegular(conditioning_llama3) + image_cond = kwargs.get("concat_latent_image", None) + if image_cond is not None: + out['image_cond'] = comfy.conds.CONDNoiseShape(self.process_latent_in(image_cond)) + return out + +class Chroma(Flux): + def __init__(self, model_config, model_type=ModelType.FLUX, device=None): + super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.chroma.model.Chroma) + + def extra_conds(self, **kwargs): + out = super().extra_conds(**kwargs) + + guidance = kwargs.get("guidance", 0) + if guidance is not None: + out['guidance'] = comfy.conds.CONDRegular(torch.FloatTensor([guidance])) + return out + +class ACEStep(BaseModel): + def __init__(self, model_config, model_type=ModelType.FLOW, device=None): + super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.ace.model.ACEStepTransformer2DModel) + + def extra_conds(self, **kwargs): + out = super().extra_conds(**kwargs) + noise = kwargs.get("noise", None) + + cross_attn = kwargs.get("cross_attn", None) + if cross_attn is not None: + out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn) + + conditioning_lyrics = kwargs.get("conditioning_lyrics", None) + if cross_attn is not None: + out['lyric_token_idx'] = comfy.conds.CONDRegular(conditioning_lyrics) + out['speaker_embeds'] = comfy.conds.CONDRegular(torch.zeros(noise.shape[0], 512, device=noise.device, dtype=noise.dtype)) + out['lyrics_strength'] = comfy.conds.CONDConstant(kwargs.get("lyrics_strength", 1.0)) return out diff --git a/comfy/model_detection.py b/comfy/model_detection.py index 76de78a8..28c58638 100644 --- a/comfy/model_detection.py +++ b/comfy/model_detection.py @@ -164,7 +164,9 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): if in_key in state_dict_keys: dit_config["in_channels"] = state_dict[in_key].shape[1] // (patch_size * patch_size) dit_config["out_channels"] = 16 - dit_config["vec_in_dim"] = 768 + vec_in_key = '{}vector_in.in_layer.weight'.format(key_prefix) + if vec_in_key in state_dict_keys: + dit_config["vec_in_dim"] = state_dict[vec_in_key].shape[1] dit_config["context_in_dim"] = 4096 dit_config["hidden_size"] = 3072 dit_config["mlp_ratio"] = 4.0 @@ -174,7 +176,16 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): dit_config["axes_dim"] = [16, 56, 56] dit_config["theta"] = 10000 dit_config["qkv_bias"] = True - dit_config["guidance_embed"] = "{}guidance_in.in_layer.weight".format(key_prefix) in state_dict_keys + if '{}distilled_guidance_layer.0.norms.0.scale'.format(key_prefix) in state_dict_keys or '{}distilled_guidance_layer.norms.0.scale'.format(key_prefix) in state_dict_keys: #Chroma + dit_config["image_model"] = "chroma" + dit_config["in_channels"] = 64 + dit_config["out_channels"] = 64 + dit_config["in_dim"] = 64 + dit_config["out_dim"] = 3072 + dit_config["hidden_dim"] = 5120 + dit_config["n_layers"] = 5 + else: + dit_config["guidance_embed"] = "{}guidance_in.in_layer.weight".format(key_prefix) in state_dict_keys return dit_config if '{}t5_yproj.weight'.format(key_prefix) in state_dict_keys: #Genmo mochi preview @@ -211,10 +222,39 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): if '{}adaln_single.emb.timestep_embedder.linear_1.bias'.format(key_prefix) in state_dict_keys: #Lightricks ltxv dit_config = {} dit_config["image_model"] = "ltxv" + dit_config["num_layers"] = count_blocks(state_dict_keys, '{}transformer_blocks.'.format(key_prefix) + '{}.') + shape = state_dict['{}transformer_blocks.0.attn2.to_k.weight'.format(key_prefix)].shape + dit_config["attention_head_dim"] = shape[0] // 32 + dit_config["cross_attention_dim"] = shape[1] if metadata is not None and "config" in metadata: dit_config.update(json.loads(metadata["config"]).get("transformer", {})) return dit_config + if '{}genre_embedder.weight'.format(key_prefix) in state_dict_keys: #ACE-Step model + dit_config = {} + dit_config["audio_model"] = "ace" + dit_config["attention_head_dim"] = 128 + dit_config["in_channels"] = 8 + dit_config["inner_dim"] = 2560 + dit_config["max_height"] = 16 + dit_config["max_position"] = 32768 + dit_config["max_width"] = 32768 + dit_config["mlp_ratio"] = 2.5 + dit_config["num_attention_heads"] = 20 + dit_config["num_layers"] = 24 + dit_config["out_channels"] = 8 + dit_config["patch_size"] = [16, 1] + dit_config["rope_theta"] = 1000000.0 + dit_config["speaker_embedding_dim"] = 512 + dit_config["text_embedding_dim"] = 768 + + dit_config["ssl_encoder_depths"] = [8, 8] + dit_config["ssl_latent_dims"] = [1024, 768] + dit_config["ssl_names"] = ["mert", "m-hubert"] + dit_config["lyric_encoder_vocab_size"] = 6693 + dit_config["lyric_hidden_size"] = 1024 + return dit_config + if '{}t_block.1.weight'.format(key_prefix) in state_dict_keys: # PixArt patch_size = 2 dit_config = {} diff --git a/comfy/model_management.py b/comfy/model_management.py index 3db32ad8..c2b41ce0 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -967,15 +967,61 @@ def force_channels_last(): #TODO return False -def cast_to(weight, dtype=None, device=None, non_blocking=False, copy=False): + +STREAMS = {} +NUM_STREAMS = 1 +if args.async_offload: + NUM_STREAMS = 2 + logging.info("Using async weight offloading with {} streams".format(NUM_STREAMS)) + +stream_counters = {} +def get_offload_stream(device): + stream_counter = stream_counters.get(device, 0) + if NUM_STREAMS <= 1: + return None + + if device in STREAMS: + ss = STREAMS[device] + s = ss[stream_counter] + stream_counter = (stream_counter + 1) % len(ss) + if is_device_cuda(device): + ss[stream_counter].wait_stream(torch.cuda.current_stream()) + stream_counters[device] = stream_counter + return s + elif is_device_cuda(device): + ss = [] + for k in range(NUM_STREAMS): + ss.append(torch.cuda.Stream(device=device, priority=0)) + STREAMS[device] = ss + s = ss[stream_counter] + stream_counter = (stream_counter + 1) % len(ss) + stream_counters[device] = stream_counter + return s + return None + +def sync_stream(device, stream): + if stream is None: + return + if is_device_cuda(device): + torch.cuda.current_stream().wait_stream(stream) + +def cast_to(weight, dtype=None, device=None, non_blocking=False, copy=False, stream=None): if device is None or weight.device == device: if not copy: if dtype is None or weight.dtype == dtype: return weight + if stream is not None: + with stream: + return weight.to(dtype=dtype, copy=copy) return weight.to(dtype=dtype, copy=copy) - r = torch.empty_like(weight, dtype=dtype, device=device) - r.copy_(weight, non_blocking=non_blocking) + if stream is not None: + with stream: + r = torch.empty_like(weight, dtype=dtype, device=device) + r.copy_(weight, non_blocking=non_blocking) + else: + r = torch.empty_like(weight, dtype=dtype, device=device) + r.copy_(weight, non_blocking=non_blocking) return r def cast_to_device(tensor, device, dtype, copy=False): diff --git a/comfy/model_sampling.py b/comfy/model_sampling.py index b79af1e9..7e729147 100644 --- a/comfy/model_sampling.py +++ b/comfy/model_sampling.py @@ -111,13 +111,14 @@ class ModelSamplingDiscrete(torch.nn.Module): self.num_timesteps = int(timesteps) self.linear_start = linear_start self.linear_end = linear_end + self.zsnr = zsnr # self.register_buffer('betas', torch.tensor(betas, dtype=torch.float32)) # self.register_buffer('alphas_cumprod', torch.tensor(alphas_cumprod, dtype=torch.float32)) # self.register_buffer('alphas_cumprod_prev', torch.tensor(alphas_cumprod_prev, dtype=torch.float32)) sigmas = ((1 - alphas_cumprod) / alphas_cumprod) ** 0.5 - if zsnr: + if self.zsnr: sigmas = rescale_zero_terminal_snr_sigmas(sigmas) self.set_sigmas(sigmas) diff --git a/comfy/ops.py b/comfy/ops.py index aae6cafa..431c8f89 100644 --- a/comfy/ops.py +++ b/comfy/ops.py @@ -22,6 +22,7 @@ import comfy.model_management from comfy.cli_args import args, PerformanceFeature import comfy.float import comfy.rmsnorm +import contextlib cast_to = comfy.model_management.cast_to #TODO: remove once no more references @@ -37,20 +38,31 @@ def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None): if device is None: device = input.device + offload_stream = comfy.model_management.get_offload_stream(device) + if offload_stream is not None: + wf_context = offload_stream + else: + wf_context = contextlib.nullcontext() + bias = None non_blocking = comfy.model_management.device_supports_non_blocking(device) if s.bias is not None: has_function = len(s.bias_function) > 0 - bias = comfy.model_management.cast_to(s.bias, bias_dtype, device, non_blocking=non_blocking, copy=has_function) + bias = comfy.model_management.cast_to(s.bias, bias_dtype, device, non_blocking=non_blocking, copy=has_function, stream=offload_stream) + if has_function: - for f in s.bias_function: - bias = f(bias) + with wf_context: + for f in s.bias_function: + bias = f(bias) has_function = len(s.weight_function) > 0 - weight = comfy.model_management.cast_to(s.weight, dtype, device, non_blocking=non_blocking, copy=has_function) + weight = comfy.model_management.cast_to(s.weight, dtype, device, non_blocking=non_blocking, copy=has_function, stream=offload_stream) if has_function: - for f in s.weight_function: - weight = f(weight) + with wf_context: + for f in s.weight_function: + weight = f(weight) + + comfy.model_management.sync_stream(device, offload_stream) return weight, bias class CastWeightBiasOp: @@ -296,10 +308,10 @@ def fp8_linear(self, input): if scale_input is None: scale_input = torch.ones((), device=input.device, dtype=torch.float32) input = torch.clamp(input, min=-448, max=448, out=input) - input = input.reshape(-1, input_shape[2]).to(dtype) + input = input.reshape(-1, input_shape[2]).to(dtype).contiguous() else: scale_input = scale_input.to(input.device) - input = (input * (1.0 / scale_input).to(input_dtype)).reshape(-1, input_shape[2]).to(dtype) + input = (input * (1.0 / scale_input).to(input_dtype)).reshape(-1, input_shape[2]).to(dtype).contiguous() if bias is not None: o = torch._scaled_mm(input, w, out_dtype=input_dtype, bias=bias, scale_a=scale_input, scale_b=scale_weight) diff --git a/comfy/samplers.py b/comfy/samplers.py index b76e75de..26052766 100644 --- a/comfy/samplers.py +++ b/comfy/samplers.py @@ -903,7 +903,7 @@ KSAMPLER_NAMES = ["euler", "euler_cfg_pp", "euler_ancestral", "euler_ancestral_c "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_2s_ancestral_cfg_pp", "dpmpp_sde", "dpmpp_sde_gpu", "dpmpp_2m", "dpmpp_2m_cfg_pp", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "lcm", "ipndm", "ipndm_v", "deis", "res_multistep", "res_multistep_cfg_pp", "res_multistep_ancestral", "res_multistep_ancestral_cfg_pp", - "gradient_estimation", "er_sde", "seeds_2", "seeds_3"] + "gradient_estimation", "gradient_estimation_cfg_pp", "er_sde", "seeds_2", "seeds_3"] class KSAMPLER(Sampler): def __init__(self, sampler_function, extra_options={}, inpaint_options={}): diff --git a/comfy/sd.py b/comfy/sd.py index 8aba5d65..e98a3aa8 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -15,6 +15,7 @@ import comfy.ldm.lightricks.vae.causal_video_autoencoder import comfy.ldm.cosmos.vae import comfy.ldm.wan.vae import comfy.ldm.hunyuan3d.vae +import comfy.ldm.ace.vae.music_dcae_pipeline import yaml import math @@ -42,6 +43,7 @@ import comfy.text_encoders.cosmos import comfy.text_encoders.lumina2 import comfy.text_encoders.wan import comfy.text_encoders.hidream +import comfy.text_encoders.ace import comfy.model_patcher import comfy.lora @@ -120,6 +122,7 @@ class CLIP: self.layer_idx = None self.use_clip_schedule = False logging.info("CLIP/text encoder model load device: {}, offload device: {}, current: {}, dtype: {}".format(load_device, offload_device, params['device'], dtype)) + self.tokenizer_options = {} def clone(self): n = CLIP(no_init=True) @@ -127,6 +130,7 @@ class CLIP: n.cond_stage_model = self.cond_stage_model n.tokenizer = self.tokenizer n.layer_idx = self.layer_idx + n.tokenizer_options = self.tokenizer_options.copy() n.use_clip_schedule = self.use_clip_schedule n.apply_hooks_to_conds = self.apply_hooks_to_conds return n @@ -134,10 +138,18 @@ class CLIP: def add_patches(self, patches, strength_patch=1.0, strength_model=1.0): return self.patcher.add_patches(patches, strength_patch, strength_model) + def set_tokenizer_option(self, option_name, value): + self.tokenizer_options[option_name] = value + def clip_layer(self, layer_idx): self.layer_idx = layer_idx def tokenize(self, text, return_word_ids=False, **kwargs): + tokenizer_options = kwargs.get("tokenizer_options", {}) + if len(self.tokenizer_options) > 0: + tokenizer_options = {**self.tokenizer_options, **tokenizer_options} + if len(tokenizer_options) > 0: + kwargs["tokenizer_options"] = tokenizer_options return self.tokenizer.tokenize_with_weights(text, return_word_ids, **kwargs) def add_hooks_to_dict(self, pooled_dict: dict[str]): @@ -270,6 +282,7 @@ class VAE: self.downscale_index_formula = None self.upscale_index_formula = None + self.extra_1d_channel = None if config is None: if "decoder.mid.block_1.mix_factor" in sd: @@ -427,6 +440,20 @@ class VAE: ddconfig = {"embed_dim": 64, "num_freqs": 8, "include_pi": False, "heads": 16, "width": 1024, "num_decoder_layers": 16, "qkv_bias": False, "qk_norm": True, "geo_decoder_mlp_expand_ratio": mlp_expand, "geo_decoder_downsample_ratio": downsample_ratio, "geo_decoder_ln_post": ln_post} self.first_stage_model = comfy.ldm.hunyuan3d.vae.ShapeVAE(**ddconfig) self.working_dtypes = [torch.float16, torch.bfloat16, torch.float32] + elif "vocoder.backbone.channel_layers.0.0.bias" in sd: #Ace Step Audio + self.first_stage_model = comfy.ldm.ace.vae.music_dcae_pipeline.MusicDCAE(source_sample_rate=44100) + self.memory_used_encode = lambda shape, dtype: (shape[2] * 330) * model_management.dtype_size(dtype) + self.memory_used_decode = lambda shape, dtype: (shape[2] * shape[3] * 87000) * model_management.dtype_size(dtype) + self.latent_channels = 8 + self.output_channels = 2 + self.upscale_ratio = 4096 + self.downscale_ratio = 4096 + self.latent_dim = 2 + self.process_output = lambda audio: audio + self.process_input = lambda audio: audio + self.working_dtypes = [torch.bfloat16, torch.float16, torch.float32] + self.disable_offload = True + self.extra_1d_channel = 16 else: logging.warning("WARNING: No VAE weights detected, VAE not initalized.") self.first_stage_model = None @@ -485,7 +512,13 @@ class VAE: return output def decode_tiled_1d(self, samples, tile_x=128, overlap=32): - decode_fn = lambda a: self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)).float() + if samples.ndim == 3: + decode_fn = lambda a: self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)).float() + else: + og_shape = samples.shape + samples = samples.reshape((og_shape[0], og_shape[1] * og_shape[2], -1)) + decode_fn = lambda a: self.first_stage_model.decode(a.reshape((-1, og_shape[1], og_shape[2], a.shape[-1])).to(self.vae_dtype).to(self.device)).float() + return self.process_output(comfy.utils.tiled_scale_multidim(samples, decode_fn, tile=(tile_x,), overlap=overlap, upscale_amount=self.upscale_ratio, out_channels=self.output_channels, output_device=self.output_device)) def decode_tiled_3d(self, samples, tile_t=999, tile_x=32, tile_y=32, overlap=(1, 8, 8)): @@ -505,9 +538,24 @@ class VAE: samples /= 3.0 return samples - def encode_tiled_1d(self, samples, tile_x=128 * 2048, overlap=32 * 2048): - encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).float() - return comfy.utils.tiled_scale_multidim(samples, encode_fn, tile=(tile_x,), overlap=overlap, upscale_amount=(1/self.downscale_ratio), out_channels=self.latent_channels, output_device=self.output_device) + def encode_tiled_1d(self, samples, tile_x=256 * 2048, overlap=64 * 2048): + if self.latent_dim == 1: + encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).float() + out_channels = self.latent_channels + upscale_amount = 1 / self.downscale_ratio + else: + extra_channel_size = self.extra_1d_channel + out_channels = self.latent_channels * extra_channel_size + tile_x = tile_x // extra_channel_size + overlap = overlap // extra_channel_size + upscale_amount = 1 / self.downscale_ratio + encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).reshape(1, out_channels, -1).float() + + out = comfy.utils.tiled_scale_multidim(samples, encode_fn, tile=(tile_x,), overlap=overlap, upscale_amount=upscale_amount, out_channels=out_channels, output_device=self.output_device) + if self.latent_dim == 1: + return out + else: + return out.reshape(samples.shape[0], self.latent_channels, extra_channel_size, -1) def encode_tiled_3d(self, samples, tile_t=9999, tile_x=512, tile_y=512, overlap=(1, 64, 64)): encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).float() @@ -532,7 +580,7 @@ class VAE: except model_management.OOM_EXCEPTION: logging.warning("Warning: Ran out of memory when regular VAE decoding, retrying with tiled VAE decoding.") dims = samples_in.ndim - 2 - if dims == 1: + if dims == 1 or self.extra_1d_channel is not None: pixel_samples = self.decode_tiled_1d(samples_in) elif dims == 2: pixel_samples = self.decode_tiled_(samples_in) @@ -599,7 +647,7 @@ class VAE: tile = 256 overlap = tile // 4 samples = self.encode_tiled_3d(pixel_samples, tile_x=tile, tile_y=tile, overlap=(1, overlap, overlap)) - elif self.latent_dim == 1: + elif self.latent_dim == 1 or self.extra_1d_channel is not None: samples = self.encode_tiled_1d(pixel_samples) else: samples = self.encode_tiled_(pixel_samples) @@ -704,6 +752,8 @@ class CLIPType(Enum): LUMINA2 = 12 WAN = 13 HIDREAM = 14 + CHROMA = 15 + ACE = 16 def load_clip(ckpt_paths, embedding_directory=None, clip_type=CLIPType.STABLE_DIFFUSION, model_options={}): @@ -808,7 +858,7 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip elif clip_type == CLIPType.LTXV: clip_target.clip = comfy.text_encoders.lt.ltxv_te(**t5xxl_detect(clip_data)) clip_target.tokenizer = comfy.text_encoders.lt.LTXVT5Tokenizer - elif clip_type == CLIPType.PIXART: + elif clip_type == CLIPType.PIXART or clip_type == CLIPType.CHROMA: clip_target.clip = comfy.text_encoders.pixart_t5.pixart_te(**t5xxl_detect(clip_data)) clip_target.tokenizer = comfy.text_encoders.pixart_t5.PixArtTokenizer elif clip_type == CLIPType.WAN: @@ -829,8 +879,13 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip clip_target.clip = comfy.text_encoders.aura_t5.AuraT5Model clip_target.tokenizer = comfy.text_encoders.aura_t5.AuraT5Tokenizer elif te_model == TEModel.T5_BASE: - clip_target.clip = comfy.text_encoders.sa_t5.SAT5Model - clip_target.tokenizer = comfy.text_encoders.sa_t5.SAT5Tokenizer + if clip_type == CLIPType.ACE or "spiece_model" in clip_data[0]: + clip_target.clip = comfy.text_encoders.ace.AceT5Model + clip_target.tokenizer = comfy.text_encoders.ace.AceT5Tokenizer + tokenizer_data["spiece_model"] = clip_data[0].get("spiece_model", None) + else: + clip_target.clip = comfy.text_encoders.sa_t5.SAT5Model + clip_target.tokenizer = comfy.text_encoders.sa_t5.SAT5Tokenizer elif te_model == TEModel.GEMMA_2_2B: clip_target.clip = comfy.text_encoders.lumina2.te(**llama_detect(clip_data)) clip_target.tokenizer = comfy.text_encoders.lumina2.LuminaTokenizer diff --git a/comfy/sd1_clip.py b/comfy/sd1_clip.py index 2ca5ed9b..ac61babe 100644 --- a/comfy/sd1_clip.py +++ b/comfy/sd1_clip.py @@ -457,13 +457,14 @@ def load_embed(embedding_name, embedding_directory, embedding_size, embed_key=No return embed_out class SDTokenizer: - def __init__(self, tokenizer_path=None, max_length=77, pad_with_end=True, embedding_directory=None, embedding_size=768, embedding_key='clip_l', tokenizer_class=CLIPTokenizer, has_start_token=True, has_end_token=True, pad_to_max_length=True, min_length=None, pad_token=None, end_token=None, tokenizer_data={}, tokenizer_args={}): + def __init__(self, tokenizer_path=None, max_length=77, pad_with_end=True, embedding_directory=None, embedding_size=768, embedding_key='clip_l', tokenizer_class=CLIPTokenizer, has_start_token=True, has_end_token=True, pad_to_max_length=True, min_length=None, pad_token=None, end_token=None, min_padding=None, tokenizer_data={}, tokenizer_args={}): if tokenizer_path is None: tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd1_tokenizer") self.tokenizer = tokenizer_class.from_pretrained(tokenizer_path, **tokenizer_args) self.max_length = tokenizer_data.get("{}_max_length".format(embedding_key), max_length) self.min_length = min_length self.end_token = None + self.min_padding = min_padding empty = self.tokenizer('')["input_ids"] self.tokenizer_adds_end_token = has_end_token @@ -518,13 +519,15 @@ class SDTokenizer: return (embed, leftover) - def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs): + def tokenize_with_weights(self, text:str, return_word_ids=False, tokenizer_options={}, **kwargs): ''' Takes a prompt and converts it to a list of (token, weight, word id) elements. Tokens can both be integer tokens and pre computed CLIP tensors. Word id values are unique per word and embedding, where the id 0 is reserved for non word tokens. Returned list has the dimensions NxM where M is the input size of CLIP ''' + min_length = tokenizer_options.get("{}_min_length".format(self.embedding_key), self.min_length) + min_padding = tokenizer_options.get("{}_min_padding".format(self.embedding_key), self.min_padding) text = escape_important(text) parsed_weights = token_weights(text, 1.0) @@ -603,10 +606,12 @@ class SDTokenizer: #fill last batch if self.end_token is not None: batch.append((self.end_token, 1.0, 0)) - if self.pad_to_max_length: + if min_padding is not None: + batch.extend([(self.pad_token, 1.0, 0)] * min_padding) + if self.pad_to_max_length and len(batch) < self.max_length: batch.extend([(self.pad_token, 1.0, 0)] * (self.max_length - len(batch))) - if self.min_length is not None and len(batch) < self.min_length: - batch.extend([(self.pad_token, 1.0, 0)] * (self.min_length - len(batch))) + if min_length is not None and len(batch) < min_length: + batch.extend([(self.pad_token, 1.0, 0)] * (min_length - len(batch))) if not return_word_ids: batched_tokens = [[(t, w) for t, w,_ in x] for x in batched_tokens] @@ -634,7 +639,7 @@ class SD1Tokenizer: def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs): out = {} - out[self.clip_name] = getattr(self, self.clip).tokenize_with_weights(text, return_word_ids) + out[self.clip_name] = getattr(self, self.clip).tokenize_with_weights(text, return_word_ids, **kwargs) return out def untokenize(self, token_weight_pair): diff --git a/comfy/sdxl_clip.py b/comfy/sdxl_clip.py index ea7f5d10..c8cef14e 100644 --- a/comfy/sdxl_clip.py +++ b/comfy/sdxl_clip.py @@ -28,8 +28,8 @@ class SDXLTokenizer: def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs): out = {} - out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids) - out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids) + out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids, **kwargs) + out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids, **kwargs) return out def untokenize(self, token_weight_pair): diff --git a/comfy/supported_models.py b/comfy/supported_models.py index 5e55035c..fef25eb2 100644 --- a/comfy/supported_models.py +++ b/comfy/supported_models.py @@ -17,6 +17,7 @@ import comfy.text_encoders.hunyuan_video import comfy.text_encoders.cosmos import comfy.text_encoders.lumina2 import comfy.text_encoders.wan +import comfy.text_encoders.ace from . import supported_models_base from . import latent_formats @@ -785,6 +786,10 @@ class LTXV(supported_models_base.BASE): vae_key_prefix = ["vae."] text_encoder_key_prefix = ["text_encoders."] + def __init__(self, unet_config): + super().__init__(unet_config) + self.memory_usage_factor = (unet_config.get("cross_attention_dim", 2048) / 2048) * 5.5 + def get_model(self, state_dict, prefix="", device=None): out = model_base.LTXV(self, device=device) return out @@ -993,6 +998,10 @@ class WAN21_Vace(WAN21_T2V): "model_type": "vace", } + def __init__(self, unet_config): + super().__init__(unet_config) + self.memory_usage_factor = 1.2 * self.memory_usage_factor + def get_model(self, state_dict, prefix="", device=None): out = model_base.WAN21_Vace(self, image_to_video=False, device=device) return out @@ -1064,7 +1073,62 @@ class HiDream(supported_models_base.BASE): def clip_target(self, state_dict={}): return None # TODO +class Chroma(supported_models_base.BASE): + unet_config = { + "image_model": "chroma", + } -models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, Lumina2, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, Hunyuan3Dv2mini, Hunyuan3Dv2, HiDream] + unet_extra_config = { + } + + sampling_settings = { + "multiplier": 1.0, + } + + latent_format = comfy.latent_formats.Flux + + memory_usage_factor = 3.2 + + supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32] + + + def get_model(self, state_dict, prefix="", device=None): + out = model_base.Chroma(self, device=device) + return out + + def clip_target(self, state_dict={}): + pref = self.text_encoder_key_prefix[0] + t5_detect = comfy.text_encoders.sd3_clip.t5_xxl_detect(state_dict, "{}t5xxl.transformer.".format(pref)) + return supported_models_base.ClipTarget(comfy.text_encoders.pixart_t5.PixArtTokenizer, comfy.text_encoders.pixart_t5.pixart_te(**t5_detect)) + +class ACEStep(supported_models_base.BASE): + unet_config = { + "audio_model": "ace", + } + + unet_extra_config = { + } + + sampling_settings = { + "shift": 3.0, + } + + latent_format = comfy.latent_formats.ACEAudio + + memory_usage_factor = 0.5 + + supported_inference_dtypes = [torch.bfloat16, torch.float32] + + vae_key_prefix = ["vae."] + text_encoder_key_prefix = ["text_encoders."] + + def get_model(self, state_dict, prefix="", device=None): + out = model_base.ACEStep(self, device=device) + return out + + def clip_target(self, state_dict={}): + return supported_models_base.ClipTarget(comfy.text_encoders.ace.AceT5Tokenizer, comfy.text_encoders.ace.AceT5Model) + +models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, Lumina2, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, Hunyuan3Dv2mini, Hunyuan3Dv2, HiDream, Chroma, ACEStep] models += [SVD_img2vid] diff --git a/comfy/text_encoders/ace.py b/comfy/text_encoders/ace.py new file mode 100644 index 00000000..d650bb10 --- /dev/null +++ b/comfy/text_encoders/ace.py @@ -0,0 +1,153 @@ +from comfy import sd1_clip +from .spiece_tokenizer import SPieceTokenizer +import comfy.text_encoders.t5 +import os +import re +import torch +import logging + +from tokenizers import Tokenizer +from .ace_text_cleaners import multilingual_cleaners, japanese_to_romaji + +SUPPORT_LANGUAGES = { + "en": 259, "de": 260, "fr": 262, "es": 284, "it": 285, + "pt": 286, "pl": 294, "tr": 295, "ru": 267, "cs": 293, + "nl": 297, "ar": 5022, "zh": 5023, "ja": 5412, "hu": 5753, + "ko": 6152, "hi": 6680 +} + +structure_pattern = re.compile(r"\[.*?\]") + +DEFAULT_VOCAB_FILE = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "ace_lyrics_tokenizer"), "vocab.json") + + +class VoiceBpeTokenizer: + def __init__(self, vocab_file=DEFAULT_VOCAB_FILE): + self.tokenizer = None + if vocab_file is not None: + self.tokenizer = Tokenizer.from_file(vocab_file) + + def preprocess_text(self, txt, lang): + txt = multilingual_cleaners(txt, lang) + return txt + + def encode(self, txt, lang='en'): + # lang = lang.split("-")[0] # remove the region + # self.check_input_length(txt, lang) + txt = self.preprocess_text(txt, lang) + lang = "zh-cn" if lang == "zh" else lang + txt = f"[{lang}]{txt}" + txt = txt.replace(" ", "[SPACE]") + return self.tokenizer.encode(txt).ids + + def get_lang(self, line): + if line.startswith("[") and line[3:4] == ']': + lang = line[1:3].lower() + if lang in SUPPORT_LANGUAGES: + return lang, line[4:] + return "en", line + + def __call__(self, string): + lines = string.split("\n") + lyric_token_idx = [261] + for line in lines: + line = line.strip() + if not line: + lyric_token_idx += [2] + continue + + lang, line = self.get_lang(line) + + if lang not in SUPPORT_LANGUAGES: + lang = "en" + if "zh" in lang: + lang = "zh" + if "spa" in lang: + lang = "es" + + try: + line_out = japanese_to_romaji(line) + if line_out != line: + lang = "ja" + line = line_out + except: + pass + + try: + if structure_pattern.match(line): + token_idx = self.encode(line, "en") + else: + token_idx = self.encode(line, lang) + lyric_token_idx = lyric_token_idx + token_idx + [2] + except Exception as e: + logging.warning("tokenize error {} for line {} major_language {}".format(e, line, lang)) + return {"input_ids": lyric_token_idx} + + @staticmethod + def from_pretrained(path, **kwargs): + return VoiceBpeTokenizer(path, **kwargs) + + def get_vocab(self): + return {} + + +class UMT5BaseModel(sd1_clip.SDClipModel): + def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, model_options={}): + textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "umt5_config_base.json") + super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"end": 1, "pad": 0}, model_class=comfy.text_encoders.t5.T5, enable_attention_masks=True, zero_out_masked=False, model_options=model_options) + +class UMT5BaseTokenizer(sd1_clip.SDTokenizer): + def __init__(self, embedding_directory=None, tokenizer_data={}): + tokenizer = tokenizer_data.get("spiece_model", None) + super().__init__(tokenizer, pad_with_end=False, embedding_size=768, embedding_key='umt5base', tokenizer_class=SPieceTokenizer, has_start_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, pad_token=0, tokenizer_data=tokenizer_data) + + def state_dict(self): + return {"spiece_model": self.tokenizer.serialize_model()} + +class LyricsTokenizer(sd1_clip.SDTokenizer): + def __init__(self, embedding_directory=None, tokenizer_data={}): + tokenizer = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "ace_lyrics_tokenizer"), "vocab.json") + super().__init__(tokenizer, pad_with_end=False, embedding_size=1024, embedding_key='lyrics', tokenizer_class=VoiceBpeTokenizer, has_start_token=True, pad_to_max_length=False, max_length=99999999, min_length=1, pad_token=2, has_end_token=False, tokenizer_data=tokenizer_data) + +class AceT5Tokenizer: + def __init__(self, embedding_directory=None, tokenizer_data={}): + self.voicebpe = LyricsTokenizer(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data) + self.umt5base = UMT5BaseTokenizer(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data) + + def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs): + out = {} + out["lyrics"] = self.voicebpe.tokenize_with_weights(kwargs.get("lyrics", ""), return_word_ids, **kwargs) + out["umt5base"] = self.umt5base.tokenize_with_weights(text, return_word_ids, **kwargs) + return out + + def untokenize(self, token_weight_pair): + return self.umt5base.untokenize(token_weight_pair) + + def state_dict(self): + return self.umt5base.state_dict() + +class AceT5Model(torch.nn.Module): + def __init__(self, device="cpu", dtype=None, model_options={}, **kwargs): + super().__init__() + self.umt5base = UMT5BaseModel(device=device, dtype=dtype, model_options=model_options) + self.dtypes = set() + if dtype is not None: + self.dtypes.add(dtype) + + def set_clip_options(self, options): + self.umt5base.set_clip_options(options) + + def reset_clip_options(self): + self.umt5base.reset_clip_options() + + def encode_token_weights(self, token_weight_pairs): + token_weight_pairs_umt5base = token_weight_pairs["umt5base"] + token_weight_pairs_lyrics = token_weight_pairs["lyrics"] + + t5_out, t5_pooled = self.umt5base.encode_token_weights(token_weight_pairs_umt5base) + + lyrics_embeds = torch.tensor(list(map(lambda a: a[0], token_weight_pairs_lyrics[0]))).unsqueeze(0) + return t5_out, None, {"conditioning_lyrics": lyrics_embeds} + + def load_sd(self, sd): + return self.umt5base.load_sd(sd) diff --git a/comfy/text_encoders/ace_lyrics_tokenizer/vocab.json b/comfy/text_encoders/ace_lyrics_tokenizer/vocab.json new file mode 100644 index 00000000..519ed340 --- /dev/null +++ b/comfy/text_encoders/ace_lyrics_tokenizer/vocab.json @@ -0,0 +1,15535 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "special": true, + "content": "[STOP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 1, + "special": true, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 2, + "special": true, + "content": "[SPACE]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 259, + "special": true, + "content": "[en]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 260, + "special": true, + "content": "[de]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 261, + "special": true, + "content": "[START]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 262, + "special": true, + "content": "[fr]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 284, + "special": true, + "content": "[es]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 285, + "special": true, + "content": "[it]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 286, + "special": true, + "content": "[pt]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 294, + "special": true, + "content": "[pl]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 295, + "special": true, + "content": "[tr]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 267, + "special": true, + "content": "[ru]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 293, + "special": true, + "content": "[cs]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 297, + "special": true, + "content": "[nl]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 5022, + "special": true, + "content": "[ar]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 5023, + "special": true, + "content": "[zh-cn]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 5412, + "special": true, + "content": "[ja]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 5753, + "special": true, + "content": "[hu]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 6152, + "special": true, + "content": "[ko]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 6680, + "special": true, + "content": "[hi]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 6681, + "special": true, + "content": "[start]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 6682, + "special": true, + "content": "[intro]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 6683, + "special": true, + "content": "[verse]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 6684, + "special": true, + "content": "[chorus]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 6685, + "special": true, + "content": "[bridge]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 6686, + "special": true, + "content": "[outro]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 6687, + "special": true, + "content": "[end]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 6688, + "special": true, + "content": "[inst]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 6689, + "special": true, + "content": "[solo]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 6690, + "special": true, + "content": "[hook]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 6691, + "special": true, + "content": "[pre-chorus]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 6692, + "special": true, + "content": "[break]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "Whitespace" + }, + "post_processor": null, + "decoder": null, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "[UNK]", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "vocab": { + "[STOP]": 0, + "[UNK]": 1, + "[SPACE]": 2, + "!": 3, + "'": 4, + "(": 5, + ")": 6, + ",": 7, + "-": 8, + ".": 9, + "/": 10, + ":": 11, + ";": 12, + "?": 13, + "a": 14, + "b": 15, + "c": 16, + "d": 17, + "e": 18, + "f": 19, + "g": 20, + "h": 21, + "i": 22, + "j": 23, + "k": 24, + "l": 25, + "m": 26, + "n": 27, + "o": 28, + "p": 29, + "q": 30, + "r": 31, + "s": 32, + "t": 33, + "u": 34, + "v": 35, + "w": 36, + "x": 37, + "y": 38, + "z": 39, + "th": 40, + "in": 41, + "the": 42, + "an": 43, + "er": 44, + "ou": 45, + "re": 46, + "on": 47, + "at": 48, + "ed": 49, + "en": 50, + "to": 51, + "ing": 52, + "and": 53, + "is": 54, + "as": 55, + "al": 56, + "or": 57, + "of": 58, + "ar": 59, + "it": 60, + "es": 61, + "he": 62, + "st": 63, + "le": 64, + "om": 65, + "se": 66, + "be": 67, + "ad": 68, + "ow": 69, + "ly": 70, + "ch": 71, + "wh": 72, + "that": 73, + "you": 74, + "li": 75, + "ve": 76, + "ac": 77, + "ti": 78, + "ld": 79, + "me": 80, + "was": 81, + "gh": 82, + "id": 83, + "ll": 84, + "wi": 85, + "ent": 86, + "for": 87, + "ay": 88, + "ro": 89, + "ver": 90, + "ic": 91, + "her": 92, + "ke": 93, + "his": 94, + "no": 95, + "ut": 96, + "un": 97, + "ir": 98, + "lo": 99, + "we": 100, + "ri": 101, + "ha": 102, + "with": 103, + "ght": 104, + "out": 105, + "im": 106, + "ion": 107, + "all": 108, + "ab": 109, + "one": 110, + "ne": 111, + "ge": 112, + "ould": 113, + "ter": 114, + "mo": 115, + "had": 116, + "ce": 117, + "she": 118, + "go": 119, + "sh": 120, + "ur": 121, + "am": 122, + "so": 123, + "pe": 124, + "my": 125, + "de": 126, + "are": 127, + "but": 128, + "ome": 129, + "fr": 130, + "ther": 131, + "fe": 132, + "su": 133, + "do": 134, + "con": 135, + "te": 136, + "ain": 137, + "ere": 138, + "po": 139, + "if": 140, + "they": 141, + "us": 142, + "ag": 143, + "tr": 144, + "now": 145, + "oun": 146, + "this": 147, + "have": 148, + "not": 149, + "sa": 150, + "il": 151, + "up": 152, + "thing": 153, + "from": 154, + "ap": 155, + "him": 156, + "ack": 157, + "ation": 158, + "ant": 159, + "our": 160, + "op": 161, + "like": 162, + "ust": 163, + "ess": 164, + "bo": 165, + "ok": 166, + "ul": 167, + "ind": 168, + "ex": 169, + "com": 170, + "some": 171, + "there": 172, + "ers": 173, + "co": 174, + "res": 175, + "man": 176, + "ard": 177, + "pl": 178, + "wor": 179, + "way": 180, + "tion": 181, + "fo": 182, + "ca": 183, + "were": 184, + "by": 185, + "ate": 186, + "pro": 187, + "ted": 188, + "ound": 189, + "own": 190, + "would": 191, + "ts": 192, + "what": 193, + "qu": 194, + "ally": 195, + "ight": 196, + "ck": 197, + "gr": 198, + "when": 199, + "ven": 200, + "can": 201, + "ough": 202, + "ine": 203, + "end": 204, + "per": 205, + "ous": 206, + "od": 207, + "ide": 208, + "know": 209, + "ty": 210, + "very": 211, + "si": 212, + "ak": 213, + "who": 214, + "about": 215, + "ill": 216, + "them": 217, + "est": 218, + "red": 219, + "ye": 220, + "could": 221, + "ong": 222, + "your": 223, + "their": 224, + "em": 225, + "just": 226, + "other": 227, + "into": 228, + "any": 229, + "whi": 230, + "um": 231, + "tw": 232, + "ast": 233, + "der": 234, + "did": 235, + "ie": 236, + "been": 237, + "ace": 238, + "ink": 239, + "ity": 240, + "back": 241, + "ting": 242, + "br": 243, + "more": 244, + "ake": 245, + "pp": 246, + "then": 247, + "sp": 248, + "el": 249, + "use": 250, + "bl": 251, + "said": 252, + "over": 253, + "get": 254, + "ß": 255, + "ä": 256, + "ö": 257, + "ü": 258, + "[en]": 259, + "[de]": 260, + "[START]": 261, + "[fr]": 262, + "œ": 263, + "ï": 264, + "ê": 265, + "â": 266, + "[ru]": 267, + "ÿ": 268, + "è": 269, + "à": 270, + "ë": 271, + "ù": 272, + "î": 273, + "ç": 274, + "æ": 275, + "ô": 276, + "û": 277, + "á": 278, + "é": 279, + "í": 280, + "ó": 281, + "ú": 282, + "ñ": 283, + "[es]": 284, + "[it]": 285, + "[pt]": 286, + "ń": 287, + "ś": 288, + "ę": 289, + "ą": 290, + "ż": 291, + "ć": 292, + "[cs]": 293, + "[pl]": 294, + "[tr]": 295, + "ã": 296, + "[nl]": 297, + "ş": 298, + "ğ": 299, + "ı": 300, + "ò": 301, + "ì": 302, + "¿": 303, + "…": 304, + "i̇": 305, + "õ": 306, + "\"": 307, + "´": 308, + "ø": 309, + "č": 310, + "ō": 311, + "š": 312, + "ž": 313, + "̇": 314, + "ei": 315, + "ich": 316, + "ein": 317, + "au": 318, + "sch": 319, + "und": 320, + "die": 321, + "da": 322, + "den": 323, + "gen": 324, + "zu": 325, + "hr": 326, + "ten": 327, + "mi": 328, + "sie": 329, + "das": 330, + "eine": 331, + "icht": 332, + "ber": 333, + "ach": 334, + "auf": 335, + "lich": 336, + "nicht": 337, + "mm": 338, + "ben": 339, + "war": 340, + "mit": 341, + "sich": 342, + "ig": 343, + "aus": 344, + "ist": 345, + "wie": 346, + "och": 347, + "ung": 348, + "ann": 349, + "ür": 350, + "hn": 351, + "ihr": 352, + "sen": 353, + "tz": 354, + "dem": 355, + "eit": 356, + "hat": 357, + "wir": 358, + "von": 359, + "wei": 360, + "ier": 361, + "ra": 362, + "einen": 363, + "vor": 364, + "als": 365, + "wo": 366, + "rei": 367, + "ste": 368, + "lie": 369, + "auch": 370, + "du": 371, + "des": 372, + "ko": 373, + "über": 374, + "bei": 375, + "hen": 376, + "hm": 377, + "lei": 378, + "aber": 379, + "wen": 380, + "hl": 381, + "ger": 382, + "nach": 383, + "ft": 384, + "imm": 385, + "je": 386, + "schen": 387, + "wer": 388, + "ser": 389, + "än": 390, + "sein": 391, + "ol": 392, + "cht": 393, + "für": 394, + "kl": 395, + "ff": 396, + "einem": 397, + "nen": 398, + "ja": 399, + "noch": 400, + "hatte": 401, + "pf": 402, + "hin": 403, + "di": 404, + "chen": 405, + "rü": 406, + "iel": 407, + "sel": 408, + "dass": 409, + "ihn": 410, + "mir": 411, + "schl": 412, + "ön": 413, + "gan": 414, + "gt": 415, + "einer": 416, + "sten": 417, + "mich": 418, + "wenn": 419, + "ell": 420, + "gte": 421, + "mal": 422, + "gel": 423, + "ken": 424, + "nur": 425, + "mmen": 426, + "fü": 427, + "ern": 428, + "ör": 429, + "unter": 430, + "ander": 431, + "dur": 432, + "uch": 433, + "ta": 434, + "men": 435, + "mach": 436, + "doch": 437, + "durch": 438, + "os": 439, + "gl": 440, + "hal": 441, + "ihre": 442, + "wä": 443, + "immer": 444, + "ihm": 445, + "kann": 446, + "ort": 447, + "dann": 448, + "lan": 449, + "tzt": 450, + "oder": 451, + "hren": 452, + "et": 453, + "kön": 454, + "ick": 455, + "fa": 456, + "wieder": 457, + "daß": 458, + "mein": 459, + "fen": 460, + "ganz": 461, + "diese": 462, + "ster": 463, + "dar": 464, + "wa": 465, + "ges": 466, + "na": 467, + "fl": 468, + "igen": 469, + "sche": 470, + "ungen": 471, + "mehr": 472, + "ßen": 473, + "ot": 474, + "kon": 475, + "gew": 476, + "haben": 477, + "geh": 478, + "ät": 479, + "sind": 480, + "dr": 481, + "wel": 482, + "uns": 483, + "vo": 484, + "ma": 485, + "ute": 486, + "schon": 487, + "bes": 488, + "gesch": 489, + "bt": 490, + "che": 491, + "son": 492, + "ob": 493, + "la": 494, + "rück": 495, + "seine": 496, + "kr": 497, + "fre": 498, + "eil": 499, + "zum": 500, + "hier": 501, + "kt": 502, + "ige": 503, + "spr": 504, + "leben": 505, + "bst": 506, + "zeit": 507, + "gro": 508, + "denn": 509, + "ho": 510, + "scha": 511, + "bar": 512, + "alle": 513, + "gegen": 514, + "wür": 515, + "mü": 516, + "ze": 517, + "werden": 518, + "jetzt": 519, + "kommen": 520, + "nie": 521, + "sei": 522, + "heit": 523, + "soll": 524, + "glei": 525, + "meine": 526, + "woll": 527, + "ner": 528, + "habe": 529, + "wur": 530, + "lichen": 531, + "assen": 532, + "nte": 533, + "sehen": 534, + "wird": 535, + "bis": 536, + "gar": 537, + "ien": 538, + "mus": 539, + "uß": 540, + "är": 541, + "stell": 542, + "keit": 543, + "zwei": 544, + "selbst": 545, + "sta": 546, + "pa": 547, + "sagte": 548, + "tet": 549, + "kam": 550, + "ssen": 551, + "viel": 552, + "ug": 553, + "zen": 554, + "hei": 555, + "mann": 556, + "will": 557, + "geb": 558, + "waren": 559, + "ück": 560, + "äch": 561, + "mer": 562, + "ru": 563, + "hau": 564, + "eigen": 565, + "ang": 566, + "weg": 567, + "blick": 568, + "fra": 569, + "alles": 570, + "ka": 571, + "augen": 572, + "fin": 573, + "liche": 574, + "unser": 575, + "dern": 576, + "herr": 577, + "nun": 578, + "vie": 579, + "chte": 580, + "wohl": 581, + "fall": 582, + "ht": 583, + "ün": 584, + "etwas": 585, + "stand": 586, + "äu": 587, + "mö": 588, + "tel": 589, + "rie": 590, + "dich": 591, + "dies": 592, + "hand": 593, + "bin": 594, + "ffen": 595, + "nichts": 596, + "dan": 597, + "hne": 598, + "ihnen": 599, + "esen": 600, + "dieser": 601, + "frau": 602, + "art": 603, + "dir": 604, + "isch": 605, + "erst": 606, + "gleich": 607, + "komm": 608, + "hör": 609, + "ße": 610, + "dig": 611, + "sehr": 612, + "zei": 613, + "sam": 614, + "aum": 615, + "hät": 616, + "ingen": 617, + "gut": 618, + "mut": 619, + "cken": 620, + "konnte": 621, + "stimm": 622, + "zur": 623, + "itz": 624, + "weil": 625, + "würde": 626, + "fä": 627, + "können": 628, + "keine": 629, + "fer": 630, + "ischen": 631, + "voll": 632, + "eines": 633, + "setz": 634, + "zie": 635, + "del": 636, + "tete": 637, + "seiner": 638, + "ieren": 639, + "gest": 640, + "zurück": 641, + "wurde": 642, + "schn": 643, + "pr": 644, + "ließ": 645, + "tra": 646, + "mä": 647, + "gend": 648, + "fol": 649, + "ik": 650, + "schla": 651, + "schaft": 652, + "ater": 653, + "weiß": 654, + "seinen": 655, + "lassen": 656, + "lu": 657, + "unden": 658, + "teil": 659, + "neu": 660, + "iert": 661, + "menschen": 662, + "hmen": 663, + "str": 664, + "gi": 665, + "sah": 666, + "ihren": 667, + "eln": 668, + "weiter": 669, + "gehen": 670, + "iger": 671, + "macht": 672, + "tag": 673, + "also": 674, + "halten": 675, + "nis": 676, + "acht": 677, + "geben": 678, + "og": 679, + "nat": 680, + "mar": 681, + "det": 682, + "ohne": 683, + "haus": 684, + "tro": 685, + "ange": 686, + "lau": 687, + "spiel": 688, + "tre": 689, + "schr": 690, + "inn": 691, + "los": 692, + "machen": 693, + "hätte": 694, + "beg": 695, + "wirk": 696, + "alt": 697, + "glich": 698, + "tes": 699, + "richt": 700, + "freund": 701, + "ihrer": 702, + "fel": 703, + "bel": 704, + "sol": 705, + "einmal": 706, + "eben": 707, + "hol": 708, + "hän": 709, + "tern": 710, + "hö": 711, + "schw": 712, + "recht": 713, + "wahr": 714, + "seinem": 715, + "stehen": 716, + "hlen": 717, + "ins": 718, + "ging": 719, + "wollte": 720, + "wissen": 721, + "ungs": 722, + "ald": 723, + "ass": 724, + "jahr": 725, + "mor": 726, + "welt": 727, + "under": 728, + "zusa": 729, + "kopf": 730, + "lang": 731, + "hinter": 732, + "atz": 733, + "stra": 734, + "angen": 735, + "ank": 736, + "ade": 737, + "glau": 738, + "fach": 739, + "hatten": 740, + "fort": 741, + "eicht": 742, + "iff": 743, + "ler": 744, + "mei": 745, + "diesem": 746, + "kein": 747, + "frei": 748, + "führ": 749, + "vom": 750, + "β": 751, + "ai": 752, + "ait": 753, + "que": 754, + "les": 755, + "av": 756, + "ais": 757, + "oi": 758, + "eu": 759, + "lle": 760, + "par": 761, + "ans": 762, + "ment": 763, + "ét": 764, + "une": 765, + "pas": 766, + "qui": 767, + "elle": 768, + "dé": 769, + "pour": 770, + "dans": 771, + "ré": 772, + "tou": 773, + "vous": 774, + "vi": 775, + "ouv": 776, + "mon": 777, + "sur": 778, + "ci": 779, + "plu": 780, + "ère": 781, + "mais": 782, + "ois": 783, + "plus": 784, + "ée": 785, + "aient": 786, + "mp": 787, + "lui": 788, + "ave": 789, + "était": 790, + "ses": 791, + "tout": 792, + "oir": 793, + "avait": 794, + "és": 795, + "mes": 796, + "nous": 797, + "eux": 798, + "bi": 799, + "ons": 800, + "pu": 801, + "ces": 802, + "tu": 803, + "leur": 804, + "don": 805, + "eur": 806, + "ette": 807, + "aire": 808, + "avec": 809, + "dit": 810, + "té": 811, + "ille": 812, + "comme": 813, + "cr": 814, + "ux": 815, + "ès": 816, + "aux": 817, + "jour": 818, + "ils": 819, + "bien": 820, + "cou": 821, + "quel": 822, + "peu": 823, + "cette": 824, + "cu": 825, + "mê": 826, + "fait": 827, + "gu": 828, + "être": 829, + "ité": 830, + "ens": 831, + "ni": 832, + "lé": 833, + "dis": 834, + "ble": 835, + "né": 836, + "puis": 837, + "même": 838, + "ques": 839, + "fi": 840, + "age": 841, + "moi": 842, + "ence": 843, + "ont": 844, + "main": 845, + "ors": 846, + "aut": 847, + "ance": 848, + "mé": 849, + "sans": 850, + "sé": 851, + "lon": 852, + "hom": 853, + "car": 854, + "able": 855, + "cher": 856, + "deux": 857, + "enf": 858, + "où": 859, + "ph": 860, + "ure": 861, + "temp": 862, + "pos": 863, + "rent": 864, + "pé": 865, + "faire": 866, + "pi": 867, + "tres": 868, + "ça": 869, + "endre": 870, + "bon": 871, + "sou": 872, + "int": 873, + "pré": 874, + "sent": 875, + "tant": 876, + "cer": 877, + "là": 878, + "lais": 879, + "près": 880, + "bre": 881, + "cour": 882, + "pet": 883, + "comp": 884, + "lait": 885, + "trouv": 886, + "entre": 887, + "sont": 888, + "dev": 889, + "nu": 890, + "temps": 891, + "dou": 892, + "rait": 893, + "bou": 894, + "quand": 895, + "jours": 896, + "avoir": 897, + "été": 898, + "ale": 899, + "pre": 900, + "fois": 901, + "orte": 902, + "vé": 903, + "non": 904, + "tous": 905, + "jus": 906, + "coup": 907, + "homme": 908, + "ête": 909, + "aussi": 910, + "urs": 911, + "seu": 912, + "ord": 913, + "min": 914, + "gé": 915, + "core": 916, + "va": 917, + "vre": 918, + "encore": 919, + "sem": 920, + "ite": 921, + "autre": 922, + "pris": 923, + "peut": 924, + "ue": 925, + "ante": 926, + "gn": 927, + "rép": 928, + "hu": 929, + "sion": 930, + "votre": 931, + "dire": 932, + "ez": 933, + "fem": 934, + "leurs": 935, + "met": 936, + "cri": 937, + "mis": 938, + "tour": 939, + "rai": 940, + "jam": 941, + "regar": 942, + "rien": 943, + "vers": 944, + "suis": 945, + "pouv": 946, + "vis": 947, + "grand": 948, + "ants": 949, + "cor": 950, + "rer": 951, + "cé": 952, + "tent": 953, + "pres": 954, + "vou": 955, + "alors": 956, + "sieur": 957, + "aine": 958, + "quoi": 959, + "fon": 960, + "endant": 961, + "arri": 962, + "eure": 963, + "après": 964, + "donc": 965, + "itu": 966, + "lè": 967, + "sait": 968, + "toi": 969, + "cha": 970, + "ail": 971, + "asse": 972, + "imp": 973, + "voy": 974, + "conn": 975, + "pla": 976, + "petit": 977, + "avant": 978, + "nom": 979, + "tin": 980, + "dont": 981, + "sous": 982, + "emp": 983, + "person": 984, + "elles": 985, + "beau": 986, + "parti": 987, + "cho": 988, + "prit": 989, + "toujours": 990, + "rais": 991, + "jamais": 992, + "trav": 993, + "tions": 994, + "très": 995, + "voi": 996, + "ren": 997, + "yeux": 998, + "voir": 999, + "premi": 1000, + "gne": 1001, + "heure": 1002, + "rou": 1003, + "eff": 1004, + "notre": 1005, + "ments": 1006, + "ton": 1007, + "fais": 1008, + "cela": 1009, + "répon": 1010, + "cons": 1011, + "air": 1012, + "ôt": 1013, + "pendant": 1014, + "ici": 1015, + "toute": 1016, + "jet": 1017, + "port": 1018, + "étaient": 1019, + "pen": 1020, + "hé": 1021, + "autres": 1022, + "père": 1023, + "oc": 1024, + "quelques": 1025, + "ique": 1026, + "lis": 1027, + "femme": 1028, + "jou": 1029, + "teur": 1030, + "monde": 1031, + "nes": 1032, + "dre": 1033, + "aff": 1034, + "rap": 1035, + "part": 1036, + "lement": 1037, + "cla": 1038, + "fut": 1039, + "quelque": 1040, + "prendre": 1041, + "rê": 1042, + "aille": 1043, + "sais": 1044, + "ches": 1045, + "let": 1046, + "char": 1047, + "ères": 1048, + "ents": 1049, + "moins": 1050, + "eau": 1051, + "aî": 1052, + "jeu": 1053, + "heur": 1054, + "ées": 1055, + "tri": 1056, + "point": 1057, + "mom": 1058, + "vent": 1059, + "nouv": 1060, + "gran": 1061, + "trois": 1062, + "sant": 1063, + "toutes": 1064, + "contre": 1065, + "èrent": 1066, + "chez": 1067, + "avez": 1068, + "ût": 1069, + "att": 1070, + "pau": 1071, + "porte": 1072, + "ouver": 1073, + "lit": 1074, + "prés": 1075, + "chose": 1076, + "vit": 1077, + "monsieur": 1078, + "hab": 1079, + "tête": 1080, + "ju": 1081, + "tement": 1082, + "ction": 1083, + "vrai": 1084, + "lar": 1085, + "cet": 1086, + "regard": 1087, + "lant": 1088, + "som": 1089, + "moment": 1090, + "illes": 1091, + "ple": 1092, + "ps": 1093, + "mère": 1094, + "cl": 1095, + "sour": 1096, + "ys": 1097, + "trop": 1098, + "enne": 1099, + "jusqu": 1100, + "avaient": 1101, + "avais": 1102, + "jeune": 1103, + "depuis": 1104, + "personne": 1105, + "fit": 1106, + "cert": 1107, + "jo": 1108, + "oui": 1109, + "rest": 1110, + "semb": 1111, + "cap": 1112, + "mat": 1113, + "mu": 1114, + "long": 1115, + "fran": 1116, + "faut": 1117, + "iti": 1118, + "bli": 1119, + "chev": 1120, + "pri": 1121, + "ente": 1122, + "ainsi": 1123, + "cham": 1124, + "lors": 1125, + "cas": 1126, + "ili": 1127, + "bé": 1128, + "nos": 1129, + "sui": 1130, + "rit": 1131, + "cro": 1132, + "gue": 1133, + "ía": 1134, + "por": 1135, + "las": 1136, + "ón": 1137, + "una": 1138, + "aba": 1139, + "dos": 1140, + "era": 1141, + "mb": 1142, + "para": 1143, + "ás": 1144, + "mos": 1145, + "ando": 1146, + "como": 1147, + "más": 1148, + "ción": 1149, + "tan": 1150, + "dad": 1151, + "ado": 1152, + "fu": 1153, + "cia": 1154, + "mente": 1155, + "sus": 1156, + "tar": 1157, + "za": 1158, + "ba": 1159, + "pero": 1160, + "sin": 1161, + "lla": 1162, + "án": 1163, + "ia": 1164, + "ran": 1165, + "ga": 1166, + "yo": 1167, + "tos": 1168, + "cos": 1169, + "ya": 1170, + "ones": 1171, + "había": 1172, + "hi": 1173, + "esta": 1174, + "mas": 1175, + "tor": 1176, + "aban": 1177, + "dor": 1178, + "ían": 1179, + "tas": 1180, + "én": 1181, + "endo": 1182, + "aque": 1183, + "ero": 1184, + "io": 1185, + "qué": 1186, + "cab": 1187, + "tal": 1188, + "señ": 1189, + "ora": 1190, + "todo": 1191, + "sal": 1192, + "cuando": 1193, + "gun": 1194, + "bu": 1195, + "ras": 1196, + "esto": 1197, + "pare": 1198, + "él": 1199, + "tras": 1200, + "jos": 1201, + "mien": 1202, + "pue": 1203, + "cre": 1204, + "pon": 1205, + "día": 1206, + "tros": 1207, + "sab": 1208, + "sobre": 1209, + "ese": 1210, + "mbre": 1211, + "eron": 1212, + "añ": 1213, + "ido": 1214, + "porque": 1215, + "ella": 1216, + "cen": 1217, + "muy": 1218, + "cal": 1219, + "este": 1220, + "has": 1221, + "có": 1222, + "gra": 1223, + "ros": 1224, + "aquel": 1225, + "dijo": 1226, + "cía": 1227, + "zo": 1228, + "ciones": 1229, + "mbi": 1230, + "elo": 1231, + "tó": 1232, + "ina": 1233, + "todos": 1234, + "tien": 1235, + "estaba": 1236, + "deci": 1237, + "cio": 1238, + "ño": 1239, + "lor": 1240, + "nues": 1241, + "medi": 1242, + "len": 1243, + "vida": 1244, + "ali": 1245, + "pues": 1246, + "ales": 1247, + "vol": 1248, + "mí": 1249, + "rar": 1250, + "cion": 1251, + "hasta": 1252, + "señor": 1253, + "cono": 1254, + "ah": 1255, + "dios": 1256, + "esa": 1257, + "ún": 1258, + "var": 1259, + "san": 1260, + "gui": 1261, + "otros": 1262, + "tado": 1263, + "buen": 1264, + "ña": 1265, + "tiemp": 1266, + "hacer": 1267, + "jer": 1268, + "vu": 1269, + "ana": 1270, + "así": 1271, + "antes": 1272, + "vez": 1273, + "miento": 1274, + "jar": 1275, + "lab": 1276, + "casa": 1277, + "eso": 1278, + "ego": 1279, + "dió": 1280, + "está": 1281, + "encia": 1282, + "eli": 1283, + "ías": 1284, + "tiempo": 1285, + "zar": 1286, + "van": 1287, + "mun": 1288, + "erta": 1289, + "tambi": 1290, + "sí": 1291, + "aun": 1292, + "mismo": 1293, + "entes": 1294, + "mano": 1295, + "ele": 1296, + "nada": 1297, + "segu": 1298, + "mej": 1299, + "erra": 1300, + "tir": 1301, + "uno": 1302, + "donde": 1303, + "toda": 1304, + "desde": 1305, + "también": 1306, + "cuer": 1307, + "hombre": 1308, + "otro": 1309, + "lib": 1310, + "trar": 1311, + "cual": 1312, + "hay": 1313, + "cada": 1314, + "taba": 1315, + "mento": 1316, + "tenía": 1317, + "quer": 1318, + "eran": 1319, + "siemp": 1320, + "siempre": 1321, + "erto": 1322, + "quí": 1323, + "gos": 1324, + "pués": 1325, + "ellos": 1326, + "después": 1327, + "nue": 1328, + "llo": 1329, + "inter": 1330, + "cómo": 1331, + "ahora": 1332, + "uste": 1333, + "traba": 1334, + "lado": 1335, + "ino": 1336, + "poco": 1337, + "erte": 1338, + "mujer": 1339, + "quier": 1340, + "algun": 1341, + "fue": 1342, + "ojos": 1343, + "enton": 1344, + "vos": 1345, + "esper": 1346, + "much": 1347, + "otra": 1348, + "az": 1349, + "eza": 1350, + "aquí": 1351, + "cias": 1352, + "gua": 1353, + "mucho": 1354, + "decir": 1355, + "esti": 1356, + "idad": 1357, + "algo": 1358, + "ocu": 1359, + "entonces": 1360, + "dido": 1361, + "entos": 1362, + "gri": 1363, + "dado": 1364, + "ios": 1365, + "dose": 1366, + "usted": 1367, + "quien": 1368, + "ami": 1369, + "unto": 1370, + "mejor": 1371, + "bas": 1372, + "solo": 1373, + "pregun": 1374, + "tur": 1375, + "alg": 1376, + "todas": 1377, + "parte": 1378, + "emb": 1379, + "cto": 1380, + "mundo": 1381, + "tiene": 1382, + "tante": 1383, + "palab": 1384, + "tran": 1385, + "aquella": 1386, + "cios": 1387, + "aunque": 1388, + "cuen": 1389, + "tener": 1390, + "fun": 1391, + "respon": 1392, + "allí": 1393, + "xi": 1394, + "han": 1395, + "pens": 1396, + "contra": 1397, + "tura": 1398, + "val": 1399, + "dio": 1400, + "tanto": 1401, + "camin": 1402, + "mó": 1403, + "esp": 1404, + "ada": 1405, + "ío": 1406, + "hacia": 1407, + "dej": 1408, + "estar": 1409, + "ión": 1410, + "gas": 1411, + "vas": 1412, + "noche": 1413, + "ér": 1414, + "años": 1415, + "padre": 1416, + "gus": 1417, + "ár": 1418, + "sino": 1419, + "manos": 1420, + "cido": 1421, + "estu": 1422, + "hubi": 1423, + "vir": 1424, + "bri": 1425, + "raz": 1426, + "chi": 1427, + "puede": 1428, + "menos": 1429, + "habi": 1430, + "homb": 1431, + "neces": 1432, + "may": 1433, + "eros": 1434, + "ría": 1435, + "hecho": 1436, + "escu": 1437, + "lti": 1438, + "ándo": 1439, + "bus": 1440, + "cosas": 1441, + "tú": 1442, + "espa": 1443, + "reci": 1444, + "ctor": 1445, + "prim": 1446, + "dia": 1447, + "dese": 1448, + "mientras": 1449, + "hor": 1450, + "fuer": 1451, + "ida": 1452, + "posi": 1453, + "lante": 1454, + "ano": 1455, + "estas": 1456, + "pli": 1457, + "luego": 1458, + "sión": 1459, + "cin": 1460, + "tierra": 1461, + "guar": 1462, + "cado": 1463, + "encon": 1464, + "pren": 1465, + "mayor": 1466, + "fal": 1467, + "ð": 1468, + "ħ": 1469, + "ň": 1470, + "ə": 1471, + "θ": 1472, + "’": 1473, + "“": 1474, + "”": 1475, + "zi": 1476, + "gli": 1477, + "tto": 1478, + "ono": 1479, + "nel": 1480, + "tti": 1481, + "della": 1482, + "zione": 1483, + "tta": 1484, + "tà": 1485, + "uo": 1486, + "come": 1487, + "alla": 1488, + "oni": 1489, + "ggi": 1490, + "ssi": 1491, + "più": 1492, + "ini": 1493, + "bb": 1494, + "sto": 1495, + "sono": 1496, + "eri": 1497, + "sse": 1498, + "sc": 1499, + "sul": 1500, + "vano": 1501, + "sti": 1502, + "suo": 1503, + "cchi": 1504, + "zza": 1505, + "anche": 1506, + "tte": 1507, + "sci": 1508, + "col": 1509, + "sso": 1510, + "ssa": 1511, + "dei": 1512, + "aveva": 1513, + "zz": 1514, + "amo": 1515, + "gno": 1516, + "sua": 1517, + "ria": 1518, + "sì": 1519, + "ché": 1520, + "dal": 1521, + "ona": 1522, + "spe": 1523, + "gni": 1524, + "tt": 1525, + "delle": 1526, + "questo": 1527, + "nella": 1528, + "dere": 1529, + "anno": 1530, + "dell": 1531, + "uni": 1532, + "bbe": 1533, + "anti": 1534, + "ene": 1535, + "gio": 1536, + "uto": 1537, + "qual": 1538, + "glia": 1539, + "quando": 1540, + "tutto": 1541, + "glio": 1542, + "zioni": 1543, + "cam": 1544, + "esso": 1545, + "ss": 1546, + "mol": 1547, + "loro": 1548, + "perché": 1549, + "cosa": 1550, + "due": 1551, + "poi": 1552, + "sco": 1553, + "cco": 1554, + "gna": 1555, + "tem": 1556, + "prima": 1557, + "così": 1558, + "essere": 1559, + "ani": 1560, + "bra": 1561, + "rio": 1562, + "anco": 1563, + "cui": 1564, + "spi": 1565, + "via": 1566, + "gior": 1567, + "bile": 1568, + "ggio": 1569, + "mai": 1570, + "tare": 1571, + "indi": 1572, + "rebbe": 1573, + "senza": 1574, + "zio": 1575, + "tutti": 1576, + "stato": 1577, + "zia": 1578, + "dalla": 1579, + "mia": 1580, + "vita": 1581, + "quella": 1582, + "qua": 1583, + "dove": 1584, + "allo": 1585, + "sempre": 1586, + "zzo": 1587, + "sia": 1588, + "dopo": 1589, + "porta": 1590, + "ccia": 1591, + "erano": 1592, + "anni": 1593, + "chia": 1594, + "enza": 1595, + "propri": 1596, + "anda": 1597, + "cca": 1598, + "occhi": 1599, + "questa": 1600, + "ffi": 1601, + "ron": 1602, + "mio": 1603, + "ris": 1604, + "ogni": 1605, + "rin": 1606, + "far": 1607, + "menti": 1608, + "ancora": 1609, + "fatto": 1610, + "mani": 1611, + "senti": 1612, + "pra": 1613, + "tempo": 1614, + "essi": 1615, + "bbi": 1616, + "lare": 1617, + "pers": 1618, + "sor": 1619, + "anza": 1620, + "pie": 1621, + "verso": 1622, + "altro": 1623, + "tato": 1624, + "cato": 1625, + "ato": 1626, + "volta": 1627, + "cc": 1628, + "fare": 1629, + "ciò": 1630, + "bili": 1631, + "nuo": 1632, + "quello": 1633, + "colo": 1634, + "ppo": 1635, + "trova": 1636, + "ore": 1637, + "rono": 1638, + "molto": 1639, + "almente": 1640, + "sca": 1641, + "vole": 1642, + "tali": 1643, + "sulla": 1644, + "sce": 1645, + "meno": 1646, + "anto": 1647, + "pun": 1648, + "stu": 1649, + "capi": 1650, + "giu": 1651, + "mini": 1652, + "pia": 1653, + "lavo": 1654, + "vero": 1655, + "rsi": 1656, + "altri": 1657, + "scia": 1658, + "suoi": 1659, + "glie": 1660, + "sotto": 1661, + "bene": 1662, + "scri": 1663, + "tale": 1664, + "degli": 1665, + "alc": 1666, + "uomo": 1667, + "pel": 1668, + "pote": 1669, + "essa": 1670, + "scu": 1671, + "signo": 1672, + "stro": 1673, + "uti": 1674, + "sione": 1675, + "gre": 1676, + "fini": 1677, + "lun": 1678, + "esi": 1679, + "passa": 1680, + "rà": 1681, + "mentre": 1682, + "hanno": 1683, + "usci": 1684, + "gia": 1685, + "già": 1686, + "mina": 1687, + "tica": 1688, + "giorno": 1689, + "esse": 1690, + "modo": 1691, + "spa": 1692, + "proprio": 1693, + "ori": 1694, + "contro": 1695, + "stru": 1696, + "diven": 1697, + "disse": 1698, + "rato": 1699, + "noi": 1700, + "vere": 1701, + "può": 1702, + "dice": 1703, + "cci": 1704, + "secon": 1705, + "ccio": 1706, + "qualche": 1707, + "tutta": 1708, + "gg": 1709, + "mondo": 1710, + "forma": 1711, + "mma": 1712, + "pensa": 1713, + "deva": 1714, + "fosse": 1715, + "sopra": 1716, + "tamente": 1717, + "ness": 1718, + "quanto": 1719, + "raga": 1720, + "unque": 1721, + "care": 1722, + "stre": 1723, + "grande": 1724, + "picco": 1725, + "guarda": 1726, + "nell": 1727, + "possi": 1728, + "presen": 1729, + "rò": 1730, + "paro": 1731, + "tua": 1732, + "vin": 1733, + "ane": 1734, + "stesso": 1735, + "dav": 1736, + "nei": 1737, + "nelle": 1738, + "ghi": 1739, + "pio": 1740, + "lato": 1741, + "sid": 1742, + "fine": 1743, + "fuo": 1744, + "quasi": 1745, + "ulti": 1746, + "ito": 1747, + "sue": 1748, + "fil": 1749, + "allora": 1750, + "veni": 1751, + "tano": 1752, + "ello": 1753, + "ão": 1754, + "não": 1755, + "uma": 1756, + "ela": 1757, + "lh": 1758, + "ção": 1759, + "cê": 1760, + "inha": 1761, + "você": 1762, + "ec": 1763, + "dade": 1764, + "ao": 1765, + "ram": 1766, + "vel": 1767, + "ém": 1768, + "pode": 1769, + "estava": 1770, + "isso": 1771, + "mui": 1772, + "faz": 1773, + "ões": 1774, + "pes": 1775, + "ix": 1776, + "sim": 1777, + "olh": 1778, + "isa": 1779, + "ên": 1780, + "tinha": 1781, + "meu": 1782, + "são": 1783, + "minha": 1784, + "muito": 1785, + "foi": 1786, + "bem": 1787, + "diz": 1788, + "parec": 1789, + "ço": 1790, + "pesso": 1791, + "pois": 1792, + "mesmo": 1793, + "ções": 1794, + "seus": 1795, + "até": 1796, + "ência": 1797, + "lhe": 1798, + "tiv": 1799, + "mã": 1800, + "só": 1801, + "tão": 1802, + "tudo": 1803, + "então": 1804, + "inda": 1805, + "bal": 1806, + "indo": 1807, + "ndo": 1808, + "já": 1809, + "vam": 1810, + "eito": 1811, + "depois": 1812, + "mel": 1813, + "lha": 1814, + "ainda": 1815, + "fazer": 1816, + "pou": 1817, + "pergun": 1818, + "deix": 1819, + "tamb": 1820, + "ala": 1821, + "pelo": 1822, + "também": 1823, + "fica": 1824, + "prec": 1825, + "eles": 1826, + "havia": 1827, + "lá": 1828, + "nas": 1829, + "gem": 1830, + "mem": 1831, + "ós": 1832, + "deu": 1833, + "eiro": 1834, + "..": 1835, + "assim": 1836, + "ior": 1837, + "har": 1838, + "aqui": 1839, + "cul": 1840, + "sar": 1841, + "outra": 1842, + "olhos": 1843, + "ima": 1844, + "mim": 1845, + "ago": 1846, + "pessoas": 1847, + "eram": 1848, + "eira": 1849, + "pela": 1850, + "coisa": 1851, + "mão": 1852, + "conh": 1853, + "agora": 1854, + "iam": 1855, + "há": 1856, + "suas": 1857, + "guém": 1858, + "cabe": 1859, + "nem": 1860, + "ível": 1861, + "consegu": 1862, + "trabal": 1863, + "lev": 1864, + "lem": 1865, + "vai": 1866, + "tei": 1867, + "pró": 1868, + "quem": 1869, + "onde": 1870, + "cabeça": 1871, + "nunca": 1872, + "mentos": 1873, + "hum": 1874, + "dele": 1875, + "verdade": 1876, + "tá": 1877, + "hos": 1878, + "algum": 1879, + "dizer": 1880, + "penas": 1881, + "nós": 1882, + "enquanto": 1883, + "outro": 1884, + "lho": 1885, + "melhor": 1886, + "primei": 1887, + "iu": 1888, + "apenas": 1889, + "estou": 1890, + "conte": 1891, + "homem": 1892, + "dois": 1893, + "ças": 1894, + "pouco": 1895, + "senhor": 1896, + "tando": 1897, + "espera": 1898, + "pai": 1899, + "rios": 1900, + "baix": 1901, + "ase": 1902, + "isas": 1903, + "hora": 1904, + "ficar": 1905, + "seja": 1906, + "ân": 1907, + "clar": 1908, + "inc": 1909, + "fos": 1910, + "ouvi": 1911, + "vem": 1912, + "tava": 1913, + "ário": 1914, + "sos": 1915, + "inho": 1916, + "rando": 1917, + "ês": 1918, + "coisas": 1919, + "aconte": 1920, + "lher": 1921, + "anos": 1922, + "talvez": 1923, + "estão": 1924, + "liv": 1925, + "outros": 1926, + "qualquer": 1927, + "gou": 1928, + "lí": 1929, + "tivesse": 1930, + "rado": 1931, + "precisa": 1932, + "mãe": 1933, + "dela": 1934, + "entra": 1935, + "maior": 1936, + "noite": 1937, + "tiva": 1938, + "pala": 1939, + "ração": 1940, + "deus": 1941, + "sas": 1942, + "inte": 1943, + "fei": 1944, + "palav": 1945, + "trás": 1946, + "cidade": 1947, + "lugar": 1948, + "vezes": 1949, + "encontra": 1950, + "tru": 1951, + "eci": 1952, + "ın": 1953, + "bir": 1954, + "yor": 1955, + "ek": 1956, + "dı": 1957, + "ey": 1958, + "tı": 1959, + "mı": 1960, + "iz": 1961, + "ır": 1962, + "gö": 1963, + "sı": 1964, + "bil": 1965, + "lı": 1966, + "üz": 1967, + "iç": 1968, + "iy": 1969, + "ım": 1970, + "uz": 1971, + "cak": 1972, + "iş": 1973, + "ını": 1974, + "iyor": 1975, + "baş": 1976, + "dü": 1977, + "değ": 1978, + "kar": 1979, + "ev": 1980, + "öy": 1981, + "bun": 1982, + "yap": 1983, + "sun": 1984, + "gör": 1985, + "yı": 1986, + "ki": 1987, + "ara": 1988, + "alı": 1989, + "onu": 1990, + "çı": 1991, + "şey": 1992, + "sın": 1993, + "kı": 1994, + "kad": 1995, + "ağ": 1996, + "değil": 1997, + "ük": 1998, + "çok": 1999, + "şı": 2000, + "ül": 2001, + "için": 2002, + "eye": 2003, + "oldu": 2004, + "mış": 2005, + "kal": 2006, + "mek": 2007, + "öyle": 2008, + "yordu": 2009, + "yüz": 2010, + "miş": 2011, + "mak": 2012, + "ola": 2013, + "yan": 2014, + "cek": 2015, + "yorum": 2016, + "bak": 2017, + "üm": 2018, + "ları": 2019, + "oğ": 2020, + "kadar": 2021, + "arı": 2022, + "ında": 2023, + "gün": 2024, + "yok": 2025, + "yer": 2026, + "dım": 2027, + "daha": 2028, + "ına": 2029, + "dim": 2030, + "bilir": 2031, + "iki": 2032, + "siz": 2033, + "diğ": 2034, + "bü": 2035, + "düş": 2036, + "üç": 2037, + "unu": 2038, + "aman": 2039, + "fak": 2040, + "ede": 2041, + "sonra": 2042, + "hiç": 2043, + "aki": 2044, + "ğı": 2045, + "bul": 2046, + "maz": 2047, + "anla": 2048, + "bura": 2049, + "geç": 2050, + "maya": 2051, + "konu": 2052, + "din": 2053, + "tek": 2054, + "zaman": 2055, + "eler": 2056, + "öz": 2057, + "dır": 2058, + "gibi": 2059, + "şa": 2060, + "leri": 2061, + "kim": 2062, + "ku": 2063, + "fakat": 2064, + "yar": 2065, + "göz": 2066, + "cı": 2067, + "yorsun": 2068, + "bek": 2069, + "inde": 2070, + "pek": 2071, + "bunu": 2072, + "lik": 2073, + "iler": 2074, + "edi": 2075, + "öl": 2076, + "sür": 2077, + "sır": 2078, + "çık": 2079, + "sıl": 2080, + "alar": 2081, + "kes": 2082, + "yak": 2083, + "çek": 2084, + "yıl": 2085, + "ecek": 2086, + "ız": 2087, + "git": 2088, + "kap": 2089, + "ama": 2090, + "ıl": 2091, + "ların": 2092, + "biz": 2093, + "tır": 2094, + "oy": 2095, + "ancak": 2096, + "doğ": 2097, + "bana": 2098, + "şim": 2099, + "başla": 2100, + "lü": 2101, + "madı": 2102, + "beni": 2103, + "yük": 2104, + "lık": 2105, + "beş": 2106, + "nasıl": 2107, + "tık": 2108, + "tür": 2109, + "daki": 2110, + "ceğ": 2111, + "zı": 2112, + "iyi": 2113, + "dok": 2114, + "benim": 2115, + "cağ": 2116, + "yen": 2117, + "şu": 2118, + "mez": 2119, + "düşün": 2120, + "kendi": 2121, + "şimdi": 2122, + "yol": 2123, + "yu": 2124, + "iste": 2125, + "sek": 2126, + "mam": 2127, + "söyle": 2128, + "dik": 2129, + "kur": 2130, + "olduğ": 2131, + "sını": 2132, + "biliyor": 2133, + "kan": 2134, + "yal": 2135, + "meye": 2136, + "muş": 2137, + "kaç": 2138, + "iye": 2139, + "tü": 2140, + "ef": 2141, + "tım": 2142, + "evet": 2143, + "yet": 2144, + "burada": 2145, + "tim": 2146, + "biraz": 2147, + "kor": 2148, + "doğru": 2149, + "inin": 2150, + "kız": 2151, + "diye": 2152, + "dör": 2153, + "etti": 2154, + "onun": 2155, + "isti": 2156, + "ği": 2157, + "sana": 2158, + "üş": 2159, + "arka": 2160, + "hayır": 2161, + "karşı": 2162, + "ile": 2163, + "hak": 2164, + "ıyor": 2165, + "neden": 2166, + "sev": 2167, + "sız": 2168, + "çocu": 2169, + "çalı": 2170, + "olur": 2171, + "bır": 2172, + "gir": 2173, + "ise": 2174, + "ih": 2175, + "kır": 2176, + "dön": 2177, + "böyle": 2178, + "seni": 2179, + "!\"": 2180, + "dört": 2181, + "söy": 2182, + "oş": 2183, + "musun": 2184, + "laş": 2185, + "ip": 2186, + "kay": 2187, + "hem": 2188, + "büyük": 2189, + "aç": 2190, + "bırak": 2191, + "misin": 2192, + "söz": 2193, + "değiş": 2194, + "ünü": 2195, + "gül": 2196, + "kö": 2197, + "karı": 2198, + "tamam": 2199, + "olu": 2200, + "yeni": 2201, + "lam": 2202, + "mıştı": 2203, + "yaş": 2204, + "iniz": 2205, + "kadın": 2206, + "bunun": 2207, + "mey": 2208, + "altı": 2209, + "yi": 2210, + "inden": 2211, + "senin": 2212, + "yat": 2213, + "top": 2214, + "isi": 2215, + "dün": 2216, + "hiçbir": 2217, + "yon": 2218, + "dın": 2219, + "tün": 2220, + "başka": 2221, + "hep": 2222, + "irmi": 2223, + "devam": 2224, + "olacak": 2225, + "artık": 2226, + "durum": 2227, + "imiz": 2228, + "üzel": 2229, + "lerini": 2230, + "sağ": 2231, + "gerek": 2232, + "yirmi": 2233, + "şek": 2234, + "bağ": 2235, + "lara": 2236, + "yür": 2237, + "ması": 2238, + "katı": 2239, + "dedi": 2240, + "gü": 2241, + "sorun": 2242, + "üne": 2243, + "mız": 2244, + "yapı": 2245, + "mil": 2246, + "ğını": 2247, + "tara": 2248, + "vardı": 2249, + "konuş": 2250, + "arak": 2251, + "larak": 2252, + "çocuk": 2253, + "bütün": 2254, + "ley": 2255, + "dür": 2256, + "güzel": 2257, + "ayı": 2258, + "yapa": 2259, + "nı": 2260, + "ayr": 2261, + "öne": 2262, + "yordum": 2263, + "ban": 2264, + "i̇ş": 2265, + "dum": 2266, + "yorlar": 2267, + "larını": 2268, + "çıkar": 2269, + "zan": 2270, + "seç": 2271, + "liyor": 2272, + "tak": 2273, + "şık": 2274, + "tekrar": 2275, + "aş": 2276, + "eş": 2277, + "mişti": 2278, + "kin": 2279, + "imi": 2280, + "eğ": 2281, + "gidi": 2282, + "leş": 2283, + "başladı": 2284, + "gide": 2285, + "otur": 2286, + "dde": 2287, + "ından": 2288, + "üzer": 2289, + "ının": 2290, + "nız": 2291, + "uy": 2292, + "yedi": 2293, + "kat": 2294, + "olarak": 2295, + "ladı": 2296, + "yalnız": 2297, + "bah": 2298, + "iyet": 2299, + "sak": 2300, + "açık": 2301, + "sında": 2302, + "...": 2303, + "insan": 2304, + "aynı": 2305, + "eder": 2306, + "istan": 2307, + "uzun": 2308, + "geri": 2309, + "erek": 2310, + "olan": 2311, + "gerçek": 2312, + "alan": 2313, + "dış": 2314, + "alık": 2315, + "fark": 2316, + "üst": 2317, + "sade": 2318, + "kiş": 2319, + "ldı": 2320, + "zor": 2321, + "etir": 2322, + "herkes": 2323, + "ömer": 2324, + "unda": 2325, + "haf": 2326, + "buna": 2327, + "ydı": 2328, + "peki": 2329, + "adam": 2330, + "haz": 2331, + "sına": 2332, + "kapı": 2333, + "görüş": 2334, + "sadece": 2335, + "aldı": 2336, + "geldi": 2337, + "rz": 2338, + "sz": 2339, + "cz": 2340, + "ię": 2341, + "dz": 2342, + "ał": 2343, + "się": 2344, + "rze": 2345, + "że": 2346, + "wy": 2347, + "rzy": 2348, + "ła": 2349, + "ło": 2350, + "ny": 2351, + "dzie": 2352, + "dzi": 2353, + "czy": 2354, + "cie": 2355, + "prze": 2356, + "dy": 2357, + "kie": 2358, + "ry": 2359, + "ją": 2360, + "ów": 2361, + "przy": 2362, + "mie": 2363, + "szy": 2364, + "cze": 2365, + "bie": 2366, + "cy": 2367, + "nia": 2368, + "ści": 2369, + "sze": 2370, + "jest": 2371, + "ży": 2372, + "ną": 2373, + "któ": 2374, + "ała": 2375, + "mnie": 2376, + "ły": 2377, + "cza": 2378, + "jak": 2379, + "roz": 2380, + "ró": 2381, + "zna": 2382, + "łu": 2383, + "ść": 2384, + "wia": 2385, + "wszy": 2386, + "spo": 2387, + "gdy": 2388, + "wał": 2389, + "wię": 2390, + "łem": 2391, + "ję": 2392, + "sk": 2393, + "rę": 2394, + "dob": 2395, + "już": 2396, + "bę": 2397, + "ałem": 2398, + "sza": 2399, + "pod": 2400, + "dla": 2401, + "pan": 2402, + "nę": 2403, + "może": 2404, + "śli": 2405, + "ało": 2406, + "lko": 2407, + "nych": 2408, + "powie": 2409, + "cię": 2410, + "tylko": 2411, + "naj": 2412, + "tego": 2413, + "ski": 2414, + "nego": 2415, + "wszyst": 2416, + "szcze": 2417, + "jed": 2418, + "jej": 2419, + "two": 2420, + "ąd": 2421, + "śmy": 2422, + "czę": 2423, + "wać": 2424, + "jego": 2425, + "ża": 2426, + "sy": 2427, + "praw": 2428, + "tym": 2429, + "który": 2430, + "ały": 2431, + "trze": 2432, + "niej": 2433, + "nym": 2434, + "gło": 2435, + "jąc": 2436, + "mówi": 2437, + "ska": 2438, + "nej": 2439, + "słu": 2440, + "wła": 2441, + "będzie": 2442, + "dę": 2443, + "pó": 2444, + "bez": 2445, + "nic": 2446, + "pła": 2447, + "ście": 2448, + "są": 2449, + "trzy": 2450, + "kiem": 2451, + "był": 2452, + "mog": 2453, + "robi": 2454, + "tam": 2455, + "mię": 2456, + "zy": 2457, + "pew": 2458, + "myś": 2459, + "przed": 2460, + "sko": 2461, + "które": 2462, + "lę": 2463, + "wsze": 2464, + "ąc": 2465, + "było": 2466, + "sobie": 2467, + "py": 2468, + "cią": 2469, + "jeszcze": 2470, + "tę": 2471, + "czas": 2472, + "szę": 2473, + "gł": 2474, + "kę": 2475, + "czu": 2476, + "przez": 2477, + "sło": 2478, + "wz": 2479, + "kto": 2480, + "ków": 2481, + "czo": 2482, + "liśmy": 2483, + "więc": 2484, + "rą": 2485, + "wó": 2486, + "rza": 2487, + "ności": 2488, + "wet": 2489, + "nął": 2490, + "śmie": 2491, + "nawet": 2492, + "musi": 2493, + "swo": 2494, + "tej": 2495, + "wą": 2496, + "wu": 2497, + "wią": 2498, + "niu": 2499, + "czą": 2500, + "dzo": 2501, + "skie": 2502, + "jeśli": 2503, + "czego": 2504, + "chy": 2505, + "dł": 2506, + "tych": 2507, + "bym": 2508, + "żo": 2509, + "eś": 2510, + "sią": 2511, + "kiedy": 2512, + "wró": 2513, + "dze": 2514, + "dro": 2515, + "rów": 2516, + "pani": 2517, + "kul": 2518, + "nad": 2519, + "chwi": 2520, + "nim": 2521, + "być": 2522, + "chodzi": 2523, + "nio": 2524, + "dobrze": 2525, + "teraz": 2526, + "wokul": 2527, + "coś": 2528, + "kł": 2529, + "pier": 2530, + "gdzie": 2531, + "dzy": 2532, + "pię": 2533, + "dź": 2534, + "ką": 2535, + "gó": 2536, + "zda": 2537, + "chce": 2538, + "stę": 2539, + "świa": 2540, + "wszystko": 2541, + "peł": 2542, + "wiem": 2543, + "wiel": 2544, + "każ": 2545, + "rzu": 2546, + "sły": 2547, + "jedna": 2548, + "myśl": 2549, + "mój": 2550, + "jestem": 2551, + "óż": 2552, + "miej": 2553, + "moż": 2554, + "kła": 2555, + "resz": 2556, + "dłu": 2557, + "stwo": 2558, + "nię": 2559, + "masz": 2560, + "żeby": 2561, + "niem": 2562, + "jakie": 2563, + "sty": 2564, + "nią": 2565, + "wej": 2566, + "oj": 2567, + "sła": 2568, + "ność": 2569, + "zło": 2570, + "szczę": 2571, + "lej": 2572, + "wego": 2573, + "cał": 2574, + "dział": 2575, + "kich": 2576, + "dza": 2577, + "dzię": 2578, + "oczy": 2579, + "zosta": 2580, + "czło": 2581, + "nam": 2582, + "kil": 2583, + "szu": 2584, + "wę": 2585, + "miał": 2586, + "strze": 2587, + "cej": 2588, + "ej": 2589, + "znaj": 2590, + "dać": 2591, + "miejs": 2592, + "kró": 2593, + "kry": 2594, + "bardzo": 2595, + "śnie": 2596, + "lą": 2597, + "gie": 2598, + "ciebie": 2599, + "dni": 2600, + "potrze": 2601, + "wokulski": 2602, + "uwa": 2603, + "umie": 2604, + "jednak": 2605, + "kra": 2606, + "wróci": 2607, + "człowie": 2608, + "czyć": 2609, + "była": 2610, + "żeli": 2611, + "mę": 2612, + "cę": 2613, + "zrobi": 2614, + "mogę": 2615, + "prowa": 2616, + "rem": 2617, + "niech": 2618, + "cznie": 2619, + "kro": 2620, + "tą": 2621, + "chci": 2622, + "bro": 2623, + "dzieć": 2624, + "szą": 2625, + "pad": 2626, + "trz": 2627, + "jem": 2628, + "tów": 2629, + "dru": 2630, + "taj": 2631, + "rzekł": 2632, + "niego": 2633, + "takie": 2634, + "wała": 2635, + "towa": 2636, + "kapła": 2637, + "widzi": 2638, + "podob": 2639, + "dzę": 2640, + "tał": 2641, + "stęp": 2642, + "bą": 2643, + "poko": 2644, + "wem": 2645, + "gę": 2646, + "aby": 2647, + "albo": 2648, + "spra": 2649, + "zno": 2650, + "smo": 2651, + "jesz": 2652, + "księ": 2653, + "jesteś": 2654, + "poz": 2655, + "nigdy": 2656, + "ksią": 2657, + "cóż": 2658, + "ws": 2659, + "pow": 2660, + "tka": 2661, + "świe": 2662, + "szka": 2663, + "samo": 2664, + "sł": 2665, + "rzę": 2666, + "nale": 2667, + "chcesz": 2668, + "nik": 2669, + "pę": 2670, + "chyba": 2671, + "ciąg": 2672, + "jący": 2673, + "woj": 2674, + "nasze": 2675, + "mniej": 2676, + "więcej": 2677, + "zwy": 2678, + "osta": 2679, + "waż": 2680, + "śmier": 2681, + "wier": 2682, + "dzą": 2683, + "zaś": 2684, + "gdyby": 2685, + "jaki": 2686, + "wol": 2687, + "win": 2688, + "dą": 2689, + "ścia": 2690, + "rozma": 2691, + "wal": 2692, + "panie": 2693, + "star": 2694, + "kaz": 2695, + "jeżeli": 2696, + "wra": 2697, + "koń": 2698, + "siebie": 2699, + "znowu": 2700, + "czem": 2701, + "stwa": 2702, + "isto": 2703, + "pół": 2704, + "dał": 2705, + "kobie": 2706, + "ałam": 2707, + "wych": 2708, + "cesa": 2709, + "nich": 2710, + "zawsze": 2711, + "dzić": 2712, + "też": 2713, + "lepie": 2714, + "proszę": 2715, + "kre": 2716, + "twa": 2717, + "łą": 2718, + "chu": 2719, + "cą": 2720, + "prz": 2721, + "łe": 2722, + "szedł": 2723, + "odpowie": 2724, + "myśli": 2725, + "świą": 2726, + "ź": 2727, + "ł": 2728, + "&": 2729, + "=": 2730, + "ă": 2731, + "đ": 2732, + "ţ": 2733, + "–": 2734, + "‘": 2735, + "ij": 2736, + "aa": 2737, + "een": 2738, + "het": 2739, + "aar": 2740, + "oor": 2741, + "ijn": 2742, + "dat": 2743, + "oe": 2744, + "ijk": 2745, + "aan": 2746, + "voor": 2747, + "iet": 2748, + "zijn": 2749, + "niet": 2750, + "oo": 2751, + "moet": 2752, + "heb": 2753, + "uit": 2754, + "wij": 2755, + "aat": 2756, + "lijk": 2757, + "sl": 2758, + "daar": 2759, + "deze": 2760, + "worden": 2761, + "moeten": 2762, + "onder": 2763, + "hebben": 2764, + "ook": 2765, + "ct": 2766, + "nog": 2767, + "aal": 2768, + "eer": 2769, + "bij": 2770, + "mijn": 2771, + "kom": 2772, + "atie": 2773, + "eft": 2774, + "kel": 2775, + "rij": 2776, + "heid": 2777, + "af": 2778, + "stel": 2779, + "maar": 2780, + "wee": 2781, + "heeft": 2782, + "waar": 2783, + "eren": 2784, + "wat": 2785, + "wil": 2786, + "aag": 2787, + "bet": 2788, + "hij": 2789, + "kun": 2790, + "uw": 2791, + "dt": 2792, + "door": 2793, + "tij": 2794, + "ond": 2795, + "geen": 2796, + "gev": 2797, + "veel": 2798, + "naar": 2799, + "aten": 2800, + "kunnen": 2801, + "echt": 2802, + "goe": 2803, + "twee": 2804, + "delijk": 2805, + "uur": 2806, + "toe": 2807, + "meer": 2808, + "onze": 2809, + "tijd": 2810, + "hoe": 2811, + "tot": 2812, + "zou": 2813, + "aak": 2814, + "amen": 2815, + "woor": 2816, + "wordt": 2817, + "gelijk": 2818, + "gaan": 2819, + "ker": 2820, + "eld": 2821, + "hou": 2822, + "zel": 2823, + "tegen": 2824, + "komen": 2825, + "werk": 2826, + "goed": 2827, + "zal": 2828, + "zij": 2829, + "slag": 2830, + "zien": 2831, + "echter": 2832, + "itie": 2833, + "tie": 2834, + "elijk": 2835, + "ische": 2836, + "belan": 2837, + "haar": 2838, + "vr": 2839, + "grijk": 2840, + "doen": 2841, + "land": 2842, + "belangrijk": 2843, + "open": 2844, + "ctie": 2845, + "zelf": 2846, + "mij": 2847, + "iteit": 2848, + "stem": 2849, + "mee": 2850, + "aren": 2851, + "dien": 2852, + "gaat": 2853, + "prob": 2854, + "moe": 2855, + "ullen": 2856, + "zich": 2857, + "daarom": 2858, + "orm": 2859, + "staat": 2860, + "zit": 2861, + "dui": 2862, + "dus": 2863, + "ds": 2864, + "verslag": 2865, + "kelijk": 2866, + "proble": 2867, + "schap": 2868, + "gd": 2869, + "hun": 2870, + "erd": 2871, + "zet": 2872, + "staan": 2873, + "maal": 2874, + "inder": 2875, + "eid": 2876, + "kken": 2877, + "ged": 2878, + "zullen": 2879, + "mensen": 2880, + "jaar": 2881, + "regel": 2882, + "ieder": 2883, + "volgen": 2884, + "geven": 2885, + "even": 2886, + "blij": 2887, + "ië": 2888, + "uwe": 2889, + "maken": 2890, + "oek": 2891, + "nieuwe": 2892, + "baar": 2893, + "andere": 2894, + "ruik": 2895, + "agen": 2896, + "ouw": 2897, + "willen": 2898, + "aakt": 2899, + "hoo": 2900, + "anden": 2901, + "lig": 2902, + "samen": 2903, + "zeer": 2904, + "duidelijk": 2905, + "antwoor": 2906, + "heel": 2907, + "punt": 2908, + "houden": 2909, + "vraag": 2910, + "gele": 2911, + "eens": 2912, + "besch": 2913, + "omen": 2914, + "erg": 2915, + "doel": 2916, + "dag": 2917, + "uren": 2918, + "ings": 2919, + "oren": 2920, + "delen": 2921, + "steun": 2922, + "innen": 2923, + "pol": 2924, + "oon": 2925, + "sn": 2926, + "zonder": 2927, + "nodig": 2928, + "alleen": 2929, + "mid": 2930, + "ragen": 2931, + "iets": 2932, + "versch": 2933, + "gebruik": 2934, + "rouw": 2935, + "stellen": 2936, + "menten": 2937, + "eerste": 2938, + "laat": 2939, + "groot": 2940, + "ood": 2941, + "toch": 2942, + "laten": 2943, + "aard": 2944, + "sle": 2945, + "deel": 2946, + "plaat": 2947, + "ree": 2948, + "betre": 2949, + "lid": 2950, + "uiten": 2951, + "racht": 2952, + "beleid": 2953, + "stie": 2954, + "staten": 2955, + "ggen": 2956, + "reken": 2957, + "alen": 2958, + "ming": 2959, + "mogelijk": 2960, + "grote": 2961, + "altijd": 2962, + "enkel": 2963, + "wik": 2964, + "politie": 2965, + "elk": 2966, + "handel": 2967, + "kwe": 2968, + "maat": 2969, + "elen": 2970, + "vrij": 2971, + "jes": 2972, + "aam": 2973, + "huis": 2974, + "weer": 2975, + "lidstaten": 2976, + "king": 2977, + "kle": 2978, + "bed": 2979, + "geval": 2980, + "wikkel": 2981, + "kwestie": 2982, + "stee": 2983, + "hel": 2984, + "komst": 2985, + "iden": 2986, + "eerd": 2987, + "tweede": 2988, + "probleem": 2989, + "ussen": 2990, + "snel": 2991, + "tig": 2992, + "ult": 2993, + "nemen": 2994, + "commis": 2995, + "verschil": 2996, + "zoek": 2997, + "krij": 2998, + "graag": 2999, + "denk": 3000, + "landen": 3001, + "reden": 3002, + "besl": 3003, + "oeg": 3004, + "beter": 3005, + "heden": 3006, + "mag": 3007, + "boven": 3008, + "cont": 3009, + "fd": 3010, + "hele": 3011, + "vier": 3012, + "gez": 3013, + "kw": 3014, + "aas": 3015, + "ontwikkel": 3016, + "drie": 3017, + "vaak": 3018, + "plaats": 3019, + "gang": 3020, + "ijf": 3021, + "natuur": 3022, + "tussen": 3023, + "bat": 3024, + "komt": 3025, + "wacht": 3026, + "aad": 3027, + "achter": 3028, + "gebie": 3029, + "verk": 3030, + "ligt": 3031, + "nieuw": 3032, + "vand": 3033, + "ý": 3034, + "ď": 3035, + "ě": 3036, + "ř": 3037, + "ť": 3038, + "ů": 3039, + "„": 3040, + "ní": 3041, + "ně": 3042, + "ře": 3043, + "ná": 3044, + "vě": 3045, + "vá": 3046, + "rá": 3047, + "vy": 3048, + "mě": 3049, + "ři": 3050, + "ří": 3051, + "že": 3052, + "jí": 3053, + "vý": 3054, + "ji": 3055, + "dě": 3056, + "če": 3057, + "tě": 3058, + "ky": 3059, + "še": 3060, + "ké": 3061, + "ší": 3062, + "pře": 3063, + "ví": 3064, + "ný": 3065, + "ži": 3066, + "má": 3067, + "cí": 3068, + "zá": 3069, + "ské": 3070, + "dá": 3071, + "byl": 3072, + "tí": 3073, + "pří": 3074, + "při": 3075, + "či": 3076, + "vní": 3077, + "ča": 3078, + "dí": 3079, + "dní": 3080, + "ká": 3081, + "nou": 3082, + "vět": 3083, + "pě": 3084, + "kou": 3085, + "ých": 3086, + "bě": 3087, + "prá": 3088, + "jako": 3089, + "ží": 3090, + "zí": 3091, + "jsou": 3092, + "jsem": 3093, + "lní": 3094, + "cké": 3095, + "vat": 3096, + "před": 3097, + "hla": 3098, + "stá": 3099, + "čí": 3100, + "ši": 3101, + "kla": 3102, + "ště": 3103, + "lou": 3104, + "mů": 3105, + "chá": 3106, + "pů": 3107, + "také": 3108, + "dů": 3109, + "nost": 3110, + "tře": 3111, + "sku": 3112, + "vše": 3113, + "tní": 3114, + "byla": 3115, + "ční": 3116, + "jeho": 3117, + "bý": 3118, + "vání": 3119, + "ných": 3120, + "tři": 3121, + "vz": 3122, + "stře": 3123, + "dva": 3124, + "hle": 3125, + "čá": 3126, + "nosti": 3127, + "vš": 3128, + "hra": 3129, + "jen": 3130, + "slo": 3131, + "však": 3132, + "kdy": 3133, + "bylo": 3134, + "bude": 3135, + "jší": 3136, + "vých": 3137, + "ním": 3138, + "sm": 3139, + "koli": 3140, + "rů": 3141, + "může": 3142, + "není": 3143, + "hod": 3144, + "bí": 3145, + "tý": 3146, + "stě": 3147, + "uje": 3148, + "sá": 3149, + "pět": 3150, + "krá": 3151, + "tom": 3152, + "ství": 3153, + "vně": 3154, + "sed": 3155, + "své": 3156, + "pí": 3157, + "musí": 3158, + "už": 3159, + "tím": 3160, + "jící": 3161, + "jedno": 3162, + "čas": 3163, + "čty": 3164, + "ský": 3165, + "evro": 3166, + "toho": 3167, + "hy": 3168, + "kter": 3169, + "rní": 3170, + "stí": 3171, + "svě": 3172, + "pak": 3173, + "všech": 3174, + "ků": 3175, + "ng": 3176, + "ád": 3177, + "chází": 3178, + "být": 3179, + "první": 3180, + "mno": 3181, + "ského": 3182, + "pá": 3183, + "nebo": 3184, + "kem": 3185, + "sla": 3186, + "ného": 3187, + "zde": 3188, + "další": 3189, + "řa": 3190, + "čtyři": 3191, + "hrá": 3192, + "druh": 3193, + "lně": 3194, + "vla": 3195, + "ských": 3196, + "ško": 3197, + "půso": 3198, + "proto": 3199, + "vů": 3200, + "ská": 3201, + "šest": 3202, + "dně": 3203, + "ještě": 3204, + "mezi": 3205, + "několi": 3206, + "již": 3207, + "čně": 3208, + "slu": 3209, + "zná": 3210, + "sedm": 3211, + "vlá": 3212, + "osm": 3213, + "byly": 3214, + "vám": 3215, + "cký": 3216, + "tech": 3217, + "ději": 3218, + "velmi": 3219, + "leži": 3220, + "vala": 3221, + "lý": 3222, + "tvo": 3223, + "spole": 3224, + "stup": 3225, + "mož": 3226, + "evrop": 3227, + "stal": 3228, + "jde": 3229, + "rodi": 3230, + "její": 3231, + "poli": 3232, + "devět": 3233, + "sme": 3234, + "až": 3235, + "této": 3236, + "tento": 3237, + "kaž": 3238, + "nula": 3239, + "bych": 3240, + "moc": 3241, + "stou": 3242, + "kdo": 3243, + "zd": 3244, + "praco": 3245, + "tomu": 3246, + "ným": 3247, + "živo": 3248, + "zem": 3249, + "násle": 3250, + "sky": 3251, + "jich": 3252, + "měl": 3253, + "děla": 3254, + "jsme": 3255, + "nice": 3256, + "stej": 3257, + "stní": 3258, + "náro": 3259, + "nit": 3260, + "později": 3261, + "tako": 3262, + "nce": 3263, + "čer": 3264, + "ším": 3265, + "něco": 3266, + "vál": 3267, + "řej": 3268, + "krát": 3269, + "ální": 3270, + "asi": 3271, + "které": 3272, + "stav": 3273, + "mají": 3274, + "mys": 3275, + "době": 3276, + "sně": 3277, + "zku": 3278, + "tů": 3279, + "chod": 3280, + "spě": 3281, + "jejich": 3282, + "součas": 3283, + "vali": 3284, + "kte": 3285, + "prů": 3286, + "zení": 3287, + "pat": 3288, + "potře": 3289, + "dnes": 3290, + "zemí": 3291, + "znam": 3292, + "mám": 3293, + "tedy": 3294, + "hlavní": 3295, + "použí": 3296, + "bní": 3297, + "vede": 3298, + "lep": 3299, + "jek": 3300, + "prav": 3301, + "politi": 3302, + "dne": 3303, + "čení": 3304, + "než": 3305, + "děl": 3306, + "čo": 3307, + "cích": 3308, + "sté": 3309, + "dlou": 3310, + "několik": 3311, + "vyu": 3312, + "ckých": 3313, + "nové": 3314, + "čin": 3315, + "dělá": 3316, + "ký": 3317, + "obla": 3318, + "podle": 3319, + "důleži": 3320, + "poku": 3321, + "kone": 3322, + "dý": 3323, + "dvě": 3324, + "žád": 3325, + "nout": 3326, + "tku": 3327, + "tvr": 3328, + "ckého": 3329, + "rov": 3330, + "tele": 3331, + "psa": 3332, + "svět": 3333, + "tivní": 3334, + "dosta": 3335, + "šel": 3336, + "druhé": 3337, + "skou": 3338, + "žo": 3339, + "jedná": 3340, + "význam": 3341, + "problé": 3342, + "publi": 3343, + "ván": 3344, + "odpo": 3345, + "podpo": 3346, + "dle": 3347, + "jaké": 3348, + "šení": 3349, + "vím": 3350, + "během": 3351, + "nachází": 3352, + "slou": 3353, + "pouze": 3354, + "otá": 3355, + "plo": 3356, + "tové": 3357, + "větši": 3358, + "komi": 3359, + "vají": 3360, + "tyto": 3361, + "zápa": 3362, + "změ": 3363, + "moh": 3364, + "více": 3365, + "společ": 3366, + "auto": 3367, + "proti": 3368, + "dět": 3369, + "cháze": 3370, + "žel": 3371, + "«": 3372, + "»": 3373, + "а": 3374, + "б": 3375, + "в": 3376, + "г": 3377, + "д": 3378, + "е": 3379, + "ж": 3380, + "з": 3381, + "и": 3382, + "й": 3383, + "к": 3384, + "л": 3385, + "м": 3386, + "н": 3387, + "о": 3388, + "п": 3389, + "р": 3390, + "с": 3391, + "т": 3392, + "у": 3393, + "ф": 3394, + "х": 3395, + "ц": 3396, + "ч": 3397, + "ш": 3398, + "щ": 3399, + "ъ": 3400, + "ы": 3401, + "ь": 3402, + "э": 3403, + "ю": 3404, + "я": 3405, + "ё": 3406, + "‑": 3407, + "−": 3408, + "ст": 3409, + "ен": 3410, + "но": 3411, + "на": 3412, + "пр": 3413, + "то": 3414, + "по": 3415, + "ра": 3416, + "го": 3417, + "ко": 3418, + "не": 3419, + "во": 3420, + "ва": 3421, + "ет": 3422, + "ер": 3423, + "ни": 3424, + "ел": 3425, + "ит": 3426, + "ны": 3427, + "за": 3428, + "ро": 3429, + "ени": 3430, + "ка": 3431, + "ли": 3432, + "ем": 3433, + "да": 3434, + "об": 3435, + "ла": 3436, + "до": 3437, + "ся": 3438, + "ть": 3439, + "от": 3440, + "ло": 3441, + "ль": 3442, + "ед": 3443, + "со": 3444, + "ми": 3445, + "ре": 3446, + "мо": 3447, + "ци": 3448, + "про": 3449, + "та": 3450, + "это": 3451, + "ки": 3452, + "ру": 3453, + "при": 3454, + "ти": 3455, + "се": 3456, + "ста": 3457, + "вы": 3458, + "мы": 3459, + "ви": 3460, + "бы": 3461, + "ма": 3462, + "ес": 3463, + "ля": 3464, + "сти": 3465, + "ле": 3466, + "что": 3467, + "ме": 3468, + "ри": 3469, + "ча": 3470, + "од": 3471, + "ей": 3472, + "ель": 3473, + "ения": 3474, + "га": 3475, + "ну": 3476, + "си": 3477, + "па": 3478, + "раз": 3479, + "бо": 3480, + "сто": 3481, + "су": 3482, + "са": 3483, + "ду": 3484, + "его": 3485, + "ест": 3486, + "ин": 3487, + "ить": 3488, + "из": 3489, + "же": 3490, + "му": 3491, + "пер": 3492, + "под": 3493, + "ение": 3494, + "сь": 3495, + "ку": 3496, + "пред": 3497, + "ного": 3498, + "ных": 3499, + "вер": 3500, + "те": 3501, + "ной": 3502, + "ции": 3503, + "де": 3504, + "ры": 3505, + "дел": 3506, + "лю": 3507, + "ве": 3508, + "он": 3509, + "мен": 3510, + "ги": 3511, + "ня": 3512, + "бу": 3513, + "пра": 3514, + "все": 3515, + "ется": 3516, + "сть": 3517, + "жа": 3518, + "дол": 3519, + "жи": 3520, + "бе": 3521, + "кон": 3522, + "сл": 3523, + "ши": 3524, + "ди": 3525, + "ств": 3526, + "ско": 3527, + "ные": 3528, + "чи": 3529, + "ют": 3530, + "дер": 3531, + "стра": 3532, + "ты": 3533, + "ход": 3534, + "щи": 3535, + "зо": 3536, + "зна": 3537, + "ности": 3538, + "чес": 3539, + "вля": 3540, + "вать": 3541, + "ор": 3542, + "пол": 3543, + "вет": 3544, + "так": 3545, + "ша": 3546, + "ту": 3547, + "сво": 3548, + "пре": 3549, + "она": 3550, + "итель": 3551, + "ный": 3552, + "сло": 3553, + "как": 3554, + "вл": 3555, + "ность": 3556, + "хо": 3557, + "мож": 3558, + "пе": 3559, + "для": 3560, + "ния": 3561, + "ное": 3562, + "рас": 3563, + "долж": 3564, + "дар": 3565, + "тель": 3566, + "ска": 3567, + "пу": 3568, + "ство": 3569, + "кото": 3570, + "раб": 3571, + "ее": 3572, + "род": 3573, + "эти": 3574, + "соб": 3575, + "ору": 3576, + "жен": 3577, + "ным": 3578, + "ити": 3579, + "ние": 3580, + "ком": 3581, + "дет": 3582, + "сту": 3583, + "гу": 3584, + "пи": 3585, + "меж": 3586, + "ению": 3587, + "тер": 3588, + "работ": 3589, + "воз": 3590, + "ция": 3591, + "кой": 3592, + "щест": 3593, + "гра": 3594, + "зи": 3595, + "ря": 3596, + "между": 3597, + "ства": 3598, + "вс": 3599, + "ело": 3600, + "ше": 3601, + "мер": 3602, + "ба": 3603, + "зы": 3604, + "лу": 3605, + "аль": 3606, + "дей": 3607, + "гла": 3608, + "народ": 3609, + "кти": 3610, + "предста": 3611, + "лся": 3612, + "явля": 3613, + "ски": 3614, + "нов": 3615, + "един": 3616, + "ров": 3617, + "ис": 3618, + "нима": 3619, + "рем": 3620, + "ходи": 3621, + "также": 3622, + "дру": 3623, + "ать": 3624, + "след": 3625, + "гово": 3626, + "ная": 3627, + "ющи": 3628, + "ень": 3629, + "которы": 3630, + "хот": 3631, + "ву": 3632, + "их": 3633, + "ему": 3634, + "чит": 3635, + "важ": 3636, + "орга": 3637, + "чески": 3638, + "ще": 3639, + "ке": 3640, + "ха": 3641, + "пос": 3642, + "том": 3643, + "боль": 3644, + "мне": 3645, + "пас": 3646, + "объ": 3647, + "прав": 3648, + "конф": 3649, + "слу": 3650, + "поддер": 3651, + "стви": 3652, + "наш": 3653, + "лько": 3654, + "стоя": 3655, + "ную": 3656, + "лем": 3657, + "енных": 3658, + "кра": 3659, + "ды": 3660, + "международ": 3661, + "гда": 3662, + "необ": 3663, + "госу": 3664, + "ству": 3665, + "ении": 3666, + "государ": 3667, + "кто": 3668, + "им": 3669, + "чест": 3670, + "рет": 3671, + "вопро": 3672, + "лен": 3673, + "ели": 3674, + "рова": 3675, + "ций": 3676, + "нам": 3677, + "этой": 3678, + "жения": 3679, + "необходи": 3680, + "меня": 3681, + "было": 3682, + "сили": 3683, + "фи": 3684, + "вя": 3685, + "шь": 3686, + "этого": 3687, + "они": 3688, + "органи": 3689, + "безо": 3690, + "проб": 3691, + "име": 3692, + "реш": 3693, + "би": 3694, + "безопас": 3695, + "ются": 3696, + "оста": 3697, + "енно": 3698, + "год": 3699, + "ела": 3700, + "представ": 3701, + "ться": 3702, + "слово": 3703, + "организа": 3704, + "должны": 3705, + "этом": 3706, + "бла": 3707, + "че": 3708, + "чу": 3709, + "благо": 3710, + "этому": 3711, + "врем": 3712, + "спе": 3713, + "ном": 3714, + "ений": 3715, + "спо": 3716, + "нас": 3717, + "нет": 3718, + "зу": 3719, + "вед": 3720, + "еще": 3721, + "сказа": 3722, + "сей": 3723, + "ерен": 3724, + "дан": 3725, + "сам": 3726, + "еля": 3727, + "ран": 3728, + "зыва": 3729, + "является": 3730, + "будет": 3731, + "ктив": 3732, + "тре": 3733, + "деле": 3734, + "мот": 3735, + "конферен": 3736, + "лась": 3737, + "час": 3738, + "сторо": 3739, + "кого": 3740, + "ез": 3741, + "ней": 3742, + "ос": 3743, + "лись": 3744, + "разору": 3745, + "пере": 3746, + "сси": 3747, + "ными": 3748, + "проц": 3749, + "голо": 3750, + "чело": 3751, + "боле": 3752, + "челове": 3753, + "сер": 3754, + "пл": 3755, + "чет": 3756, + "стран": 3757, + "пя": 3758, + "был": 3759, + "кла": 3760, + "тов": 3761, + "жд": 3762, + "дела": 3763, + "ера": 3764, + "уже": 3765, + "совет": 3766, + "ген": 3767, + "безопасности": 3768, + "ца": 3769, + "седа": 3770, + "поз": 3771, + "ответ": 3772, + "проблем": 3773, + "нако": 3774, + "тем": 3775, + "доста": 3776, + "пы": 3777, + "ща": 3778, + "вой": 3779, + "сущест": 3780, + "необходимо": 3781, + "быть": 3782, + "может": 3783, + "дем": 3784, + "чтобы": 3785, + "ек": 3786, + "чер": 3787, + "усили": 3788, + "рес": 3789, + "руд": 3790, + "единенных": 3791, + "доб": 3792, + "дости": 3793, + "ствен": 3794, + "ядер": 3795, + "годня": 3796, + "каза": 3797, + "сегодня": 3798, + "сейчас": 3799, + "только": 3800, + "вод": 3801, + "есь": 3802, + "много": 3803, + "буду": 3804, + "ев": 3805, + "есть": 3806, + "три": 3807, + "общест": 3808, + "явл": 3809, + "высту": 3810, + "ред": 3811, + "счит": 3812, + "сит": 3813, + "делега": 3814, + "лож": 3815, + "этот": 3816, + "фор": 3817, + "клю": 3818, + "возмож": 3819, + "вания": 3820, + "бли": 3821, + "или": 3822, + "вз": 3823, + "наций": 3824, + "ского": 3825, + "приня": 3826, + "пла": 3827, + "оч": 3828, + "иться": 3829, + "сте": 3830, + "наши": 3831, + "которые": 3832, + "ар": 3833, + "имеет": 3834, + "сот": 3835, + "знач": 3836, + "перь": 3837, + "следу": 3838, + "ены": 3839, + "таки": 3840, + "объединенных": 3841, + "стро": 3842, + "теперь": 3843, + "бле": 3844, + "благодар": 3845, + "разв": 3846, + "ан": 3847, + "жива": 3848, + "очень": 3849, + "ят": 3850, + "без": 3851, + "обес": 3852, + "гро": 3853, + "лось": 3854, + "сы": 3855, + "организации": 3856, + "член": 3857, + "того": 3858, + "ональ": 3859, + "жда": 3860, + "всех": 3861, + "свя": 3862, + "более": 3863, + "сов": 3864, + "когда": 3865, + "вот": 3866, + "кре": 3867, + "кры": 3868, + "поэтому": 3869, + "воль": 3870, + "ой": 3871, + "генера": 3872, + "чем": 3873, + "лы": 3874, + "полити": 3875, + "вен": 3876, + "конференции": 3877, + "процес": 3878, + "бя": 3879, + "ите": 3880, + "отно": 3881, + "развити": 3882, + "аф": 3883, + "ющ": 3884, + "вно": 3885, + "мир": 3886, + "нии": 3887, + "кая": 3888, + "ас": 3889, + "ительно": 3890, + "вто": 3891, + "ением": 3892, + "генераль": 3893, + "прот": 3894, + "всем": 3895, + "самбле": 3896, + "ассамбле": 3897, + "ом": 3898, + "зд": 3899, + "смот": 3900, + "реги": 3901, + "чего": 3902, + "однако": 3903, + "усилия": 3904, + "действи": 3905, + "чно": 3906, + "уча": 3907, + "образ": 3908, + "вос": 3909, + "эта": 3910, + "перего": 3911, + "говор": 3912, + "вам": 3913, + "моло": 3914, + "время": 3915, + "дь": 3916, + "хотел": 3917, + "гру": 3918, + "заявл": 3919, + "предоста": 3920, + "поль": 3921, + "нее": 3922, + "резо": 3923, + "перегово": 3924, + "резолю": 3925, + "крет": 3926, + "поддерж": 3927, + "обеспе": 3928, + "него": 3929, + "представит": 3930, + "наде": 3931, + "кри": 3932, + "чь": 3933, + "проек": 3934, + "лет": 3935, + "други": 3936, + "_": 3937, + "،": 3938, + "؛": 3939, + "؟": 3940, + "ء": 3941, + "آ": 3942, + "أ": 3943, + "ؤ": 3944, + "إ": 3945, + "ئ": 3946, + "ا": 3947, + "ب": 3948, + "ة": 3949, + "ت": 3950, + "ث": 3951, + "ج": 3952, + "ح": 3953, + "خ": 3954, + "د": 3955, + "ذ": 3956, + "ر": 3957, + "ز": 3958, + "س": 3959, + "ش": 3960, + "ص": 3961, + "ض": 3962, + "ط": 3963, + "ظ": 3964, + "ع": 3965, + "غ": 3966, + "ـ": 3967, + "ف": 3968, + "ق": 3969, + "ك": 3970, + "ل": 3971, + "م": 3972, + "ن": 3973, + "ه": 3974, + "و": 3975, + "ى": 3976, + "ي": 3977, + "ً": 3978, + "ٌ": 3979, + "ٍ": 3980, + "َ": 3981, + "ُ": 3982, + "ِ": 3983, + "ّ": 3984, + "ْ": 3985, + "ٰ": 3986, + "چ": 3987, + "ڨ": 3988, + "ک": 3989, + "ھ": 3990, + "ی": 3991, + "ۖ": 3992, + "ۗ": 3993, + "ۘ": 3994, + "ۚ": 3995, + "ۛ": 3996, + "—": 3997, + "☭": 3998, + "ﺃ": 3999, + "ﻻ": 4000, + "ال": 4001, + "َا": 4002, + "وَ": 4003, + "َّ": 4004, + "ِي": 4005, + "أَ": 4006, + "لَ": 4007, + "نَ": 4008, + "الْ": 4009, + "هُ": 4010, + "ُو": 4011, + "ما": 4012, + "نْ": 4013, + "من": 4014, + "عَ": 4015, + "نا": 4016, + "لا": 4017, + "مَ": 4018, + "تَ": 4019, + "فَ": 4020, + "أن": 4021, + "لي": 4022, + "مِ": 4023, + "ان": 4024, + "في": 4025, + "رَ": 4026, + "يَ": 4027, + "هِ": 4028, + "مْ": 4029, + "قَ": 4030, + "بِ": 4031, + "لى": 4032, + "ين": 4033, + "إِ": 4034, + "لِ": 4035, + "وا": 4036, + "كَ": 4037, + "ها": 4038, + "ًا": 4039, + "مُ": 4040, + "ون": 4041, + "الم": 4042, + "بَ": 4043, + "يا": 4044, + "ذا": 4045, + "سا": 4046, + "الل": 4047, + "مي": 4048, + "يْ": 4049, + "را": 4050, + "ري": 4051, + "لك": 4052, + "مَا": 4053, + "نَّ": 4054, + "لم": 4055, + "إن": 4056, + "ست": 4057, + "وم": 4058, + "َّا": 4059, + "لَا": 4060, + "هم": 4061, + "ِّ": 4062, + "كُ": 4063, + "كان": 4064, + "سَ": 4065, + "با": 4066, + "دي": 4067, + "حَ": 4068, + "عْ": 4069, + "بي": 4070, + "الأ": 4071, + "ول": 4072, + "فِي": 4073, + "رِ": 4074, + "دا": 4075, + "مِنْ": 4076, + "ُونَ": 4077, + "وْ": 4078, + "هَا": 4079, + "ُّ": 4080, + "الس": 4081, + "الَ": 4082, + "ني": 4083, + "لْ": 4084, + "تُ": 4085, + "هل": 4086, + "رة": 4087, + "دَ": 4088, + "سْ": 4089, + "تِ": 4090, + "نَا": 4091, + "رْ": 4092, + "اللَّ": 4093, + "سامي": 4094, + "كن": 4095, + "كل": 4096, + "هَ": 4097, + "عَلَ": 4098, + "على": 4099, + "مع": 4100, + "إلى": 4101, + "قد": 4102, + "الر": 4103, + "ُوا": 4104, + "ير": 4105, + "عن": 4106, + "يُ": 4107, + "نِ": 4108, + "بْ": 4109, + "الح": 4110, + "هُمْ": 4111, + "قا": 4112, + "ذه": 4113, + "الت": 4114, + "ِينَ": 4115, + "جَ": 4116, + "هذا": 4117, + "عد": 4118, + "الع": 4119, + "دْ": 4120, + "قَالَ": 4121, + "رُ": 4122, + "يم": 4123, + "ية": 4124, + "نُ": 4125, + "خَ": 4126, + "رب": 4127, + "الك": 4128, + "وَا": 4129, + "أنا": 4130, + "ةِ": 4131, + "الن": 4132, + "حد": 4133, + "عِ": 4134, + "تا": 4135, + "هو": 4136, + "فا": 4137, + "عا": 4138, + "الش": 4139, + "لُ": 4140, + "يت": 4141, + "ذَا": 4142, + "يع": 4143, + "الذ": 4144, + "حْ": 4145, + "الص": 4146, + "إِنَّ": 4147, + "جا": 4148, + "علي": 4149, + "كَا": 4150, + "بُ": 4151, + "تع": 4152, + "وق": 4153, + "مل": 4154, + "لَّ": 4155, + "يد": 4156, + "أخ": 4157, + "رف": 4158, + "تي": 4159, + "الِ": 4160, + "ّا": 4161, + "ذلك": 4162, + "أَنْ": 4163, + "سِ": 4164, + "توم": 4165, + "مر": 4166, + "مَنْ": 4167, + "بل": 4168, + "الق": 4169, + "الله": 4170, + "ِيَ": 4171, + "كم": 4172, + "ذَ": 4173, + "عل": 4174, + "حب": 4175, + "سي": 4176, + "عُ": 4177, + "الج": 4178, + "الد": 4179, + "شَ": 4180, + "تك": 4181, + "فْ": 4182, + "صَ": 4183, + "لل": 4184, + "دِ": 4185, + "بر": 4186, + "فِ": 4187, + "ته": 4188, + "أع": 4189, + "تْ": 4190, + "قْ": 4191, + "الْأَ": 4192, + "ئِ": 4193, + "عَنْ": 4194, + "ور": 4195, + "حا": 4196, + "الَّ": 4197, + "مت": 4198, + "فر": 4199, + "دُ": 4200, + "هنا": 4201, + "وَأَ": 4202, + "تب": 4203, + "ةُ": 4204, + "أي": 4205, + "سب": 4206, + "ريد": 4207, + "وج": 4208, + "كُمْ": 4209, + "حِ": 4210, + "كْ": 4211, + "در": 4212, + "َاء": 4213, + "هذه": 4214, + "الط": 4215, + "الْمُ": 4216, + "دة": 4217, + "قل": 4218, + "غَ": 4219, + "يوم": 4220, + "الَّذ": 4221, + "كر": 4222, + "تر": 4223, + "كِ": 4224, + "كي": 4225, + "عَلَى": 4226, + "رَب": 4227, + "عة": 4228, + "قُ": 4229, + "جْ": 4230, + "فض": 4231, + "لة": 4232, + "هْ": 4233, + "رَا": 4234, + "وَلَ": 4235, + "الْمَ": 4236, + "أَنَّ": 4237, + "يَا": 4238, + "أُ": 4239, + "شي": 4240, + "اللَّهُ": 4241, + "لَى": 4242, + "قِ": 4243, + "أت": 4244, + "عَلَيْ": 4245, + "اللَّهِ": 4246, + "الب": 4247, + "ضَ": 4248, + "ةً": 4249, + "قي": 4250, + "ار": 4251, + "بد": 4252, + "خْ": 4253, + "سْتَ": 4254, + "طَ": 4255, + "قَدْ": 4256, + "ذهب": 4257, + "أم": 4258, + "ماذا": 4259, + "وَإِ": 4260, + "ةٌ": 4261, + "ونَ": 4262, + "ليلى": 4263, + "ولا": 4264, + "حُ": 4265, + "هي": 4266, + "صل": 4267, + "الخ": 4268, + "ود": 4269, + "ليس": 4270, + "لدي": 4271, + "قال": 4272, + "كَانَ": 4273, + "مَّ": 4274, + "حي": 4275, + "تم": 4276, + "لن": 4277, + "وَلَا": 4278, + "بع": 4279, + "يمكن": 4280, + "سُ": 4281, + "ةَ": 4282, + "حت": 4283, + "رًا": 4284, + "كا": 4285, + "شا": 4286, + "هِمْ": 4287, + "لَهُ": 4288, + "زَ": 4289, + "داً": 4290, + "مس": 4291, + "كث": 4292, + "الْعَ": 4293, + "جِ": 4294, + "صْ": 4295, + "فَا": 4296, + "له": 4297, + "وي": 4298, + "عَا": 4299, + "هُوَ": 4300, + "بِي": 4301, + "بَا": 4302, + "أس": 4303, + "ثَ": 4304, + "لِي": 4305, + "رض": 4306, + "الرَّ": 4307, + "لِكَ": 4308, + "تَّ": 4309, + "فُ": 4310, + "قة": 4311, + "فعل": 4312, + "مِن": 4313, + "الآ": 4314, + "ثُ": 4315, + "سم": 4316, + "مَّا": 4317, + "بِهِ": 4318, + "تق": 4319, + "خر": 4320, + "لقد": 4321, + "خل": 4322, + "شر": 4323, + "أنت": 4324, + "لَّا": 4325, + "سن": 4326, + "السَّ": 4327, + "الذي": 4328, + "سَا": 4329, + "وما": 4330, + "زل": 4331, + "وب": 4332, + "أْ": 4333, + "إذا": 4334, + "رِي": 4335, + "حة": 4336, + "نِي": 4337, + "الْحَ": 4338, + "وَقَالَ": 4339, + "به": 4340, + "ةٍ": 4341, + "سأ": 4342, + "رٌ": 4343, + "بال": 4344, + "مة": 4345, + "شْ": 4346, + "وت": 4347, + "عند": 4348, + "فس": 4349, + "بَعْ": 4350, + "هر": 4351, + "قط": 4352, + "أح": 4353, + "إنه": 4354, + "وع": 4355, + "فت": 4356, + "غا": 4357, + "هناك": 4358, + "بت": 4359, + "مِنَ": 4360, + "سر": 4361, + "ذَلِكَ": 4362, + "رس": 4363, + "حدث": 4364, + "غْ": 4365, + "ِّي": 4366, + "الإ": 4367, + "وَيَ": 4368, + "جل": 4369, + "است": 4370, + "قِي": 4371, + "عب": 4372, + "وس": 4373, + "يش": 4374, + "الَّذِينَ": 4375, + "تاب": 4376, + "دِي": 4377, + "جب": 4378, + "كون": 4379, + "بن": 4380, + "الث": 4381, + "لَيْ": 4382, + "بعد": 4383, + "وَالْ": 4384, + "فَأَ": 4385, + "عم": 4386, + "هُم": 4387, + "تن": 4388, + "ذْ": 4389, + "أص": 4390, + "أين": 4391, + "رَبِّ": 4392, + "الذين": 4393, + "إِن": 4394, + "بين": 4395, + "جُ": 4396, + "عَلَيْهِ": 4397, + "حَا": 4398, + "لو": 4399, + "ستط": 4400, + "ظر": 4401, + "لَمْ": 4402, + "ءِ": 4403, + "كُل": 4404, + "طل": 4405, + "تَا": 4406, + "ضُ": 4407, + "كنت": 4408, + "لًا": 4409, + "مٌ": 4410, + "قبل": 4411, + "ــ": 4412, + "ذِ": 4413, + "قَوْ": 4414, + "صِ": 4415, + "مًا": 4416, + "كانت": 4417, + "صا": 4418, + "يق": 4419, + "الف": 4420, + "النا": 4421, + "مٍ": 4422, + "إِنْ": 4423, + "النَّ": 4424, + "جد": 4425, + "وَمَا": 4426, + "تت": 4427, + "بح": 4428, + "مكان": 4429, + "كيف": 4430, + "ّة": 4431, + "الا": 4432, + "جَا": 4433, + "أو": 4434, + "ساعد": 4435, + "ضِ": 4436, + "إلا": 4437, + "راً": 4438, + "قَا": 4439, + "رأ": 4440, + "عت": 4441, + "أحد": 4442, + "هد": 4443, + "ضا": 4444, + "طر": 4445, + "أق": 4446, + "ماء": 4447, + "دَّ": 4448, + "البا": 4449, + "مُو": 4450, + "أَوْ": 4451, + "طا": 4452, + "قُو": 4453, + "خِ": 4454, + "تل": 4455, + "ستطيع": 4456, + "دَا": 4457, + "النَّا": 4458, + "إلَى": 4459, + "وَتَ": 4460, + "هَذَا": 4461, + "بة": 4462, + "عليك": 4463, + "جر": 4464, + "المن": 4465, + "زا": 4466, + "رٍ": 4467, + "دع": 4468, + "ًّا": 4469, + "سة": 4470, + "ثُمَّ": 4471, + "شيء": 4472, + "الغ": 4473, + "تح": 4474, + "رُونَ": 4475, + "اليوم": 4476, + "مِي": 4477, + "نُوا": 4478, + "أر": 4479, + "تُمْ": 4480, + "عر": 4481, + "يف": 4482, + "أب": 4483, + "دًا": 4484, + "صَا": 4485, + "التَّ": 4486, + "أريد": 4487, + "الز": 4488, + "يَوْ": 4489, + "إلي": 4490, + "جي": 4491, + "يَعْ": 4492, + "فضل": 4493, + "الإن": 4494, + "أنه": 4495, + "1": 4496, + "2": 4497, + "3": 4498, + "4": 4499, + "5": 4500, + "·": 4501, + "×": 4502, + "̃": 4503, + "̌": 4504, + "ε": 4505, + "λ": 4506, + "μ": 4507, + "•": 4508, + "‧": 4509, + "─": 4510, + "□": 4511, + "、": 4512, + "。": 4513, + "〈": 4514, + "〉": 4515, + "《": 4516, + "》": 4517, + "「": 4518, + "」": 4519, + "『": 4520, + "』": 4521, + "ア": 4522, + "オ": 4523, + "カ": 4524, + "チ": 4525, + "ド": 4526, + "ベ": 4527, + "ャ": 4528, + "ヤ": 4529, + "ン": 4530, + "・": 4531, + "ー": 4532, + "ㄟ": 4533, + "!": 4534, + "(": 4535, + ")": 4536, + ",": 4537, + "-": 4538, + "/": 4539, + ":": 4540, + ";": 4541, + "?": 4542, + "p": 4543, + "i4": 4544, + "zh": 4545, + "i2": 4546, + "ng1": 4547, + "u4": 4548, + "i1": 4549, + "ng2": 4550, + "u3": 4551, + "de5": 4552, + "e4": 4553, + "i3": 4554, + "ng4": 4555, + "an4": 4556, + "shi4": 4557, + "an2": 4558, + "u2": 4559, + "u1": 4560, + "ng3": 4561, + "a1": 4562, + "an1": 4563, + "e2": 4564, + "a4": 4565, + "ei4": 4566, + "ong1": 4567, + "ai4": 4568, + "ao4": 4569, + "ang1": 4570, + "an3": 4571, + "wei4": 4572, + "uo2": 4573, + "n1": 4574, + "en2": 4575, + "ao3": 4576, + "e1": 4577, + "qi": 4578, + "eng2": 4579, + "zho": 4580, + "ang3": 4581, + "ang4": 4582, + "ang2": 4583, + "uo4": 4584, + "ge4": 4585, + "yi1": 4586, + "guo2": 4587, + "a3": 4588, + "he2": 4589, + "e3": 4590, + "yi2": 4591, + "di4": 4592, + "zhong1": 4593, + "bu4": 4594, + "ai2": 4595, + "n2": 4596, + "zai4": 4597, + "shi2": 4598, + "eng1": 4599, + "ren2": 4600, + "ong2": 4601, + "xian4": 4602, + "xu": 4603, + "n4": 4604, + "li4": 4605, + "en4": 4606, + "yu2": 4607, + "ei2": 4608, + "yi2ge4": 4609, + "ou4": 4610, + "ei3": 4611, + "ui4": 4612, + "a2": 4613, + "you3": 4614, + "ao1": 4615, + "da4": 4616, + "cheng2": 4617, + "en1": 4618, + "eng4": 4619, + "yi4": 4620, + "si1": 4621, + "zhi4": 4622, + "jia1": 4623, + "yuan2": 4624, + "ta1": 4625, + "de5yi2ge4": 4626, + "ke1": 4627, + "shu3": 4628, + "xi1": 4629, + "ji2": 4630, + "ao2": 4631, + "ou3": 4632, + "ong4": 4633, + "xia4": 4634, + "ai1": 4635, + "gong1": 4636, + "zhi1": 4637, + "en3": 4638, + "wei2": 4639, + "xue2": 4640, + "qu1": 4641, + "zhou1": 4642, + "er3": 4643, + "ming2": 4644, + "zhong3": 4645, + "li3": 4646, + "wu4": 4647, + "yi3": 4648, + "uo1": 4649, + "e5": 4650, + "ji4": 4651, + "xing2": 4652, + "jian4": 4653, + "hua4": 4654, + "yu3": 4655, + "uo3": 4656, + "ji1": 4657, + "ai3": 4658, + "zuo4": 4659, + "hou4": 4660, + "hui4": 4661, + "ei1": 4662, + "nian2": 4663, + "qi2": 4664, + "dao4": 4665, + "sheng1": 4666, + "de2": 4667, + "dai4": 4668, + "uan2": 4669, + "zhe4": 4670, + "zheng4": 4671, + "ben3": 4672, + "shang4": 4673, + "zhu3": 4674, + "bei4": 4675, + "ye4": 4676, + "chu1": 4677, + "zhan4": 4678, + "le5": 4679, + "lai2": 4680, + "shi3": 4681, + "nan2": 4682, + "ren4": 4683, + "you2": 4684, + "ke4": 4685, + "ba1": 4686, + "fu4": 4687, + "dui4": 4688, + "ya4": 4689, + "mei3": 4690, + "zi4": 4691, + "xin1": 4692, + "jing1": 4693, + "zhu": 4694, + "n3": 4695, + "yong4": 4696, + "mu4": 4697, + "jiao4": 4698, + "ye3": 4699, + "jin4": 4700, + "bian4": 4701, + "lu4": 4702, + "qi1": 4703, + "she4": 4704, + "xiang1": 4705, + "ong3": 4706, + "shu4": 4707, + "dong4": 4708, + "suo3": 4709, + "guan1": 4710, + "san1": 4711, + "te4": 4712, + "duo1": 4713, + "fu2": 4714, + "min2": 4715, + "la1": 4716, + "zhi2": 4717, + "zhen4": 4718, + "ou1": 4719, + "wu3": 4720, + "ma3": 4721, + "i5": 4722, + "zi5": 4723, + "ju4": 4724, + "er4": 4725, + "yao4": 4726, + "xia4de5yi2ge4": 4727, + "si4": 4728, + "tu2": 4729, + "shan1": 4730, + "zui4": 4731, + "yin1": 4732, + "er2": 4733, + "tong2": 4734, + "dong1": 4735, + "yu4": 4736, + "yan2": 4737, + "qian2": 4738, + "shu3xia4de5yi2ge4": 4739, + "jun1": 4740, + "ke3": 4741, + "wen2": 4742, + "fa3": 4743, + "luo2": 4744, + "zhu4": 4745, + "xi4": 4746, + "kou3": 4747, + "bei3": 4748, + "jian1": 4749, + "fa1": 4750, + "dian4": 4751, + "jiang1": 4752, + "wei4yu2": 4753, + "xiang4": 4754, + "zhi3": 4755, + "eng3": 4756, + "fang1": 4757, + "lan2": 4758, + "shu": 4759, + "ri4": 4760, + "lian2": 4761, + "shou3": 4762, + "qiu2": 4763, + "jin1": 4764, + "huo4": 4765, + "shu3xia4de5yi2ge4zhong3": 4766, + "fen1": 4767, + "nei4": 4768, + "gai1": 4769, + "mei3guo2": 4770, + "un2": 4771, + "ge2": 4772, + "bao3": 4773, + "qing1": 4774, + "gao1": 4775, + "tai2": 4776, + "xiao3": 4777, + "jie2": 4778, + "tian1": 4779, + "chang2": 4780, + "quan2": 4781, + "lie4": 4782, + "hai3": 4783, + "fei1": 4784, + "ti3": 4785, + "jue2": 4786, + "ou2": 4787, + "ci3": 4788, + "zu2": 4789, + "ni2": 4790, + "biao3": 4791, + "zhong1guo2": 4792, + "du4": 4793, + "yue4": 4794, + "xing4": 4795, + "sheng4": 4796, + "che1": 4797, + "dan1": 4798, + "jie1": 4799, + "lin2": 4800, + "ping2": 4801, + "fu3": 4802, + "gu3": 4803, + "jie4": 4804, + "v3": 4805, + "sheng3": 4806, + "na4": 4807, + "yuan4": 4808, + "zhang3": 4809, + "guan3": 4810, + "dao3": 4811, + "zu3": 4812, + "ding4": 4813, + "dian3": 4814, + "ceng2": 4815, + "ren2kou3": 4816, + "tai4": 4817, + "tong1": 4818, + "guo4": 4819, + "neng2": 4820, + "chang3": 4821, + "hua2": 4822, + "liu2": 4823, + "ying1": 4824, + "xiao4": 4825, + "ci4": 4826, + "bian4hua4": 4827, + "liang3": 4828, + "gong4": 4829, + "zhong4": 4830, + "de5yi1": 4831, + "se4": 4832, + "kai1": 4833, + "wang2": 4834, + "jiu4": 4835, + "shi1": 4836, + "shou4": 4837, + "mei2": 4838, + "feng1": 4839, + "ze2": 4840, + "tu2shi4": 4841, + "ti2": 4842, + "qi4": 4843, + "jiu3": 4844, + "shen1": 4845, + "zhe3": 4846, + "ren2kou3bian4hua4": 4847, + "ren2kou3bian4hua4tu2shi4": 4848, + "di4qu1": 4849, + "yang2": 4850, + "men5": 4851, + "long2": 4852, + "bing4": 4853, + "chan3": 4854, + "zhu1": 4855, + "wei3": 4856, + "wai4": 4857, + "xing1": 4858, + "bo1": 4859, + "bi3": 4860, + "tang2": 4861, + "hua1": 4862, + "bo2": 4863, + "shui3": 4864, + "shu1": 4865, + "dou1": 4866, + "sai4": 4867, + "chao2": 4868, + "bi4": 4869, + "ling2": 4870, + "lei4": 4871, + "da4xue2": 4872, + "fen4": 4873, + "shu3de5": 4874, + "mu3": 4875, + "jiao1": 4876, + "dang1": 4877, + "cheng1": 4878, + "tong3": 4879, + "nv3": 4880, + "qi3": 4881, + "yan3": 4882, + "mian4": 4883, + "luo4": 4884, + "jing4": 4885, + "ge1": 4886, + "ru4": 4887, + "dan4": 4888, + "ri4ben3": 4889, + "pu3": 4890, + "yun4": 4891, + "huang2": 4892, + "wo3": 4893, + "lv": 4894, + "hai2": 4895, + "shi4yi1": 4896, + "xie1": 4897, + "ying3": 4898, + "wu2": 4899, + "shen2": 4900, + "wang3": 4901, + "guang3": 4902, + "liu4": 4903, + "su4": 4904, + "shi4zhen4": 4905, + "can1": 4906, + "cao3": 4907, + "xia2": 4908, + "ka3": 4909, + "da2": 4910, + "hu4": 4911, + "ban4": 4912, + "dang3": 4913, + "hu2": 4914, + "zong3": 4915, + "deng3": 4916, + "de5yi2ge4shi4zhen4": 4917, + "chuan2": 4918, + "mo4": 4919, + "zhang1": 4920, + "ban1": 4921, + "mo2": 4922, + "cha2": 4923, + "ce4": 4924, + "zhu3yao4": 4925, + "tou2": 4926, + "ju2": 4927, + "shi4wei4yu2": 4928, + "sa4": 4929, + "un1": 4930, + "ke3yi3": 4931, + "du1": 4932, + "han4": 4933, + "liang4": 4934, + "sha1": 4935, + "jia3": 4936, + "zi1": 4937, + "lv4": 4938, + "fu1": 4939, + "xian1": 4940, + "xu4": 4941, + "guang1": 4942, + "meng2": 4943, + "bao4": 4944, + "you4": 4945, + "rong2": 4946, + "zhi1yi1": 4947, + "wei1": 4948, + "mao2": 4949, + "guo2jia1": 4950, + "cong2": 4951, + "gou4": 4952, + "tie3": 4953, + "zhen1": 4954, + "du2": 4955, + "bian1": 4956, + "ci2": 4957, + "qu3": 4958, + "fan4": 4959, + "xiang3": 4960, + "men2": 4961, + "ju1": 4962, + "hong2": 4963, + "zi3": 4964, + "ta1men5": 4965, + "ji3": 4966, + "zong1": 4967, + "zhou1de5yi2ge4shi4zhen4": 4968, + "tuan2": 4969, + "jing3": 4970, + "gong1si1": 4971, + "xie4": 4972, + "li2": 4973, + "li4shi3": 4974, + "bao1": 4975, + "gang3": 4976, + "gui1": 4977, + "zheng1": 4978, + "zhi2wu4": 4979, + "ta1de5": 4980, + "pin3": 4981, + "zhuan1": 4982, + "chong2": 4983, + "shi3yong4": 4984, + "wa3": 4985, + "shuo1": 4986, + "chuan1": 4987, + "lei2": 4988, + "wan1": 4989, + "huo2": 4990, + "su1": 4991, + "zao3": 4992, + "gai3": 4993, + "qu4": 4994, + "gu4": 4995, + "xi2": 4996, + "hang2": 4997, + "ying4": 4998, + "cun1": 4999, + "gen1": 5000, + "ying2": 5001, + "ting2": 5002, + "cheng2shi4": 5003, + "jiang3": 5004, + "ling3": 5005, + "lun2": 5006, + "bu4fen4": 5007, + "deng1": 5008, + "xuan3": 5009, + "dong4wu4": 5010, + "de2guo2": 5011, + "xian3": 5012, + "fan3": 5013, + "zhe5": 5014, + "han2": 5015, + "hao4": 5016, + "mi4": 5017, + "ran2": 5018, + "qin1": 5019, + "tiao2": 5020, + "zhan3": 5021, + "[ar]": 5022, + "[zh-cn]": 5023, + "¡": 5024, + "é": 5025, + "shi": 5026, + "tsu": 5027, + "teki": 5028, + "nai": 5029, + "aru": 5030, + "uu": 5031, + "kai": 5032, + "shite": 5033, + "mono": 5034, + "koto": 5035, + "kara": 5036, + "shita": 5037, + "suru": 5038, + "masu": 5039, + "tai": 5040, + "ware": 5041, + "shin": 5042, + "oku": 5043, + "yuu": 5044, + "iru": 5045, + "jiko": 5046, + "desu": 5047, + "rare": 5048, + "shou": 5049, + "sha": 5050, + "sekai": 5051, + "kyou": 5052, + "mashita": 5053, + "nara": 5054, + "kei": 5055, + "ita": 5056, + "ari": 5057, + "itsu": 5058, + "kono": 5059, + "naka": 5060, + "chou": 5061, + "sore": 5062, + "naru": 5063, + "gaku": 5064, + "reba": 5065, + "hito": 5066, + "sai": 5067, + "nan": 5068, + "dai": 5069, + "tsuku": 5070, + "shiki": 5071, + "sare": 5072, + "naku": 5073, + "jun": 5074, + "kaku": 5075, + "zai": 5076, + "wata": 5077, + "shuu": 5078, + "ii": 5079, + "kare": 5080, + "shii": 5081, + "made": 5082, + "sho": 5083, + "kereba": 5084, + "shika": 5085, + "ichi": 5086, + "deki": 5087, + "nin": 5088, + "wareware": 5089, + "nakereba": 5090, + "oite": 5091, + "yaku": 5092, + "mujun": 5093, + "yoku": 5094, + "butsu": 5095, + "omo": 5096, + "gae": 5097, + "naranai": 5098, + "tachi": 5099, + "chuu": 5100, + "kangae": 5101, + "toki": 5102, + "koro": 5103, + "mujunteki": 5104, + "naga": 5105, + "jin": 5106, + "shima": 5107, + "iku": 5108, + "imasu": 5109, + "hon": 5110, + "kae": 5111, + "kore": 5112, + "kita": 5113, + "datta": 5114, + "jitsu": 5115, + "mae": 5116, + "toku": 5117, + "douitsu": 5118, + "ritsu": 5119, + "kyuu": 5120, + "hyou": 5121, + "rareta": 5122, + "keisei": 5123, + "kkan": 5124, + "rareru": 5125, + "mou": 5126, + "doko": 5127, + "ryou": 5128, + "dake": 5129, + "nakatta": 5130, + "soko": 5131, + "tabe": 5132, + "hana": 5133, + "fuku": 5134, + "yasu": 5135, + "wataku": 5136, + "yama": 5137, + "kyo": 5138, + "genzai": 5139, + "boku": 5140, + "ata": 5141, + "kawa": 5142, + "masen": 5143, + "juu": 5144, + "natte": 5145, + "watakushi": 5146, + "yotte": 5147, + "hai": 5148, + "jishin": 5149, + "rete": 5150, + "oka": 5151, + "kagaku": 5152, + "natta": 5153, + "karu": 5154, + "nari": 5155, + "mata": 5156, + "kuru": 5157, + "gai": 5158, + "kari": 5159, + "shakai": 5160, + "koui": 5161, + "yori": 5162, + "setsu": 5163, + "reru": 5164, + "tokoro": 5165, + "jutsu": 5166, + "saku": 5167, + "ttai": 5168, + "ningen": 5169, + "tame": 5170, + "kankyou": 5171, + "ooku": 5172, + "watashi": 5173, + "tsukuru": 5174, + "sugi": 5175, + "jibun": 5176, + "shitsu": 5177, + "keru": 5178, + "kishi": 5179, + "shikashi": 5180, + "moto": 5181, + "mari": 5182, + "itte": 5183, + "deshita": 5184, + "nde": 5185, + "arimasu": 5186, + "koe": 5187, + "zettai": 5188, + "kkanteki": 5189, + "rekishi": 5190, + "dekiru": 5191, + "tsuka": 5192, + "itta": 5193, + "kobutsu": 5194, + "miru": 5195, + "shoku": 5196, + "shimasu": 5197, + "gijutsu": 5198, + "gyou": 5199, + "joushiki": 5200, + "atta": 5201, + "hodo": 5202, + "koko": 5203, + "tsukurareta": 5204, + "zoku": 5205, + "hitei": 5206, + "koku": 5207, + "rekishiteki": 5208, + "kete": 5209, + "kako": 5210, + "nagara": 5211, + "kakaru": 5212, + "shutai": 5213, + "haji": 5214, + "taku": 5215, + "douitsuteki": 5216, + "mete": 5217, + "tsuu": 5218, + "sarete": 5219, + "genjitsu": 5220, + "bai": 5221, + "nawa": 5222, + "jikan": 5223, + "waru": 5224, + "rt": 5225, + "atsu": 5226, + "soku": 5227, + "kouiteki": 5228, + "kata": 5229, + "tetsu": 5230, + "gawa": 5231, + "kedo": 5232, + "reta": 5233, + "sayou": 5234, + "tteru": 5235, + "tori": 5236, + "kimi": 5237, + "mura": 5238, + "sareru": 5239, + "machi": 5240, + "kya": 5241, + "osa": 5242, + "konna": 5243, + "aku": 5244, + "sareta": 5245, + "ipp": 5246, + "shiku": 5247, + "uchi": 5248, + "hitotsu": 5249, + "hatara": 5250, + "tachiba": 5251, + "shiro": 5252, + "katachi": 5253, + "tomo": 5254, + "ete": 5255, + "meru": 5256, + "nichi": 5257, + "dare": 5258, + "katta": 5259, + "eru": 5260, + "suki": 5261, + "ooki": 5262, + "maru": 5263, + "moku": 5264, + "oko": 5265, + "kangaerareru": 5266, + "oto": 5267, + "tanni": 5268, + "tada": 5269, + "taiteki": 5270, + "motte": 5271, + "kinou": 5272, + "shinai": 5273, + "kki": 5274, + "tari": 5275, + "ranai": 5276, + "kkou": 5277, + "mirai": 5278, + "ppon": 5279, + "goto": 5280, + "hitsu": 5281, + "teru": 5282, + "mochi": 5283, + "katsu": 5284, + "nyuu": 5285, + "zuka": 5286, + "tsuite": 5287, + "nomi": 5288, + "sugu": 5289, + "kuda": 5290, + "tetsugaku": 5291, + "ika": 5292, + "ronri": 5293, + "oki": 5294, + "nippon": 5295, + "shimashita": 5296, + "chishiki": 5297, + "chokkanteki": 5298, + "suko": 5299, + "kuu": 5300, + "arou": 5301, + "katte": 5302, + "kuri": 5303, + "inai": 5304, + "hyougen": 5305, + "ishiki": 5306, + "doku": 5307, + "atte": 5308, + "atara": 5309, + "wari": 5310, + "kao": 5311, + "seisan": 5312, + "hanashi": 5313, + "kake": 5314, + "naji": 5315, + "sunawa": 5316, + "sunawachi": 5317, + "ugo": 5318, + "suu": 5319, + "bara": 5320, + "hiro": 5321, + "iwa": 5322, + "betsu": 5323, + "yoi": 5324, + "seru": 5325, + "shiteru": 5326, + "rarete": 5327, + "toshi": 5328, + "seki": 5329, + "tairitsu": 5330, + "wakara": 5331, + "tokyo": 5332, + "kka": 5333, + "kyoku": 5334, + "iro": 5335, + "mite": 5336, + "saki": 5337, + "kanji": 5338, + "mita": 5339, + "sube": 5340, + "ryoku": 5341, + "matta": 5342, + "kudasai": 5343, + "omoi": 5344, + "wareru": 5345, + "hitsuyou": 5346, + "kashi": 5347, + "renai": 5348, + "kankei": 5349, + "gatte": 5350, + "ochi": 5351, + "motsu": 5352, + "sonzai": 5353, + "taishite": 5354, + "ame": 5355, + "seimei": 5356, + "kano": 5357, + "giri": 5358, + "kangaeru": 5359, + "yue": 5360, + "asa": 5361, + "onaji": 5362, + "yoru": 5363, + "niku": 5364, + "osaka": 5365, + "sukoshi": 5366, + "tama": 5367, + "kanojo": 5368, + "kite": 5369, + "mondai": 5370, + "amari": 5371, + "eki": 5372, + "kojin": 5373, + "haya": 5374, + "dete": 5375, + "atarashii": 5376, + "awa": 5377, + "gakkou": 5378, + "tsuzu": 5379, + "shukan": 5380, + "imashita": 5381, + "atae": 5382, + "darou": 5383, + "hataraku": 5384, + "gata": 5385, + "dachi": 5386, + "matsu": 5387, + "arimasen": 5388, + "seibutsu": 5389, + "mitsu": 5390, + "heya": 5391, + "yasui": 5392, + "deni": 5393, + "noko": 5394, + "haha": 5395, + "domo": 5396, + "kami": 5397, + "sudeni": 5398, + "nao": 5399, + "raku": 5400, + "ike": 5401, + "meta": 5402, + "kodomo": 5403, + "soshite": 5404, + "game": 5405, + "bakari": 5406, + "tote": 5407, + "hatsu": 5408, + "mise": 5409, + "mokuteki": 5410, + "dakara": 5411, + "[ja]": 5412, + "ő": 5413, + "ű": 5414, + "そ": 5415, + "な": 5416, + "ん": 5417, + "포": 5418, + "�": 5419, + "gy": 5420, + "eg": 5421, + "cs": 5422, + "ál": 5423, + "egy": 5424, + "át": 5425, + "ott": 5426, + "ett": 5427, + "meg": 5428, + "hogy": 5429, + "ég": 5430, + "ól": 5431, + "nek": 5432, + "volt": 5433, + "ág": 5434, + "nk": 5435, + "ék": 5436, + "ít": 5437, + "ák": 5438, + "ud": 5439, + "szer": 5440, + "mind": 5441, + "oz": 5442, + "ép": 5443, + "ért": 5444, + "mond": 5445, + "szt": 5446, + "nak": 5447, + "ől": 5448, + "csak": 5449, + "oly": 5450, + "áll": 5451, + "ány": 5452, + "mint": 5453, + "már": 5454, + "ött": 5455, + "nagy": 5456, + "ész": 5457, + "azt": 5458, + "elő": 5459, + "tud": 5460, + "ény": 5461, + "áz": 5462, + "még": 5463, + "köz": 5464, + "ely": 5465, + "ség": 5466, + "hoz": 5467, + "uk": 5468, + "kez": 5469, + "ám": 5470, + "aj": 5471, + "unk": 5472, + "vagy": 5473, + "szem": 5474, + "ember": 5475, + "fog": 5476, + "mert": 5477, + "ös": 5478, + "ság": 5479, + "leg": 5480, + "ünk": 5481, + "hát": 5482, + "ony": 5483, + "ezt": 5484, + "minden": 5485, + "ült": 5486, + "jó": 5487, + "kis": 5488, + "áj": 5489, + "úgy": 5490, + "most": 5491, + "ír": 5492, + "itt": 5493, + "elt": 5494, + "mondta": 5495, + "kell": 5496, + "ált": 5497, + "érd": 5498, + "tö": 5499, + "vár": 5500, + "lát": 5501, + "ők": 5502, + "vet": 5503, + "után": 5504, + "két": 5505, + "nap": 5506, + "ív": 5507, + "ály": 5508, + "vég": 5509, + "ök": 5510, + "dul": 5511, + "néz": 5512, + "ában": 5513, + "kül": 5514, + "akkor": 5515, + "szél": 5516, + "új": 5517, + "olyan": 5518, + "ked": 5519, + "hely": 5520, + "tör": 5521, + "ból": 5522, + "elm": 5523, + "ára": 5524, + "ló": 5525, + "volna": 5526, + "lehet": 5527, + "ebb": 5528, + "sok": 5529, + "olt": 5530, + "eket": 5531, + "bor": 5532, + "fej": 5533, + "gond": 5534, + "akar": 5535, + "fél": 5536, + "úl": 5537, + "otta": 5538, + "valami": 5539, + "jel": 5540, + "éd": 5541, + "arc": 5542, + "hall": 5543, + "föl": 5544, + "ába": 5545, + "olg": 5546, + "kir": 5547, + "old": 5548, + "kérd": 5549, + "jár": 5550, + "úr": 5551, + "zs": 5552, + "élet": 5553, + "ját": 5554, + "ov": 5555, + "éz": 5556, + "vil": 5557, + "őr": 5558, + "ög": 5559, + "lesz": 5560, + "koz": 5561, + "ább": 5562, + "király": 5563, + "eng": 5564, + "igaz": 5565, + "haj": 5566, + "kod": 5567, + "ról": 5568, + "több": 5569, + "szó": 5570, + "ében": 5571, + "öt": 5572, + "nyi": 5573, + "szól": 5574, + "gondol": 5575, + "egész": 5576, + "így": 5577, + "ős": 5578, + "obb": 5579, + "osan": 5580, + "ből": 5581, + "abb": 5582, + "őt": 5583, + "nál": 5584, + "kép": 5585, + "aztán": 5586, + "tart": 5587, + "beszél": 5588, + "előtt": 5589, + "aszt": 5590, + "maj": 5591, + "kör": 5592, + "hang": 5593, + "íz": 5594, + "incs": 5595, + "év": 5596, + "ód": 5597, + "ók": 5598, + "hozz": 5599, + "okat": 5600, + "nagyon": 5601, + "ház": 5602, + "ped": 5603, + "ezte": 5604, + "etlen": 5605, + "neki": 5606, + "majd": 5607, + "szony": 5608, + "ának": 5609, + "felé": 5610, + "egyszer": 5611, + "adt": 5612, + "gyer": 5613, + "amikor": 5614, + "foly": 5615, + "szak": 5616, + "őd": 5617, + "hú": 5618, + "ász": 5619, + "amely": 5620, + "ére": 5621, + "ilyen": 5622, + "oda": 5623, + "ják": 5624, + "tár": 5625, + "ával": 5626, + "lak": 5627, + "gyan": 5628, + "ély": 5629, + "út": 5630, + "kezd": 5631, + "mell": 5632, + "mikor": 5633, + "hez": 5634, + "való": 5635, + "szeret": 5636, + "rend": 5637, + "vissza": 5638, + "fő": 5639, + "asszony": 5640, + "ről": 5641, + "pedig": 5642, + "szép": 5643, + "ták": 5644, + "öv": 5645, + "világ": 5646, + "maga": 5647, + "szik": 5648, + "éj": 5649, + "ént": 5650, + "jött": 5651, + "szí": 5652, + "gat": 5653, + "ettem": 5654, + "hány": 5655, + "ást": 5656, + "ahol": 5657, + "őket": 5658, + "hár": 5659, + "nő": 5660, + "csi": 5661, + "talál": 5662, + "elte": 5663, + "látt": 5664, + "tört": 5665, + "hagy": 5666, + "esz": 5667, + "nél": 5668, + "kut": 5669, + "lány": 5670, + "amit": 5671, + "ső": 5672, + "ellen": 5673, + "magát": 5674, + "ugyan": 5675, + "külön": 5676, + "asz": 5677, + "mindig": 5678, + "lép": 5679, + "talán": 5680, + "szor": 5681, + "illan": 5682, + "nincs": 5683, + "vagyok": 5684, + "telen": 5685, + "ismer": 5686, + "isten": 5687, + "ított": 5688, + "jobb": 5689, + "ves": 5690, + "dult": 5691, + "juk": 5692, + "szen": 5693, + "öm": 5694, + "lett": 5695, + "egyik": 5696, + "bár": 5697, + "szi": 5698, + "szív": 5699, + "azon": 5700, + "eszt": 5701, + "föld": 5702, + "kuty": 5703, + "pillan": 5704, + "fér": 5705, + "től": 5706, + "tű": 5707, + "ébe": 5708, + "tött": 5709, + "barát": 5710, + "íg": 5711, + "ahogy": 5712, + "eh": 5713, + "ep": 5714, + "jelent": 5715, + "tat": 5716, + "szeg": 5717, + "mintha": 5718, + "egyen": 5719, + "szab": 5720, + "bizony": 5721, + "jon": 5722, + "öreg": 5723, + "dolg": 5724, + "csap": 5725, + "tiszt": 5726, + "állt": 5727, + "ancs": 5728, + "idő": 5729, + "ügy": 5730, + "miért": 5731, + "ót": 5732, + "csin": 5733, + "ének": 5734, + "vér": 5735, + "jól": 5736, + "alatt": 5737, + "mely": 5738, + "semmi": 5739, + "nyug": 5740, + "vág": 5741, + "követ": 5742, + "össze": 5743, + "mad": 5744, + "acs": 5745, + "fiú": 5746, + "másik": 5747, + "jön": 5748, + "szám": 5749, + "rész": 5750, + "kér": 5751, + "ével": 5752, + "[hu]": 5753, + "%": 5754, + "0": 5755, + "6": 5756, + "7": 5757, + "8": 5758, + "9": 5759, + "A": 5760, + "B": 5761, + "C": 5762, + "D": 5763, + "E": 5764, + "F": 5765, + "G": 5766, + "H": 5767, + "I": 5768, + "J": 5769, + "K": 5770, + "L": 5771, + "M": 5772, + "N": 5773, + "O": 5774, + "P": 5775, + "Q": 5776, + "R": 5777, + "S": 5778, + "T": 5779, + "U": 5780, + "V": 5781, + "W": 5782, + "X": 5783, + "Y": 5784, + "Z": 5785, + "Ł": 5786, + "α": 5787, + "ς": 5788, + "♥": 5789, + "か": 5790, + "ズ": 5791, + "因": 5792, + "国": 5793, + "怎": 5794, + "抱": 5795, + "推": 5796, + "有": 5797, + "樣": 5798, + "為": 5799, + "群": 5800, + "麼": 5801, + "eo": 5802, + "eul": 5803, + "eun": 5804, + "eon": 5805, + "ae": 5806, + "yeon": 5807, + "yeo": 5808, + "ui": 5809, + "hae": 5810, + "geo": 5811, + "neun": 5812, + "ssda": 5813, + "seo": 5814, + "eong": 5815, + "kk": 5816, + "jeo": 5817, + "deul": 5818, + "eum": 5819, + "yeong": 5820, + "geos": 5821, + "hag": 5822, + "aneun": 5823, + "iss": 5824, + "dae": 5825, + "eob": 5826, + "eol": 5827, + "geu": 5828, + "jeong": 5829, + "sae": 5830, + "doe": 5831, + "geul": 5832, + "eulo": 5833, + "bn": 5834, + "sang": 5835, + "bnida": 5836, + "haneun": 5837, + "jeog": 5838, + "saeng": 5839, + "ineun": 5840, + "anh": 5841, + "salam": 5842, + "eom": 5843, + "nae": 5844, + "gwa": 5845, + "yeol": 5846, + "eseo": 5847, + "myeon": 5848, + "ttae": 5849, + "hw": 5850, + "eobs": 5851, + "jang": 5852, + "gw": 5853, + "ileul": 5854, + "yeog": 5855, + "jeon": 5856, + "sig": 5857, + "jag": 5858, + "hago": 5859, + "deun": 5860, + "seong": 5861, + "gag": 5862, + "ham": 5863, + "dang": 5864, + "leul": 5865, + "sil": 5866, + "dong": 5867, + "handa": 5868, + "eossda": 5869, + "aeg": 5870, + "seon": 5871, + "haessda": 5872, + "issda": 5873, + "ege": 5874, + "mul": 5875, + "jung": 5876, + "jig": 5877, + "issneun": 5878, + "geun": 5879, + "seubnida": 5880, + "won": 5881, + "daneun": 5882, + "eoh": 5883, + "deo": 5884, + "gam": 5885, + "jal": 5886, + "haeng": 5887, + "yang": 5888, + "bang": 5889, + "jae": 5890, + "saenggag": 5891, + "hage": 5892, + "sog": 5893, + "eoss": 5894, + "jasin": 5895, + "jil": 5896, + "eog": 5897, + "gyeong": 5898, + "gong": 5899, + "deon": 5900, + "haess": 5901, + "eung": 5902, + "joh": 5903, + "nal": 5904, + "myeong": 5905, + "eona": 5906, + "igo": 5907, + "gyeol": 5908, + "yag": 5909, + "gwan": 5910, + "uli": 5911, + "yong": 5912, + "lyeo": 5913, + "jog": 5914, + "eohge": 5915, + "bog": 5916, + "tong": 5917, + "manh": 5918, + "jeol": 5919, + "geol": 5920, + "aga": 5921, + "naneun": 5922, + "uneun": 5923, + "cheol": 5924, + "dol": 5925, + "bad": 5926, + "hamyeon": 5927, + "yeossda": 5928, + "ibnida": 5929, + "gye": 5930, + "eos": 5931, + "hwal": 5932, + "salamdeul": 5933, + "jiman": 5934, + "dangsin": 5935, + "jib": 5936, + "ttaemun": 5937, + "ib": 5938, + "eneun": 5939, + "eug": 5940, + "jeom": 5941, + "geuleon": 5942, + "hwa": 5943, + "assda": 5944, + "beob": 5945, + "bae": 5946, + "yeoss": 5947, + "chin": 5948, + "chaeg": 5949, + "geon": 5950, + "naega": 5951, + "iga": 5952, + "sigan": 5953, + "gil": 5954, + "hyeon": 5955, + "lyeog": 5956, + "gug": 5957, + "pyeon": 5958, + "wae": 5959, + "jul": 5960, + "seul": 5961, + "deung": 5962, + "hajiman": 5963, + "eumyeon": 5964, + "pil": 5965, + "nyeon": 5966, + "tae": 5967, + "pyo": 5968, + "jineun": 5969, + "beon": 5970, + "hada": 5971, + "seol": 5972, + "sip": 5973, + "daleun": 5974, + "salm": 5975, + "gyo": 5976, + "cheon": 5977, + "hagi": 5978, + "cheoleom": 5979, + "gal": 5980, + "ila": 5981, + "kkaji": 5982, + "anhneun": 5983, + "habnida": 5984, + "tteon": 5985, + "haeseo": 5986, + "doenda": 5987, + "ttal": 5988, + "ilo": 5989, + "seub": 5990, + "byeon": 5991, + "myeo": 5992, + "beol": 5993, + "jeung": 5994, + "chim": 5995, + "hwang": 5996, + "euneun": 5997, + "jong": 5998, + "boda": 5999, + "nol": 6000, + "neom": 6001, + "buteo": 6002, + "jigeum": 6003, + "eobsda": 6004, + "daelo": 6005, + "yul": 6006, + "pyeong": 6007, + "seoneun": 6008, + "salang": 6009, + "seut": 6010, + "heom": 6011, + "hyang": 6012, + "gwang": 6013, + "eobsneun": 6014, + "hwag": 6015, + "gess": 6016, + "jagi": 6017, + "ileon": 6018, + "wihae": 6019, + "daehan": 6020, + "gaji": 6021, + "meog": 6022, + "jyeo": 6023, + "chaj": 6024, + "byeong": 6025, + "eod": 6026, + "gyeo": 6027, + "eoji": 6028, + "gul": 6029, + "modeun": 6030, + "insaeng": 6031, + "geulae": 6032, + "sasil": 6033, + "sib": 6034, + "chal": 6035, + "ilago": 6036, + "geum": 6037, + "doeneun": 6038, + "bol": 6039, + "gajang": 6040, + "geuligo": 6041, + "hyeong": 6042, + "haengbog": 6043, + "chul": 6044, + "chae": 6045, + "mang": 6046, + "dam": 6047, + "choe": 6048, + "sijag": 6049, + "cheong": 6050, + "ilaneun": 6051, + "ulineun": 6052, + "aen": 6053, + "kke": 6054, + "munje": 6055, + "teu": 6056, + "geuneun": 6057, + "bge": 6058, + "cheo": 6059, + "baeg": 6060, + "jug": 6061, + "sangdae": 6062, + "geugeos": 6063, + "dog": 6064, + "eus": 6065, + "jab": 6066, + "hyeo": 6067, + "tteohge": 6068, + "chil": 6069, + "swi": 6070, + "jileul": 6071, + "chang": 6072, + "ganeun": 6073, + "iji": 6074, + "dago": 6075, + "yohan": 6076, + "teug": 6077, + "ppun": 6078, + "aleul": 6079, + "haengdong": 6080, + "sesang": 6081, + "edo": 6082, + "mandeul": 6083, + "amyeon": 6084, + "kkae": 6085, + "bag": 6086, + "ideul": 6087, + "pum": 6088, + "meol": 6089, + "neul": 6090, + "hamkke": 6091, + "chung": 6092, + "dab": 6093, + "yug": 6094, + "sag": 6095, + "gwangye": 6096, + "ileohge": 6097, + "balo": 6098, + "neunde": 6099, + "hamyeo": 6100, + "geuleoh": 6101, + "anila": 6102, + "bangbeob": 6103, + "dasi": 6104, + "byeol": 6105, + "gyeon": 6106, + "gamjeong": 6107, + "oneul": 6108, + "janeun": 6109, + "yeom": 6110, + "lago": 6111, + "igi": 6112, + "hwan": 6113, + "teul": 6114, + "eoseo": 6115, + "sik": 6116, + "jaga": 6117, + "geuleom": 6118, + "geuleona": 6119, + "jeongdo": 6120, + "gyeog": 6121, + "geuleohge": 6122, + "geudeul": 6123, + "eut": 6124, + "imyeon": 6125, + "jjae": 6126, + "keun": 6127, + "isang": 6128, + "malhaessda": 6129, + "euge": 6130, + "nop": 6131, + "ingan": 6132, + "bomyeon": 6133, + "taeg": 6134, + "dwi": 6135, + "saneun": 6136, + "wan": 6137, + "anhgo": 6138, + "nugu": 6139, + "sung": 6140, + "damyeon": 6141, + "adeul": 6142, + "peul": 6143, + "ttala": 6144, + "geosdo": 6145, + "aji": 6146, + "meon": 6147, + "eumyeo": 6148, + "dolog": 6149, + "neung": 6150, + "modu": 6151, + "[ko]": 6152, + "\u0014": 6153, + "\u0016": 6154, + "$": 6155, + "*": 6156, + "|": 6157, + "°": 6158, + "º": 6159, + "ँ": 6160, + "ं": 6161, + "ः": 6162, + "अ": 6163, + "आ": 6164, + "इ": 6165, + "ई": 6166, + "उ": 6167, + "ऊ": 6168, + "ऋ": 6169, + "ऎ": 6170, + "ए": 6171, + "ऐ": 6172, + "ऑ": 6173, + "ऒ": 6174, + "ओ": 6175, + "औ": 6176, + "क": 6177, + "ख": 6178, + "ग": 6179, + "घ": 6180, + "ङ": 6181, + "च": 6182, + "छ": 6183, + "ज": 6184, + "झ": 6185, + "ञ": 6186, + "ट": 6187, + "ठ": 6188, + "ड": 6189, + "ढ": 6190, + "ण": 6191, + "त": 6192, + "थ": 6193, + "द": 6194, + "ध": 6195, + "न": 6196, + "ऩ": 6197, + "प": 6198, + "फ": 6199, + "ब": 6200, + "भ": 6201, + "म": 6202, + "य": 6203, + "र": 6204, + "ऱ": 6205, + "ल": 6206, + "ळ": 6207, + "व": 6208, + "श": 6209, + "ष": 6210, + "स": 6211, + "ह": 6212, + "़": 6213, + "ा": 6214, + "ि": 6215, + "ी": 6216, + "ु": 6217, + "ू": 6218, + "ृ": 6219, + "ॄ": 6220, + "ॅ": 6221, + "ॆ": 6222, + "े": 6223, + "ै": 6224, + "ॉ": 6225, + "ॊ": 6226, + "ो": 6227, + "ौ": 6228, + "्": 6229, + "ॐ": 6230, + "ॖ": 6231, + "क़": 6232, + "ख़": 6233, + "ग़": 6234, + "ज़": 6235, + "ड़": 6236, + "ढ़": 6237, + "फ़": 6238, + "य़": 6239, + "ॠ": 6240, + "।": 6241, + "॥": 6242, + "०": 6243, + "१": 6244, + "२": 6245, + "३": 6246, + "४": 6247, + "५": 6248, + "६": 6249, + "७": 6250, + "८": 6251, + "९": 6252, + "॰": 6253, + "ॲ": 6254, + "​": 6255, + "‌": 6256, + "‍": 6257, + "‎": 6258, + "₹": 6259, + "के": 6260, + "है": 6261, + "ें": 6262, + "्र": 6263, + "ार": 6264, + "ने": 6265, + "या": 6266, + "में": 6267, + "से": 6268, + "की": 6269, + "का": 6270, + "ों": 6271, + "ता": 6272, + "कर": 6273, + "स्": 6274, + "कि": 6275, + "को": 6276, + "र्": 6277, + "ना": 6278, + "क्": 6279, + "ही": 6280, + "और": 6281, + "पर": 6282, + "ते": 6283, + "हो": 6284, + "प्र": 6285, + "ान": 6286, + "्य": 6287, + "ला": 6288, + "वा": 6289, + "ले": 6290, + "सा": 6291, + "हैं": 6292, + "लि": 6293, + "जा": 6294, + "हा": 6295, + "भी": 6296, + "वि": 6297, + "इस": 6298, + "ती": 6299, + "न्": 6300, + "रा": 6301, + "मा": 6302, + "दे": 6303, + "दि": 6304, + "बा": 6305, + "ति": 6306, + "था": 6307, + "नि": 6308, + "कार": 6309, + "एक": 6310, + "हीं": 6311, + "हु": 6312, + "ंग": 6313, + "ैं": 6314, + "नी": 6315, + "सी": 6316, + "अप": 6317, + "त्": 6318, + "नहीं": 6319, + "री": 6320, + "मे": 6321, + "मु": 6322, + "ित": 6323, + "तो": 6324, + "पा": 6325, + "ली": 6326, + "लिए": 6327, + "गा": 6328, + "ल्": 6329, + "रह": 6330, + "रे": 6331, + "क्ष": 6332, + "मैं": 6333, + "सम": 6334, + "उस": 6335, + "जि": 6336, + "त्र": 6337, + "मि": 6338, + "चा": 6339, + "ोग": 6340, + "सं": 6341, + "द्": 6342, + "सि": 6343, + "आप": 6344, + "तु": 6345, + "दा": 6346, + "कु": 6347, + "यों": 6348, + "वे": 6349, + "जी": 6350, + "्या": 6351, + "उन": 6352, + "िक": 6353, + "ये": 6354, + "भा": 6355, + "्ट": 6356, + "हम": 6357, + "स्ट": 6358, + "शा": 6359, + "ड़": 6360, + "ंद": 6361, + "खा": 6362, + "म्": 6363, + "श्": 6364, + "यह": 6365, + "सक": 6366, + "पू": 6367, + "किया": 6368, + "अपने": 6369, + "रू": 6370, + "सु": 6371, + "मी": 6372, + "हि": 6373, + "जो": 6374, + "थे": 6375, + "रि": 6376, + "दी": 6377, + "थी": 6378, + "गी": 6379, + "लोग": 6380, + "गया": 6381, + "तर": 6382, + "न्ह": 6383, + "च्": 6384, + "वार": 6385, + "बी": 6386, + "प्": 6387, + "दो": 6388, + "टी": 6389, + "शि": 6390, + "करने": 6391, + "गे": 6392, + "ैसे": 6393, + "इन": 6394, + "ंड": 6395, + "साथ": 6396, + "पु": 6397, + "बे": 6398, + "बार": 6399, + "वी": 6400, + "अन": 6401, + "हर": 6402, + "उन्ह": 6403, + "होता": 6404, + "जब": 6405, + "कुछ": 6406, + "मान": 6407, + "क्र": 6408, + "बि": 6409, + "पह": 6410, + "फि": 6411, + "सर": 6412, + "ारी": 6413, + "रो": 6414, + "दू": 6415, + "कहा": 6416, + "तक": 6417, + "शन": 6418, + "ब्": 6419, + "स्थ": 6420, + "वह": 6421, + "बाद": 6422, + "ओं": 6423, + "गु": 6424, + "ज्": 6425, + "्रे": 6426, + "गर": 6427, + "रहे": 6428, + "वर्": 6429, + "हू": 6430, + "ार्": 6431, + "पी": 6432, + "बहु": 6433, + "मुझ": 6434, + "्रा": 6435, + "दिया": 6436, + "सब": 6437, + "करते": 6438, + "अपनी": 6439, + "बहुत": 6440, + "कह": 6441, + "टे": 6442, + "हुए": 6443, + "किसी": 6444, + "रहा": 6445, + "ष्ट": 6446, + "ज़": 6447, + "बना": 6448, + "सो": 6449, + "डि": 6450, + "कोई": 6451, + "व्य": 6452, + "बात": 6453, + "रु": 6454, + "वो": 6455, + "मुझे": 6456, + "द्ध": 6457, + "चार": 6458, + "मेरे": 6459, + "वर": 6460, + "्री": 6461, + "जाता": 6462, + "नों": 6463, + "प्रा": 6464, + "देख": 6465, + "टा": 6466, + "क्या": 6467, + "अध": 6468, + "लग": 6469, + "लो": 6470, + "पि": 6471, + "यु": 6472, + "चे": 6473, + "जिस": 6474, + "ंत": 6475, + "ानी": 6476, + "पै": 6477, + "जन": 6478, + "ारे": 6479, + "ची": 6480, + "मिल": 6481, + "दु": 6482, + "देश": 6483, + "च्छ": 6484, + "ष्": 6485, + "सू": 6486, + "खे": 6487, + "चु": 6488, + "िया": 6489, + "लगा": 6490, + "बु": 6491, + "उनके": 6492, + "ज्ञ": 6493, + "क्षा": 6494, + "तरह": 6495, + "्यादा": 6496, + "वाले": 6497, + "पूर्": 6498, + "मैंने": 6499, + "काम": 6500, + "रूप": 6501, + "होती": 6502, + "उप": 6503, + "जान": 6504, + "प्रकार": 6505, + "भार": 6506, + "मन": 6507, + "हुआ": 6508, + "टर": 6509, + "हूँ": 6510, + "परि": 6511, + "पास": 6512, + "अनु": 6513, + "राज": 6514, + "लोगों": 6515, + "अब": 6516, + "समझ": 6517, + "डी": 6518, + "मौ": 6519, + "शु": 6520, + "चि": 6521, + "पे": 6522, + "कृ": 6523, + "सकते": 6524, + "मह": 6525, + "योग": 6526, + "दर्": 6527, + "उसे": 6528, + "ंध": 6529, + "डा": 6530, + "जाए": 6531, + "बो": 6532, + "ूल": 6533, + "मो": 6534, + "ोंने": 6535, + "ंस": 6536, + "तुम": 6537, + "पहले": 6538, + "बता": 6539, + "तथा": 6540, + "यो": 6541, + "गई": 6542, + "उत्": 6543, + "सकता": 6544, + "कम": 6545, + "ज्यादा": 6546, + "रख": 6547, + "समय": 6548, + "ारा": 6549, + "अगर": 6550, + "स्त": 6551, + "चल": 6552, + "फिर": 6553, + "वारा": 6554, + "करना": 6555, + "शी": 6556, + "गए": 6557, + "बन": 6558, + "ौर": 6559, + "होने": 6560, + "चाह": 6561, + "खु": 6562, + "हाँ": 6563, + "उन्हें": 6564, + "उन्होंने": 6565, + "छो": 6566, + "म्ह": 6567, + "प्रति": 6568, + "निक": 6569, + "वन": 6570, + "्यू": 6571, + "रही": 6572, + "तुम्ह": 6573, + "जैसे": 6574, + "ियों": 6575, + "क्यों": 6576, + "लों": 6577, + "फ़": 6578, + "ंत्र": 6579, + "होते": 6580, + "क्ति": 6581, + "त्य": 6582, + "कर्": 6583, + "कई": 6584, + "वं": 6585, + "किन": 6586, + "पो": 6587, + "कारण": 6588, + "ड़ी": 6589, + "भि": 6590, + "इसके": 6591, + "बर": 6592, + "उसके": 6593, + "द्वारा": 6594, + "शे": 6595, + "कॉ": 6596, + "दिन": 6597, + "न्न": 6598, + "ड़ा": 6599, + "स्व": 6600, + "निर्": 6601, + "मुख": 6602, + "लिया": 6603, + "टि": 6604, + "ज्ञान": 6605, + "क्त": 6606, + "द्र": 6607, + "ग्": 6608, + "क्स": 6609, + "मै": 6610, + "गो": 6611, + "जे": 6612, + "ट्र": 6613, + "मार": 6614, + "त्व": 6615, + "धार": 6616, + "भाव": 6617, + "करता": 6618, + "खि": 6619, + "कं": 6620, + "चाहि": 6621, + "यर": 6622, + "प्त": 6623, + "कों": 6624, + "ंच": 6625, + "जु": 6626, + "मत": 6627, + "अच्छ": 6628, + "हुई": 6629, + "कभी": 6630, + "लेकिन": 6631, + "भू": 6632, + "अपना": 6633, + "दूस": 6634, + "चाहिए": 6635, + "यू": 6636, + "घर": 6637, + "सबसे": 6638, + "मेरी": 6639, + "नाम": 6640, + "ढ़": 6641, + "ंट": 6642, + "ेंगे": 6643, + "बै": 6644, + "फा": 6645, + "एवं": 6646, + "यी": 6647, + "ग्र": 6648, + "क्षे": 6649, + "आज": 6650, + "आपको": 6651, + "भाग": 6652, + "ठा": 6653, + "कै": 6654, + "भारत": 6655, + "उनकी": 6656, + "पहु": 6657, + "सभी": 6658, + "धा": 6659, + "णा": 6660, + "सान": 6661, + "होगा": 6662, + "तब": 6663, + "संग": 6664, + "पर्": 6665, + "अव": 6666, + "तना": 6667, + "गि": 6668, + "यन": 6669, + "स्था": 6670, + "चित": 6671, + "ट्": 6672, + "छा": 6673, + "जाने": 6674, + "क्षेत्र": 6675, + "वाली": 6676, + "पूर्ण": 6677, + "समा": 6678, + "कारी": 6679, + "[hi]": 6680 + }, + "merges": [ + "t h", + "i n", + "th e", + "a n", + "e r", + "o u", + "r e", + "o n", + "a t", + "e d", + "e n", + "t o", + "in g", + "an d", + "i s", + "a s", + "a l", + "o r", + "o f", + "a r", + "i t", + "e s", + "h e", + "s t", + "l e", + "o m", + "s e", + "b e", + "a d", + "o w", + "l y", + "c h", + "w h", + "th at", + "y ou", + "l i", + "v e", + "a c", + "t i", + "l d", + "m e", + "w as", + "g h", + "i d", + "l l", + "w i", + "en t", + "f or", + "a y", + "r o", + "v er", + "i c", + "h er", + "k e", + "h is", + "n o", + "u t", + "u n", + "i r", + "l o", + "w e", + "r i", + "h a", + "wi th", + "gh t", + "ou t", + "i m", + "i on", + "al l", + "a b", + "on e", + "n e", + "g e", + "ou ld", + "t er", + "m o", + "h ad", + "c e", + "s he", + "g o", + "s h", + "u r", + "a m", + "s o", + "p e", + "m y", + "d e", + "a re", + "b ut", + "om e", + "f r", + "the r", + "f e", + "s u", + "d o", + "c on", + "t e", + "a in", + "er e", + "p o", + "i f", + "the y", + "u s", + "a g", + "t r", + "n ow", + "ou n", + "th is", + "ha ve", + "no t", + "s a", + "i l", + "u p", + "th ing", + "fr om", + "a p", + "h im", + "ac k", + "at ion", + "an t", + "ou r", + "o p", + "li ke", + "u st", + "es s", + "b o", + "o k", + "u l", + "in d", + "e x", + "c om", + "s ome", + "the re", + "er s", + "c o", + "re s", + "m an", + "ar d", + "p l", + "w or", + "w ay", + "ti on", + "f o", + "c a", + "w ere", + "b y", + "at e", + "p ro", + "t ed", + "oun d", + "ow n", + "w ould", + "t s", + "wh at", + "q u", + "al ly", + "i ght", + "c k", + "g r", + "wh en", + "v en", + "c an", + "ou gh", + "in e", + "en d", + "p er", + "ou s", + "o d", + "id e", + "k now", + "t y", + "ver y", + "s i", + "a k", + "wh o", + "ab out", + "i ll", + "the m", + "es t", + "re d", + "y e", + "c ould", + "on g", + "you r", + "the ir", + "e m", + "j ust", + "o ther", + "in to", + "an y", + "wh i", + "u m", + "t w", + "as t", + "d er", + "d id", + "i e", + "be en", + "ac e", + "in k", + "it y", + "b ack", + "t ing", + "b r", + "mo re", + "a ke", + "p p", + "the n", + "s p", + "e l", + "u se", + "b l", + "sa id", + "o ver", + "ge t", + "e n", + "e r", + "c h", + "e i", + "i e", + "u n", + "i ch", + "ei n", + "s t", + "a n", + "t e", + "g e", + "a u", + "i n", + "s ch", + "d er", + "un d", + "d ie", + "d a", + "e s", + "a l", + "d en", + "a r", + "g en", + "z u", + "d e", + "h r", + "o n", + "t en", + "e l", + "o r", + "m i", + "s ie", + "da s", + "a t", + "b e", + "ein e", + "ich t", + "b er", + "l e", + "a ch", + "v er", + "s e", + "au f", + "w i", + "s o", + "t er", + "l ich", + "c k", + "u r", + "n icht", + "m m", + "b en", + "a s", + "w ar", + "r e", + "mi t", + "s ich", + "i g", + "l l", + "au s", + "i st", + "w ie", + "o ch", + "un g", + "an n", + "ü r", + "h n", + "i hr", + "s a", + "s en", + "t z", + "de m", + "ei t", + "u m", + "h at", + "wi r", + "v on", + "h a", + "s p", + "w ei", + "i er", + "r o", + "h er", + "r a", + "ein en", + "n e", + "v or", + "al s", + "an d", + "al l", + "w as", + "w o", + "r ei", + "st e", + "l ie", + "au ch", + "d u", + "d es", + "k o", + "ü ber", + "a m", + "b ei", + "h en", + "h m", + "l ei", + "a ber", + "w en", + "h l", + "g er", + "i m", + "u t", + "n ach", + "h e", + "i s", + "b r", + "f t", + "en t", + "i mm", + "j e", + "sch en", + "w er", + "s er", + "a b", + "ä n", + "m e", + "s ein", + "i t", + "o l", + "ch t", + "f ür", + "k l", + "f f", + "eine m", + "n en", + "w e", + "j a", + "u s", + "n och", + "hat te", + "t r", + "p f", + "h in", + "d i", + "ch en", + "b l", + "m an", + "r ü", + "ie l", + "s el", + "das s", + "i hn", + "mi r", + "sch l", + "ö n", + "g an", + "g t", + "ein er", + "st en", + "m ich", + "wen n", + "el l", + "g te", + "in d", + "m al", + "ge l", + "k en", + "n ur", + "mm en", + "f ü", + "er n", + "ö r", + "un ter", + "f r", + "an der", + "g r", + "i l", + "d ur", + "u ch", + "f e", + "t a", + "m en", + "m ach", + "d och", + "t i", + "dur ch", + "o s", + "g l", + "h al", + "ihr e", + "w ä", + "imm er", + "i hm", + "k ann", + "or t", + "d ann", + "l an", + "tz t", + "o der", + "hr en", + "e t", + "k ön", + "i ck", + "f a", + "in g", + "i r", + "wie der", + "da ß", + "m ein", + "f en", + "gan z", + "die se", + "st er", + "da r", + "w a", + "ge s", + "n a", + "f l", + "i gen", + "sch e", + "un gen", + "me hr", + "ß en", + "o t", + "k on", + "ge w", + "ha ben", + "ge h", + "ä t", + "s ind", + "d r", + "w el", + "un s", + "v o", + "m a", + "u te", + "sch on", + "b es", + "ge sch", + "b t", + "ch e", + "s on", + "o b", + "l a", + "p p", + "rü ck", + "s eine", + "k r", + "f re", + "ei l", + "zu m", + "u l", + "h ier", + "k t", + "i ge", + "sp r", + "k e", + "le ben", + "b st", + "z eit", + "i on", + "g ro", + "den n", + "h o", + "sch a", + "b ar", + "al le", + "ge gen", + "w ür", + "m ü", + "z e", + "wer den", + "je tzt", + "ko mmen", + "n ie", + "s ei", + "h eit", + "so ll", + "g lei", + "m eine", + "wo ll", + "n er", + "ha be", + "w ur", + "lich en", + "p er", + "as sen", + "n te", + "se hen", + "wir d", + "b is", + "g ar", + "i en", + "m us", + "u ß", + "ä r", + "st ell", + "k eit", + "z wei", + "sel bst", + "st a", + "p a", + "sa gte", + "te t", + "k am", + "s sen", + "v iel", + "u g", + "z en", + "h ei", + "m ann", + "wi ll", + "ge b", + "war en", + "ü ck", + "ä ch", + "m er", + "r u", + "w or", + "h au", + "ei gen", + "an g", + "we g", + "bl ick", + "f ra", + "all es", + "k a", + "au gen", + "f in", + "lich e", + "t o", + "un ser", + "der n", + "her r", + "n un", + "v ie", + "ch te", + "wo hl", + "f all", + "h t", + "ü n", + "et was", + "st and", + "en d", + "ä u", + "e m", + "m ö", + "te l", + "r ie", + "d ich", + "die s", + "h and", + "b in", + "ff en", + "nicht s", + "d an", + "p l", + "hn e", + "ihn en", + "es en", + "die ser", + "fr au", + "an t", + "ar t", + "di r", + "i sch", + "er st", + "glei ch", + "ko mm", + "h ör", + "ß e", + "d ig", + "se hr", + "z ei", + "sa m", + "au m", + "h ät", + "in gen", + "g ut", + "b o", + "m ut", + "ck en", + "kon nte", + "st imm", + "p ro", + "zu r", + "i tz", + "wei l", + "wür de", + "f ä", + "kön nen", + "k eine", + "f er", + "i schen", + "vo ll", + "ein es", + "se tz", + "z ie", + "de l", + "te te", + "sein er", + "ier en", + "ge st", + "zu rück", + "wur de", + "sch n", + "p r", + "lie ß", + "t ra", + "m ä", + "gen d", + "f ol", + "i k", + "schl a", + "scha ft", + "at er", + "wei ß", + "s einen", + "l assen", + "l u", + "und en", + "t eil", + "ne u", + "ier t", + "men schen", + "hm en", + "st r", + "g i", + "sa h", + "ihr en", + "el n", + "wei ter", + "ge hen", + "ig er", + "mach t", + "ta g", + "al so", + "hal ten", + "n is", + "ach t", + "ge ben", + "f or", + "o g", + "n at", + "m ar", + "de t", + "o hne", + "h aus", + "t ro", + "an ge", + "l au", + "sp iel", + "t re", + "sch r", + "in n", + "s u", + "l os", + "mach en", + "hät te", + "be g", + "wir k", + "al t", + "g lich", + "te s", + "r icht", + "fre und", + "m o", + "ihr er", + "f el", + "b el", + "so l", + "ein mal", + "e ben", + "h ol", + "h än", + "q u", + "ter n", + "h ö", + "sch w", + "re cht", + "wa hr", + "s einem", + "ste hen", + "hl en", + "in s", + "g ing", + "woll te", + "wi ssen", + "ung s", + "al d", + "as s", + "ja hr", + "m or", + "wel t", + "un der", + "zu sa", + "at ion", + "ko pf", + "lan g", + "hin ter", + "at z", + "st ra", + "an gen", + "an k", + "a de", + "gl au", + "f ach", + "hat ten", + "l o", + "f ort", + "ei cht", + "i ff", + "l er", + "m ei", + "diese m", + "k ein", + "f rei", + "fü hr", + "vo m", + "e s", + "e n", + "a i", + "o u", + "o n", + "l e", + "d e", + "r e", + "q u", + "a n", + "e r", + "en t", + "e t", + "l a", + "n e", + "i l", + "a r", + "i s", + "ai t", + "t e", + "a u", + "i n", + "qu e", + "i t", + "u r", + "s e", + "l es", + "c h", + "c e", + "m e", + "o r", + "ou r", + "a s", + "p r", + "a v", + "o m", + "ai s", + "u n", + "an t", + "ou s", + "t r", + "t i", + "l u", + "o i", + "e u", + "l le", + "s i", + "p ar", + "d es", + "an s", + "m ent", + "é t", + "es t", + "j e", + "u ne", + "a l", + "p as", + "t re", + "qu i", + "d u", + "r i", + "c on", + "s on", + "c om", + "e lle", + "d é", + "p our", + "d ans", + "l i", + "s a", + "r é", + "t ou", + "v ous", + "d i", + "v i", + "a g", + "a m", + "a t", + "ou v", + "a p", + "ti on", + "m on", + "s ur", + "c i", + "o s", + "p lu", + "s u", + "en d", + "a b", + "è re", + "ai n", + "m ais", + "o is", + "r es", + "plu s", + "é e", + "ai ent", + "m p", + "ch e", + "lu i", + "av e", + "ét ait", + "m a", + "s es", + "tou t", + "i r", + "v o", + "a c", + "s er", + "an d", + "f f", + "oi r", + "g r", + "av ait", + "é s", + "m es", + "n ous", + "eu x", + "b i", + "t er", + "c o", + "on s", + "p u", + "c es", + "g e", + "t u", + "le ur", + "pr o", + "d on", + "e ur", + "et te", + "ai re", + "ave c", + "d it", + "t é", + "i e", + "u s", + "il le", + "p er", + "com me", + "c r", + "or t", + "m i", + "e x", + "u x", + "v er", + "m o", + "è s", + "v e", + "au x", + "r a", + "j our", + "il s", + "bi en", + "c ou", + "p e", + "que l", + "p eu", + "c ette", + "t es", + "p o", + "in s", + "c u", + "m ê", + "s o", + "f ait", + "g u", + "m ar", + "ê tre", + "l o", + "it é", + "f r", + "a tion", + "en s", + "b r", + "n i", + "l é", + "d is", + "b le", + "m an", + "n é", + "pu is", + "mê me", + "qu es", + "f i", + "e l", + "ag e", + "g ar", + "m oi", + "en ce", + "on t", + "m ain", + "or s", + "au t", + "an ce", + "v en", + "m é", + "s ans", + "e m", + "s é", + "l on", + "h om", + "r o", + "u t", + "c ar", + "ab le", + "i m", + "de r", + "ch er", + "n o", + "vi e", + "au s", + "b e", + "de ux", + "en f", + "o ù", + "t en", + "p h", + "u re", + "te mp", + "p os", + "r ent", + "p é", + "f aire", + "p i", + "tr es", + "ç a", + "an g", + "end re", + "f or", + "p a", + "b on", + "s ou", + "in t", + "pr é", + "s ent", + "t ant", + "n er", + "c er", + "l à", + "l ais", + "pr ès", + "b re", + "c our", + "p et", + "i on", + "i ne", + "com p", + "l ait", + "tr ouv", + "t a", + "ent re", + "son t", + "de v", + "n u", + "temp s", + "d ou", + "r ait", + "b ou", + "qu and", + "jour s", + "l an", + "er s", + "av oir", + "ét é", + "a le", + "p re", + "f ois", + "or te", + "v é", + "m er", + "n on", + "t ous", + "j us", + "cou p", + "t s", + "hom me", + "ê te", + "a d", + "aus si", + "ur s", + "se u", + "or d", + "o b", + "m in", + "g é", + "co re", + "v a", + "v re", + "en core", + "se m", + "i te", + "au tre", + "pr is", + "peu t", + "u e", + "an te", + "m al", + "g n", + "ré p", + "h u", + "si on", + "vo tre", + "di re", + "e z", + "f em", + "leur s", + "m et", + "f in", + "c ri", + "m is", + "t our", + "r ai", + "j am", + "re gar", + "ri en", + "ver s", + "su is", + "p ouv", + "o p", + "v is", + "gr and", + "ant s", + "c or", + "re r", + "ar d", + "c é", + "t ent", + "pr es", + "v ou", + "f a", + "al ors", + "si eur", + "ai ne", + "le r", + "qu oi", + "f on", + "end ant", + "ar ri", + "eu re", + "a près", + "don c", + "it u", + "l è", + "s ait", + "t oi", + "ch a", + "ai l", + "as se", + "i mp", + "vo y", + "con n", + "p la", + "pet it", + "av ant", + "n om", + "t in", + "don t", + "d a", + "s ous", + "e mp", + "per son", + "el les", + "be au", + "par ti", + "ch o", + "pr it", + "tou jours", + "m en", + "r ais", + "jam ais", + "tr av", + "tion s", + "tr ès", + "v oi", + "r en", + "y eux", + "f er", + "v oir", + "pre mi", + "c a", + "g ne", + "h eure", + "r ou", + "e ff", + "no tre", + "ment s", + "t on", + "f ais", + "ce la", + "i er", + "rép on", + "con s", + "ai r", + "ô t", + "p endant", + "i ci", + "tou te", + "j et", + "p ort", + "ét aient", + "p en", + "h é", + "au tres", + "p ère", + "o c", + "quel ques", + "i que", + "l is", + "fem me", + "j ou", + "te ur", + "mon de", + "u se", + "n es", + "d re", + "a ff", + "r ap", + "par t", + "le ment", + "c la", + "f ut", + "quel que", + "pr endre", + "r ê", + "ai lle", + "s ais", + "ch es", + "le t", + "ch ar", + "è res", + "ent s", + "b er", + "g er", + "mo ins", + "e au", + "a î", + "j eu", + "h eur", + "é es", + "tr i", + "po int", + "m om", + "v ent", + "n ouv", + "gr an", + "tr ois", + "s ant", + "tout es", + "con tre", + "è rent", + "che z", + "ave z", + "û t", + "a lle", + "at t", + "p au", + "p orte", + "ouv er", + "b ar", + "l it", + "f ort", + "o t", + "as s", + "pr és", + "cho se", + "v it", + "mon sieur", + "h ab", + "t ête", + "j u", + "te ment", + "c tion", + "v rai", + "la r", + "c et", + "regar d", + "l ant", + "de m", + "s om", + "mom ent", + "il les", + "p le", + "p s", + "b es", + "m ère", + "c l", + "s our", + "y s", + "tr op", + "en ne", + "jus qu", + "av aient", + "av ais", + "jeu ne", + "de puis", + "person ne", + "f it", + "cer t", + "j o", + "g es", + "ou i", + "r est", + "sem b", + "c ap", + "m at", + "m u", + "lon g", + "fr an", + "f aut", + "it i", + "b li", + "che v", + "pr i", + "ent e", + "ain si", + "ch am", + "l ors", + "c as", + "d o", + "il i", + "b é", + "n os", + "an ge", + "su i", + "r it", + "cr o", + "gu e", + "d e", + "e n", + "e s", + "o s", + "l a", + "e r", + "q u", + "a r", + "a n", + "o n", + "qu e", + "a s", + "o r", + "e l", + "d o", + "a l", + "c i", + "u n", + "r e", + "a b", + "i n", + "t e", + "t o", + "s e", + "d i", + "t r", + "d a", + "c on", + "t a", + "s u", + "m i", + "c o", + "t i", + "l e", + "l os", + "n o", + "l o", + "í a", + "c u", + "c a", + "s i", + "v i", + "m e", + "p or", + "m o", + "p ar", + "r a", + "r i", + "la s", + "c h", + "r o", + "m a", + "p er", + "ó n", + "m en", + "de s", + "un a", + "m p", + "s o", + "ab a", + "p u", + "d os", + "t u", + "g u", + "er a", + "de l", + "h a", + "m u", + "l i", + "en t", + "m b", + "h ab", + "es t", + "g o", + "p a", + "r es", + "par a", + "p o", + "á s", + "m os", + "tr a", + "t en", + "an do", + "p i", + "qu i", + "b i", + "m an", + "co mo", + "v e", + "m ás", + "j o", + "ci ón", + "i s", + "t an", + "v o", + "da d", + "c e", + "a do", + "v er", + "f u", + "ci a", + "c er", + "p e", + "c as", + "c ar", + "men te", + "n i", + "su s", + "t ar", + "n a", + "f i", + "t er", + "z a", + "p ro", + "tr o", + "s a", + "l u", + "b a", + "per o", + "s er", + "c es", + "d as", + "d u", + "s in", + "e mp", + "m ar", + "l la", + "e x", + "á n", + "c or", + "i a", + "v a", + "r an", + "ch o", + "g a", + "y o", + "t os", + "c os", + "mi s", + "l es", + "t es", + "v en", + "h o", + "y a", + "en te", + "on es", + "hab ía", + "n u", + "u s", + "p as", + "h i", + "n os", + "es ta", + "la n", + "m as", + "t or", + "l le", + "h e", + "s on", + "b re", + "p re", + "ab an", + "d or", + "í an", + "i r", + "t as", + "é n", + "r u", + "en do", + "a que", + "er o", + "i o", + "qu é", + "m in", + "c ab", + "j a", + "de r", + "t al", + "é s", + "se ñ", + "or a", + "to do", + "la r", + "d on", + "g ar", + "s al", + "p r", + "cu ando", + "j e", + "h u", + "g un", + "b u", + "g i", + "d ar", + "n e", + "r as", + "de n", + "es to", + "par e", + "p en", + "é l", + "tr as", + "c an", + "b o", + "j os", + "mi en", + "pu e", + "c re", + "co mp", + "p on", + "d ía", + "tr os", + "s ab", + "so bre", + "es e", + "mb re", + "er on", + "a ñ", + "m or", + "f or", + "i do", + "por que", + "el la", + "p ri", + "g ran", + "f a", + "c en", + "di s", + "c ri", + "mu y", + "ch a", + "c al", + "es te", + "h as", + "c ó", + "g ra", + "r os", + "p os", + "o b", + "al l", + "aque l", + "j u", + "p res", + "m er", + "di jo", + "c ía", + "ent re", + "z o", + "ci ones", + "bi en", + "mb i", + "el o", + "t ó", + "in a", + "to dos", + "g en", + "ti en", + "est aba", + "de ci", + "ci o", + "h er", + "ñ o", + "l or", + "nu es", + "me di", + "l en", + "vi da", + "f e", + "al i", + "m on", + "c la", + "d re", + "pu es", + "al es", + "vo l", + "m í", + "r ar", + "b le", + "ci on", + "has ta", + "señ or", + "con o", + "a h", + "di os", + "s en", + "es a", + "ú n", + "v ar", + "s an", + "gu i", + "a c", + "o tros", + "ta do", + "bu en", + "ñ a", + "ti emp", + "ha cer", + "j er", + "f er", + "v u", + "f in", + "an a", + "as í", + "an tes", + "t in", + "ve z", + "mien to", + "j ar", + "la b", + "ch e", + "cas a", + "d r", + "es o", + "e go", + "di ó", + "an te", + "est á", + "m al", + "en cia", + "el i", + "í as", + "tiemp o", + "z ar", + "v an", + "m un", + "er ta", + "ta mbi", + "s í", + "b ar", + "a un", + "al e", + "mis mo", + "ent es", + "vi s", + "man o", + "el e", + "na da", + "se gu", + "me j", + "er ra", + "ab le", + "b e", + "ti r", + "un o", + "don de", + "to da", + "des de", + "r en", + "tambi én", + "cu er", + "per son", + "ho mbre", + "o tro", + "li b", + "tr ar", + "cu al", + "ha y", + "a u", + "ca da", + "t aba", + "i mp", + "men to", + "ten ía", + "qu er", + "er an", + "si emp", + "siemp re", + "er to", + "qu í", + "g os", + "pu és", + "el los", + "des pués", + "nu e", + "g an", + "l lo", + "in ter", + "có mo", + "tr i", + "ah ora", + "us te", + "tr aba", + "la do", + "in o", + "po co", + "er te", + "mu jer", + "i m", + "qui er", + "al gun", + "fu e", + "o jos", + "ent on", + "v os", + "es per", + "mu ch", + "o tra", + "a z", + "a d", + "in g", + "e za", + "a quí", + "ci as", + "gu a", + "mu cho", + "deci r", + "es ti", + "i dad", + "al go", + "e z", + "o cu", + "enton ces", + "di do", + "ent os", + "g ri", + "da do", + "i os", + "so l", + "dos e", + "uste d", + "qui en", + "a mi", + "un to", + "f r", + "mi r", + "mej or", + "b as", + "so lo", + "pre gun", + "tu r", + "al g", + "p la", + "to das", + "par te", + "e mb", + "c to", + "mun do", + "tien e", + "tan te", + "pa lab", + "tr an", + "aque lla", + "ci os", + "aun que", + "a y", + "cu en", + "ten er", + "f un", + "res pon", + "all í", + "x i", + "h an", + "pen s", + "con tra", + "tu ra", + "v al", + "di o", + "tr es", + "t re", + "tan to", + "ca min", + "m ó", + "es p", + "a da", + "í o", + "in s", + "ha cia", + "de j", + "est ar", + "i ón", + "g as", + "b er", + "v as", + "no che", + "é r", + "añ os", + "pa dre", + "gu s", + "á r", + "sin o", + "man os", + "ci do", + "es tu", + "a de", + "hu bi", + "vi r", + "b ri", + "ra z", + "ch i", + "pue de", + "men os", + "hab i", + "ho mb", + "ne ces", + "ma y", + "er os", + "r ía", + "he cho", + "es cu", + "l ti", + "án do", + "b us", + "cos as", + "t ú", + "es pa", + "re ci", + "c tor", + "pri m", + "di a", + "de se", + "mien tras", + "h or", + "fu er", + "i da", + "pos i", + "lan te", + "t on", + "an o", + "est as", + "p li", + "ch ar", + "lu ego", + "si ón", + "ci n", + "ti erra", + "m es", + "gu ar", + "ca do", + "en con", + "pr en", + "may or", + "f al", + "e r", + "o n", + "a n", + "t o", + "d i", + "r e", + "l a", + "i n", + "e n", + "a l", + "t a", + "c h", + "e l", + "r i", + "c o", + "t i", + "t e", + "s i", + "r a", + "u n", + "l e", + "l i", + "ch e", + "r o", + "c i", + "c a", + "s e", + "q u", + "m a", + "p o", + "s o", + "i l", + "d o", + "e s", + "v a", + "p er", + "l o", + "c on", + "d el", + "p a", + "m o", + "s a", + "p i", + "d a", + "m i", + "g i", + "s u", + "d e", + "v i", + "z i", + "m e", + "g li", + "n o", + "m en", + "v o", + "t u", + "n on", + "v e", + "t to", + "s t", + "on e", + "an o", + "ch i", + "er a", + "er e", + "f a", + "c e", + "z a", + "un a", + "b i", + "p re", + "s ta", + "o r", + "a r", + "f i", + "on o", + "t ra", + "n a", + "n el", + "n e", + "p ro", + "t ro", + "al e", + "v er", + "n i", + "c u", + "t ti", + "men te", + "del la", + "t er", + "zi one", + "g u", + "p e", + "t ta", + "an do", + "t à", + "al i", + "u o", + "qu el", + "co m", + "s en", + "co me", + "b a", + "al la", + "p ri", + "d u", + "qu es", + "l u", + "on i", + "g gi", + "pa r", + "s si", + "v en", + "in a", + "g a", + "pi ù", + "ci a", + "i m", + "co r", + "m an", + "in o", + "in i", + "t en", + "r an", + "b b", + "g o", + "s to", + "t re", + "a ve", + "a v", + "s ono", + "er i", + "a c", + "s se", + "er o", + "h a", + "s c", + "su l", + "f or", + "v ano", + "po r", + "s ti", + "su o", + "c chi", + "t an", + "z za", + "an che", + "p u", + "i o", + "t te", + "vo l", + "es s", + "s ci", + "co l", + "r u", + "p en", + "f u", + "al l", + "s so", + "s te", + "se m", + "s sa", + "d en", + "a d", + "t ri", + "de i", + "in e", + "ave va", + "men to", + "z z", + "a mo", + "g no", + "f o", + "un o", + "su a", + "g en", + "ri a", + "g e", + "st ra", + "s ì", + "c er", + "ch é", + "b u", + "a p", + "c en", + "d al", + "on a", + "s pe", + "g ni", + "b o", + "t t", + "del le", + "ques to", + "nel la", + "f f", + "d ere", + "an no", + "del l", + "un i", + "bb e", + "an ti", + "g ra", + "s p", + "en e", + "gi o", + "u to", + "qu al", + "gli a", + "qu ando", + "tu tto", + "c an", + "gli o", + "zi oni", + "ca m", + "h o", + "es so", + "s s", + "mo l", + "a t", + "lo ro", + "per ché", + "co sa", + "du e", + "po i", + "ca r", + "s co", + "ci o", + "to r", + "c co", + "c re", + "a m", + "g na", + "te m", + "pri ma", + "lu i", + "co sì", + "qu e", + "gu ar", + "ess ere", + "an i", + "con o", + "b ra", + "al le", + "m on", + "ri o", + "an co", + "cu i", + "s pi", + "vi a", + "g ran", + "gi or", + "a i", + "bi le", + "u l", + "ggi o", + "f e", + "an te", + "ma i", + "ta re", + "in ter", + "in di", + "re bbe", + "sen za", + "so lo", + "zi o", + "e d", + "en te", + "tu tti", + "sta to", + "zi a", + "d alla", + "tu ra", + "mi a", + "vi ta", + "quel la", + "qu a", + "ma r", + "do ve", + "g h", + "al lo", + "sem pre", + "zz o", + "si a", + "mo r", + "do po", + "por ta", + "d re", + "c cia", + "er ano", + "an ni", + "di o", + "chi a", + "en za", + "pro pri", + "qu i", + "m u", + "m b", + "an da", + "c ca", + "o cchi", + "ques ta", + "f fi", + "le i", + "par te", + "d on", + "r on", + "mi o", + "tan to", + "ri s", + "o gni", + "di s", + "r in", + "fa r", + "men ti", + "t el", + "anco ra", + "f ra", + "fa tto", + "man i", + "sen ti", + "p ra", + "tem po", + "es si", + "b bi", + "f in", + "a re", + "la re", + "per s", + "f on", + "b el", + "so r", + "d er", + "pre n", + "an za", + "di re", + "pi e", + "o ra", + "ver so", + "se gu", + "al tro", + "ta to", + "ca to", + "a to", + "vol ta", + "c c", + "fa re", + "pa re", + "ci ò", + "li b", + "bi li", + "n uo", + "s er", + "quel lo", + "co lo", + "p po", + "ca sa", + "tro va", + "o re", + "f er", + "r ono", + "d es", + "mol to", + "al mente", + "s ca", + "vo le", + "t ali", + "sul la", + "s ce", + "men o", + "an to", + "p un", + "s tu", + "ca pi", + "so l", + "gi u", + "m ini", + "m ano", + "z e", + "pi a", + "par ti", + "s al", + "la vo", + "ver o", + "r si", + "al tri", + "es ti", + "s cia", + "suo i", + "gli e", + "so tto", + "b ene", + "sc ri", + "t ale", + "de gli", + "n u", + "al c", + "uo mo", + "p el", + "f re", + "po te", + "es sa", + "s cu", + "si gno", + "el e", + "st ro", + "u ti", + "di a", + "si one", + "g re", + "f ini", + "ar ri", + "l un", + "c ri", + "e si", + "pa ssa", + "r à", + "men tre", + "an d", + "h anno", + "el o", + "u sci", + "gi a", + "gi à", + "di e", + "m ina", + "b e", + "ti ca", + "gior no", + "t in", + "es se", + "mo do", + "c al", + "s pa", + "propri o", + "l en", + "o ri", + "con tro", + "st ru", + "di ven", + "di sse", + "ra to", + "no i", + "v ere", + "pu ò", + "di ce", + "s an", + "es a", + "c ci", + "se con", + "re n", + "c cio", + "qual che", + "tu tta", + "g g", + "mon do", + "for ma", + "p li", + "m ma", + "pen sa", + "de va", + "tu r", + "fo sse", + "so pra", + "ta mente", + "n ess", + "qu anto", + "ra ga", + "un que", + "ca re", + "st re", + "gran de", + "pi cco", + "guar da", + "b en", + "nel l", + "a ff", + "po ssi", + "pre sen", + "r ò", + "pa ro", + "tu a", + "v in", + "an e", + "a s", + "ste sso", + "da v", + "ne i", + "nel le", + "gh i", + "pi o", + "ta r", + "an a", + "la to", + "si d", + "f ine", + "f uo", + "m er", + "z o", + "qua si", + "ul ti", + "i to", + "su e", + "si e", + "f il", + "allo ra", + "m in", + "ven i", + "t ano", + "el lo", + "d e", + "r a", + "e s", + "d o", + "e n", + "q u", + "c o", + "a s", + "o s", + "e r", + "a r", + "s e", + "qu e", + "a n", + "i n", + "i s", + "t o", + "ã o", + "t e", + "d a", + "m a", + "e l", + "t a", + "o r", + "i a", + "r e", + "e m", + "a l", + "co m", + "p a", + "o u", + "c a", + "u m", + "r o", + "v a", + "t i", + "s o", + "m en", + "n ão", + "h a", + "co n", + "m e", + "r i", + "pa ra", + "p o", + "d i", + "s a", + "v o", + "u ma", + "c i", + "n a", + "p or", + "n o", + "g u", + "s u", + "h o", + "an do", + "t ra", + "e i", + "v i", + "e u", + "i m", + "do s", + "el e", + "r es", + "m o", + "en t", + "f i", + "l a", + "e ra", + "l e", + "de s", + "el a", + "men te", + "l h", + "p er", + "l i", + "ç ão", + "m as", + "t er", + "m u", + "es t", + "v e", + "g o", + "l o", + "u s", + "ma is", + "v er", + "c ê", + "in ha", + "vo cê", + "f a", + "t u", + "c u", + "p ar", + "com o", + "p ro", + "s i", + "m os", + "e c", + "p re", + "d as", + "ç a", + "es ta", + "s er", + "u n", + "da de", + "d is", + "f o", + "e x", + "c h", + "i r", + "ra n", + "t ar", + "en te", + "g a", + "t r", + "p e", + "t os", + "b o", + "c ia", + "p en", + "c ar", + "s en", + "su a", + "se m", + "c as", + "f or", + "to u", + "n os", + "te m", + "r ia", + "m es", + "se u", + "co r", + "o n", + "a o", + "p os", + "ra m", + "v el", + "é m", + "t en", + "po de", + "t es", + "esta va", + "c e", + "b a", + "qu ando", + "m i", + "qu er", + "men to", + "se gu", + "t as", + "is so", + "mu i", + "g ar", + "t ro", + "d u", + "fa z", + "õ es", + "p es", + "an to", + "l u", + "p i", + "i x", + "ve z", + "s im", + "j a", + "p r", + "m in", + "b e", + "ra s", + "m an", + "p res", + "est á", + "c er", + "b re", + "p as", + "d ia", + "m b", + "dis se", + "n i", + "r os", + "es se", + "v ia", + "o lh", + "is a", + "an te", + "ê n", + "z a", + "qu i", + "b i", + "t inha", + "me u", + "s ão", + "m inha", + "a c", + "ri o", + "m ar", + "a t", + "p el", + "mui to", + "ta l", + "to r", + "fo i", + "h or", + "j o", + "b em", + "g i", + "f al", + "vo l", + "po n", + "di z", + "l ar", + "gu n", + "m or", + "r u", + "par ec", + "ç o", + "do r", + "pes so", + "n e", + "f er", + "b er", + "p u", + "po is", + "in a", + "es p", + "d ar", + "en do", + "de n", + "so bre", + "co s", + "p ri", + "al i", + "mes mo", + "ç ões", + "g ra", + "se us", + "me i", + "b ra", + "vi da", + "an tes", + "b ri", + "at é", + "ên cia", + "lh e", + "ti v", + "m ã", + "al g", + "qu anto", + "s ó", + "g os", + "de r", + "t ão", + "tu do", + "ent ão", + "r ou", + "es s", + "in da", + "b al", + "in do", + "ci o", + "n do", + "j á", + "va m", + "re i", + "l es", + "ei to", + "v is", + "tem po", + "de pois", + "c ha", + "m el", + "ch e", + "l ha", + "a inda", + "faz er", + "con tra", + "p ou", + "per gun", + "de ix", + "ta mb", + "ra r", + "al a", + "v en", + "t in", + "pel o", + "tamb ém", + "fi ca", + "pre c", + "el es", + "tra n", + "ha via", + "l á", + "to dos", + "j u", + "qu al", + "c an", + "ta do", + "cas a", + "es sa", + "n as", + "g em", + "m em", + "se i", + "na da", + "sen ti", + "c ri", + "ó s", + "de u", + "ei ro", + ". .", + "f un", + "as sim", + "s ou", + "ent re", + "com e", + "i or", + "h ar", + "f e", + "por que", + "s or", + "f in", + "ta mente", + "a qui", + "cu l", + "t ó", + "for ma", + "s ar", + "ou tra", + "olh os", + "i ma", + "m im", + "a go", + "in s", + "co u", + "g ran", + "v al", + "pesso as", + "era m", + "ei ra", + "a que", + "com p", + "de i", + "p ela", + "co isa", + "m ão", + "con h", + "ca da", + "ago ra", + "ia m", + "h á", + "con s", + "su as", + "gu ém", + "o b", + "l an", + "es ti", + "á s", + "la do", + "in ter", + "ca be", + "por ta", + "n em", + "í vel", + "r is", + "j e", + "n un", + "sem pre", + "con segu", + "h as", + "tra bal", + "f u", + "le v", + "l em", + "l as", + "va i", + "tr os", + "t ante", + "te i", + "pr ó", + "que m", + "tu ra", + "on de", + "cabe ça", + "nun ca", + "men tos", + "h um", + "de le", + "ver dade", + "t á", + "h os", + "el i", + "ent es", + "m er", + "alg um", + "diz er", + "s in", + "pen as", + "n ós", + "en quanto", + "ou tro", + "l ho", + "es te", + "mel hor", + "est ar", + "g an", + "b ar", + "pri mei", + "a u", + "i u", + "pen sa", + "a penas", + "p ra", + "es tou", + "con te", + "res pon", + "ho mem", + "do is", + "a do", + "c al", + "a b", + "l os", + "ç as", + "pou co", + "sen hor", + "t ando", + "esp era", + "pa i", + "ri os", + "no i", + "i da", + "ba ix", + "as e", + "is as", + "f r", + "ho ra", + "mu ndo", + "pas sa", + "fi car", + "to do", + "se ja", + "al mente", + "â n", + "c lar", + "a d", + "in c", + "f os", + "lo n", + "g ri", + "ou vi", + "v em", + "g e", + "ta va", + "á rio", + "mo n", + "s os", + "in ho", + "ma l", + "t an", + "t re", + "gran de", + "ran do", + "b u", + "v ou", + "ê s", + "co isas", + "a conte", + "lh er", + "g en", + "ci on", + "an os", + "i do", + "tal vez", + "est ão", + "li v", + "sa b", + "su r", + "ou tros", + "c re", + "qual quer", + "g ou", + "t ri", + "l í", + "tiv esse", + "ra do", + "prec isa", + "mã e", + "su s", + "t anto", + "de la", + "men os", + "s al", + "en tra", + "p é", + "ma ior", + "noi te", + "ti va", + "p ala", + "so n", + "ra ção", + "de us", + "s as", + "un i", + "l or", + "u l", + "in te", + "f ei", + "an o", + "par ti", + "pala v", + "tr ás", + "par te", + "b el", + "ci dade", + "lu gar", + "v os", + "vez es", + "do u", + "en contra", + "tr u", + "e ci", + "a r", + "e r", + "a n", + "e n", + "i n", + "i r", + "o r", + "d e", + "a k", + "ı n", + "a l", + "d i", + "d a", + "b u", + "b ir", + "y or", + "i l", + "e k", + "y a", + "m a", + "l a", + "e l", + "u n", + "k a", + "l ar", + "i m", + "d ı", + "e t", + "o n", + "d u", + "o l", + "e y", + "t ı", + "m i", + "h a", + "b a", + "l er", + "ü n", + "m ı", + "i z", + "l e", + "ı r", + "m e", + "i s", + "n e", + "o k", + "t a", + "s a", + "u m", + "r a", + "g ö", + "i k", + "s ı", + "d en", + "e s", + "b il", + "t i", + "l ı", + "ü z", + "i ç", + "ü r", + "g i", + "u r", + "t e", + "b en", + "d an", + "i y", + "ı m", + "u z", + "v e", + "c ak", + "a y", + "c e", + "i ş", + "ın ı", + "i yor", + "ba ş", + "d ü", + "a t", + "a m", + "g el", + "de ğ", + "k ar", + "i ̇", + "m u", + "e v", + "ö y", + "bu n", + "v ar", + "ya p", + "s en", + "an a", + "s un", + "in i", + "gö r", + "y ı", + "k i", + "l i", + "ar a", + "al ı", + "on u", + "ç ı", + "ş ey", + "s ın", + "k ı", + "ka d", + "s e", + "t an", + "a ğ", + "değ il", + "s in", + "ü k", + "a z", + "ç ok", + "s on", + "ş ı", + "b i", + "ü l", + "t u", + "v er", + "iç in", + "g e", + "k en", + "ey e", + "ol du", + "mı ş", + "y e", + "k al", + "m ek", + "l an", + "öy le", + "yor du", + "er i", + "y üz", + "mi ş", + "b e", + "m ak", + "o la", + "in e", + "y an", + "h er", + "c ek", + "yor um", + "b ak", + "ü m", + "ö n", + "lar ı", + "o ğ", + "d er", + "kad ar", + "h al", + "ar ı", + "s t", + "s an", + "ın da", + "du r", + "g ün", + "v a", + "y ok", + "y er", + "dı m", + "k o", + "da ha", + "l u", + "ın a", + "di m", + "e m", + "bil ir", + "ik i", + "s iz", + "s i", + "n a", + "di ğ", + "s u", + "b ü", + "ha y", + "s or", + "dü ş", + "ü ç", + "un u", + "ö r", + "d ir", + "m ü", + "c a", + "am an", + "f ak", + "a da", + "e de", + "son ra", + "h iç", + "ak i", + "ğ ı", + "bu l", + "r u", + "ma z", + "an la", + "bu ra", + "ge ç", + "ma ya", + "l en", + "k onu", + "c i", + "c u", + "d in", + "t ek", + "z aman", + "el er", + "ö z", + "dı r", + "gi bi", + "o t", + "ş a", + "g er", + "ler i", + "k im", + "k u", + "fak at", + "y ar", + "gö z", + "c ı", + "yor sun", + "b ek", + "in de", + "r o", + "p ek", + "bun u", + "l ik", + "m an", + "il er", + "e di", + "ö l", + "s ür", + "b in", + "s ır", + "çı k", + "sı l", + "al ar", + "k es", + "y ak", + "ç ek", + "yı l", + "e cek", + "ı z", + "gi t", + "ka p", + "a ma", + "ı l", + "lar ın", + "b iz", + "tı r", + "o y", + "an cak", + "d oğ", + "ç a", + "b ana", + "ş im", + "baş la", + "l ü", + "ma dı", + "ben i", + "t ir", + "y ük", + "lı k", + "be ş", + "b el", + "b er", + "m er", + "na sıl", + "tı k", + "k e", + "t ür", + "a v", + ". .", + "d aki", + "p ar", + "t er", + "ce ğ", + "t en", + "z ı", + "iy i", + "d ok", + "ben im", + "c ağ", + "n er", + "y en", + "ş u", + "me z", + "düş ün", + "ken di", + "şim di", + "y ol", + "y u", + "de v", + "is te", + "s ek", + "ma m", + "s öyle", + "di k", + "t o", + "k ur", + "oldu ğ", + "s ını", + "t ar", + "bil iyor", + "k an", + "y al", + "m eye", + "mu ş", + "f a", + "ka ç", + "bil e", + "iy e", + "t ü", + "e f", + "tı m", + "ev et", + "ç o", + "y et", + "g en", + "bura da", + "t im", + "bir az", + "es i", + "k or", + "doğ ru", + "in in", + "kı z", + "di ye", + "d ör", + "et ti", + "on un", + "is ti", + "ğ i", + "h e", + "s ana", + "ü ş", + "ar ka", + "hay ır", + "kar şı", + "h ar", + "il e", + "h ak", + "ı yor", + "ne den", + "s ev", + "sı z", + "ço cu", + "me m", + "ç alı", + "ol ur", + "b ır", + "g ir", + "is e", + "i h", + "c an", + "k ır", + "d ön", + "b öyle", + "sen i", + "! \"", + "al t", + "dör t", + "s öy", + "o ş", + "mu sun", + "la ş", + "h an", + "i p", + "ka y", + "h em", + "bü yük", + "a ç", + "bır ak", + "mi sin", + "s öz", + "u l", + "değ iş", + "ün ü", + "g ül", + "k ö", + "kar ı", + "ta mam", + "ol u", + "r ar", + "yen i", + "la m", + "mış tı", + "ya ş", + "al a", + "in iz", + "kad ın", + "bun un", + "m ey", + "al tı", + "y i", + "s o", + "in den", + "sen in", + "ya t", + "to p", + "s er", + "is i", + "d ün", + "s es", + "hiç bir", + "y on", + "d ın", + "t ün", + "baş ka", + "a s", + "he p", + "i t", + "ir mi", + "dev am", + "ola cak", + "ar tık", + "r e", + "dur um", + "im iz", + "üz el", + "ler ini", + "sa ğ", + "p ro", + "ger ek", + "y irmi", + "ş ek", + "ba ğ", + "me di", + "lar a", + "a h", + "t ur", + "y ür", + "ma sı", + "ka tı", + "de di", + "g ü", + "sor un", + "el i", + "ün e", + "mı z", + "yap ı", + "m il", + "ğ ını", + "t ara", + "m en", + "ha t", + "var dı", + "m et", + "konu ş", + "ar ak", + "lar ak", + "çocu k", + "bü tün", + "l ey", + "d ür", + "g üzel", + "ay ı", + "yap a", + "n ı", + "ay r", + "ö ne", + "yordu m", + "b an", + "i̇ ş", + "du m", + "un a", + "on a", + "yor lar", + "lar ını", + "çı kar", + "z an", + "se ç", + "l iyor", + "t ak", + "şı k", + "tek rar", + "a ş", + "e ş", + "miş ti", + "f ar", + "k in", + "im i", + "i f", + "e ğ", + "gi di", + "le ş", + "başla dı", + "gi de", + "ot ur", + "d de", + "ın dan", + "üz er", + "ın ın", + "n ız", + "u y", + "ye di", + "ka t", + "o larak", + "la dı", + "yal nız", + "ba h", + "iy et", + "m al", + "s ak", + "a çık", + "sın da", + ".. .", + "in san", + "ay nı", + "e der", + "is tan", + "uz un", + "sa h", + "d o", + "g eri", + "er ek", + "ol an", + "ger çek", + "f en", + "al an", + "dı ş", + "alı k", + "far k", + "ü st", + "sa de", + "r i", + "k iş", + "l dı", + "z or", + "et ir", + "her kes", + "s al", + "ö mer", + "s el", + "un da", + "ha f", + "bun a", + "y dı", + "pek i", + "ada m", + "ha z", + "sın a", + "kap ı", + "gör üş", + "sade ce", + "al dı", + "gel di", + "i e", + "n ie", + "n a", + "r z", + "s z", + "c z", + "p o", + "s t", + "c h", + "i ę", + "d z", + "n i", + "a ł", + "r a", + "j e", + "r o", + "d o", + "s ię", + "z a", + "g o", + "e m", + "w i", + "c i", + "rz e", + "k o", + "l e", + "l i", + "w a", + "t o", + "k a", + "m i", + "ż e", + "t a", + "w ie", + "b y", + "m o", + "w y", + "rz y", + "ł a", + "j a", + "n o", + "ł o", + "w o", + "p a", + "m a", + "t e", + "t y", + "n y", + "k i", + "d a", + "n e", + "dz ie", + "dz i", + "cz y", + "c ie", + "m y", + "p rze", + "d y", + "o d", + "l a", + "k ie", + "r y", + "st a", + "j ą", + "ó w", + "c e", + "p rzy", + "c o", + "k u", + "m ie", + "sz y", + "cz e", + "r e", + "b a", + "s i", + "b ie", + "m u", + "w e", + "c y", + "ni a", + "ś ci", + "sz e", + "je st", + "k t", + "s a", + "b o", + "t u", + "ż y", + "n ą", + "b i", + "r u", + "a le", + "kt ó", + "p ra", + "ał a", + "m nie", + "p ie", + "ł y", + "cz a", + "ja k", + "ro z", + "r ó", + "l u", + "z na", + "g a", + "ra z", + "ł u", + "ta k", + "j u", + "p i", + "ś ć", + "s o", + "wi a", + "m ó", + "ch o", + "w szy", + "p e", + "s po", + "c a", + "g dy", + "w ał", + "w ię", + "d e", + "b e", + "p ro", + "ł em", + "j ę", + "s k", + "z e", + "l o", + "g i", + "r ę", + "do b", + "d u", + "ju ż", + "st o", + "b ę", + "ał em", + "sz a", + "m e", + "po d", + "d la", + "pa n", + "n ę", + "z o", + "mo że", + "ś li", + "s ie", + "ał o", + "t em", + "l ko", + "ny ch", + "po wie", + "c ię", + "s u", + "ty lko", + "i n", + "b u", + "na j", + "ch a", + "te go", + "p u", + "s ki", + "ne go", + "wszy st", + "sz cze", + "je d", + "je j", + "t wo", + "ą d", + "ś my", + "cz ę", + "wa ć", + "je go", + "ż a", + "i m", + "s y", + "pra w", + "ty m", + "któ ry", + "ał y", + "t rze", + "nie j", + "s e", + "ny m", + "i ch", + "o b", + ". .", + "g ło", + "ją c", + "mó wi", + "s ka", + "o n", + "ne j", + "s łu", + "w ła", + "bę dzie", + "d ę", + "p ó", + "be z", + "ni c", + "p ła", + "ś cie", + "mi a", + "s ą", + "t rzy", + "kie m", + "by ł", + "mo g", + "ro bi", + "ta m", + "c u", + "te n", + "m ię", + "z y", + "pe w", + "ci a", + "my ś", + "prze d", + "s ko", + "n u", + "któ re", + "a l", + "l ę", + "w sze", + "ą c", + "by ło", + "so bie", + "p y", + "ci ą", + "ba r", + "je szcze", + "h a", + "t ę", + "b ra", + "cza s", + "sz ę", + "g ł", + "k ę", + "ma r", + "cz u", + "prze z", + "f i", + "s ło", + "w z", + "k to", + "k ów", + "cz o", + "li śmy", + "st ra", + "wię c", + "r ą", + "ma m", + "w ó", + "rz a", + "g ro", + "no ści", + "f a", + "we t", + "ną ł", + "ś mie", + "na wet", + "mu si", + "s wo", + "te j", + "w ą", + "w u", + "wi ą", + "ni u", + "cz ą", + "b li", + "dz o", + "s kie", + "n em", + "je śli", + "cze go", + "ch y", + "d ł", + "ty ch", + "by m", + "ż o", + "e ś", + "si ą", + "kie dy", + "na s", + "w ró", + "dz e", + "d ro", + "t ra", + "r ów", + "pa ni", + "z ie", + "ku l", + "na d", + "ch wi", + "ni m", + "t ro", + "by ć", + "cho dzi", + "ni o", + "dob rze", + "te raz", + "wo kul", + "co ś", + "k ł", + "pie r", + "h e", + "g dzie", + "dz y", + "p ię", + "d ź", + "k ą", + "g ó", + "z da", + "ch ce", + "st ę", + "o r", + "ś wia", + "wszyst ko", + "st ro", + "pe ł", + "wie m", + "wie l", + "ka ż", + "ki m", + "rz u", + "s ły", + "jed na", + "z u", + "myś l", + "mó j", + "g u", + "wa r", + "jest em", + "ó ż", + "mie j", + "mo ż", + "k ła", + "re sz", + "d łu", + "st wo", + "n ię", + "ma sz", + "że by", + "nie m", + "ja kie", + "st y", + "ni ą", + "we j", + "o j", + "g ra", + "s ła", + "no ść", + "z ło", + "sz czę", + ".. .", + "r i", + "le j", + "we go", + "c ał", + "dzi ał", + "ki ch", + "dz a", + "dz ię", + "o czy", + "zo sta", + "cz ło", + "na m", + "ki l", + "o na", + "sz u", + "w ę", + "pa r", + "mi ał", + "st rze", + "ce j", + "e j", + "zna j", + "da ć", + "miej s", + "k ró", + "k ry", + "bar dzo", + "si a", + "z i", + "ś nie", + "l ą", + "g ie", + "cie bie", + "d ni", + "st u", + "po trze", + "wokul ski", + "u wa", + "u mie", + "jedna k", + "k ra", + "wró ci", + "czło wie", + "czy ć", + "by ła", + "że li", + "m ę", + "c ę", + "z robi", + "mog ę", + "pro wa", + "r em", + "nie ch", + "cz nie", + "k ro", + "t ą", + "ch ci", + "b ro", + "dzie ć", + "sz ą", + "pa d", + "t rz", + "t ru", + "je m", + "a ni", + "t ów", + "a r", + "d ru", + "ta j", + "rze kł", + "sa m", + "st e", + "nie go", + "ta kie", + "w ała", + "to wa", + "ka pła", + "wi dzi", + "po dob", + "dz ę", + "t ał", + "stę p", + "b ą", + "po ko", + "w em", + "g ę", + "a by", + "g e", + "al bo", + "s pra", + "z no", + "de n", + "s mo", + "je sz", + "k się", + "jest eś", + "po z", + "ni gdy", + "k sią", + "c óż", + "w s", + "po w", + "t ka", + "ś wie", + "sz ka", + "sa mo", + "s ł", + "rz ę", + "na le", + "chce sz", + "ni k", + "p ę", + "chy ba", + "cią g", + "ją cy", + "wo j", + "na sze", + "mnie j", + "wię cej", + "z wy", + "o sta", + "f e", + "wa ż", + "h o", + "se r", + "śmie r", + "wie r", + "dz ą", + "za ś", + "gdy by", + "ja ki", + "wo l", + "wi n", + "d ą", + "ści a", + "roz ma", + "wa l", + "pa nie", + "sta r", + "ka z", + "je żeli", + "d em", + "w ra", + "ko ń", + "sie bie", + "zno wu", + "p ró", + "cz em", + "st wa", + "i sto", + "pó ł", + "d ał", + "ko bie", + "ała m", + "wy ch", + "ce sa", + "ni ch", + "za wsze", + "dzi ć", + "te ż", + "le pie", + "pro szę", + "k re", + "t wa", + "o t", + "ł ą", + "ch u", + "c ą", + "p rz", + "ł e", + "sze dł", + "od powie", + "my śli", + "ś wią", + "e n", + "e r", + "d e", + "a n", + "e t", + "i j", + "i n", + "e l", + "a a", + "s t", + "o r", + "g e", + "i s", + "a t", + "i e", + "c h", + "o n", + "e en", + "h et", + "i t", + "v er", + "aa r", + "a l", + "o or", + "g en", + "v an", + "o p", + "d en", + "h e", + "o m", + "t e", + "w e", + "i k", + "r e", + "z e", + "ij n", + "d at", + "b e", + "d er", + "in g", + "o e", + "ij k", + "a an", + "ch t", + "v oor", + "l e", + "i et", + "r o", + "m o", + "k en", + "z ijn", + "m en", + "i g", + "j e", + "n iet", + "a r", + "o o", + "i d", + "u n", + "i l", + "s ch", + "mo et", + "st e", + "u r", + "o l", + "he b", + "u it", + "g el", + "w ij", + "a s", + "m e", + "t en", + "w or", + "o u", + "v en", + "l en", + "aa t", + "d it", + "m et", + "r a", + "b en", + "s p", + "o ver", + "d ie", + "n o", + "w er", + "l ijk", + "f t", + "s l", + "an d", + "v e", + "t er", + "i er", + "i en", + "t o", + "d aar", + "g r", + "b el", + "de ze", + "d u", + "a g", + "k an", + "wor den", + "in gen", + "moet en", + "n en", + "on der", + "heb ben", + "r u", + "oo k", + "s en", + "c t", + "k t", + "no g", + "aa l", + "w as", + "u l", + "e er", + "b ij", + "m ijn", + "p ro", + "v ol", + "d o", + "k om", + "at ie", + "e ft", + "k el", + "al s", + "r ij", + "he id", + "a f", + "st el", + "m aar", + "a p", + "we e", + "a d", + "he eft", + "w aar", + "i cht", + "d an", + "er en", + "n e", + "w el", + "w at", + "w il", + "a cht", + "aa g", + "ge b", + "c on", + "z o", + "k e", + "b et", + "h ij", + "d ig", + "k un", + "u w", + "d t", + "d oor", + "t ij", + "a m", + "an g", + "on d", + "er s", + "is ch", + "ge en", + "i ge", + "ge v", + "ve el", + "n u", + "m a", + "on s", + "o f", + "b l", + "n aar", + "g ro", + "p l", + "an der", + "at en", + "kun nen", + "e cht", + "h ier", + "g oe", + "an t", + "u s", + "t wee", + "on t", + "de lijk", + "el e", + "u ur", + "al le", + "t oe", + "me er", + "i st", + "n a", + "n ie", + "on ze", + "l o", + "i m", + "p en", + "h ad", + "tij d", + "h oe", + "to t", + "z ou", + "a k", + "aa k", + "a men", + "d r", + "w oor", + "s e", + "wor dt", + "o t", + "gel ijk", + "g aan", + "i c", + "g er", + "k er", + "el d", + "e m", + "h ou", + "de l", + "z en", + "z el", + "te gen", + "b o", + "kom en", + "c om", + "i gen", + "e it", + "wer k", + "goe d", + "z al", + "z ij", + "sl ag", + "e s", + "z ien", + "a st", + "echt er", + "it ie", + "t ie", + "el ijk", + "m is", + "isch e", + "bel an", + "h aar", + "i ch", + "b er", + "h an", + "v r", + "al e", + "c i", + "gr ijk", + "in d", + "do en", + "l and", + "belan grijk", + "p un", + "op en", + "ct ie", + "zel f", + "m ij", + "it eit", + "ste m", + "me e", + "ar en", + "al l", + "b r", + "re cht", + "d ien", + "h u", + "g aat", + "pro b", + "m oe", + "p er", + "a u", + "ul len", + "z ich", + "daar om", + "or m", + "k l", + "v o", + "en t", + "st aat", + "z it", + "du i", + "n at", + "du s", + "d s", + "ver slag", + "kel ijk", + "prob le", + "w et", + "ge m", + "c r", + "i on", + "p r", + "sch ap", + "g d", + "h un", + "z a", + "er d", + "z et", + "st aan", + "st r", + "m aal", + "in der", + "e id", + "st en", + "p ar", + "k ken", + "ge d", + "z ullen", + "re s", + "men sen", + "j aar", + "re gel", + "ie der", + "vol gen", + "ge ven", + "e ven", + "l u", + "bl ij", + "i ë", + "k o", + "u we", + "m an", + "ma ken", + "l ie", + "g a", + "oe k", + "nie uwe", + "b aar", + "h o", + "h er", + "in ter", + "ander e", + "ru ik", + "s u", + "a gen", + "or t", + "m er", + "ou w", + "st er", + "wil len", + "aa kt", + "h oo", + "an den", + "f f", + "l ig", + "t re", + "s amen", + "ze er", + "dui delijk", + "ant woor", + "he el", + "men t", + "pun t", + "hou den", + "we g", + "vr aag", + "gel e", + "een s", + "be sch", + "om en", + "er g", + "do el", + "d ag", + "sp e", + "ur en", + "ing s", + "or en", + "l ang", + "de len", + "m ar", + "ste un", + "in nen", + "p ol", + "o on", + "i de", + "s n", + "s ie", + "r icht", + "z onder", + "no dig", + "all een", + "m id", + "ra gen", + "iet s", + "ver sch", + "geb ruik", + "st u", + "ro uw", + "stel len", + "be g", + "men ten", + "v in", + "eer ste", + "l aat", + "gro ot", + "oo d", + "to ch", + "l aten", + "aar d", + "s le", + "de el", + "st and", + "pl aat", + "re e", + "bet re", + "d i", + "l id", + "uit en", + "ra cht", + "bel eid", + "g et", + "ar t", + "st ie", + "st aten", + "g gen", + "re ken", + "e in", + "al en", + "m ing", + "mo gelijk", + "gro te", + "al tijd", + "z or", + "en kel", + "w ik", + "pol itie", + "e igen", + "el k", + "han del", + "g t", + "k we", + "m aat", + "el en", + "i p", + "v rij", + "s om", + "je s", + "aa m", + "hu is", + "v al", + "we er", + "lid staten", + "k ing", + "k le", + "be d", + "gev al", + "stel l", + "a i", + "wik kel", + "kwe stie", + "t al", + "ste e", + "a b", + "h el", + "kom st", + "p as", + "s s", + "it u", + "i den", + "eer d", + "m in", + "c e", + "p o", + "twee de", + "proble em", + "w aren", + "us sen", + "sn el", + "t ig", + "ge w", + "j u", + "ul t", + "ne men", + "com mis", + "versch il", + "k on", + "z oek", + "k rij", + "gr aag", + "den k", + "l anden", + "re den", + "be sl", + "oe g", + "bet er", + "he den", + "m ag", + "p e", + "bo ven", + "a c", + "con t", + "f d", + "h ele", + "k r", + "v ier", + "w in", + "ge z", + "k w", + "m il", + "v or", + "he m", + "ra m", + "aa s", + "ont wikkel", + "dr ie", + "v aak", + "plaat s", + "l a", + "g ang", + "ij f", + "f in", + "nat uur", + "t ussen", + "u g", + "in e", + "d a", + "b at", + "kom t", + "w acht", + "aa d", + "u t", + "é n", + "acht er", + "geb ie", + "ver k", + "lig t", + "c es", + "nie uw", + "van d", + "s t", + "n í", + "j e", + "p o", + "c h", + "r o", + "n a", + "s e", + "t o", + "n e", + "l e", + "k o", + "l a", + "d o", + "r a", + "n o", + "t e", + "h o", + "n ě", + "v a", + "l i", + "l o", + "ř e", + "c e", + "d e", + "v e", + "b y", + "n i", + "s k", + "t a", + "n á", + "z a", + "p ro", + "v o", + "v ě", + "m e", + "v á", + "s o", + "k a", + "r á", + "v y", + "z e", + "m i", + "p a", + "t i", + "st a", + "m ě", + "n é", + "ř i", + "ř í", + "m o", + "ž e", + "m a", + "j í", + "v ý", + "j i", + "d ě", + "r e", + "d a", + "k u", + "j a", + "c i", + "r u", + "č e", + "o b", + "t ě", + "m u", + "k y", + "d i", + "š e", + "k é", + "š í", + "t u", + "v i", + "p ře", + "v í", + "s i", + "n ý", + "o d", + "so u", + "v é", + "n y", + "r i", + "d y", + "b u", + "b o", + "t y", + "l á", + "l u", + "n u", + "ž i", + "m á", + "st i", + "c í", + "z á", + "p ra", + "sk é", + "m í", + "c o", + "d u", + "d á", + "by l", + "st o", + "s a", + "t í", + "je d", + "p ří", + "p ři", + "t é", + "s í", + "č i", + "v ní", + "č a", + "d í", + "z i", + "st u", + "p e", + "b a", + "d ní", + "ro z", + "va l", + "l í", + "s po", + "k á", + "b e", + "p i", + "no u", + "ta k", + "st e", + "r y", + "l é", + "vě t", + "se m", + "p ě", + "ko n", + "ne j", + "l y", + "ko u", + "ý ch", + "b ě", + "p r", + "f i", + "p rá", + "a le", + "ja ko", + "po d", + "ž í", + "z í", + "j sou", + "j sem", + "ch o", + "l ní", + "c ké", + "t á", + "m y", + "a k", + "h u", + "va t", + "pře d", + "h la", + "k e", + "st á", + "č í", + "š i", + "s le", + "k la", + "š tě", + "lo u", + "m ů", + "z na", + "ch á", + "o r", + "p ů", + "h a", + "b i", + "ta ké", + "d ů", + "no st", + "t ře", + "te r", + "p u", + "i n", + "v r", + "ve l", + "sk u", + "v še", + "t ní", + "do b", + "by la", + "č ní", + "ja k", + "v u", + "je ho", + "b ý", + "vá ní", + "ný ch", + "po u", + "te n", + "t ři", + "v z", + "st ře", + "d va", + "h le", + "č á", + "no sti", + "c k", + "v š", + "vo u", + "s u", + "h e", + "h ra", + "je n", + "s y", + "da l", + "po z", + "s lo", + "te l", + "d ru", + "de n", + "vš ak", + "g i", + "k dy", + "by lo", + "bu de", + "st ra", + "j ší", + "m é", + "me n", + "vý ch", + "ní m", + "s m", + "ko li", + "r ů", + "t ra", + "mů že", + "ne ní", + "ho d", + "b í", + "do u", + "sk a", + "t ý", + "st ě", + "u je", + "s á", + "pě t", + "ne s", + "k rá", + "to m", + "st ví", + "v ně", + "se d", + "s vé", + "p í", + "z o", + "mu sí", + "u ž", + "tí m", + "jí cí", + "jed no", + "t r", + "ča s", + "e v", + "č ty", + "sk ý", + "ni c", + "ev ro", + "to ho", + "h y", + "k ter", + "r ní", + "st í", + "s vě", + "pa k", + "vše ch", + "k ů", + "n g", + "á d", + "chá zí", + "a ni", + "a r", + "jed na", + "bý t", + "t ro", + "k ra", + "pr vní", + "m no", + "ské ho", + "p á", + "p la", + "le m", + "ne bo", + "ke m", + "st ro", + "s la", + "né ho", + "z de", + "dal ší", + "ř a", + "čty ři", + "h rá", + "dru h", + "l ně", + "v la", + "sk ých", + "š ko", + "pů so", + "pro to", + "v ů", + "sk á", + "ve n", + "še st", + "d ně", + "je ště", + "me zi", + "te k", + "s ko", + "ch a", + "ně koli", + "be z", + "g ra", + "ji ž", + "č ně", + "j á", + "s lu", + "z ná", + "ve r", + "sed m", + "k ro", + "ta m", + "a no", + "v lá", + "o sm", + "byl y", + "vá m", + "ck ý", + "te ch", + "dě ji", + "vel mi", + "le ži", + "va la", + "l ý", + "t vo", + "spo le", + "ch u", + "stu p", + "mo ž", + "evro p", + "g e", + "sta l", + "j de", + "ch y", + "ro di", + "je jí", + "po li", + "de vět", + "s me", + "a ž", + "té to", + "re m", + "d é", + "f or", + "u ni", + "f o", + "ten to", + "a u", + "ka ž", + "nu la", + "na d", + "by ch", + "mo c", + "sto u", + "e x", + "le n", + "k do", + "z d", + "pra co", + "to mu", + "ný m", + "ži vo", + "ze m", + "f e", + "f u", + "ná sle", + "j o", + "sk y", + "ji ch", + "h á", + "mě l", + "dě la", + "j sme", + "p re", + "ni ce", + "ste j", + "ne m", + "st ní", + "he m", + "ná ro", + "z u", + "b li", + "ni t", + "pa r", + "a l", + "poz ději", + "ta ko", + "n ce", + "če r", + "ší m", + "ně co", + "vá l", + "ře j", + "krá t", + "á lní", + "u r", + ". .", + "a si", + "kter é", + "sta v", + "ma jí", + "my s", + "do bě", + "s ně", + "ce n", + "z y", + "z ku", + "t ů", + "ch od", + "s pě", + "je jich", + "sou čas", + "d r", + "va li", + "ri e", + "k te", + "pr ů", + "ze ní", + "pa t", + "a n", + "po tře", + "de m", + "d nes", + "ze mí", + "sa mo", + "zna m", + "b ra", + "má m", + "te dy", + "g o", + "hla vní", + "pou ží", + "b ní", + "ve de", + "le p", + "je k", + "pra v", + "poli ti", + "d ne", + "je m", + "le t", + "če ní", + "pro b", + "ne ž", + "dě l", + "fi l", + "č o", + "cí ch", + "st é", + "d lou", + "h i", + "a by", + "to u", + "několi k", + "d la", + "vy u", + "vi t", + "ho u", + "ck ých", + "no vé", + "či n", + "st y", + "dě lá", + "k ý", + "ob la", + "pod le", + "ra n", + "dů leži", + "ta to", + "po ku", + "ko ne", + "d ý", + "d vě", + "ž ád", + "nou t", + "t ku", + "t vr", + "cké ho", + "ro v", + "r é", + "te le", + "p sa", + "s vět", + "ti vní", + "do sta", + "te m", + "še l", + "druh é", + "s kou", + "ž o", + "jed ná", + "vý znam", + "prob lé", + "pu bli", + "vá n", + "od po", + "pod po", + "d le", + "ja ké", + "še ní", + "ví m", + "bě hem", + "na chází", + "s lou", + "pou ze", + "o tá", + "p lo", + "to vé", + "vět ši", + "ko mi", + "va jí", + "ty to", + "zá pa", + "z mě", + "mo h", + "ví ce", + "spole č", + "au to", + "pro ti", + "st ru", + "dě t", + "chá ze", + "že l", + "с т", + "е н", + "н о", + "н а", + "п р", + "т о", + "п о", + "р а", + "г о", + "к о", + "н е", + "в о", + "в а", + "е т", + "е р", + "н и", + "е л", + "и т", + "н ы", + "з а", + "р о", + "ен и", + "к а", + "л и", + "е м", + "д а", + "о б", + "л а", + "д о", + "с я", + "т ь", + "о т", + "л о", + "л ь", + "е д", + "с о", + "м и", + "р е", + "м о", + "ц и", + "пр о", + "т а", + "э то", + "к и", + "р у", + "пр и", + "т и", + "с е", + "ст а", + "в ы", + "м ы", + "в и", + "б ы", + "м а", + "е с", + "л я", + "ст и", + "л е", + "ч то", + "м е", + "р и", + "ч а", + "о д", + "е й", + "ел ь", + "ени я", + "г а", + "н у", + "с и", + "п а", + "ра з", + "б о", + "ст о", + "с у", + "с а", + "д у", + "е го", + "е ст", + "и н", + "ит ь", + "и з", + "ж е", + "м у", + "п ер", + "по д", + "ени е", + "с ь", + "к у", + "пр ед", + "но го", + "ны х", + "в ер", + "т е", + "но й", + "ци и", + "д е", + "р ы", + "д ел", + "л ю", + "в е", + "о н", + "м ен", + "г и", + "н я", + "б у", + "пр а", + "в се", + "ет ся", + "ст ь", + "ж а", + "до л", + "ж и", + "б е", + "ко н", + "с л", + "ш и", + "д и", + "ст в", + "с ко", + "ны е", + "ч и", + "ю т", + "д ер", + "ст ра", + "т ы", + "х од", + "щ и", + "з о", + "з на", + "но сти", + "ч ес", + "в ля", + "ва ть", + "о р", + "по л", + "в ет", + "та к", + "ш а", + "т у", + "с во", + "пр е", + "о на", + "ит ель", + "ны й", + "с ло", + "ка к", + "в л", + "но сть", + "х о", + "мо ж", + "п е", + "д ля", + "ни я", + "но е", + "ра с", + "дол ж", + "да р", + "т ель", + "с ка", + "п у", + "ст во", + "ко то", + "ра б", + "е е", + "ро д", + "э ти", + "с об", + "о ру", + "ж ен", + "ны м", + "ит и", + "ни е", + "ко м", + "д ет", + "ст у", + "г у", + "п и", + "ме ж", + "ени ю", + "т ер", + "раб от", + "во з", + "ци я", + "ко й", + "щ ест", + "г ра", + "з и", + "р я", + "меж ду", + "ст ва", + "в с", + "ел о", + "ш е", + "м ер", + "б а", + "з ы", + "л у", + "а ль", + "д ей", + "г ла", + "на род", + "к ти", + "пред ста", + "л ся", + "я вля", + "с ки", + "но в", + "ед ин", + "ро в", + "и с", + "ни ма", + "р ем", + "ход и", + "так же", + "д ру", + "а ть", + "сл ед", + "го во", + "на я", + "ю щи", + "ен ь", + "кото ры", + "х от", + "в у", + "и х", + "ем у", + "ч ит", + "ва ж", + "ор га", + "чес ки", + "щ е", + "к е", + "х а", + "по с", + "то м", + "бо ль", + "м не", + "па с", + "об ъ", + "пра в", + "кон ф", + "сл у", + "под дер", + "ст ви", + "на ш", + "ль ко", + "сто я", + "ну ю", + "л ем", + "ен ных", + "к ра", + "д ы", + "между народ", + "г да", + "не об", + "го су", + "ств у", + "ени и", + "госу дар", + "к то", + "и м", + "ч ест", + "р ет", + "во про", + "л ен", + "ел и", + "ро ва", + "ци й", + "на м", + "это й", + "ж ения", + "необ ходи", + "мен я", + "бы ло", + "си ли", + "ф и", + "в я", + "ш ь", + "это го", + "о ни", + "орга ни", + "бе зо", + "пр об", + "и ме", + "ре ш", + "б и", + "безо пас", + "ют ся", + "о ста", + "ен но", + "го д", + "ел а", + "предста в", + "ть ся", + "сло во", + "органи за", + "долж ны", + "это м", + "б ла", + "ч е", + "ч у", + "бла го", + "это му", + "в рем", + "с пе", + "но м", + "ени й", + "с по", + "на с", + "не т", + "з у", + "в ед", + "е ще", + "ска за", + "се й", + "ер ен", + "да н", + "са м", + "ел я", + "ра н", + "зы ва", + "явля ется", + "бу дет", + "кти в", + "т ре", + "дел е", + "м от", + "конф ерен", + "ла сь", + "ча с", + "сто ро", + "ко го", + "е з", + "не й", + "о с", + "ли сь", + "раз ору", + "пер е", + "с си", + "ны ми", + "про ц", + "го ло", + "ч ело", + "бо ле", + "чело ве", + "с ер", + "п л", + "ч ет", + "стра н", + "п я", + "бы л", + "к ла", + "то в", + "ж д", + "дел а", + "е ра", + "у же", + "со вет", + "г ен", + "безопас ности", + "ц а", + "се да", + "по з", + "от вет", + "проб лем", + "на ко", + "т ем", + "до ста", + "п ы", + "щ а", + "во й", + "су щест", + "необходи мо", + "бы ть", + "мож ет", + "д ем", + "что бы", + "е к", + "ч ер", + "у сили", + "ре с", + "ру д", + "един енных", + "д об", + "до сти", + "ств ен", + "я дер", + "год ня", + "ка за", + "се годня", + "сей час", + "то лько", + "во д", + "ес ь", + "м ного", + "бу ду", + "е в", + "ест ь", + "т ри", + "об щест", + ". .", + "я вл", + "вы сту", + "р ед", + "с чит", + "с ит", + "деле га", + "ло ж", + "это т", + "ф ор", + "к лю", + "воз мож", + "ва ния", + "б ли", + "и ли", + "в з", + "на ций", + "ско го", + "при ня", + "п ла", + "о ч", + "ить ся", + "ст е", + "на ши", + "которы е", + "а р", + "име ет", + "с от", + "зна ч", + "пер ь", + "след у", + "ен ы", + "та ки", + "объ единенных", + "ст ро", + "те перь", + "б ле", + "благо дар", + "раз в", + "а н", + "жи ва", + "оч ень", + "я т", + "бе з", + "об ес", + "г ро", + "ло сь", + "с ы", + "организа ции", + "ч лен", + "то го", + "она ль", + "ж да", + "все х", + "с вя", + "боле е", + "со в", + "ко гда", + "во т", + "к ре", + "к ры", + "по этому", + "во ль", + "о й", + "ген ера", + "ч ем", + "л ы", + "пол ити", + "в ен", + "конферен ции", + "проц ес", + "б я", + "ит е", + "от но", + "разв ити", + "а ф", + "ю щ", + "в но", + "ми р", + "ни и", + "ка я", + "а с", + "итель но", + "в то", + "ени ем", + "генера ль", + "пр от", + "вс ем", + "сам бле", + "ас самбле", + "о м", + "з д", + "с мот", + "ре ги", + "ч его", + "од нако", + "усили я", + "дей стви", + "ч но", + "у ча", + "об раз", + "во с", + "э та", + "пер его", + "гово р", + "ва м", + "мо ло", + "врем я", + "д ь", + "хот ел", + "г ру", + "за явл", + "пре доста", + "по ль", + "не е", + "ре зо", + "перего во", + "резо лю", + "к рет", + "поддер ж", + "обес пе", + "не го", + "представ ит", + "на де", + "к ри", + "ч ь", + "про ек", + "л ет", + "дру ги", + "ا ل", + "َ ا", + "و َ", + "ّ َ", + "ِ ي", + "أ َ", + "ل َ", + "ن َ", + "ال ْ", + "ه ُ", + "ُ و", + "م ا", + "ن ْ", + "م ن", + "ع َ", + "ن ا", + "ل ا", + "م َ", + "ت َ", + "ف َ", + "أ ن", + "ل ي", + "م ِ", + "ا ن", + "ف ي", + "ر َ", + "ي َ", + "ه ِ", + "م ْ", + "ق َ", + "ب ِ", + "ل ى", + "ي ن", + "إ ِ", + "ل ِ", + "و ا", + "ك َ", + "ه ا", + "ً ا", + "م ُ", + "و ن", + "ال م", + "ب َ", + "ي ا", + "ذ ا", + "س ا", + "ال ل", + "م ي", + "ي ْ", + "ر ا", + "ر ي", + "ل ك", + "م َا", + "ن َّ", + "ل م", + "إ ن", + "س ت", + "و م", + "ّ َا", + "ل َا", + "ه م", + "ّ ِ", + "ك ُ", + "ك ان", + "س َ", + "ب ا", + "د ي", + "ح َ", + "ع ْ", + "ب ي", + "ال أ", + "و ل", + "ف ِي", + "ر ِ", + "د ا", + "مِ نْ", + "ُو نَ", + "و ْ", + "ه َا", + "ّ ُ", + "ال س", + "ال َ", + "ن ي", + "ل ْ", + "ت ُ", + "ه ل", + "ر ة", + "د َ", + "س ْ", + "ت ِ", + "ن َا", + "ر ْ", + "الل َّ", + "سا مي", + "ك ن", + "ك ل", + "ه َ", + "عَ لَ", + "ع لى", + "م ع", + "إ لى", + "ق د", + "ال ر", + "ُو ا", + "ي ر", + "ع ن", + "ي ُ", + "ن ِ", + "ب ْ", + "ال ح", + "هُ مْ", + "ق ا", + "ذ ه", + "ال ت", + "ِي نَ", + "ج َ", + "ه ذا", + "ع د", + "ال ع", + "د ْ", + "قَ الَ", + "ر ُ", + "ي م", + "ي ة", + "ن ُ", + "خ َ", + "ر ب", + "ال ك", + "و َا", + "أ نا", + "ة ِ", + "ال ن", + "ح د", + "ع ِ", + "ت ا", + "ه و", + "ف ا", + "ع ا", + "ال ش", + "ل ُ", + "ي ت", + "ذ َا", + "ي ع", + "ال ذ", + "ح ْ", + "ال ص", + "إِ نَّ", + "ج ا", + "ع لي", + "ك َا", + "ب ُ", + "ت ع", + "و ق", + "م ل", + "ل َّ", + "ي د", + "أ خ", + "ر ف", + "ت ي", + "ال ِ", + "ّ ا", + "ذ لك", + "أَ نْ", + "س ِ", + "ت وم", + "م ر", + "مَ نْ", + "ب ل", + "ال ق", + "الل ه", + "ِي َ", + "ك م", + "ذ َ", + "ع ل", + "ح ب", + "س ي", + "ع ُ", + "ال ج", + "ال د", + "ش َ", + "ت ك", + "ف ْ", + "ص َ", + "ل ل", + "د ِ", + "ب ر", + "ف ِ", + "ت ه", + "أ ع", + "ت ْ", + "ق ْ", + "الْ أَ", + "ئ ِ", + "عَ نْ", + "و ر", + "ح ا", + "ال َّ", + "م ت", + "ف ر", + "د ُ", + "ه نا", + "وَ أَ", + "ت ب", + "ة ُ", + "أ ي", + "س ب", + "ري د", + "و ج", + "كُ مْ", + "ح ِ", + "ك ْ", + "د ر", + "َا ء", + "ه ذه", + "ال ط", + "الْ مُ", + "د ة", + "ق ل", + "غ َ", + "ي وم", + "الَّ ذ", + "ك ر", + "ت ر", + "ك ِ", + "ك ي", + "عَلَ ى", + "رَ ب", + "ع ة", + "ق ُ", + "ج ْ", + "ف ض", + "ل ة", + "ه ْ", + "ر َا", + "وَ لَ", + "الْ مَ", + "أَ نَّ", + "ي َا", + "أ ُ", + "ش ي", + "اللَّ هُ", + "لَ ى", + "ق ِ", + "أ ت", + "عَلَ يْ", + "اللَّ هِ", + "ال ب", + "ض َ", + "ة ً", + "ق ي", + "ا ر", + "ب د", + "خ ْ", + "سْ تَ", + "ط َ", + "قَ دْ", + "ذه ب", + "أ م", + "ما ذا", + "وَ إِ", + "ة ٌ", + "و نَ", + "لي لى", + "و لا", + "ح ُ", + "ه ي", + "ص ل", + "ال خ", + "و د", + "لي س", + "ل دي", + "ق ال", + "كَا نَ", + "م َّ", + "ح ي", + "ت م", + "ل ن", + "وَ لَا", + "ب ع", + "يم كن", + "س ُ", + "ة َ", + "ح ت", + "ر ًا", + "ك ا", + "ش ا", + "هِ مْ", + "لَ هُ", + "ز َ", + "دا ً", + "م س", + "ك ث", + "الْ عَ", + "ج ِ", + "ص ْ", + "ف َا", + "ل ه", + "و ي", + "ع َا", + "هُ وَ", + "ب ِي", + "ب َا", + "أ س", + "ث َ", + "ل ِي", + "ر ض", + "الر َّ", + "لِ كَ", + "ت َّ", + "ف ُ", + "ق ة", + "ف عل", + "مِ ن", + "ال آ", + "ث ُ", + "س م", + "م َّا", + "بِ هِ", + "ت ق", + "خ ر", + "ل قد", + "خ ل", + "ش ر", + "أن ت", + "ل َّا", + "س ن", + "الس َّ", + "الذ ي", + "س َا", + "و ما", + "ز ل", + "و ب", + "أ ْ", + "إ ذا", + "ر ِي", + "ح ة", + "ن ِي", + "الْ حَ", + "وَ قَالَ", + "ب ه", + "ة ٍ", + "س أ", + "ر ٌ", + "ب ال", + "م ة", + "ش ْ", + "و ت", + "عن د", + "ف س", + "بَ عْ", + "ه ر", + "ق ط", + "أ ح", + "إن ه", + "و ع", + "ف ت", + "غ ا", + "هنا ك", + "ب ت", + "مِ نَ", + "س ر", + "ذَ لِكَ", + "ر س", + "حد ث", + "غ ْ", + "ّ ِي", + "ال إ", + "وَ يَ", + "ج ل", + "ا ست", + "ق ِي", + "ع ب", + "و س", + "ي ش", + "الَّذ ِينَ", + "تا ب", + "د ِي", + "ج ب", + "ك ون", + "ب ن", + "ال ث", + "لَ يْ", + "ب عد", + "وَ الْ", + "فَ أَ", + "ع م", + "هُ م", + "ت ن", + "ذ ْ", + "أ ص", + "أ ين", + "رَب ِّ", + "الذ ين", + "إِ ن", + "ب ين", + "ج ُ", + "عَلَيْ هِ", + "ح َا", + "ل و", + "ست ط", + "ظ ر", + "لَ مْ", + "ء ِ", + "كُ ل", + "ط ل", + "ت َا", + "ض ُ", + "كن ت", + "ل ًا", + "م ٌ", + "ق بل", + "ـ ـ", + "ذ ِ", + "قَ وْ", + "ص ِ", + "م ًا", + "كان ت", + "ص ا", + "ي ق", + "ال ف", + "ال نا", + "م ٍ", + "إِ نْ", + "ال نَّ", + "ج د", + "وَ مَا", + "ت ت", + "ب ح", + "م كان", + "كي ف", + "ّ ة", + "ال ا", + "ج َا", + "أ و", + "سا عد", + "ض ِ", + "إ لا", + "را ً", + "ق َا", + "ر أ", + "ع ت", + "أ حد", + "ه د", + "ض ا", + "ط ر", + "أ ق", + "ما ء", + "د َّ", + "ال با", + "م ُو", + "أَ وْ", + "ط ا", + "ق ُو", + "خ ِ", + "ت ل", + "ستط يع", + "د َا", + "الن َّا", + "إ لَى", + "وَ تَ", + "هَ ذَا", + "ب ة", + "علي ك", + "ج ر", + "ال من", + "ز ا", + "ر ٍ", + "د ع", + "ّ ًا", + "س ة", + "ثُ مَّ", + "شي ء", + "ال غ", + "ت ح", + "ر ُونَ", + "ال يوم", + "م ِي", + "ن ُوا", + "أ ر", + "تُ مْ", + "ع ر", + "ي ف", + "أ ب", + "د ًا", + "ص َا", + "الت َّ", + "أ ريد", + "ال ز", + "يَ وْ", + "إ لي", + "ج ي", + "يَ عْ", + "فض ل", + "ال إن", + "أن ه", + "n g", + "i 4", + "a n", + "s h", + "z h", + "i 2", + "ng 1", + "u 4", + "i 1", + "ng 2", + "d e", + "j i", + "a o", + "x i", + "u 3", + "de 5", + "e 4", + "i 3", + "ng 4", + "an 4", + "e n", + "u o", + "sh i4", + "an 2", + "u 2", + "c h", + "u 1", + "ng 3", + "a 1", + "an 1", + "e 2", + "a 4", + "e i4", + "o ng1", + "a i4", + "ao 4", + "h u", + "a ng1", + "l i", + "y o", + "an 3", + "w ei4", + "uo 2", + "n 1", + "en 2", + "ao 3", + "e 1", + "y u", + "q i", + "e ng2", + "zh o", + "a ng3", + "a ng4", + "a ng2", + "uo 4", + "m i", + "g e4", + "y i1", + "g uo2", + "e r", + "b i", + "a 3", + "h e2", + "e 3", + "y i2", + "d i4", + "zh ong1", + "b u4", + "g u", + "a i2", + "n 2", + "z ai4", + "sh i2", + "e ng1", + "r en2", + "o ng2", + "xi an4", + "y i", + "x u", + "n 4", + "l i4", + "en 4", + "y u2", + "e i2", + "yi2 ge4", + "o u4", + "e i3", + "d i", + "u i4", + "a 2", + "yo u3", + "ao 1", + "d a4", + "ch eng2", + "en 1", + "e ng4", + "y i4", + "s i1", + "zh i4", + "ji a1", + "yu an2", + "n i", + "t a1", + "de5 yi2ge4", + "k e1", + "sh u3", + "x i1", + "j i2", + "ao 2", + "t i", + "o u3", + "o ng4", + "xi a4", + "a i1", + "g ong1", + "zh i1", + "en 3", + "w ei2", + "j u", + "xu e2", + "q u1", + "zho u1", + "er 3", + "mi ng2", + "zho ng3", + "l i3", + "w u4", + "y i3", + "uo 1", + "e 5", + "j i4", + "xi ng2", + "ji an4", + "hu a4", + "y u3", + "uo 3", + "j i1", + "a i3", + "z uo4", + "h ou4", + "hu i4", + "e i1", + "ni an2", + "q i2", + "p i", + "d ao4", + "sh eng1", + "de 2", + "d ai4", + "u an2", + "zh e4", + "zh eng4", + "b en3", + "sh ang4", + "zh u3", + "b ei4", + "y e4", + "ch u1", + "zh an4", + "l e5", + "l ai2", + "sh i3", + "n an2", + "r en4", + "yo u2", + "k e4", + "b a1", + "f u4", + "d ui4", + "y a4", + "m ei3", + "z i4", + "xi n1", + "ji ng1", + "zh u", + "n 3", + "yo ng4", + "m u4", + "ji ao4", + "y e3", + "ji n4", + "bi an4", + "l u4", + "q i1", + "sh e4", + "xi ang1", + "o ng3", + "sh u4", + "d ong4", + "s uo3", + "gu an1", + "s an1", + "b o", + "t e4", + "d uo1", + "f u2", + "mi n2", + "l a1", + "zh i2", + "zh en4", + "o u1", + "w u3", + "m a3", + "i 5", + "z i5", + "j u4", + "er 4", + "y ao4", + "xia4 de5yi2ge4", + "s i4", + "t u2", + "sh an1", + "z ui4", + "ch u", + "yi n1", + "er 2", + "t ong2", + "d ong1", + "y u4", + "y an2", + "qi an2", + "shu3 xia4de5yi2ge4", + "ju n1", + "k e3", + "w en2", + "f a3", + "l uo2", + "zh u4", + "x i4", + "k ou3", + "b ei3", + "ji an1", + "f a1", + "di an4", + "ji ang1", + "wei4 yu2", + "xi ang4", + "zh i3", + "e ng3", + "f ang1", + "l an2", + "sh u", + "r i4", + "li an2", + "sh ou3", + "m o", + "qi u2", + "ji n1", + "h uo4", + "shu3xia4de5yi2ge4 zhong3", + "f en1", + "n ei4", + "g ai1", + "mei3 guo2", + "u n2", + "g e2", + "b ao3", + "qi ng1", + "g ao1", + "t ai2", + "d u", + "xi ao3", + "ji e2", + "ti an1", + "ch ang2", + "q uan2", + "li e4", + "h ai3", + "f ei1", + "t i3", + "ju e2", + "o u2", + "c i3", + "z u2", + "n i2", + "bi ao3", + "zhong1 guo2", + "d u4", + "yu e4", + "xi ng4", + "sh eng4", + "ch e1", + "d an1", + "ji e1", + "li n2", + "pi ng2", + "f u3", + "g u3", + "ji e4", + "w o", + "v 3", + "sh eng3", + "n a4", + "yu an4", + "zh ang3", + "gu an3", + "d ao3", + "z u3", + "di ng4", + "di an3", + "c eng2", + "ren2 kou3", + "t ai4", + "t ong1", + "g uo4", + "n eng2", + "ch ang3", + "hu a2", + "li u2", + "yi ng1", + "xi ao4", + "c i4", + "bian4 hua4", + "li ang3", + "g ong4", + "zho ng4", + "de5 yi1", + "s e4", + "k ai1", + "w ang2", + "ji u4", + "sh i1", + "sh ou4", + "m ei2", + "k u", + "s u", + "f eng1", + "z e2", + "tu2 shi4", + "t i2", + "q i4", + "ji u3", + "sh en1", + "zh e3", + "ren2kou3 bian4hua4", + "ren2kou3bian4hua4 tu2shi4", + "di4 qu1", + "y ang2", + "m en", + "men 5", + "l ong2", + "bi ng4", + "ch an3", + "zh u1", + "w ei3", + "w ai4", + "xi ng1", + "bo 1", + "b i3", + "t ang2", + "hu a1", + "bo 2", + "shu i3", + "sh u1", + "d ou1", + "s ai4", + "ch ao2", + "b i4", + "li ng2", + "l ei4", + "da4 xue2", + "f en4", + "shu3 de5", + "m u3", + "ji ao1", + "d ang1", + "ch eng1", + "t ong3", + "n v3", + "q i3", + "y an3", + "mi an4", + "l uo4", + "ji ng4", + "g e1", + "r u4", + "d an4", + "ri4 ben3", + "p u3", + "yu n4", + "hu ang2", + "wo 3", + "l v", + "h ai2", + "shi4 yi1", + "xi e1", + "yi ng3", + "w u2", + "sh en2", + "w ang3", + "gu ang3", + "li u4", + "s u4", + "shi4 zhen4", + "c an1", + "c ao3", + "xi a2", + "k a3", + "d a2", + "h u4", + "b an4", + "d ang3", + "h u2", + "z ong3", + "de ng3", + "de5yi2ge4 shi4zhen4", + "ch uan2", + "mo 4", + "zh ang1", + "b an1", + "mo 2", + "ch a2", + "c e4", + "zhu3 yao4", + "t ou2", + "j u2", + "shi4 wei4yu2", + "s a4", + "u n1", + "ke3 yi3", + "d u1", + "h an4", + "li ang4", + "sh a1", + "ji a3", + "z i1", + "lv 4", + "f u1", + "xi an1", + "x u4", + "gu ang1", + "m eng2", + "b ao4", + "yo u4", + "r ong2", + "zhi1 yi1", + "w ei1", + "m ao2", + "guo2 jia1", + "c ong2", + "g ou4", + "ti e3", + "zh en1", + "d u2", + "bi an1", + "c i2", + "q u3", + "f an4", + "xi ang3", + "m en2", + "j u1", + "h ong2", + "z i3", + "ta1 men5", + "ji 3", + "z ong1", + "zhou1 de5yi2ge4shi4zhen4", + "t uan2", + "ji ng3", + "gong1 si1", + "xi e4", + "l i2", + "li4 shi3", + "b ao1", + "g ang3", + "gu i1", + "zh eng1", + "zhi2 wu4", + "ta1 de5", + "pi n3", + "zhu an1", + "ch ong2", + "shi3 yong4", + "w a3", + "sh uo1", + "chu an1", + "l ei2", + "w an1", + "h uo2", + "q u", + "s u1", + "z ao3", + "g ai3", + "q u4", + "g u4", + "l u", + "x i2", + "h ang2", + "yi ng4", + "c un1", + "g en1", + "yi ng2", + "ti ng2", + "cheng2 shi4", + "ji ang3", + "li ng3", + "l un2", + "bu4 fen4", + "de ng1", + "xu an3", + "dong4 wu4", + "de2 guo2", + "xi an3", + "f an3", + "zh e5", + "h an2", + "h ao4", + "m i4", + "r an2", + "qi n1", + "ti ao2", + "zh an3", + "h i", + "k a", + "n o", + "t e", + "s u", + "s hi", + "t a", + "t o", + "n a", + "w a", + "o u", + "r u", + "n i", + "k u", + "k i", + "g a", + "d e", + "k o", + "m a", + "r e", + "r a", + "m o", + "t su", + "w o", + "e n", + "r i", + "s a", + "d a", + "s e", + "j i", + "h a", + "c hi", + "k e", + "te ki", + "m i", + "y ou", + "s h", + "s o", + "y o", + "y a", + "na i", + "t te", + "a ru", + "b a", + "u u", + "t ta", + "ka i", + "ka n", + "shi te", + "m e", + "d o", + "mo no", + "se i", + "r o", + "ko to", + "ka ra", + "shi ta", + "b u", + "m u", + "c h", + "su ru", + "k ou", + "g o", + "ma su", + "ta i", + "f u", + "k en", + "i u", + "g en", + "wa re", + "shi n", + "z u", + "a i", + "o n", + "o ku", + "g i", + "d ou", + "n e", + "y uu", + "i ru", + "i te", + "ji ko", + "de su", + "j u", + "ra re", + "sh u", + "b e", + "sh ou", + "s ha", + "se kai", + "s ou", + "k you", + "ma shita", + "s en", + "na ra", + "sa n", + "ke i", + "i ta", + "a ri", + "i tsu", + "ko no", + "j ou", + "na ka", + "ch ou", + "so re", + "g u", + "na ru", + "ga ku", + "re ba", + "g e", + "h o", + "i n", + "hi to", + "sa i", + "na n", + "da i", + "tsu ku", + "shi ki", + "sa re", + "na ku", + "p p", + "bu n", + "ju n", + "so no", + "ka ku", + "z ai", + "b i", + "to u", + "wa ta", + "sh uu", + "i i", + "te i", + "ka re", + "y u", + "shi i", + "ma de", + "sh o", + "a n", + "ke reba", + "shi ka", + "i chi", + "ha n", + "de ki", + "ni n", + "ware ware", + "na kereba", + "o ite", + "h ou", + "ya ku", + "ra i", + "mu jun", + "l e", + "yo ku", + "bu tsu", + "o o", + "ko n", + "o mo", + "ga e", + "nara nai", + "ta chi", + "z en", + "ch uu", + "kan gae", + "ta ra", + "to ki", + "ko ro", + "mujun teki", + "z e", + "na ga", + "ji n", + "shi ma", + "te n", + "i ki", + "i ku", + "no u", + "i masu", + "r ou", + "h on", + "ka e", + "t to", + "ko re", + "ta n", + "ki ta", + "i s", + "da tta", + "ji tsu", + "ma e", + "i e", + "me i", + "da n", + "h e", + "to ku", + "dou itsu", + "ri tsu", + "k yuu", + "h you", + "rare ta", + "kei sei", + "k kan", + "rare ru", + "m ou", + "do ko", + "r you", + "da ke", + "naka tta", + "so ko", + "ta be", + "e r", + "ha na", + "c o", + "fu ku", + "p a", + "so n", + "ya su", + "ch o", + "wata ku", + "ya ma", + "z a", + "k yo", + "gen zai", + "b oku", + "a ta", + "j a", + "ka wa", + "ma sen", + "j uu", + "ro n", + "b o", + "na tte", + "wataku shi", + "yo tte", + "ma i", + "g ou", + "ha i", + "mo n", + "ba n", + "ji shin", + "c a", + "re te", + "n en", + "o ka", + "ka gaku", + "na tta", + "p o", + "ka ru", + "na ri", + "m en", + "ma ta", + "e i", + "ku ru", + "ga i", + "ka ri", + "sha kai", + "kou i", + "yo ri", + "se tsu", + "j o", + "re ru", + "to koro", + "ju tsu", + "i on", + "sa ku", + "tta i", + "c ha", + "nin gen", + "n u", + "c e", + "ta me", + "kan kyou", + "de n", + "o oku", + "i ma", + "wata shi", + "tsuku ru", + "su gi", + "b en", + "ji bun", + "shi tsu", + "ke ru", + "ki n", + "ki shi", + "shika shi", + "mo to", + "ma ri", + "i tte", + "de shita", + "n de", + "ari masu", + "te r", + "z ou", + "ko e", + "ze ttai", + "kkan teki", + "h en", + "re kishi", + "deki ru", + "tsu ka", + "l a", + "i tta", + "o i", + "ko butsu", + "mi ru", + "sh oku", + "shi masu", + "gi jutsu", + "g you", + "jou shiki", + "a tta", + "ho do", + "ko ko", + "tsuku rareta", + "z oku", + "hi tei", + "ko ku", + "rekishi teki", + "ke te", + "o ri", + "i mi", + "ka ko", + "naga ra", + "ka karu", + "shu tai", + "ha ji", + "ma n", + "ta ku", + "ra n", + "douitsu teki", + "z o", + "me te", + "re i", + "tsu u", + "sare te", + "gen jitsu", + "p e", + "s t", + "ba i", + "na wa", + "ji kan", + "wa ru", + "r t", + "a tsu", + "so ku", + "koui teki", + "a ra", + "u ma", + "a no", + "i de", + "ka ta", + "te tsu", + "ga wa", + "ke do", + "re ta", + "mi n", + "sa you", + "tte ru", + "to ri", + "p u", + "ki mi", + "b ou", + "mu ra", + "sare ru", + "ma chi", + "k ya", + "o sa", + "kon na", + "a ku", + "a l", + "sare ta", + "i pp", + "shi ku", + "u chi", + "hito tsu", + "ha tara", + "tachi ba", + "shi ro", + "ka tachi", + "to mo", + "e te", + "me ru", + "ni chi", + "da re", + "ka tta", + "e ru", + "su ki", + "a ge", + "oo ki", + "ma ru", + "mo ku", + "o ko", + "kangae rareru", + "o to", + "tan ni", + "ta da", + "tai teki", + "mo tte", + "ki nou", + "shi nai", + "k ki", + "u e", + "ta ri", + "l i", + "ra nai", + "k kou", + "mi rai", + "pp on", + "go to", + "hi n", + "hi tsu", + "te ru", + "mo chi", + "ka tsu", + "re n", + "n yuu", + "su i", + "zu ka", + "tsu ite", + "no mi", + "su gu", + "ku da", + "tetsu gaku", + "i ka", + "ron ri", + "o ki", + "ni ppon", + "p er", + "shi mashita", + "chi shiki", + "cho kkanteki", + "su ko", + "t ion", + "ku u", + "a na", + "a rou", + "ka tte", + "ku ri", + "i nai", + "hyou gen", + "i shiki", + "do ku", + "a tte", + "a tara", + "to n", + "wa ri", + "ka o", + "sei san", + "hana shi", + "s i", + "ka ke", + "na ji", + "su nawa", + "sunawa chi", + "u go", + "su u", + "ba ra", + "le v", + "hi ro", + "i wa", + "be tsu", + "yo i", + "se ru", + "shite ru", + "rare te", + "to shi", + "se ki", + "tai ritsu", + "wa kara", + "to kyo", + "k ka", + "k yoku", + "u n", + "i ro", + "mi te", + "sa ki", + "kan ji", + "mi ta", + "su be", + "r yoku", + "ma tta", + "kuda sai", + "omo i", + "ta no", + "ware ru", + "co m", + "hitsu you", + "ka shi", + "re nai", + "kan kei", + "a to", + "ga tte", + "o chi", + "mo tsu", + "in g", + "son zai", + "l l", + "o re", + "tai shite", + "a me", + "sei mei", + "ka no", + "gi ri", + "kangae ru", + "yu e", + "a sa", + "o naji", + "yo ru", + "ni ku", + "osa ka", + "suko shi", + "c k", + "ta ma", + "kano jo", + "ki te", + "mon dai", + "a mari", + "e ki", + "ko jin", + "ha ya", + "i t", + "de te", + "atara shii", + "a wa", + "ga kkou", + "tsu zu", + "shu kan", + "i mashita", + "mi na", + "ata e", + "da rou", + "hatara ku", + "ga ta", + "da chi", + "ma tsu", + "ari masen", + "sei butsu", + "mi tsu", + "he ya", + "yasu i", + "d i", + "de ni", + "no ko", + "ha ha", + "do mo", + "ka mi", + "su deni", + "na o", + "ra ku", + "i ke", + "a ki", + "me ta", + "l o", + "ko domo", + "so shite", + "ga me", + "ba kari", + "to te", + "ha tsu", + "mi se", + "moku teki", + "da kara", + "s z", + "e l", + "g y", + "e n", + "t t", + "e m", + "a n", + "a k", + "e r", + "a z", + "a l", + "e t", + "o l", + "e g", + "e k", + "m i", + "o n", + "é s", + "c s", + "a t", + "á r", + "h o", + "e z", + "á l", + "i s", + "á n", + "o r", + "a r", + "e gy", + "e s", + "é r", + "á t", + "o tt", + "e tt", + "m eg", + "t a", + "o k", + "o s", + "ho gy", + "n em", + "é g", + "n y", + "k i", + "é l", + "h a", + "á s", + "ü l", + "i n", + "mi n", + "n a", + "e d", + "o m", + "i k", + "k ö", + "m a", + "n i", + "v a", + "v ol", + "é t", + "b b", + "f el", + "i g", + "l e", + "r a", + "é n", + "t e", + "d e", + "a d", + "ó l", + "b e", + "on d", + "j a", + "r e", + "u l", + "b en", + "n ek", + "u t", + "vol t", + "b an", + "ö r", + "o g", + "a p", + "o d", + "á g", + "n k", + "é k", + "v al", + "k or", + "a m", + "i l", + "í t", + "á k", + "b a", + "u d", + "sz er", + "min d", + "o z", + "é p", + "el l", + "ér t", + "m ond", + "i t", + "sz t", + "n ak", + "a mi", + "n e", + "ő l", + "cs ak", + "n é", + "ma g", + "ol y", + "m er", + "ál l", + "án y", + "ö n", + "ö l", + "min t", + "m ár", + "ö tt", + "na gy", + "é sz", + "az t", + "el ő", + "t ud", + "o t", + "é ny", + "á z", + "m ég", + "kö z", + "el y", + "s ég", + "en t", + "s em", + "ta m", + "h et", + "h al", + "f i", + "a s", + "v an", + "ho z", + "v e", + "u k", + "k ez", + "á m", + "v el", + "b er", + "a j", + "u nk", + "i z", + "va gy", + "m os", + "sz em", + "em ber", + "f og", + "mer t", + "ü k", + "l en", + "ö s", + "e j", + "t al", + "h at", + "t ak", + "h i", + "m ás", + "s ág", + "ett e", + "l eg", + "ü nk", + "h át", + "sz a", + "on y", + "ez t", + "mind en", + "en d", + "ül t", + "h an", + "j ó", + "k is", + "á j", + "in t", + "ú gy", + "i d", + "mos t", + "ar t", + "í r", + "k er", + "i tt", + "a tt", + "el t", + "mond ta", + "k ell", + "l á", + "ak i", + "ál t", + "ér d", + "t ö", + "l an", + "v ár", + "h ol", + "t el", + "l át", + "ő k", + "v et", + "s e", + "ut án", + "k ét", + "na p", + "í v", + "ál y", + "v ég", + "ö k", + "i r", + "d ul", + "v is", + "né z", + "t er", + "á ban", + "k ül", + "ak kor", + "k ap", + "sz él", + "y en", + "ú j", + "i m", + "oly an", + "es en", + "k ed", + "h ely", + "t ör", + "b ól", + "el m", + "r á", + "ár a", + "r ó", + "l ó", + "vol na", + "t an", + "le het", + "e bb", + "t en", + "t ek", + "s ok", + "k al", + "f or", + "u g", + "ol t", + "k a", + "ek et", + "b or", + "f ej", + "g ond", + "a g", + "ak ar", + "f él", + "ú l", + "b el", + "ott a", + "mi t", + "val ami", + "j el", + "é d", + "ar c", + "u r", + "hal l", + "t i", + "f öl", + "á ba", + "ol g", + "ki r", + "ol d", + "m ar", + "k érd", + "j ár", + "ú r", + "sz e", + "z s", + "él et", + "j át", + "o v", + "u s", + "é z", + "v il", + "v er", + "ő r", + "á d", + "ö g", + "le sz", + "on t", + "b iz", + "k oz", + "á bb", + "kir ály", + "es t", + "a b", + "en g", + "ig az", + "b ar", + "ha j", + "d i", + "o b", + "k od", + "r ól", + "v ez", + "tö bb", + "sz ó", + "é ben", + "ö t", + "ny i", + "t á", + "sz ól", + "gond ol", + "eg ész", + "í gy", + "ő s", + "o bb", + "os an", + "b ől", + "a bb", + "c i", + "ő t", + "n ál", + "k ép", + "azt án", + "v i", + "t art", + "be szél", + "m en", + "elő tt", + "a szt", + "ma j", + "kö r", + "han g", + "í z", + "in cs", + "a i", + "é v", + "ó d", + "ó k", + "hoz z", + "t em", + "ok at", + "an y", + "nagy on", + "h áz", + "p er", + "p ed", + "ez te", + "et len", + "nek i", + "maj d", + "sz ony", + "án ak", + "fel é", + "egy szer", + "j e", + "ad t", + "gy er", + "ami kor", + "f oly", + "sz ak", + "ő d", + "h ú", + "á sz", + "am ely", + "h ar", + "ér e", + "il yen", + "od a", + "j ák", + "t ár", + "á val", + "l ak", + "t ó", + "m ent", + "gy an", + "él y", + "ú t", + "v ar", + "kez d", + "m ell", + "mi kor", + "h ez", + "val ó", + "k o", + "m es", + "szer et", + "r end", + "l et", + "vis sza", + "ig en", + "f ő", + "va s", + "as szony", + "r ől", + "ped ig", + "p i", + "sz ép", + "t ák", + "ö v", + "an i", + "vil ág", + "p en", + "mag a", + "t et", + "sz ik", + "é j", + "én t", + "j ött", + "s an", + "sz í", + "i de", + "g at", + "ett em", + "ul t", + "h ány", + "ás t", + "a hol", + "ők et", + "h ár", + "k el", + "n ő", + "cs i", + "tal ál", + "el te", + "lá tt", + "tör t", + "ha gy", + "e sz", + "s en", + "n él", + "p ar", + "v ál", + "k ut", + "l ány", + "ami t", + "s ő", + "ell en", + "mag át", + "in k", + "u gyan", + "kül ön", + "a sz", + "mind ig", + "l ép", + "tal án", + "u n", + "sz or", + "k e", + "il lan", + "n incs", + "z et", + "vagy ok", + "tel en", + "is mer", + "s or", + "is ten", + "ít ott", + "j obb", + "v es", + "dul t", + "j uk", + "sz en", + "r o", + "ö m", + "l ett", + "k ar", + "egy ik", + "b ár", + "sz i", + "sz ív", + "az on", + "e szt", + "föl d", + "kut y", + "p illan", + "f ér", + "k om", + "t ől", + "t ű", + "é be", + "t ött", + "bar át", + "í g", + "a hogy", + "e h", + "e p", + "s o", + "v en", + "jel ent", + "t at", + "sz eg", + "mint ha", + "f al", + "egy en", + "mi l", + "sza b", + "r i", + "é m", + "biz ony", + "j on", + "ör eg", + "d olg", + "cs ap", + "ti szt", + "áll t", + "an cs", + "id ő", + "k at", + "ü gy", + "mi ért", + "ó t", + "ü r", + "cs in", + "h az", + "b et", + "én ek", + "v ér", + "j ól", + "al att", + "m ely", + "l o", + "sem mi", + "ny ug", + "v ág", + "kö vet", + "ös sze", + "ma d", + "l i", + "a cs", + "fi ú", + "kö n", + "más ik", + "j ön", + "sz ám", + "g er", + "s ó", + "r ész", + "k ér", + "z el", + "é vel", + "e o", + "e u", + "a n", + "eu l", + "eu n", + "eo n", + "a e", + "d a", + "a l", + "s s", + "i n", + "i l", + "a g", + "an g", + "y eon", + "y eo", + "d o", + "c h", + "n g", + "j i", + "h an", + "g a", + "g o", + "u i", + "h ae", + "a m", + "u l", + "u n", + "g eo", + "s i", + "n eun", + "ss da", + "s eo", + "eon g", + "y o", + "i da", + "t t", + "k k", + "j eo", + "d eul", + "w a", + "eu m", + "g e", + "o n", + "o g", + "s al", + "m an", + "yeon g", + "geo s", + "h ag", + "an eun", + "j a", + "g i", + "s u", + "i ss", + "o l", + "d ae", + "eo b", + "h a", + "j u", + "eo l", + "g eu", + "j eong", + "s ae", + "do e", + "g eul", + "s eu", + "s in", + "eul o", + "b n", + "s ang", + "bn ida", + "h al", + "b o", + "han eun", + "m al", + "i m", + "m o", + "b u", + "jeo g", + "sae ng", + "in eun", + "an h", + "m a", + "sal am", + "j o", + "s a", + "eo m", + "n ae", + "w i", + "l o", + "g wa", + "yeo l", + "n a", + "e seo", + "y e", + "m yeon", + "tt ae", + "h w", + "j e", + "eob s", + "j ang", + "g u", + "g w", + "il eul", + "yeo g", + "j eon", + "si g", + "j ag", + "j in", + "y u", + "o e", + "s e", + "hag o", + "d eun", + "y a", + "m un", + "s eong", + "g ag", + "h am", + "d ang", + "b a", + "l eul", + "s il", + "do ng", + "kk a", + "b al", + "da l", + "han da", + "eo ssda", + "ae g", + "l i", + "ha ji", + "s eon", + "o ng", + "hae ssda", + "d e", + "i ssda", + "e ge", + "b un", + "m ul", + "ju ng", + "ji g", + "m u", + "iss neun", + "b i", + "g eun", + "seu bnida", + "w on", + "p p", + "d aneun", + "eo h", + "d eo", + "ga m", + "j al", + "hae ng", + "ag o", + "y ang", + "b ul", + "b ang", + "u m", + "s o", + "h i", + "j ae", + "si m", + "saeng gag", + "hag e", + "s og", + "eo ss", + "d an", + "ja sin", + "j il", + "eo g", + "g yeong", + "doe n", + "go ng", + "m i", + "ch i", + "d eu", + "d eon", + "hae ss", + "d u", + "n am", + "eun g", + "jo h", + "n al", + "m yeong", + "w o", + "eon a", + "i go", + "g yeol", + "y ag", + "gw an", + "ul i", + "yo ng", + "n o", + "l yeo", + "j og", + "eoh ge", + "ga t", + "b og", + "mo s", + "t ong", + "ch a", + "man h", + "jeo l", + "geo l", + "h oe", + "ag a", + "n aneun", + "g an", + "un eun", + "ch eol", + "ch e", + "do l", + "b on", + "b an", + "ba d", + "ch u", + "ham yeon", + "yeo ssda", + "i bnida", + "g ye", + "eo s", + "hw al", + "salam deul", + "ji man", + "dang sin", + "ji b", + "ttae mun", + "m ae", + "i b", + "e neun", + "eu g", + "jeo m", + "geul eon", + "h wa", + "a ssda", + "b eob", + "bu t", + "b ae", + "yeo ss", + "ch in", + "ch aeg", + "g eon", + "g ae", + "nae ga", + "i ga", + "m og", + "sig an", + "g il", + "h yeon", + "l yeog", + "gu g", + "p yeon", + "s an", + "w ae", + "j ul", + "s eul", + "deun g", + "haji man", + "eum yeon", + "p il", + "m ol", + "n eu", + "a ss", + "n yeon", + "t ae", + "h u", + "p yo", + "s ul", + "g ang", + "j ineun", + "b eon", + "ha da", + "seo l", + "si p", + "dal eun", + "a p", + "sal m", + "g yo", + "ch eon", + "hag i", + "in a", + "cheol eom", + "g al", + "il a", + "kka ji", + "anh neun", + "ha bnida", + "tt eon", + "n u", + "hae seo", + "doen da", + "s ol", + "tt al", + "l a", + "il o", + "seu b", + "b yeon", + "m yeo", + "b eol", + "s on", + "n un", + "j un", + "j am", + "j eung", + "tt o", + "e n", + "mo m", + "h o", + "ch im", + "hw ang", + "eun eun", + "jo ng", + "bo da", + "n ol", + "n eom", + "but eo", + "jig eum", + "eobs da", + "dae lo", + "i g", + "y ul", + "p yeong", + "seon eun", + "sal ang", + "seu t", + "h im", + "n an", + "h eom", + "h yang", + "p i", + "gw ang", + "eobs neun", + "hw ag", + "ge ss", + "jag i", + "il eon", + "wi hae", + "dae han", + "ga ji", + "m eog", + "j yeo", + "cha j", + "b yeong", + "eo d", + "g yeo", + "do n", + "eo ji", + "g ul", + "mo deun", + "j on", + "in saeng", + "geul ae", + "h ang", + "sa sil", + "si b", + "ch al", + "il ago", + "doe l", + "g eum", + "doe neun", + "b ol", + "ga jang", + "geul igo", + "e l", + "h yeong", + "haeng bog", + "ch ul", + "h on", + "ch ae", + "s am", + "m ang", + "in da", + "da m", + "w ol", + "ch oe", + "d ul", + "si jag", + "ch eong", + "il aneun", + "ul ineun", + "ae n", + "kk e", + "mun je", + "a do", + "t eu", + "g un", + "geun eun", + "b ge", + "ch eo", + "b aeg", + "ju g", + "t a", + "sang dae", + "geu geos", + "do g", + "eu s", + "deu s", + "ja b", + "h yeo", + "tt eohge", + "u g", + "ma j", + "ch il", + "s wi", + "j ileul", + "ch ang", + "g aneun", + "m ag", + "i ji", + "da go", + "m in", + "yo han", + "t eug", + "pp un", + "al eul", + "haeng dong", + "p o", + "m il", + "ch am", + "se sang", + "e do", + "p an", + "man deul", + "am yeon", + "a b", + "kk ae", + "b ag", + "i deul", + "p um", + "m eol", + "s un", + "n eul", + "ham kke", + "chu ng", + "da b", + "yu g", + "s ag", + "gwang ye", + "il eohge", + "bal o", + "neun de", + "ham yeo", + "go s", + "geul eoh", + "an ila", + "bang beob", + "da si", + "b yeol", + "g yeon", + "gam jeong", + "on eul", + "j aneun", + "yeo m", + "l ago", + "i gi", + "hw an", + "t eul", + "eo seo", + "si k", + "ch o", + "jag a", + "geul eom", + "geul eona", + "jeong do", + "g yeog", + "geul eohge", + "geu deul", + "eu t", + "im yeon", + "j jae", + "k eun", + "i sang", + "mal haessda", + "eu ge", + "no p", + "in gan", + "bo myeon", + "t aeg", + "seu s", + "d wi", + "s aneun", + "w an", + "anh go", + "t an", + "nu gu", + "su ng", + "da myeon", + "a deul", + "p eul", + "ttal a", + "d i", + "geos do", + "a ji", + "m eon", + "eum yeo", + "dol og", + "neun g", + "mo du", + "क े", + "ह ै", + "े ं", + "् र", + "ा र", + "न े", + "य ा", + "म ें", + "स े", + "क ी", + "क ा", + "ो ं", + "त ा", + "क र", + "स ्", + "क ि", + "क ो", + "र ्", + "न ा", + "क ्", + "ह ी", + "औ र", + "प र", + "त े", + "ह ो", + "प ्र", + "ा न", + "् य", + "ल ा", + "व ा", + "ल े", + "स ा", + "है ं", + "ल ि", + "ज ा", + "ह ा", + "भ ी", + "व ि", + "इ स", + "त ी", + "न ्", + "र ा", + "म ा", + "द े", + "द ि", + "ब ा", + "त ि", + "थ ा", + "न ि", + "क ार", + "ए क", + "ही ं", + "ह ु", + "ं ग", + "ै ं", + "न ी", + "स ी", + "अ प", + "त ्", + "न हीं", + "र ी", + "म े", + "म ु", + "ि त", + "त ो", + "प ा", + "ल ी", + "लि ए", + "ग ा", + "ल ्", + "र ह", + "र े", + "क् ष", + "म ैं", + "स म", + "उ स", + "ज ि", + "त ्र", + "म ि", + "च ा", + "ो ग", + "स ं", + "द ्", + "स ि", + "आ प", + "त ु", + "द ा", + "क ु", + "य ों", + "व े", + "ज ी", + "् या", + "उ न", + "ि क", + "य े", + "भ ा", + "् ट", + "ह म", + "स् ट", + "श ा", + "ड ़", + "ं द", + "ख ा", + "म ्", + "श ्", + "य ह", + "स क", + "प ू", + "कि या", + "अप ने", + "र ू", + "स ु", + "म ी", + "ह ि", + "ज ो", + "थ े", + "र ि", + "द ी", + "थ ी", + "ग ी", + "ल ोग", + "ग या", + "त र", + "न् ह", + "च ्", + "व ार", + "ब ी", + "प ्", + "द ो", + "ट ी", + "श ि", + "कर ने", + "ग े", + "ै से", + "इ न", + "ं ड", + "सा थ", + "प ु", + "ब े", + "ब ार", + "व ी", + "अ न", + "ह र", + "उ न्ह", + "हो ता", + "ज ब", + "कु छ", + "म ान", + "क ्र", + "ब ि", + "प ह", + "फ ि", + "स र", + "ार ी", + "र ो", + "द ू", + "क हा", + "त क", + "श न", + "ब ्", + "स् थ", + "व ह", + "बा द", + "ओ ं", + "ग ु", + "ज ्", + "्र े", + "ग र", + "रह े", + "व र्", + "ह ू", + "ार ्", + "प ी", + "ब हु", + "मु झ", + "्र ा", + "दि या", + "स ब", + "कर ते", + "अप नी", + "बहु त", + "क ह", + "ट े", + "हु ए", + "कि सी", + "र हा", + "ष ्ट", + "ज ़", + "ब ना", + "स ो", + "ड ि", + "को ई", + "व ्य", + "बा त", + "र ु", + "व ो", + "मुझ े", + "द् ध", + "च ार", + "मे रे", + "व र", + "्र ी", + "जा ता", + "न ों", + "प्र ा", + "दे ख", + "ट ा", + "क् या", + "अ ध", + "ल ग", + "ल ो", + "प ि", + "य ु", + "च े", + "जि स", + "ं त", + "ान ी", + "प ै", + "ज न", + "ार े", + "च ी", + "मि ल", + "द ु", + "दे श", + "च् छ", + "ष ्", + "स ू", + "ख े", + "च ु", + "ि या", + "ल गा", + "ब ु", + "उन के", + "ज् ञ", + "क्ष ा", + "त रह", + "्या दा", + "वा ले", + "पू र्", + "मैं ने", + "का म", + "रू प", + "हो ती", + "उ प", + "ज ान", + "प्र कार", + "भ ार", + "म न", + "हु आ", + "ट र", + "हू ँ", + "पर ि", + "पा स", + "अन ु", + "रा ज", + "लोग ों", + "अ ब", + "सम झ", + "ड ी", + "म ौ", + "श ु", + "च ि", + "प े", + "क ृ", + "सक ते", + "म ह", + "य ोग", + "द र्", + "उ से", + "ं ध", + "ड ा", + "जा ए", + "ब ो", + "ू ल", + "म ो", + "ों ने", + "ं स", + "तु म", + "पह ले", + "ब ता", + "त था", + "य ो", + "ग ई", + "उ त्", + "सक ता", + "क म", + "ज ्यादा", + "र ख", + "सम य", + "ार ा", + "अ गर", + "स् त", + "च ल", + "फि र", + "वार ा", + "कर ना", + "श ी", + "ग ए", + "ब न", + "ौ र", + "हो ने", + "चा ह", + "ख ु", + "हा ँ", + "उन्ह ें", + "उन्ह ोंने", + "छ ो", + "म् ह", + "प्र ति", + "नि क", + "व न", + "्य ू", + "र ही", + "तु म्ह", + "ज ैसे", + "ि यों", + "क् यों", + "ल ों", + "फ ़", + "ं त्र", + "हो ते", + "क् ति", + "त ्य", + "कर ्", + "क ई", + "व ं", + "कि न", + "प ो", + "कार ण", + "ड़ ी", + "भ ि", + "इस के", + "ब र", + "उस के", + "द् वारा", + "श े", + "क ॉ", + "दि न", + "न् न", + "ड़ ा", + "स् व", + "नि र्", + "मु ख", + "लि या", + "ट ि", + "ज्ञ ान", + "क् त", + "द ्र", + "ग ्", + "क् स", + "म ै", + "ग ो", + "ज े", + "ट ्र", + "म ार", + "त् व", + "ध ार", + "भा व", + "कर ता", + "ख ि", + "क ं", + "चा हि", + "य र", + "प् त", + "क ों", + "ं च", + "ज ु", + "म त", + "अ च्छ", + "हु ई", + "क भी", + "ले किन", + "भ ू", + "अप ना", + "दू स", + "चाहि ए", + "य ू", + "घ र", + "सब से", + "मे री", + "ना म", + "ढ ़", + "ं ट", + "ें गे", + "ब ै", + "फ ा", + "ए वं", + "य ी", + "ग ्र", + "क्ष े", + "आ ज", + "आप को", + "भा ग", + "ठ ा", + "क ै", + "भार त", + "उन की", + "प हु", + "स भी", + "ध ा", + "ण ा", + "स ान", + "हो गा", + "त ब", + "स ंग", + "प र्", + "अ व", + "त ना", + "ग ि", + "य न", + "स् था", + "च ित", + "ट ्", + "छ ा", + "जा ने", + "क्षे त्र", + "वा ली", + "पूर् ण", + "स मा", + "कार ी" + ] + } +} \ No newline at end of file diff --git a/comfy/text_encoders/ace_text_cleaners.py b/comfy/text_encoders/ace_text_cleaners.py new file mode 100644 index 00000000..cd31d8d8 --- /dev/null +++ b/comfy/text_encoders/ace_text_cleaners.py @@ -0,0 +1,395 @@ +# basic text cleaners for the ACE step model +# I didn't copy the ones from the reference code because I didn't want to deal with the dependencies +# TODO: more languages than english? + +import re + +def japanese_to_romaji(japanese_text): + """ + Convert Japanese hiragana and katakana to romaji (Latin alphabet representation). + + Args: + japanese_text (str): Text containing hiragana and/or katakana characters + + Returns: + str: The romaji (Latin alphabet) equivalent + """ + # Dictionary mapping kana characters to their romaji equivalents + kana_map = { + # Katakana characters + 'ア': 'a', 'イ': 'i', 'ウ': 'u', 'エ': 'e', 'オ': 'o', + 'カ': 'ka', 'キ': 'ki', 'ク': 'ku', 'ケ': 'ke', 'コ': 'ko', + 'サ': 'sa', 'シ': 'shi', 'ス': 'su', 'セ': 'se', 'ソ': 'so', + 'タ': 'ta', 'チ': 'chi', 'ツ': 'tsu', 'テ': 'te', 'ト': 'to', + 'ナ': 'na', 'ニ': 'ni', 'ヌ': 'nu', 'ネ': 'ne', 'ノ': 'no', + 'ハ': 'ha', 'ヒ': 'hi', 'フ': 'fu', 'ヘ': 'he', 'ホ': 'ho', + 'マ': 'ma', 'ミ': 'mi', 'ム': 'mu', 'メ': 'me', 'モ': 'mo', + 'ヤ': 'ya', 'ユ': 'yu', 'ヨ': 'yo', + 'ラ': 'ra', 'リ': 'ri', 'ル': 'ru', 'レ': 're', 'ロ': 'ro', + 'ワ': 'wa', 'ヲ': 'wo', 'ン': 'n', + + # Katakana voiced consonants + 'ガ': 'ga', 'ギ': 'gi', 'グ': 'gu', 'ゲ': 'ge', 'ゴ': 'go', + 'ザ': 'za', 'ジ': 'ji', 'ズ': 'zu', 'ゼ': 'ze', 'ゾ': 'zo', + 'ダ': 'da', 'ヂ': 'ji', 'ヅ': 'zu', 'デ': 'de', 'ド': 'do', + 'バ': 'ba', 'ビ': 'bi', 'ブ': 'bu', 'ベ': 'be', 'ボ': 'bo', + 'パ': 'pa', 'ピ': 'pi', 'プ': 'pu', 'ペ': 'pe', 'ポ': 'po', + + # Katakana combinations + 'キャ': 'kya', 'キュ': 'kyu', 'キョ': 'kyo', + 'シャ': 'sha', 'シュ': 'shu', 'ショ': 'sho', + 'チャ': 'cha', 'チュ': 'chu', 'チョ': 'cho', + 'ニャ': 'nya', 'ニュ': 'nyu', 'ニョ': 'nyo', + 'ヒャ': 'hya', 'ヒュ': 'hyu', 'ヒョ': 'hyo', + 'ミャ': 'mya', 'ミュ': 'myu', 'ミョ': 'myo', + 'リャ': 'rya', 'リュ': 'ryu', 'リョ': 'ryo', + 'ギャ': 'gya', 'ギュ': 'gyu', 'ギョ': 'gyo', + 'ジャ': 'ja', 'ジュ': 'ju', 'ジョ': 'jo', + 'ビャ': 'bya', 'ビュ': 'byu', 'ビョ': 'byo', + 'ピャ': 'pya', 'ピュ': 'pyu', 'ピョ': 'pyo', + + # Katakana small characters and special cases + 'ッ': '', # Small tsu (doubles the following consonant) + 'ャ': 'ya', 'ュ': 'yu', 'ョ': 'yo', + + # Katakana extras + 'ヴ': 'vu', 'ファ': 'fa', 'フィ': 'fi', 'フェ': 'fe', 'フォ': 'fo', + 'ウィ': 'wi', 'ウェ': 'we', 'ウォ': 'wo', + + # Hiragana characters + 'あ': 'a', 'い': 'i', 'う': 'u', 'え': 'e', 'お': 'o', + 'か': 'ka', 'き': 'ki', 'く': 'ku', 'け': 'ke', 'こ': 'ko', + 'さ': 'sa', 'し': 'shi', 'す': 'su', 'せ': 'se', 'そ': 'so', + 'た': 'ta', 'ち': 'chi', 'つ': 'tsu', 'て': 'te', 'と': 'to', + 'な': 'na', 'に': 'ni', 'ぬ': 'nu', 'ね': 'ne', 'の': 'no', + 'は': 'ha', 'ひ': 'hi', 'ふ': 'fu', 'へ': 'he', 'ほ': 'ho', + 'ま': 'ma', 'み': 'mi', 'む': 'mu', 'め': 'me', 'も': 'mo', + 'や': 'ya', 'ゆ': 'yu', 'よ': 'yo', + 'ら': 'ra', 'り': 'ri', 'る': 'ru', 'れ': 're', 'ろ': 'ro', + 'わ': 'wa', 'を': 'wo', 'ん': 'n', + + # Hiragana voiced consonants + 'が': 'ga', 'ぎ': 'gi', 'ぐ': 'gu', 'げ': 'ge', 'ご': 'go', + 'ざ': 'za', 'じ': 'ji', 'ず': 'zu', 'ぜ': 'ze', 'ぞ': 'zo', + 'だ': 'da', 'ぢ': 'ji', 'づ': 'zu', 'で': 'de', 'ど': 'do', + 'ば': 'ba', 'び': 'bi', 'ぶ': 'bu', 'べ': 'be', 'ぼ': 'bo', + 'ぱ': 'pa', 'ぴ': 'pi', 'ぷ': 'pu', 'ぺ': 'pe', 'ぽ': 'po', + + # Hiragana combinations + 'きゃ': 'kya', 'きゅ': 'kyu', 'きょ': 'kyo', + 'しゃ': 'sha', 'しゅ': 'shu', 'しょ': 'sho', + 'ちゃ': 'cha', 'ちゅ': 'chu', 'ちょ': 'cho', + 'にゃ': 'nya', 'にゅ': 'nyu', 'にょ': 'nyo', + 'ひゃ': 'hya', 'ひゅ': 'hyu', 'ひょ': 'hyo', + 'みゃ': 'mya', 'みゅ': 'myu', 'みょ': 'myo', + 'りゃ': 'rya', 'りゅ': 'ryu', 'りょ': 'ryo', + 'ぎゃ': 'gya', 'ぎゅ': 'gyu', 'ぎょ': 'gyo', + 'じゃ': 'ja', 'じゅ': 'ju', 'じょ': 'jo', + 'びゃ': 'bya', 'びゅ': 'byu', 'びょ': 'byo', + 'ぴゃ': 'pya', 'ぴゅ': 'pyu', 'ぴょ': 'pyo', + + # Hiragana small characters and special cases + 'っ': '', # Small tsu (doubles the following consonant) + 'ゃ': 'ya', 'ゅ': 'yu', 'ょ': 'yo', + + # Common punctuation and spaces + ' ': ' ', # Japanese space + '、': ', ', '。': '. ', + } + + result = [] + i = 0 + + while i < len(japanese_text): + # Check for small tsu (doubling the following consonant) + if i < len(japanese_text) - 1 and (japanese_text[i] == 'っ' or japanese_text[i] == 'ッ'): + if i < len(japanese_text) - 1 and japanese_text[i+1] in kana_map: + next_romaji = kana_map[japanese_text[i+1]] + if next_romaji and next_romaji[0] not in 'aiueon': + result.append(next_romaji[0]) # Double the consonant + i += 1 + continue + + # Check for combinations with small ya, yu, yo + if i < len(japanese_text) - 1 and japanese_text[i+1] in ('ゃ', 'ゅ', 'ょ', 'ャ', 'ュ', 'ョ'): + combo = japanese_text[i:i+2] + if combo in kana_map: + result.append(kana_map[combo]) + i += 2 + continue + + # Regular character + if japanese_text[i] in kana_map: + result.append(kana_map[japanese_text[i]]) + else: + # If it's not in our map, keep it as is (might be kanji, romaji, etc.) + result.append(japanese_text[i]) + + i += 1 + + return ''.join(result) + +def number_to_text(num, ordinal=False): + """ + Convert a number (int or float) to its text representation. + + Args: + num: The number to convert + + Returns: + str: Text representation of the number + """ + + if not isinstance(num, (int, float)): + return "Input must be a number" + + # Handle special case of zero + if num == 0: + return "zero" + + # Handle negative numbers + negative = num < 0 + num = abs(num) + + # Handle floats + if isinstance(num, float): + # Split into integer and decimal parts + int_part = int(num) + + # Convert both parts + int_text = _int_to_text(int_part) + + # Handle decimal part (convert to string and remove '0.') + decimal_str = str(num).split('.')[1] + decimal_text = " point " + " ".join(_digit_to_text(int(digit)) for digit in decimal_str) + + result = int_text + decimal_text + else: + # Handle integers + result = _int_to_text(num) + + # Add 'negative' prefix for negative numbers + if negative: + result = "negative " + result + + return result + + +def _int_to_text(num): + """Helper function to convert an integer to text""" + + ones = ["", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", + "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", + "seventeen", "eighteen", "nineteen"] + + tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"] + + if num < 20: + return ones[num] + + if num < 100: + return tens[num // 10] + (" " + ones[num % 10] if num % 10 != 0 else "") + + if num < 1000: + return ones[num // 100] + " hundred" + (" " + _int_to_text(num % 100) if num % 100 != 0 else "") + + if num < 1000000: + return _int_to_text(num // 1000) + " thousand" + (" " + _int_to_text(num % 1000) if num % 1000 != 0 else "") + + if num < 1000000000: + return _int_to_text(num // 1000000) + " million" + (" " + _int_to_text(num % 1000000) if num % 1000000 != 0 else "") + + return _int_to_text(num // 1000000000) + " billion" + (" " + _int_to_text(num % 1000000000) if num % 1000000000 != 0 else "") + + +def _digit_to_text(digit): + """Convert a single digit to text""" + digits = ["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"] + return digits[digit] + + +_whitespace_re = re.compile(r"\s+") + + +# List of (regular expression, replacement) pairs for abbreviations: +_abbreviations = { + "en": [ + (re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1]) + for x in [ + ("mrs", "misess"), + ("mr", "mister"), + ("dr", "doctor"), + ("st", "saint"), + ("co", "company"), + ("jr", "junior"), + ("maj", "major"), + ("gen", "general"), + ("drs", "doctors"), + ("rev", "reverend"), + ("lt", "lieutenant"), + ("hon", "honorable"), + ("sgt", "sergeant"), + ("capt", "captain"), + ("esq", "esquire"), + ("ltd", "limited"), + ("col", "colonel"), + ("ft", "fort"), + ] + ], +} + + +def expand_abbreviations_multilingual(text, lang="en"): + for regex, replacement in _abbreviations[lang]: + text = re.sub(regex, replacement, text) + return text + + +_symbols_multilingual = { + "en": [ + (re.compile(r"%s" % re.escape(x[0]), re.IGNORECASE), x[1]) + for x in [ + ("&", " and "), + ("@", " at "), + ("%", " percent "), + ("#", " hash "), + ("$", " dollar "), + ("£", " pound "), + ("°", " degree "), + ] + ], +} + + +def expand_symbols_multilingual(text, lang="en"): + for regex, replacement in _symbols_multilingual[lang]: + text = re.sub(regex, replacement, text) + text = text.replace(" ", " ") # Ensure there are no double spaces + return text.strip() + + +_ordinal_re = { + "en": re.compile(r"([0-9]+)(st|nd|rd|th)"), +} +_number_re = re.compile(r"[0-9]+") +_currency_re = { + "USD": re.compile(r"((\$[0-9\.\,]*[0-9]+)|([0-9\.\,]*[0-9]+\$))"), + "GBP": re.compile(r"((£[0-9\.\,]*[0-9]+)|([0-9\.\,]*[0-9]+£))"), + "EUR": re.compile(r"(([0-9\.\,]*[0-9]+€)|((€[0-9\.\,]*[0-9]+)))"), +} + +_comma_number_re = re.compile(r"\b\d{1,3}(,\d{3})*(\.\d+)?\b") +_dot_number_re = re.compile(r"\b\d{1,3}(.\d{3})*(\,\d+)?\b") +_decimal_number_re = re.compile(r"([0-9]+[.,][0-9]+)") + + +def _remove_commas(m): + text = m.group(0) + if "," in text: + text = text.replace(",", "") + return text + + +def _remove_dots(m): + text = m.group(0) + if "." in text: + text = text.replace(".", "") + return text + + +def _expand_decimal_point(m, lang="en"): + amount = m.group(1).replace(",", ".") + return number_to_text(float(amount)) + + +def _expand_currency(m, lang="en", currency="USD"): + amount = float((re.sub(r"[^\d.]", "", m.group(0).replace(",", ".")))) + full_amount = number_to_text(amount) + + and_equivalents = { + "en": ", ", + "es": " con ", + "fr": " et ", + "de": " und ", + "pt": " e ", + "it": " e ", + "pl": ", ", + "cs": ", ", + "ru": ", ", + "nl": ", ", + "ar": ", ", + "tr": ", ", + "hu": ", ", + "ko": ", ", + } + + if amount.is_integer(): + last_and = full_amount.rfind(and_equivalents[lang]) + if last_and != -1: + full_amount = full_amount[:last_and] + + return full_amount + + +def _expand_ordinal(m, lang="en"): + return number_to_text(int(m.group(1)), ordinal=True) + + +def _expand_number(m, lang="en"): + return number_to_text(int(m.group(0))) + + +def expand_numbers_multilingual(text, lang="en"): + if lang in ["en", "ru"]: + text = re.sub(_comma_number_re, _remove_commas, text) + else: + text = re.sub(_dot_number_re, _remove_dots, text) + try: + text = re.sub(_currency_re["GBP"], lambda m: _expand_currency(m, lang, "GBP"), text) + text = re.sub(_currency_re["USD"], lambda m: _expand_currency(m, lang, "USD"), text) + text = re.sub(_currency_re["EUR"], lambda m: _expand_currency(m, lang, "EUR"), text) + except: + pass + + text = re.sub(_decimal_number_re, lambda m: _expand_decimal_point(m, lang), text) + text = re.sub(_ordinal_re[lang], lambda m: _expand_ordinal(m, lang), text) + text = re.sub(_number_re, lambda m: _expand_number(m, lang), text) + return text + + +def lowercase(text): + return text.lower() + + +def collapse_whitespace(text): + return re.sub(_whitespace_re, " ", text) + + +def multilingual_cleaners(text, lang): + text = text.replace('"', "") + if lang == "tr": + text = text.replace("İ", "i") + text = text.replace("Ö", "ö") + text = text.replace("Ü", "ü") + text = lowercase(text) + try: + text = expand_numbers_multilingual(text, lang) + except: + pass + try: + text = expand_abbreviations_multilingual(text, lang) + except: + pass + try: + text = expand_symbols_multilingual(text, lang=lang) + except: + pass + text = collapse_whitespace(text) + return text + + +def basic_cleaners(text): + """Basic pipeline that lowercases and collapses whitespace without transliteration.""" + text = lowercase(text) + text = collapse_whitespace(text) + return text diff --git a/comfy/text_encoders/flux.py b/comfy/text_encoders/flux.py index 0666dde7..d61ef666 100644 --- a/comfy/text_encoders/flux.py +++ b/comfy/text_encoders/flux.py @@ -19,8 +19,8 @@ class FluxTokenizer: def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs): out = {} - out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids) - out["t5xxl"] = self.t5xxl.tokenize_with_weights(text, return_word_ids) + out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids, **kwargs) + out["t5xxl"] = self.t5xxl.tokenize_with_weights(text, return_word_ids, **kwargs) return out def untokenize(self, token_weight_pair): diff --git a/comfy/text_encoders/hidream.py b/comfy/text_encoders/hidream.py index 8e1abcfc..dbcf5278 100644 --- a/comfy/text_encoders/hidream.py +++ b/comfy/text_encoders/hidream.py @@ -16,11 +16,11 @@ class HiDreamTokenizer: def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs): out = {} - out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids) - out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids) - t5xxl = self.t5xxl.tokenize_with_weights(text, return_word_ids) + out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids, **kwargs) + out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids, **kwargs) + t5xxl = self.t5xxl.tokenize_with_weights(text, return_word_ids, **kwargs) out["t5xxl"] = [t5xxl[0]] # Use only first 128 tokens - out["llama"] = self.llama.tokenize_with_weights(text, return_word_ids) + out["llama"] = self.llama.tokenize_with_weights(text, return_word_ids, **kwargs) return out def untokenize(self, token_weight_pair): diff --git a/comfy/text_encoders/hunyuan_video.py b/comfy/text_encoders/hunyuan_video.py index 33ac2249..b02148b3 100644 --- a/comfy/text_encoders/hunyuan_video.py +++ b/comfy/text_encoders/hunyuan_video.py @@ -49,13 +49,13 @@ class HunyuanVideoTokenizer: def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, image_embeds=None, image_interleave=1, **kwargs): out = {} - out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids) + out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids, **kwargs) if llama_template is None: llama_text = self.llama_template.format(text) else: llama_text = llama_template.format(text) - llama_text_tokens = self.llama.tokenize_with_weights(llama_text, return_word_ids) + llama_text_tokens = self.llama.tokenize_with_weights(llama_text, return_word_ids, **kwargs) embed_count = 0 for r in llama_text_tokens: for i in range(len(r)): diff --git a/comfy/text_encoders/hydit.py b/comfy/text_encoders/hydit.py index e7273f42..ac699452 100644 --- a/comfy/text_encoders/hydit.py +++ b/comfy/text_encoders/hydit.py @@ -41,8 +41,8 @@ class HyditTokenizer: def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs): out = {} - out["hydit_clip"] = self.hydit_clip.tokenize_with_weights(text, return_word_ids) - out["mt5xl"] = self.mt5xl.tokenize_with_weights(text, return_word_ids) + out["hydit_clip"] = self.hydit_clip.tokenize_with_weights(text, return_word_ids, **kwargs) + out["mt5xl"] = self.mt5xl.tokenize_with_weights(text, return_word_ids, **kwargs) return out def untokenize(self, token_weight_pair): diff --git a/comfy/text_encoders/sd3_clip.py b/comfy/text_encoders/sd3_clip.py index 6c2fbeca..ff5d412d 100644 --- a/comfy/text_encoders/sd3_clip.py +++ b/comfy/text_encoders/sd3_clip.py @@ -45,9 +45,9 @@ class SD3Tokenizer: def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs): out = {} - out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids) - out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids) - out["t5xxl"] = self.t5xxl.tokenize_with_weights(text, return_word_ids) + out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids, **kwargs) + out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids, **kwargs) + out["t5xxl"] = self.t5xxl.tokenize_with_weights(text, return_word_ids, **kwargs) return out def untokenize(self, token_weight_pair): diff --git a/comfy/text_encoders/umt5_config_base.json b/comfy/text_encoders/umt5_config_base.json new file mode 100644 index 00000000..6b3618f0 --- /dev/null +++ b/comfy/text_encoders/umt5_config_base.json @@ -0,0 +1,22 @@ +{ + "d_ff": 2048, + "d_kv": 64, + "d_model": 768, + "decoder_start_token_id": 0, + "dropout_rate": 0.1, + "eos_token_id": 1, + "dense_act_fn": "gelu_pytorch_tanh", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "umt5", + "num_decoder_layers": 12, + "num_heads": 12, + "num_layers": 12, + "output_past": true, + "pad_token_id": 0, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "vocab_size": 256384 +} diff --git a/comfy/utils.py b/comfy/utils.py index a826e41b..561e1b85 100644 --- a/comfy/utils.py +++ b/comfy/utils.py @@ -28,6 +28,9 @@ import logging import itertools from torch.nn.functional import interpolate from einops import rearrange +from comfy.cli_args import args + +MMAP_TORCH_FILES = args.mmap_torch_files ALWAYS_SAFE_LOAD = False if hasattr(torch.serialization, "add_safe_globals"): # TODO: this was added in pytorch 2.4, the unsafe path should be removed once earlier versions are deprecated @@ -67,8 +70,12 @@ def load_torch_file(ckpt, safe_load=False, device=None, return_metadata=False): raise ValueError("{}\n\nFile path: {}\n\nThe safetensors file is corrupt/incomplete. Check the file size and make sure you have copied/downloaded it correctly.".format(message, ckpt)) raise e else: + torch_args = {} + if MMAP_TORCH_FILES: + torch_args["mmap"] = True + if safe_load or ALWAYS_SAFE_LOAD: - pl_sd = torch.load(ckpt, map_location=device, weights_only=True) + pl_sd = torch.load(ckpt, map_location=device, weights_only=True, **torch_args) else: pl_sd = torch.load(ckpt, map_location=device, pickle_module=comfy.checkpoint_pickle) if "global_step" in pl_sd: diff --git a/comfy/weight_adapter/boft.py b/comfy/weight_adapter/boft.py index c85adc7a..b2a2f1bd 100644 --- a/comfy/weight_adapter/boft.py +++ b/comfy/weight_adapter/boft.py @@ -24,7 +24,7 @@ class BOFTAdapter(WeightAdapterBase): ) -> Optional["BOFTAdapter"]: if loaded_keys is None: loaded_keys = set() - blocks_name = "{}.boft_blocks".format(x) + blocks_name = "{}.oft_blocks".format(x) rescale_name = "{}.rescale".format(x) blocks = None @@ -32,17 +32,18 @@ class BOFTAdapter(WeightAdapterBase): blocks = lora[blocks_name] if blocks.ndim == 4: loaded_keys.add(blocks_name) + else: + blocks = None + if blocks is None: + return None rescale = None if rescale_name in lora.keys(): rescale = lora[rescale_name] loaded_keys.add(rescale_name) - if blocks is not None: - weights = (blocks, rescale, alpha, dora_scale) - return cls(loaded_keys, weights) - else: - return None + weights = (blocks, rescale, alpha, dora_scale) + return cls(loaded_keys, weights) def calculate_weight( self, @@ -71,7 +72,7 @@ class BOFTAdapter(WeightAdapterBase): # Get r I = torch.eye(boft_b, device=blocks.device, dtype=blocks.dtype) # for Q = -Q^T - q = blocks - blocks.transpose(1, 2) + q = blocks - blocks.transpose(-1, -2) normed_q = q if alpha > 0: # alpha in boft/bboft is for constraint q_norm = torch.norm(q) + 1e-8 @@ -79,9 +80,8 @@ class BOFTAdapter(WeightAdapterBase): normed_q = q * alpha / q_norm # use float() to prevent unsupported type in .inverse() r = (I + normed_q) @ (I - normed_q).float().inverse() - r = r.to(original_weight) - - inp = org = original_weight + r = r.to(weight) + inp = org = weight r_b = boft_b//2 for i in range(boft_m): @@ -91,14 +91,14 @@ class BOFTAdapter(WeightAdapterBase): if strength != 1: bi = bi * strength + (1-strength) * I inp = ( - inp.unflatten(-1, (-1, g, k)) - .transpose(-2, -1) - .flatten(-3) - .unflatten(-1, (-1, boft_b)) + inp.unflatten(0, (-1, g, k)) + .transpose(1, 2) + .flatten(0, 2) + .unflatten(0, (-1, boft_b)) ) - inp = torch.einsum("b n m, b n ... -> b m ...", inp, bi) + inp = torch.einsum("b i j, b j ...-> b i ...", bi, inp) inp = ( - inp.flatten(-2).unflatten(-1, (-1, k, g)).transpose(-2, -1).flatten(-3) + inp.flatten(0, 1).unflatten(0, (-1, k, g)).transpose(1, 2).flatten(0, 2) ) if rescale is not None: @@ -109,7 +109,7 @@ class BOFTAdapter(WeightAdapterBase): if dora_scale is not None: weight = weight_decompose(dora_scale, weight, lora_diff, alpha, strength, intermediate_dtype, function) else: - weight += function(((strength * alpha) * lora_diff).type(weight.dtype)) + weight += function((strength * lora_diff).type(weight.dtype)) except Exception as e: logging.error("ERROR {} {} {}".format(self.name, key, e)) return weight diff --git a/comfy/weight_adapter/oft.py b/comfy/weight_adapter/oft.py index 0ea229b7..25009eca 100644 --- a/comfy/weight_adapter/oft.py +++ b/comfy/weight_adapter/oft.py @@ -32,17 +32,18 @@ class OFTAdapter(WeightAdapterBase): blocks = lora[blocks_name] if blocks.ndim == 3: loaded_keys.add(blocks_name) + else: + blocks = None + if blocks is None: + return None rescale = None if rescale_name in lora.keys(): rescale = lora[rescale_name] loaded_keys.add(rescale_name) - if blocks is not None: - weights = (blocks, rescale, alpha, dora_scale) - return cls(loaded_keys, weights) - else: - return None + weights = (blocks, rescale, alpha, dora_scale) + return cls(loaded_keys, weights) def calculate_weight( self, @@ -79,16 +80,17 @@ class OFTAdapter(WeightAdapterBase): normed_q = q * alpha / q_norm # use float() to prevent unsupported type in .inverse() r = (I + normed_q) @ (I - normed_q).float().inverse() - r = r.to(original_weight) + r = r.to(weight) + _, *shape = weight.shape lora_diff = torch.einsum( "k n m, k n ... -> k m ...", (r * strength) - strength * I, - original_weight, - ) + weight.view(block_num, block_size, *shape), + ).view(-1, *shape) if dora_scale is not None: weight = weight_decompose(dora_scale, weight, lora_diff, alpha, strength, intermediate_dtype, function) else: - weight += function(((strength * alpha) * lora_diff).type(weight.dtype)) + weight += function((strength * lora_diff).type(weight.dtype)) except Exception as e: logging.error("ERROR {} {} {}".format(self.name, key, e)) return weight diff --git a/comfy_api/input/__init__.py b/comfy_api/input/__init__.py new file mode 100644 index 00000000..66667946 --- /dev/null +++ b/comfy_api/input/__init__.py @@ -0,0 +1,8 @@ +from .basic_types import ImageInput, AudioInput +from .video_types import VideoInput + +__all__ = [ + "ImageInput", + "AudioInput", + "VideoInput", +] diff --git a/comfy_api/input/basic_types.py b/comfy_api/input/basic_types.py new file mode 100644 index 00000000..033fb7e2 --- /dev/null +++ b/comfy_api/input/basic_types.py @@ -0,0 +1,20 @@ +import torch +from typing import TypedDict + +ImageInput = torch.Tensor +""" +An image in format [B, H, W, C] where B is the batch size, C is the number of channels, +""" + +class AudioInput(TypedDict): + """ + TypedDict representing audio input. + """ + + waveform: torch.Tensor + """ + Tensor in the format [B, C, T] where B is the batch size, C is the number of channels, + """ + + sample_rate: int + diff --git a/comfy_api/input/video_types.py b/comfy_api/input/video_types.py new file mode 100644 index 00000000..0676e0e6 --- /dev/null +++ b/comfy_api/input/video_types.py @@ -0,0 +1,45 @@ +from __future__ import annotations +from abc import ABC, abstractmethod +from typing import Optional +from comfy_api.util import VideoContainer, VideoCodec, VideoComponents + +class VideoInput(ABC): + """ + Abstract base class for video input types. + """ + + @abstractmethod + def get_components(self) -> VideoComponents: + """ + Abstract method to get the video components (images, audio, and frame rate). + + Returns: + VideoComponents containing images, audio, and frame rate + """ + pass + + @abstractmethod + def save_to( + self, + path: str, + format: VideoContainer = VideoContainer.AUTO, + codec: VideoCodec = VideoCodec.AUTO, + metadata: Optional[dict] = None + ): + """ + Abstract method to save the video input to a file. + """ + pass + + # Provide a default implementation, but subclasses can provide optimized versions + # if possible. + def get_dimensions(self) -> tuple[int, int]: + """ + Returns the dimensions of the video input. + + Returns: + Tuple of (width, height) + """ + components = self.get_components() + return components.images.shape[2], components.images.shape[1] + diff --git a/comfy_api/input_impl/__init__.py b/comfy_api/input_impl/__init__.py new file mode 100644 index 00000000..02901b8b --- /dev/null +++ b/comfy_api/input_impl/__init__.py @@ -0,0 +1,7 @@ +from .video_types import VideoFromFile, VideoFromComponents + +__all__ = [ + # Implementations + "VideoFromFile", + "VideoFromComponents", +] diff --git a/comfy_api/input_impl/video_types.py b/comfy_api/input_impl/video_types.py new file mode 100644 index 00000000..ae48dbaa --- /dev/null +++ b/comfy_api/input_impl/video_types.py @@ -0,0 +1,271 @@ +from __future__ import annotations +from av.container import InputContainer +from av.subtitles.stream import SubtitleStream +from fractions import Fraction +from typing import Optional +from comfy_api.input import AudioInput +import av +import io +import json +import numpy as np +import torch +from comfy_api.input import VideoInput +from comfy_api.util import VideoContainer, VideoCodec, VideoComponents + + +def container_to_output_format(container_format: str | None) -> str | None: + """ + A container's `format` may be a comma-separated list of formats. + E.g., iso container's `format` may be `mov,mp4,m4a,3gp,3g2,mj2`. + However, writing to a file/stream with `av.open` requires a single format, + or `None` to auto-detect. + """ + if not container_format: + return None # Auto-detect + + if "," not in container_format: + return container_format + + formats = container_format.split(",") + return formats[0] + + +def get_open_write_kwargs( + dest: str | io.BytesIO, container_format: str, to_format: str | None +) -> dict: + """Get kwargs for writing a `VideoFromFile` to a file/stream with `av.open`""" + open_kwargs = { + "mode": "w", + # If isobmff, preserve custom metadata tags (workflow, prompt, extra_pnginfo) + "options": {"movflags": "use_metadata_tags"}, + } + + is_write_to_buffer = isinstance(dest, io.BytesIO) + if is_write_to_buffer: + # Set output format explicitly, since it cannot be inferred from file extension + if to_format == VideoContainer.AUTO: + to_format = container_format.lower() + elif isinstance(to_format, str): + to_format = to_format.lower() + open_kwargs["format"] = container_to_output_format(to_format) + + return open_kwargs + + +class VideoFromFile(VideoInput): + """ + Class representing video input from a file. + """ + + def __init__(self, file: str | io.BytesIO): + """ + Initialize the VideoFromFile object based off of either a path on disk or a BytesIO object + containing the file contents. + """ + self.__file = file + + def get_dimensions(self) -> tuple[int, int]: + """ + Returns the dimensions of the video input. + + Returns: + Tuple of (width, height) + """ + if isinstance(self.__file, io.BytesIO): + self.__file.seek(0) # Reset the BytesIO object to the beginning + with av.open(self.__file, mode='r') as container: + for stream in container.streams: + if stream.type == 'video': + assert isinstance(stream, av.VideoStream) + return stream.width, stream.height + raise ValueError(f"No video stream found in file '{self.__file}'") + + def get_components_internal(self, container: InputContainer) -> VideoComponents: + # Get video frames + frames = [] + for frame in container.decode(video=0): + img = frame.to_ndarray(format='rgb24') # shape: (H, W, 3) + img = torch.from_numpy(img) / 255.0 # shape: (H, W, 3) + frames.append(img) + + images = torch.stack(frames) if len(frames) > 0 else torch.zeros(0, 3, 0, 0) + + # Get frame rate + video_stream = next(s for s in container.streams if s.type == 'video') + frame_rate = Fraction(video_stream.average_rate) if video_stream and video_stream.average_rate else Fraction(1) + + # Get audio if available + audio = None + try: + container.seek(0) # Reset the container to the beginning + for stream in container.streams: + if stream.type != 'audio': + continue + assert isinstance(stream, av.AudioStream) + audio_frames = [] + for packet in container.demux(stream): + for frame in packet.decode(): + assert isinstance(frame, av.AudioFrame) + audio_frames.append(frame.to_ndarray()) # shape: (channels, samples) + if len(audio_frames) > 0: + audio_data = np.concatenate(audio_frames, axis=1) # shape: (channels, total_samples) + audio_tensor = torch.from_numpy(audio_data).unsqueeze(0) # shape: (1, channels, total_samples) + audio = AudioInput({ + "waveform": audio_tensor, + "sample_rate": int(stream.sample_rate) if stream.sample_rate else 1, + }) + except StopIteration: + pass # No audio stream + + metadata = container.metadata + return VideoComponents(images=images, audio=audio, frame_rate=frame_rate, metadata=metadata) + + def get_components(self) -> VideoComponents: + if isinstance(self.__file, io.BytesIO): + self.__file.seek(0) # Reset the BytesIO object to the beginning + with av.open(self.__file, mode='r') as container: + return self.get_components_internal(container) + raise ValueError(f"No video stream found in file '{self.__file}'") + + def save_to( + self, + path: str | io.BytesIO, + format: VideoContainer = VideoContainer.AUTO, + codec: VideoCodec = VideoCodec.AUTO, + metadata: Optional[dict] = None + ): + if isinstance(self.__file, io.BytesIO): + self.__file.seek(0) # Reset the BytesIO object to the beginning + with av.open(self.__file, mode='r') as container: + container_format = container.format.name + video_encoding = container.streams.video[0].codec.name if len(container.streams.video) > 0 else None + reuse_streams = True + if format != VideoContainer.AUTO and format not in container_format.split(","): + reuse_streams = False + if codec != VideoCodec.AUTO and codec != video_encoding and video_encoding is not None: + reuse_streams = False + + if not reuse_streams: + components = self.get_components_internal(container) + video = VideoFromComponents(components) + return video.save_to( + path, + format=format, + codec=codec, + metadata=metadata + ) + + streams = container.streams + + open_kwargs = get_open_write_kwargs(path, container_format, format) + with av.open(path, **open_kwargs) as output_container: + # Copy over the original metadata + for key, value in container.metadata.items(): + if metadata is None or key not in metadata: + output_container.metadata[key] = value + + # Add our new metadata + if metadata is not None: + for key, value in metadata.items(): + if isinstance(value, str): + output_container.metadata[key] = value + else: + output_container.metadata[key] = json.dumps(value) + + # Add streams to the new container + stream_map = {} + for stream in streams: + if isinstance(stream, (av.VideoStream, av.AudioStream, SubtitleStream)): + out_stream = output_container.add_stream_from_template(template=stream, opaque=True) + stream_map[stream] = out_stream + + # Write packets to the new container + for packet in container.demux(): + if packet.stream in stream_map and packet.dts is not None: + packet.stream = stream_map[packet.stream] + output_container.mux(packet) + +class VideoFromComponents(VideoInput): + """ + Class representing video input from tensors. + """ + + def __init__(self, components: VideoComponents): + self.__components = components + + def get_components(self) -> VideoComponents: + return VideoComponents( + images=self.__components.images, + audio=self.__components.audio, + frame_rate=self.__components.frame_rate + ) + + def save_to( + self, + path: str, + format: VideoContainer = VideoContainer.AUTO, + codec: VideoCodec = VideoCodec.AUTO, + metadata: Optional[dict] = None + ): + if format != VideoContainer.AUTO and format != VideoContainer.MP4: + raise ValueError("Only MP4 format is supported for now") + if codec != VideoCodec.AUTO and codec != VideoCodec.H264: + raise ValueError("Only H264 codec is supported for now") + with av.open(path, mode='w', options={'movflags': 'use_metadata_tags'}) as output: + # Add metadata before writing any streams + if metadata is not None: + for key, value in metadata.items(): + output.metadata[key] = json.dumps(value) + + frame_rate = Fraction(round(self.__components.frame_rate * 1000), 1000) + # Create a video stream + video_stream = output.add_stream('h264', rate=frame_rate) + video_stream.width = self.__components.images.shape[2] + video_stream.height = self.__components.images.shape[1] + video_stream.pix_fmt = 'yuv420p' + + # Create an audio stream + audio_sample_rate = 1 + audio_stream: Optional[av.AudioStream] = None + if self.__components.audio: + audio_sample_rate = int(self.__components.audio['sample_rate']) + audio_stream = output.add_stream('aac', rate=audio_sample_rate) + audio_stream.sample_rate = audio_sample_rate + audio_stream.format = 'fltp' + + # Encode video + for i, frame in enumerate(self.__components.images): + img = (frame * 255).clamp(0, 255).byte().cpu().numpy() # shape: (H, W, 3) + frame = av.VideoFrame.from_ndarray(img, format='rgb24') + frame = frame.reformat(format='yuv420p') # Convert to YUV420P as required by h264 + packet = video_stream.encode(frame) + output.mux(packet) + + # Flush video + packet = video_stream.encode(None) + output.mux(packet) + + if audio_stream and self.__components.audio: + # Encode audio + samples_per_frame = int(audio_sample_rate / frame_rate) + num_frames = self.__components.audio['waveform'].shape[2] // samples_per_frame + for i in range(num_frames): + start = i * samples_per_frame + end = start + samples_per_frame + # TODO(Feature) - Add support for stereo audio + chunk = ( + self.__components.audio["waveform"][0, 0, start:end] + .unsqueeze(0) + .contiguous() + .numpy() + ) + audio_frame = av.AudioFrame.from_ndarray(chunk, format='fltp', layout='mono') + audio_frame.sample_rate = audio_sample_rate + audio_frame.pts = i * samples_per_frame + for packet in audio_stream.encode(audio_frame): + output.mux(packet) + + # Flush audio + for packet in audio_stream.encode(None): + output.mux(packet) + diff --git a/comfy_api/util/__init__.py b/comfy_api/util/__init__.py new file mode 100644 index 00000000..9019c46d --- /dev/null +++ b/comfy_api/util/__init__.py @@ -0,0 +1,8 @@ +from .video_types import VideoContainer, VideoCodec, VideoComponents + +__all__ = [ + # Utility Types + "VideoContainer", + "VideoCodec", + "VideoComponents", +] diff --git a/comfy_api/util/video_types.py b/comfy_api/util/video_types.py new file mode 100644 index 00000000..d09663db --- /dev/null +++ b/comfy_api/util/video_types.py @@ -0,0 +1,51 @@ +from __future__ import annotations +from dataclasses import dataclass +from enum import Enum +from fractions import Fraction +from typing import Optional +from comfy_api.input import ImageInput, AudioInput + +class VideoCodec(str, Enum): + AUTO = "auto" + H264 = "h264" + + @classmethod + def as_input(cls) -> list[str]: + """ + Returns a list of codec names that can be used as node input. + """ + return [member.value for member in cls] + +class VideoContainer(str, Enum): + AUTO = "auto" + MP4 = "mp4" + + @classmethod + def as_input(cls) -> list[str]: + """ + Returns a list of container names that can be used as node input. + """ + return [member.value for member in cls] + + @classmethod + def get_extension(cls, value) -> str: + """ + Returns the file extension for the container. + """ + if isinstance(value, str): + value = cls(value) + if value == VideoContainer.MP4 or value == VideoContainer.AUTO: + return "mp4" + return "" + +@dataclass +class VideoComponents: + """ + Dataclass representing the components of a video. + """ + + images: ImageInput + frame_rate: Fraction + audio: Optional[AudioInput] = None + metadata: Optional[dict] = None + diff --git a/comfy_api_nodes/README.md b/comfy_api_nodes/README.md new file mode 100644 index 00000000..e2633a76 --- /dev/null +++ b/comfy_api_nodes/README.md @@ -0,0 +1,41 @@ +# ComfyUI API Nodes + +## Introduction + +Below are a collection of nodes that work by calling external APIs. More information available in our [docs](https://docs.comfy.org/tutorials/api-nodes/overview#api-nodes). + +## Development + +While developing, you should be testing against the Staging environment. To test against staging: + +**Install ComfyUI_frontend** + +Follow the instructions [here](https://github.com/Comfy-Org/ComfyUI_frontend) to start the frontend server. By default, it will connect to Staging authentication. + +> **Hint:** If you use --front-end-version argument for ComfyUI, it will use production authentication. + +```bash +python run main.py --comfy-api-base https://stagingapi.comfy.org +``` + +API stubs are generated through automatic codegen tools from OpenAPI definitions. Since the Comfy Org OpenAPI definition contains many things from the Comfy Registry as well, we use redocly/cli to filter out only the paths relevant for API nodes. + +### Redocly Instructions + +**Tip** +When developing locally, use the `redocly-dev.yaml` file to generate pydantic models. This lets you use stubs for APIs that are not marked `Released` yet. + +Before your API node PR merges, make sure to add the `Released` tag to the `openapi.yaml` file and test in staging. + +```bash +# Download the OpenAPI file from prod server. +curl -o openapi.yaml https://stagingapi.comfy.org/openapi + +# Filter out unneeded API definitions. +npm install -g @redocly/cli +redocly bundle openapi.yaml --output filtered-openapi.yaml --config comfy_api_nodes/redocly-dev.yaml --remove-unused-components + +# Generate the pydantic datamodels for validation. +datamodel-codegen --use-subclass-enum --field-constraints --strict-types bytes --input filtered-openapi.yaml --output comfy_api_nodes/apis/__init__.py --output-model-type pydantic_v2.BaseModel + +``` diff --git a/comfy_api_nodes/__init__.py b/comfy_api_nodes/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/comfy_api_nodes/apinode_utils.py b/comfy_api_nodes/apinode_utils.py new file mode 100644 index 00000000..e28d7d60 --- /dev/null +++ b/comfy_api_nodes/apinode_utils.py @@ -0,0 +1,576 @@ +from __future__ import annotations +import io +import logging +from typing import Optional +from comfy.utils import common_upscale +from comfy_api.input_impl import VideoFromFile +from comfy_api.util import VideoContainer, VideoCodec +from comfy_api.input.video_types import VideoInput +from comfy_api.input.basic_types import AudioInput +from comfy_api_nodes.apis.client import ( + ApiClient, + ApiEndpoint, + HttpMethod, + SynchronousOperation, + UploadRequest, + UploadResponse, +) + + +import numpy as np +from PIL import Image +import requests +import torch +import math +import base64 +import uuid +from io import BytesIO +import av + + +def download_url_to_video_output(video_url: str, timeout: int = None) -> VideoFromFile: + """Downloads a video from a URL and returns a `VIDEO` output. + + Args: + video_url: The URL of the video to download. + + Returns: + A Comfy node `VIDEO` output. + """ + video_io = download_url_to_bytesio(video_url, timeout) + if video_io is None: + error_msg = f"Failed to download video from {video_url}" + logging.error(error_msg) + raise ValueError(error_msg) + return VideoFromFile(video_io) + + +def downscale_image_tensor(image, total_pixels=1536 * 1024) -> torch.Tensor: + """Downscale input image tensor to roughly the specified total pixels.""" + samples = image.movedim(-1, 1) + total = int(total_pixels) + scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2])) + if scale_by >= 1: + return image + width = round(samples.shape[3] * scale_by) + height = round(samples.shape[2] * scale_by) + + s = common_upscale(samples, width, height, "lanczos", "disabled") + s = s.movedim(1, -1) + return s + + +def validate_and_cast_response(response, timeout: int = None) -> torch.Tensor: + """Validates and casts a response to a torch.Tensor. + + Args: + response: The response to validate and cast. + timeout: Request timeout in seconds. Defaults to None (no timeout). + + Returns: + A torch.Tensor representing the image (1, H, W, C). + + Raises: + ValueError: If the response is not valid. + """ + # validate raw JSON response + data = response.data + if not data or len(data) == 0: + raise ValueError("No images returned from API endpoint") + + # Initialize list to store image tensors + image_tensors: list[torch.Tensor] = [] + + # Process each image in the data array + for image_data in data: + image_url = image_data.url + b64_data = image_data.b64_json + + if not image_url and not b64_data: + raise ValueError("No image was generated in the response") + + if b64_data: + img_data = base64.b64decode(b64_data) + img = Image.open(io.BytesIO(img_data)) + + elif image_url: + img_response = requests.get(image_url, timeout=timeout) + if img_response.status_code != 200: + raise ValueError("Failed to download the image") + img = Image.open(io.BytesIO(img_response.content)) + + img = img.convert("RGBA") + + # Convert to numpy array, normalize to float32 between 0 and 1 + img_array = np.array(img).astype(np.float32) / 255.0 + img_tensor = torch.from_numpy(img_array) + + # Add to list of tensors + image_tensors.append(img_tensor) + + return torch.stack(image_tensors, dim=0) + + +def validate_aspect_ratio( + aspect_ratio: str, + minimum_ratio: float, + maximum_ratio: float, + minimum_ratio_str: str, + maximum_ratio_str: str, +) -> float: + """Validates and casts an aspect ratio string to a float. + + Args: + aspect_ratio: The aspect ratio string to validate. + minimum_ratio: The minimum aspect ratio. + maximum_ratio: The maximum aspect ratio. + minimum_ratio_str: The minimum aspect ratio string. + maximum_ratio_str: The maximum aspect ratio string. + + Returns: + The validated and cast aspect ratio. + + Raises: + Exception: If the aspect ratio is not valid. + """ + # get ratio values + numbers = aspect_ratio.split(":") + if len(numbers) != 2: + raise TypeError( + f"Aspect ratio must be in the format X:Y, such as 16:9, but was {aspect_ratio}." + ) + try: + numerator = int(numbers[0]) + denominator = int(numbers[1]) + except ValueError as exc: + raise TypeError( + f"Aspect ratio must contain numbers separated by ':', such as 16:9, but was {aspect_ratio}." + ) from exc + calculated_ratio = numerator / denominator + # if not close to minimum and maximum, check bounds + if not math.isclose(calculated_ratio, minimum_ratio) or not math.isclose( + calculated_ratio, maximum_ratio + ): + if calculated_ratio < minimum_ratio: + raise TypeError( + f"Aspect ratio cannot reduce to any less than {minimum_ratio_str} ({minimum_ratio}), but was {aspect_ratio} ({calculated_ratio})." + ) + elif calculated_ratio > maximum_ratio: + raise TypeError( + f"Aspect ratio cannot reduce to any greater than {maximum_ratio_str} ({maximum_ratio}), but was {aspect_ratio} ({calculated_ratio})." + ) + return aspect_ratio + + +def mimetype_to_extension(mime_type: str) -> str: + """Converts a MIME type to a file extension.""" + return mime_type.split("/")[-1].lower() + + +def download_url_to_bytesio(url: str, timeout: int = None) -> BytesIO: + """Downloads content from a URL using requests and returns it as BytesIO. + + Args: + url: The URL to download. + timeout: Request timeout in seconds. Defaults to None (no timeout). + + Returns: + BytesIO object containing the downloaded content. + """ + response = requests.get(url, stream=True, timeout=timeout) + response.raise_for_status() # Raises HTTPError for bad responses (4XX or 5XX) + return BytesIO(response.content) + + +def bytesio_to_image_tensor(image_bytesio: BytesIO, mode: str = "RGBA") -> torch.Tensor: + """Converts image data from BytesIO to a torch.Tensor. + + Args: + image_bytesio: BytesIO object containing the image data. + mode: The PIL mode to convert the image to (e.g., "RGB", "RGBA"). + + Returns: + A torch.Tensor representing the image (1, H, W, C). + + Raises: + PIL.UnidentifiedImageError: If the image data cannot be identified. + ValueError: If the specified mode is invalid. + """ + image = Image.open(image_bytesio) + image = image.convert(mode) + image_array = np.array(image).astype(np.float32) / 255.0 + return torch.from_numpy(image_array).unsqueeze(0) + + +def download_url_to_image_tensor(url: str, timeout: int = None) -> torch.Tensor: + """Downloads an image from a URL and returns a [B, H, W, C] tensor.""" + image_bytesio = download_url_to_bytesio(url, timeout) + return bytesio_to_image_tensor(image_bytesio) + +def process_image_response(response: requests.Response) -> torch.Tensor: + """Uses content from a Response object and converts it to a torch.Tensor""" + return bytesio_to_image_tensor(BytesIO(response.content)) + + +def _tensor_to_pil(image: torch.Tensor, total_pixels: int = 2048 * 2048) -> Image.Image: + """Converts a single torch.Tensor image [H, W, C] to a PIL Image, optionally downscaling.""" + if len(image.shape) > 3: + image = image[0] + # TODO: remove alpha if not allowed and present + input_tensor = image.cpu() + input_tensor = downscale_image_tensor( + input_tensor.unsqueeze(0), total_pixels=total_pixels + ).squeeze() + image_np = (input_tensor.numpy() * 255).astype(np.uint8) + img = Image.fromarray(image_np) + return img + + +def _pil_to_bytesio(img: Image.Image, mime_type: str = "image/png") -> BytesIO: + """Converts a PIL Image to a BytesIO object.""" + if not mime_type: + mime_type = "image/png" + + img_byte_arr = io.BytesIO() + # Derive PIL format from MIME type (e.g., 'image/png' -> 'PNG') + pil_format = mime_type.split("/")[-1].upper() + if pil_format == "JPG": + pil_format = "JPEG" + img.save(img_byte_arr, format=pil_format) + img_byte_arr.seek(0) + return img_byte_arr + + +def tensor_to_bytesio( + image: torch.Tensor, + name: Optional[str] = None, + total_pixels: int = 2048 * 2048, + mime_type: str = "image/png", +) -> BytesIO: + """Converts a torch.Tensor image to a named BytesIO object. + + Args: + image: Input torch.Tensor image. + name: Optional filename for the BytesIO object. + total_pixels: Maximum total pixels for potential downscaling. + mime_type: Target image MIME type (e.g., 'image/png', 'image/jpeg', 'image/webp', 'video/mp4'). + + Returns: + Named BytesIO object containing the image data. + """ + if not mime_type: + mime_type = "image/png" + + pil_image = _tensor_to_pil(image, total_pixels=total_pixels) + img_binary = _pil_to_bytesio(pil_image, mime_type=mime_type) + img_binary.name = ( + f"{name if name else uuid.uuid4()}.{mimetype_to_extension(mime_type)}" + ) + return img_binary + + +def tensor_to_base64_string( + image_tensor: torch.Tensor, + total_pixels: int = 2048 * 2048, + mime_type: str = "image/png", +) -> str: + """Convert [B, H, W, C] or [H, W, C] tensor to a base64 string. + + Args: + image_tensor: Input torch.Tensor image. + total_pixels: Maximum total pixels for potential downscaling. + mime_type: Target image MIME type (e.g., 'image/png', 'image/jpeg', 'image/webp', 'video/mp4'). + + Returns: + Base64 encoded string of the image. + """ + pil_image = _tensor_to_pil(image_tensor, total_pixels=total_pixels) + img_byte_arr = _pil_to_bytesio(pil_image, mime_type=mime_type) + img_bytes = img_byte_arr.getvalue() + # Encode bytes to base64 string + base64_encoded_string = base64.b64encode(img_bytes).decode("utf-8") + return base64_encoded_string + + +def tensor_to_data_uri( + image_tensor: torch.Tensor, + total_pixels: int = 2048 * 2048, + mime_type: str = "image/png", +) -> str: + """Converts a tensor image to a Data URI string. + + Args: + image_tensor: Input torch.Tensor image. + total_pixels: Maximum total pixels for potential downscaling. + mime_type: Target image MIME type (e.g., 'image/png', 'image/jpeg', 'image/webp'). + + Returns: + Data URI string (e.g., 'data:image/png;base64,...'). + """ + base64_string = tensor_to_base64_string(image_tensor, total_pixels, mime_type) + return f"data:{mime_type};base64,{base64_string}" + + +def upload_file_to_comfyapi( + file_bytes_io: BytesIO, + filename: str, + upload_mime_type: str, + auth_kwargs: Optional[dict[str,str]] = None, +) -> str: + """ + Uploads a single file to ComfyUI API and returns its download URL. + + Args: + file_bytes_io: BytesIO object containing the file data. + filename: The filename of the file. + upload_mime_type: MIME type of the file. + auth_kwargs: Optional authentication token(s). + + Returns: + The download URL for the uploaded file. + """ + request_object = UploadRequest(file_name=filename, content_type=upload_mime_type) + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/customers/storage", + method=HttpMethod.POST, + request_model=UploadRequest, + response_model=UploadResponse, + ), + request=request_object, + auth_kwargs=auth_kwargs, + ) + + response: UploadResponse = operation.execute() + upload_response = ApiClient.upload_file( + response.upload_url, file_bytes_io, content_type=upload_mime_type + ) + upload_response.raise_for_status() + + return response.download_url + + +def upload_video_to_comfyapi( + video: VideoInput, + auth_kwargs: Optional[dict[str,str]] = None, + container: VideoContainer = VideoContainer.MP4, + codec: VideoCodec = VideoCodec.H264, + max_duration: Optional[int] = None, +) -> str: + """ + Uploads a single video to ComfyUI API and returns its download URL. + Uses the specified container and codec for saving the video before upload. + + Args: + video: VideoInput object (Comfy VIDEO type). + auth_kwargs: Optional authentication token(s). + container: The video container format to use (default: MP4). + codec: The video codec to use (default: H264). + max_duration: Optional maximum duration of the video in seconds. If the video is longer than this, an error will be raised. + + Returns: + The download URL for the uploaded video file. + """ + if max_duration is not None: + try: + actual_duration = video.duration_seconds + if actual_duration is not None and actual_duration > max_duration: + raise ValueError( + f"Video duration ({actual_duration:.2f}s) exceeds the maximum allowed ({max_duration}s)." + ) + except Exception as e: + logging.error(f"Error getting video duration: {e}") + raise ValueError(f"Could not verify video duration from source: {e}") from e + + upload_mime_type = f"video/{container.value.lower()}" + filename = f"uploaded_video.{container.value.lower()}" + + # Convert VideoInput to BytesIO using specified container/codec + video_bytes_io = io.BytesIO() + video.save_to(video_bytes_io, format=container, codec=codec) + video_bytes_io.seek(0) + + return upload_file_to_comfyapi( + video_bytes_io, filename, upload_mime_type, auth_kwargs + ) + + +def audio_tensor_to_contiguous_ndarray(waveform: torch.Tensor) -> np.ndarray: + """ + Prepares audio waveform for av library by converting to a contiguous numpy array. + + Args: + waveform: a tensor of shape (1, channels, samples) derived from a Comfy `AUDIO` type. + + Returns: + Contiguous numpy array of the audio waveform. If the audio was batched, + the first item is taken. + """ + if waveform.ndim != 3 or waveform.shape[0] != 1: + raise ValueError("Expected waveform tensor shape (1, channels, samples)") + + # If batch is > 1, take first item + if waveform.shape[0] > 1: + waveform = waveform[0] + + # Prepare for av: remove batch dim, move to CPU, make contiguous, convert to numpy array + audio_data_np = waveform.squeeze(0).cpu().contiguous().numpy() + if audio_data_np.dtype != np.float32: + audio_data_np = audio_data_np.astype(np.float32) + + return audio_data_np + + +def audio_ndarray_to_bytesio( + audio_data_np: np.ndarray, + sample_rate: int, + container_format: str = "mp4", + codec_name: str = "aac", +) -> BytesIO: + """ + Encodes a numpy array of audio data into a BytesIO object. + """ + audio_bytes_io = io.BytesIO() + with av.open(audio_bytes_io, mode="w", format=container_format) as output_container: + audio_stream = output_container.add_stream(codec_name, rate=sample_rate) + frame = av.AudioFrame.from_ndarray( + audio_data_np, + format="fltp", + layout="stereo" if audio_data_np.shape[0] > 1 else "mono", + ) + frame.sample_rate = sample_rate + frame.pts = 0 + + for packet in audio_stream.encode(frame): + output_container.mux(packet) + + # Flush stream + for packet in audio_stream.encode(None): + output_container.mux(packet) + + audio_bytes_io.seek(0) + return audio_bytes_io + + +def upload_audio_to_comfyapi( + audio: AudioInput, + auth_kwargs: Optional[dict[str,str]] = None, + container_format: str = "mp4", + codec_name: str = "aac", + mime_type: str = "audio/mp4", + filename: str = "uploaded_audio.mp4", +) -> str: + """ + Uploads a single audio input to ComfyUI API and returns its download URL. + Encodes the raw waveform into the specified format before uploading. + + Args: + audio: a Comfy `AUDIO` type (contains waveform tensor and sample_rate) + auth_kwargs: Optional authentication token(s). + + Returns: + The download URL for the uploaded audio file. + """ + sample_rate: int = audio["sample_rate"] + waveform: torch.Tensor = audio["waveform"] + audio_data_np = audio_tensor_to_contiguous_ndarray(waveform) + audio_bytes_io = audio_ndarray_to_bytesio( + audio_data_np, sample_rate, container_format, codec_name + ) + + return upload_file_to_comfyapi(audio_bytes_io, filename, mime_type, auth_kwargs) + + +def upload_images_to_comfyapi( + image: torch.Tensor, max_images=8, auth_kwargs: Optional[dict[str,str]] = None, mime_type: Optional[str] = None +) -> list[str]: + """ + Uploads images to ComfyUI API and returns download URLs. + To upload multiple images, stack them in the batch dimension first. + + Args: + image: Input torch.Tensor image. + max_images: Maximum number of images to upload. + auth_kwargs: Optional authentication token(s). + mime_type: Optional MIME type for the image. + """ + # if batch, try to upload each file if max_images is greater than 0 + idx_image = 0 + download_urls: list[str] = [] + is_batch = len(image.shape) > 3 + batch_length = 1 + if is_batch: + batch_length = image.shape[0] + while True: + curr_image = image + if len(image.shape) > 3: + curr_image = image[idx_image] + # get BytesIO version of image + img_binary = tensor_to_bytesio(curr_image, mime_type=mime_type) + # first, request upload/download urls from comfy API + if not mime_type: + request_object = UploadRequest(file_name=img_binary.name) + else: + request_object = UploadRequest( + file_name=img_binary.name, content_type=mime_type + ) + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/customers/storage", + method=HttpMethod.POST, + request_model=UploadRequest, + response_model=UploadResponse, + ), + request=request_object, + auth_kwargs=auth_kwargs, + ) + response = operation.execute() + + upload_response = ApiClient.upload_file( + response.upload_url, img_binary, content_type=mime_type + ) + # verify success + try: + upload_response.raise_for_status() + except requests.exceptions.HTTPError as e: + raise ValueError(f"Could not upload one or more images: {e}") from e + # add download_url to list + download_urls.append(response.download_url) + + idx_image += 1 + # stop uploading additional files if done + if is_batch and max_images > 0: + if idx_image >= max_images: + break + if idx_image >= batch_length: + break + return download_urls + + +def resize_mask_to_image(mask: torch.Tensor, image: torch.Tensor, + upscale_method="nearest-exact", crop="disabled", + allow_gradient=True, add_channel_dim=False): + """ + Resize mask to be the same dimensions as an image, while maintaining proper format for API calls. + """ + _, H, W, _ = image.shape + mask = mask.unsqueeze(-1) + mask = mask.movedim(-1,1) + mask = common_upscale(mask, width=W, height=H, upscale_method=upscale_method, crop=crop) + mask = mask.movedim(1,-1) + if not add_channel_dim: + mask = mask.squeeze(-1) + if not allow_gradient: + mask = (mask > 0.5).float() + return mask + + +def validate_string(string: str, strip_whitespace=True, field_name="prompt", min_length=None, max_length=None): + if strip_whitespace: + string = string.strip() + if min_length and len(string) < min_length: + raise Exception(f"Field '{field_name}' cannot be shorter than {min_length} characters; was {len(string)} characters long.") + if max_length and len(string) > max_length: + raise Exception(f" Field '{field_name} cannot be longer than {max_length} characters; was {len(string)} characters long.") + if not string: + raise Exception(f"Field '{field_name}' cannot be empty.") diff --git a/comfy_api_nodes/apis/PixverseController.py b/comfy_api_nodes/apis/PixverseController.py new file mode 100644 index 00000000..310c0f54 --- /dev/null +++ b/comfy_api_nodes/apis/PixverseController.py @@ -0,0 +1,17 @@ +# generated by datamodel-codegen: +# filename: filtered-openapi.yaml +# timestamp: 2025-04-29T23:44:54+00:00 + +from __future__ import annotations + +from typing import Optional + +from pydantic import BaseModel + +from . import PixverseDto + + +class ResponseData(BaseModel): + ErrCode: Optional[int] = None + ErrMsg: Optional[str] = None + Resp: Optional[PixverseDto.V2OpenAPII2VResp] = None diff --git a/comfy_api_nodes/apis/PixverseDto.py b/comfy_api_nodes/apis/PixverseDto.py new file mode 100644 index 00000000..323c38e9 --- /dev/null +++ b/comfy_api_nodes/apis/PixverseDto.py @@ -0,0 +1,57 @@ +# generated by datamodel-codegen: +# filename: filtered-openapi.yaml +# timestamp: 2025-04-29T23:44:54+00:00 + +from __future__ import annotations + +from typing import Optional + +from pydantic import BaseModel, Field + + +class V2OpenAPII2VResp(BaseModel): + video_id: Optional[int] = Field(None, description='Video_id') + + +class V2OpenAPIT2VReq(BaseModel): + aspect_ratio: str = Field( + ..., description='Aspect ratio (16:9, 4:3, 1:1, 3:4, 9:16)', examples=['16:9'] + ) + duration: int = Field( + ..., + description='Video duration (5, 8 seconds, --model=v3.5 only allows 5,8; --quality=1080p does not support 8s)', + examples=[5], + ) + model: str = Field( + ..., description='Model version (only supports v3.5)', examples=['v3.5'] + ) + motion_mode: Optional[str] = Field( + 'normal', + description='Motion mode (normal, fast, --fast only available when duration=5; --quality=1080p does not support fast)', + examples=['normal'], + ) + negative_prompt: Optional[str] = Field( + None, description='Negative prompt\n', max_length=2048 + ) + prompt: str = Field(..., description='Prompt', max_length=2048) + quality: str = Field( + ..., + description='Video quality ("360p"(Turbo model), "540p", "720p", "1080p")', + examples=['540p'], + ) + seed: Optional[int] = Field(None, description='Random seed, range: 0 - 2147483647') + style: Optional[str] = Field( + None, + description='Style (effective when model=v3.5, "anime", "3d_animation", "clay", "comic", "cyberpunk") Do not include style parameter unless needed', + examples=['anime'], + ) + template_id: Optional[int] = Field( + None, + description='Template ID (template_id must be activated before use)', + examples=[302325299692608], + ) + water_mark: Optional[bool] = Field( + False, + description='Watermark (true: add watermark, false: no watermark)', + examples=[False], + ) diff --git a/comfy_api_nodes/apis/__init__.py b/comfy_api_nodes/apis/__init__.py new file mode 100644 index 00000000..aa1c4ce0 --- /dev/null +++ b/comfy_api_nodes/apis/__init__.py @@ -0,0 +1,3829 @@ +# generated by datamodel-codegen: +# filename: filtered-openapi.yaml +# timestamp: 2025-05-04T04:12:39+00:00 + +from __future__ import annotations + +from datetime import datetime +from enum import Enum +from typing import Any, Dict, List, Literal, Optional, Union +from uuid import UUID + +from pydantic import AnyUrl, BaseModel, Field, RootModel, StrictBytes + + +class PersonalAccessToken(BaseModel): + id: Optional[UUID] = Field(None, description='Unique identifier for the GitCommit') + name: Optional[str] = Field( + None, + description='Required. The name of the token. Can be a simple description.', + ) + description: Optional[str] = Field( + None, + description="Optional. A more detailed description of the token's intended use.", + ) + createdAt: Optional[datetime] = Field( + None, description='[Output Only]The date and time the token was created.' + ) + token: Optional[str] = Field( + None, + description='[Output Only]. The personal access token. Only returned during creation.', + ) + + +class GitCommitSummary(BaseModel): + commit_hash: Optional[str] = Field(None, description='The hash of the commit') + commit_name: Optional[str] = Field(None, description='The name of the commit') + branch_name: Optional[str] = Field( + None, description='The branch where the commit was made' + ) + author: Optional[str] = Field(None, description='The author of the commit') + timestamp: Optional[datetime] = Field( + None, description='The timestamp when the commit was made' + ) + status_summary: Optional[Dict[str, str]] = Field( + None, description='A map of operating system to status pairs' + ) + + +class User(BaseModel): + id: Optional[str] = Field(None, description='The unique id for this user.') + email: Optional[str] = Field(None, description='The email address for this user.') + name: Optional[str] = Field(None, description='The name for this user.') + isApproved: Optional[bool] = Field( + None, description='Indicates if the user is approved.' + ) + isAdmin: Optional[bool] = Field( + None, description='Indicates if the user has admin privileges.' + ) + + +class PublisherUser(BaseModel): + id: Optional[str] = Field(None, description='The unique id for this user.') + email: Optional[str] = Field(None, description='The email address for this user.') + name: Optional[str] = Field(None, description='The name for this user.') + + +class ErrorResponse(BaseModel): + error: str + message: str + + +class StorageFile(BaseModel): + id: Optional[UUID] = Field( + None, description='Unique identifier for the storage file' + ) + file_path: Optional[str] = Field(None, description='Path to the file in storage') + public_url: Optional[str] = Field(None, description='Public URL') + + +class PublisherMember(BaseModel): + id: Optional[str] = Field( + None, description='The unique identifier for the publisher member.' + ) + user: Optional[PublisherUser] = Field( + None, description='The user associated with this publisher member.' + ) + role: Optional[str] = Field( + None, description='The role of the user in the publisher.' + ) + + +class ComfyNode(BaseModel): + comfy_node_name: Optional[str] = Field( + None, description='Unique identifier for the node' + ) + category: Optional[str] = Field( + None, + description='UI category where the node is listed, used for grouping nodes.', + ) + description: Optional[str] = Field( + None, description="Brief description of the node's functionality or purpose." + ) + input_types: Optional[str] = Field(None, description='Defines input parameters') + deprecated: Optional[bool] = Field( + None, + description='Indicates if the node is deprecated. Deprecated nodes are hidden in the UI.', + ) + experimental: Optional[bool] = Field( + None, + description='Indicates if the node is experimental, subject to changes or removal.', + ) + output_is_list: Optional[List[bool]] = Field( + None, description='Boolean values indicating if each output is a list.' + ) + return_names: Optional[str] = Field( + None, description='Names of the outputs for clarity in workflows.' + ) + return_types: Optional[str] = Field( + None, description='Specifies the types of outputs produced by the node.' + ) + function: Optional[str] = Field( + None, description='Name of the entry-point function to execute the node.' + ) + + +class ComfyNodeCloudBuildInfo(BaseModel): + project_id: Optional[str] = None + project_number: Optional[str] = None + location: Optional[str] = None + build_id: Optional[str] = None + + +class Error(BaseModel): + message: Optional[str] = Field( + None, description='A clear and concise description of the error.' + ) + details: Optional[List[str]] = Field( + None, + description='Optional detailed information about the error or hints for resolving it.', + ) + + +class NodeVersionUpdateRequest(BaseModel): + changelog: Optional[str] = Field( + None, description='The changelog describing the version changes.' + ) + deprecated: Optional[bool] = Field( + None, description='Whether the version is deprecated.' + ) + + +class NodeStatus(str, Enum): + NodeStatusActive = 'NodeStatusActive' + NodeStatusDeleted = 'NodeStatusDeleted' + NodeStatusBanned = 'NodeStatusBanned' + + +class NodeVersionStatus(str, Enum): + NodeVersionStatusActive = 'NodeVersionStatusActive' + NodeVersionStatusDeleted = 'NodeVersionStatusDeleted' + NodeVersionStatusBanned = 'NodeVersionStatusBanned' + NodeVersionStatusPending = 'NodeVersionStatusPending' + NodeVersionStatusFlagged = 'NodeVersionStatusFlagged' + + +class PublisherStatus(str, Enum): + PublisherStatusActive = 'PublisherStatusActive' + PublisherStatusBanned = 'PublisherStatusBanned' + + +class WorkflowRunStatus(str, Enum): + WorkflowRunStatusStarted = 'WorkflowRunStatusStarted' + WorkflowRunStatusFailed = 'WorkflowRunStatusFailed' + WorkflowRunStatusCompleted = 'WorkflowRunStatusCompleted' + + +class MachineStats(BaseModel): + machine_name: Optional[str] = Field(None, description='Name of the machine.') + os_version: Optional[str] = Field( + None, description='The operating system version. eg. Ubuntu Linux 20.04' + ) + gpu_type: Optional[str] = Field( + None, description='The GPU type. eg. NVIDIA Tesla K80' + ) + cpu_capacity: Optional[str] = Field(None, description='Total CPU on the machine.') + initial_cpu: Optional[str] = Field( + None, description='Initial CPU available before the job starts.' + ) + memory_capacity: Optional[str] = Field( + None, description='Total memory on the machine.' + ) + initial_ram: Optional[str] = Field( + None, description='Initial RAM available before the job starts.' + ) + vram_time_series: Optional[Dict[str, Any]] = Field( + None, description='Time series of VRAM usage.' + ) + disk_capacity: Optional[str] = Field( + None, description='Total disk capacity on the machine.' + ) + initial_disk: Optional[str] = Field( + None, description='Initial disk available before the job starts.' + ) + pip_freeze: Optional[str] = Field(None, description='The pip freeze output') + + +class Customer(BaseModel): + id: str = Field(..., description='The firebase UID of the user') + email: Optional[str] = Field(None, description='The email address for this user') + name: Optional[str] = Field(None, description='The name for this user') + createdAt: Optional[datetime] = Field( + None, description='The date and time the user was created' + ) + updatedAt: Optional[datetime] = Field( + None, description='The date and time the user was last updated' + ) + + +class MagicPrompt(str, Enum): + ON = 'ON' + OFF = 'OFF' + + +class ColorPalette(BaseModel): + name: str = Field(..., description='Name of the color palette', examples=['PASTEL']) + + +class StyleCode(RootModel[str]): + root: str = Field(..., pattern='^[0-9A-Fa-f]{8}$') + + +class StyleType(str, Enum): + GENERAL = 'GENERAL' + + +class IdeogramColorPalette1(BaseModel): + name: str = Field(..., description='Name of the preset color palette') + + +class Member(BaseModel): + color: Optional[str] = Field( + None, description='Hexadecimal color code', pattern='^#[0-9A-Fa-f]{6}$' + ) + weight: Optional[float] = Field( + None, description='Optional weight for the color (0-1)', ge=0.0, le=1.0 + ) + + +class IdeogramColorPalette2(BaseModel): + members: List[Member] = Field( + ..., description='Array of color definitions with optional weights' + ) + + +class IdeogramColorPalette( + RootModel[Union[IdeogramColorPalette1, IdeogramColorPalette2]] +): + root: Union[IdeogramColorPalette1, IdeogramColorPalette2] = Field( + ..., + description='A color palette specification that can either use a preset name or explicit color definitions with weights', + ) + + +class ImageRequest(BaseModel): + prompt: str = Field( + ..., description='Required. The prompt to use to generate the image.' + ) + aspect_ratio: Optional[str] = Field( + None, + description="Optional. The aspect ratio (e.g., 'ASPECT_16_9', 'ASPECT_1_1'). Cannot be used with resolution. Defaults to 'ASPECT_1_1' if unspecified.", + ) + model: str = Field(..., description="The model used (e.g., 'V_2', 'V_2A_TURBO')") + magic_prompt_option: Optional[str] = Field( + None, description="Optional. MagicPrompt usage ('AUTO', 'ON', 'OFF')." + ) + seed: Optional[int] = Field( + None, + description='Optional. A number between 0 and 2147483647.', + ge=0, + le=2147483647, + ) + style_type: Optional[str] = Field( + None, + description="Optional. Style type ('AUTO', 'GENERAL', 'REALISTIC', 'DESIGN', 'RENDER_3D', 'ANIME'). Only for models V_2 and above.", + ) + negative_prompt: Optional[str] = Field( + None, + description='Optional. Description of what to exclude. Only for V_1, V_1_TURBO, V_2, V_2_TURBO.', + ) + num_images: Optional[int] = Field( + 1, + description='Optional. Number of images to generate (1-8). Defaults to 1.', + ge=1, + le=8, + ) + resolution: Optional[str] = Field( + None, + description="Optional. Resolution (e.g., 'RESOLUTION_1024_1024'). Only for model V_2. Cannot be used with aspect_ratio.", + ) + color_palette: Optional[Dict[str, Any]] = Field( + None, description='Optional. Color palette object. Only for V_2, V_2_TURBO.' + ) + + +class IdeogramGenerateRequest(BaseModel): + image_request: ImageRequest = Field( + ..., description='The image generation request parameters.' + ) + + +class Datum(BaseModel): + prompt: Optional[str] = Field( + None, description='The prompt used to generate this image.' + ) + resolution: Optional[str] = Field( + None, description="The resolution of the generated image (e.g., '1024x1024')." + ) + is_image_safe: Optional[bool] = Field( + None, description='Indicates whether the image is considered safe.' + ) + seed: Optional[int] = Field( + None, description='The seed value used for this generation.' + ) + url: Optional[str] = Field(None, description='URL to the generated image.') + style_type: Optional[str] = Field( + None, + description="The style type used for generation (e.g., 'REALISTIC', 'ANIME').", + ) + + +class IdeogramGenerateResponse(BaseModel): + created: Optional[datetime] = Field( + None, description='Timestamp when the generation was created.' + ) + data: Optional[List[Datum]] = Field( + None, description='Array of generated image information.' + ) + + +class RenderingSpeed1(str, Enum): + TURBO = 'TURBO' + DEFAULT = 'DEFAULT' + QUALITY = 'QUALITY' + + +class MagicPrompt1(str, Enum): + AUTO = 'AUTO' + ON = 'ON' + OFF = 'OFF' + + +class StyleType1(str, Enum): + AUTO = 'AUTO' + GENERAL = 'GENERAL' + REALISTIC = 'REALISTIC' + DESIGN = 'DESIGN' + + +class IdeogramV3RemixRequest(BaseModel): + image: Optional[StrictBytes] = None + prompt: str + image_weight: Optional[int] = Field(50, ge=1, le=100) + seed: Optional[int] = Field(None, ge=0, le=2147483647) + resolution: Optional[str] = None + aspect_ratio: Optional[str] = None + rendering_speed: Optional[RenderingSpeed1] = None + magic_prompt: Optional[MagicPrompt1] = None + negative_prompt: Optional[str] = None + num_images: Optional[int] = Field(None, ge=1, le=8) + color_palette: Optional[Dict[str, Any]] = None + style_codes: Optional[List[str]] = None + style_type: Optional[StyleType1] = None + style_reference_images: Optional[List[StrictBytes]] = None + + +class Datum1(BaseModel): + prompt: Optional[str] = None + resolution: Optional[str] = None + is_image_safe: Optional[bool] = None + seed: Optional[int] = None + url: Optional[str] = None + style_type: Optional[str] = None + + +class IdeogramV3IdeogramResponse(BaseModel): + created: Optional[datetime] = None + data: Optional[List[Datum1]] = None + + +class IdeogramV3ReframeRequest(BaseModel): + image: Optional[StrictBytes] = None + resolution: str + num_images: Optional[int] = Field(None, ge=1, le=8) + seed: Optional[int] = Field(None, ge=0, le=2147483647) + rendering_speed: Optional[RenderingSpeed1] = None + color_palette: Optional[Dict[str, Any]] = None + style_codes: Optional[List[str]] = None + style_reference_images: Optional[List[StrictBytes]] = None + + +class IdeogramV3ReplaceBackgroundRequest(BaseModel): + image: Optional[StrictBytes] = None + prompt: str + magic_prompt: Optional[MagicPrompt1] = None + num_images: Optional[int] = Field(None, ge=1, le=8) + seed: Optional[int] = Field(None, ge=0, le=2147483647) + rendering_speed: Optional[RenderingSpeed1] = None + color_palette: Optional[Dict[str, Any]] = None + style_codes: Optional[List[str]] = None + style_reference_images: Optional[List[StrictBytes]] = None + + +class KlingTaskStatus(str, Enum): + submitted = 'submitted' + processing = 'processing' + succeed = 'succeed' + failed = 'failed' + + +class KlingVideoGenModelName(str, Enum): + kling_v1 = 'kling-v1' + kling_v1_5 = 'kling-v1-5' + kling_v1_6 = 'kling-v1-6' + kling_v2_master = 'kling-v2-master' + + +class KlingVideoGenMode(str, Enum): + std = 'std' + pro = 'pro' + + +class KlingVideoGenAspectRatio(str, Enum): + field_16_9 = '16:9' + field_9_16 = '9:16' + field_1_1 = '1:1' + + +class KlingVideoGenDuration(str, Enum): + field_5 = '5' + field_10 = '10' + + +class KlingVideoGenCfgScale(RootModel[float]): + root: float = Field( + ..., + description="Flexibility in video generation. The higher the value, the lower the model's degree of flexibility, and the stronger the relevance to the user's prompt.", + ge=0.0, + le=1.0, + ) + + +class KlingCameraControlType(str, Enum): + simple = 'simple' + down_back = 'down_back' + forward_up = 'forward_up' + right_turn_forward = 'right_turn_forward' + left_turn_forward = 'left_turn_forward' + + +class KlingCameraConfig(BaseModel): + horizontal: Optional[float] = Field( + None, + description="Controls camera's movement along horizontal axis (x-axis). Negative indicates left, positive indicates right.", + ge=-10.0, + le=10.0, + ) + vertical: Optional[float] = Field( + None, + description="Controls camera's movement along vertical axis (y-axis). Negative indicates downward, positive indicates upward.", + ge=-10.0, + le=10.0, + ) + pan: Optional[float] = Field( + None, + description="Controls camera's rotation in vertical plane (x-axis). Negative indicates downward rotation, positive indicates upward rotation.", + ge=-10.0, + le=10.0, + ) + tilt: Optional[float] = Field( + None, + description="Controls camera's rotation in horizontal plane (y-axis). Negative indicates left rotation, positive indicates right rotation.", + ge=-10.0, + le=10.0, + ) + roll: Optional[float] = Field( + None, + description="Controls camera's rolling amount (z-axis). Negative indicates counterclockwise, positive indicates clockwise.", + ge=-10.0, + le=10.0, + ) + zoom: Optional[float] = Field( + None, + description="Controls change in camera's focal length. Negative indicates narrower field of view, positive indicates wider field of view.", + ge=-10.0, + le=10.0, + ) + + +class KlingVideoResult(BaseModel): + id: Optional[str] = Field(None, description='Generated video ID') + url: Optional[AnyUrl] = Field(None, description='URL for generated video') + duration: Optional[str] = Field(None, description='Total video duration') + + +class KlingAudioUploadType(str, Enum): + file = 'file' + url = 'url' + + +class KlingLipSyncMode(str, Enum): + text2video = 'text2video' + audio2video = 'audio2video' + + +class KlingLipSyncVoiceLanguage(str, Enum): + zh = 'zh' + en = 'en' + + +class KlingDualCharacterEffectsScene(str, Enum): + hug = 'hug' + kiss = 'kiss' + heart_gesture = 'heart_gesture' + + +class KlingSingleImageEffectsScene(str, Enum): + bloombloom = 'bloombloom' + dizzydizzy = 'dizzydizzy' + fuzzyfuzzy = 'fuzzyfuzzy' + squish = 'squish' + expansion = 'expansion' + + +class KlingCharacterEffectModelName(str, Enum): + kling_v1 = 'kling-v1' + kling_v1_5 = 'kling-v1-5' + kling_v1_6 = 'kling-v1-6' + + +class KlingSingleImageEffectModelName(str, Enum): + kling_v1_6 = 'kling-v1-6' + + +class KlingSingleImageEffectDuration(str, Enum): + field_5 = '5' + + +class KlingDualCharacterImages(RootModel[List[str]]): + root: List[str] = Field(..., max_length=2, min_length=2) + + +class KlingImageGenAspectRatio(str, Enum): + field_16_9 = '16:9' + field_9_16 = '9:16' + field_1_1 = '1:1' + field_4_3 = '4:3' + field_3_4 = '3:4' + field_3_2 = '3:2' + field_2_3 = '2:3' + field_21_9 = '21:9' + + +class KlingImageGenImageReferenceType(str, Enum): + subject = 'subject' + face = 'face' + + +class KlingImageGenModelName(str, Enum): + kling_v1 = 'kling-v1' + kling_v1_5 = 'kling-v1-5' + kling_v2 = 'kling-v2' + + +class KlingImageResult(BaseModel): + index: Optional[int] = Field(None, description='Image Number (0-9)') + url: Optional[AnyUrl] = Field(None, description='URL for generated image') + + +class KlingVirtualTryOnModelName(str, Enum): + kolors_virtual_try_on_v1 = 'kolors-virtual-try-on-v1' + kolors_virtual_try_on_v1_5 = 'kolors-virtual-try-on-v1-5' + + +class TaskInfo(BaseModel): + external_task_id: Optional[str] = None + + +class TaskResult(BaseModel): + videos: Optional[List[KlingVideoResult]] = None + + +class Data(BaseModel): + task_id: Optional[str] = Field(None, description='Task ID') + task_status: Optional[KlingTaskStatus] = None + task_info: Optional[TaskInfo] = None + created_at: Optional[int] = Field(None, description='Task creation time') + updated_at: Optional[int] = Field(None, description='Task update time') + task_result: Optional[TaskResult] = None + + +class KlingText2VideoResponse(BaseModel): + code: Optional[int] = Field(None, description='Error code') + message: Optional[str] = Field(None, description='Error message') + request_id: Optional[str] = Field(None, description='Request ID') + data: Optional[Data] = None + + +class Trajectory(BaseModel): + x: Optional[int] = Field( + None, + description='The horizontal coordinate of trajectory point. Based on bottom-left corner of image as origin (0,0).', + ) + y: Optional[int] = Field( + None, + description='The vertical coordinate of trajectory point. Based on bottom-left corner of image as origin (0,0).', + ) + + +class DynamicMask(BaseModel): + mask: Optional[AnyUrl] = Field( + None, + description='Dynamic Brush Application Area (Mask image created by users using the motion brush). The aspect ratio must match the input image.', + ) + trajectories: Optional[List[Trajectory]] = None + + +class Data1(BaseModel): + task_id: Optional[str] = Field(None, description='Task ID') + task_status: Optional[KlingTaskStatus] = None + task_info: Optional[TaskInfo] = None + created_at: Optional[int] = Field(None, description='Task creation time') + updated_at: Optional[int] = Field(None, description='Task update time') + task_result: Optional[TaskResult] = None + + +class KlingImage2VideoResponse(BaseModel): + code: Optional[int] = Field(None, description='Error code') + message: Optional[str] = Field(None, description='Error message') + request_id: Optional[str] = Field(None, description='Request ID') + data: Optional[Data1] = None + + +class KlingVideoExtendRequest(BaseModel): + video_id: Optional[str] = Field( + None, + description='The ID of the video to be extended. Supports videos generated by text-to-video, image-to-video, and previous video extension operations. Cannot exceed 3 minutes total duration after extension.', + ) + prompt: Optional[str] = Field( + None, + description='Positive text prompt for guiding the video extension', + max_length=2500, + ) + negative_prompt: Optional[str] = Field( + None, + description='Negative text prompt for elements to avoid in the extended video', + max_length=2500, + ) + cfg_scale: Optional[KlingVideoGenCfgScale] = Field( + default_factory=lambda: KlingVideoGenCfgScale.model_validate(0.5) + ) + callback_url: Optional[AnyUrl] = Field( + None, + description='The callback notification address. Server will notify when the task status changes.', + ) + + +class Data2(BaseModel): + task_id: Optional[str] = Field(None, description='Task ID') + task_status: Optional[KlingTaskStatus] = None + task_info: Optional[TaskInfo] = None + created_at: Optional[int] = Field(None, description='Task creation time') + updated_at: Optional[int] = Field(None, description='Task update time') + task_result: Optional[TaskResult] = None + + +class KlingVideoExtendResponse(BaseModel): + code: Optional[int] = Field(None, description='Error code') + message: Optional[str] = Field(None, description='Error message') + request_id: Optional[str] = Field(None, description='Request ID') + data: Optional[Data2] = None + + +class KlingLipSyncInputObject(BaseModel): + video_id: Optional[str] = Field( + None, + description='The ID of the video generated by Kling AI. Only supports 5-second and 10-second videos generated within the last 30 days.', + ) + video_url: Optional[str] = Field( + None, + description='Get link for uploaded video. Video files support .mp4/.mov, file size does not exceed 100MB, video length between 2-10s.', + ) + mode: KlingLipSyncMode + text: Optional[str] = Field( + None, + description='Text Content for Lip-Sync Video Generation. Required when mode is text2video. Maximum length is 120 characters.', + ) + voice_id: Optional[str] = Field( + None, + description='Voice ID. Required when mode is text2video. The system offers a variety of voice options to choose from.', + ) + voice_language: Optional[KlingLipSyncVoiceLanguage] = 'en' + voice_speed: Optional[float] = Field( + 1, + description='Speech Rate. Valid range: 0.8~2.0, accurate to one decimal place.', + ge=0.8, + le=2.0, + ) + audio_type: Optional[KlingAudioUploadType] = None + audio_file: Optional[str] = Field( + None, + description='Local Path of Audio File. Supported formats: .mp3/.wav/.m4a/.aac, maximum file size of 5MB. Base64 code.', + ) + audio_url: Optional[str] = Field( + None, + description='Audio File Download URL. Supported formats: .mp3/.wav/.m4a/.aac, maximum file size of 5MB.', + ) + + +class KlingLipSyncRequest(BaseModel): + input: KlingLipSyncInputObject + callback_url: Optional[AnyUrl] = Field( + None, + description='The callback notification address. Server will notify when the task status changes.', + ) + + +class Data3(BaseModel): + task_id: Optional[str] = Field(None, description='Task ID') + task_status: Optional[KlingTaskStatus] = None + task_info: Optional[TaskInfo] = None + created_at: Optional[int] = Field(None, description='Task creation time') + updated_at: Optional[int] = Field(None, description='Task update time') + task_result: Optional[TaskResult] = None + + +class KlingLipSyncResponse(BaseModel): + code: Optional[int] = Field(None, description='Error code') + message: Optional[str] = Field(None, description='Error message') + request_id: Optional[str] = Field(None, description='Request ID') + data: Optional[Data3] = None + + +class KlingSingleImageEffectInput(BaseModel): + model_name: KlingSingleImageEffectModelName + image: str = Field( + ..., + description='Reference Image. URL or Base64 encoded string (without data:image prefix). File size cannot exceed 10MB, resolution not less than 300*300px, aspect ratio between 1:2.5 ~ 2.5:1.', + ) + duration: KlingSingleImageEffectDuration + + +class KlingDualCharacterEffectInput(BaseModel): + model_name: Optional[KlingCharacterEffectModelName] = 'kling-v1' + mode: Optional[KlingVideoGenMode] = 'std' + images: KlingDualCharacterImages + duration: KlingVideoGenDuration + + +class Data4(BaseModel): + task_id: Optional[str] = Field(None, description='Task ID') + task_status: Optional[KlingTaskStatus] = None + task_info: Optional[TaskInfo] = None + created_at: Optional[int] = Field(None, description='Task creation time') + updated_at: Optional[int] = Field(None, description='Task update time') + task_result: Optional[TaskResult] = None + + +class KlingVideoEffectsResponse(BaseModel): + code: Optional[int] = Field(None, description='Error code') + message: Optional[str] = Field(None, description='Error message') + request_id: Optional[str] = Field(None, description='Request ID') + data: Optional[Data4] = None + + +class KlingImageGenerationsRequest(BaseModel): + model_name: Optional[KlingImageGenModelName] = 'kling-v1' + prompt: str = Field(..., description='Positive text prompt', max_length=500) + negative_prompt: Optional[str] = Field( + None, description='Negative text prompt', max_length=200 + ) + image: Optional[str] = Field( + None, description='Reference Image - Base64 encoded string or image URL' + ) + image_reference: Optional[KlingImageGenImageReferenceType] = None + image_fidelity: Optional[float] = Field( + 0.5, description='Reference intensity for user-uploaded images', ge=0.0, le=1.0 + ) + human_fidelity: Optional[float] = Field( + 0.45, description='Subject reference similarity', ge=0.0, le=1.0 + ) + n: Optional[int] = Field(1, description='Number of generated images', ge=1, le=9) + aspect_ratio: Optional[KlingImageGenAspectRatio] = '16:9' + callback_url: Optional[AnyUrl] = Field( + None, description='The callback notification address' + ) + + +class TaskResult5(BaseModel): + images: Optional[List[KlingImageResult]] = None + + +class Data5(BaseModel): + task_id: Optional[str] = Field(None, description='Task ID') + task_status: Optional[KlingTaskStatus] = None + task_status_msg: Optional[str] = Field(None, description='Task status information') + created_at: Optional[int] = Field(None, description='Task creation time') + updated_at: Optional[int] = Field(None, description='Task update time') + task_result: Optional[TaskResult5] = None + + +class KlingImageGenerationsResponse(BaseModel): + code: Optional[int] = Field(None, description='Error code') + message: Optional[str] = Field(None, description='Error message') + request_id: Optional[str] = Field(None, description='Request ID') + data: Optional[Data5] = None + + +class KlingVirtualTryOnRequest(BaseModel): + model_name: Optional[KlingVirtualTryOnModelName] = 'kolors-virtual-try-on-v1' + human_image: str = Field( + ..., description='Reference human image - Base64 encoded string or image URL' + ) + cloth_image: Optional[str] = Field( + None, + description='Reference clothing image - Base64 encoded string or image URL', + ) + callback_url: Optional[AnyUrl] = Field( + None, description='The callback notification address' + ) + + +class Data6(BaseModel): + task_id: Optional[str] = Field(None, description='Task ID') + task_status: Optional[KlingTaskStatus] = None + task_status_msg: Optional[str] = Field(None, description='Task status information') + created_at: Optional[int] = Field(None, description='Task creation time') + updated_at: Optional[int] = Field(None, description='Task update time') + task_result: Optional[TaskResult5] = None + + +class KlingVirtualTryOnResponse(BaseModel): + code: Optional[int] = Field(None, description='Error code') + message: Optional[str] = Field(None, description='Error message') + request_id: Optional[str] = Field(None, description='Request ID') + data: Optional[Data6] = None + + +class ResourcePackType(str, Enum): + decreasing_total = 'decreasing_total' + constant_period = 'constant_period' + + +class Status(str, Enum): + toBeOnline = 'toBeOnline' + online = 'online' + expired = 'expired' + runOut = 'runOut' + + +class ResourcePackSubscribeInfo(BaseModel): + resource_pack_name: Optional[str] = Field(None, description='Resource package name') + resource_pack_id: Optional[str] = Field(None, description='Resource package ID') + resource_pack_type: Optional[ResourcePackType] = Field( + None, + description='Resource package type (decreasing_total=decreasing total, constant_period=constant periodicity)', + ) + total_quantity: Optional[float] = Field(None, description='Total quantity') + remaining_quantity: Optional[float] = Field( + None, description='Remaining quantity (updated with a 12-hour delay)' + ) + purchase_time: Optional[int] = Field( + None, description='Purchase time, Unix timestamp in ms' + ) + effective_time: Optional[int] = Field( + None, description='Effective time, Unix timestamp in ms' + ) + invalid_time: Optional[int] = Field( + None, description='Expiration time, Unix timestamp in ms' + ) + status: Optional[Status] = Field(None, description='Resource Package Status') + + +class Data7(BaseModel): + code: Optional[int] = Field(None, description='Error code; 0 indicates success') + msg: Optional[str] = Field(None, description='Error information') + resource_pack_subscribe_infos: Optional[List[ResourcePackSubscribeInfo]] = Field( + None, description='Resource package list' + ) + + +class KlingResourcePackageResponse(BaseModel): + code: Optional[int] = Field(None, description='Error code; 0 indicates success') + message: Optional[str] = Field(None, description='Error information') + request_id: Optional[str] = Field( + None, + description='Request ID, generated by the system, used to track requests and troubleshoot problems', + ) + data: Optional[Data7] = None + + +class Object(str, Enum): + event = 'event' + + +class Type(str, Enum): + payment_intent_succeeded = 'payment_intent.succeeded' + + +class StripeRequestInfo(BaseModel): + id: Optional[str] = None + idempotency_key: Optional[str] = None + + +class Object1(str, Enum): + payment_intent = 'payment_intent' + + +class StripeAmountDetails(BaseModel): + tip: Optional[Dict[str, Any]] = None + + +class Object2(str, Enum): + charge = 'charge' + + +class StripeAddress(BaseModel): + city: Optional[str] = None + country: Optional[str] = None + line1: Optional[str] = None + line2: Optional[str] = None + postal_code: Optional[str] = None + state: Optional[str] = None + + +class StripeOutcome(BaseModel): + advice_code: Optional[Any] = None + network_advice_code: Optional[Any] = None + network_decline_code: Optional[Any] = None + network_status: Optional[str] = None + reason: Optional[Any] = None + risk_level: Optional[str] = None + risk_score: Optional[int] = None + seller_message: Optional[str] = None + type: Optional[str] = None + + +class Checks(BaseModel): + address_line1_check: Optional[Any] = None + address_postal_code_check: Optional[Any] = None + cvc_check: Optional[str] = None + + +class ExtendedAuthorization(BaseModel): + status: Optional[str] = None + + +class IncrementalAuthorization(BaseModel): + status: Optional[str] = None + + +class Multicapture(BaseModel): + status: Optional[str] = None + + +class NetworkToken(BaseModel): + used: Optional[bool] = None + + +class Overcapture(BaseModel): + maximum_amount_capturable: Optional[int] = None + status: Optional[str] = None + + +class StripeCardDetails(BaseModel): + amount_authorized: Optional[int] = None + authorization_code: Optional[Any] = None + brand: Optional[str] = None + checks: Optional[Checks] = None + country: Optional[str] = None + exp_month: Optional[int] = None + exp_year: Optional[int] = None + extended_authorization: Optional[ExtendedAuthorization] = None + fingerprint: Optional[str] = None + funding: Optional[str] = None + incremental_authorization: Optional[IncrementalAuthorization] = None + installments: Optional[Any] = None + last4: Optional[str] = None + mandate: Optional[Any] = None + multicapture: Optional[Multicapture] = None + network: Optional[str] = None + network_token: Optional[NetworkToken] = None + network_transaction_id: Optional[str] = None + overcapture: Optional[Overcapture] = None + regulated_status: Optional[str] = None + three_d_secure: Optional[Any] = None + wallet: Optional[Any] = None + + +class StripeRefundList(BaseModel): + object: Optional[str] = None + data: Optional[List[Dict[str, Any]]] = None + has_more: Optional[bool] = None + total_count: Optional[int] = None + url: Optional[str] = None + + +class Card(BaseModel): + installments: Optional[Any] = None + mandate_options: Optional[Any] = None + network: Optional[Any] = None + request_three_d_secure: Optional[str] = None + + +class StripePaymentMethodOptions(BaseModel): + card: Optional[Card] = None + + +class StripeShipping(BaseModel): + address: Optional[StripeAddress] = None + carrier: Optional[str] = None + name: Optional[str] = None + phone: Optional[str] = None + tracking_number: Optional[str] = None + + +class Model(str, Enum): + T2V_01_Director = 'T2V-01-Director' + I2V_01_Director = 'I2V-01-Director' + S2V_01 = 'S2V-01' + I2V_01 = 'I2V-01' + I2V_01_live = 'I2V-01-live' + T2V_01 = 'T2V-01' + + +class SubjectReferenceItem(BaseModel): + image: Optional[str] = Field( + None, description='URL or base64 encoding of the subject reference image.' + ) + mask: Optional[str] = Field( + None, + description='URL or base64 encoding of the mask for the subject reference image.', + ) + + +class MinimaxVideoGenerationRequest(BaseModel): + model: Model = Field( + ..., + description='Required. ID of model. Options: T2V-01-Director, I2V-01-Director, S2V-01, I2V-01, I2V-01-live, T2V-01', + ) + prompt: Optional[str] = Field( + None, + description='Description of the video. Should be less than 2000 characters. Supports camera movement instructions in [brackets].', + max_length=2000, + ) + prompt_optimizer: Optional[bool] = Field( + True, + description='If true (default), the model will automatically optimize the prompt. Set to false for more precise control.', + ) + first_frame_image: Optional[str] = Field( + None, + description='URL or base64 encoding of the first frame image. Required when model is I2V-01, I2V-01-Director, or I2V-01-live.', + ) + subject_reference: Optional[List[SubjectReferenceItem]] = Field( + None, + description='Only available when model is S2V-01. The model will generate a video based on the subject uploaded through this parameter.', + ) + callback_url: Optional[str] = Field( + None, + description='Optional. URL to receive real-time status updates about the video generation task.', + ) + + +class MinimaxBaseResponse(BaseModel): + status_code: int = Field( + ..., + description='Status code. 0 indicates success, other values indicate errors.', + ) + status_msg: str = Field( + ..., description='Specific error details or success message.' + ) + + +class MinimaxVideoGenerationResponse(BaseModel): + task_id: str = Field( + ..., description='The task ID for the asynchronous video generation task.' + ) + base_resp: MinimaxBaseResponse + + +class File(BaseModel): + file_id: Optional[int] = Field(None, description='Unique identifier for the file') + bytes: Optional[int] = Field(None, description='File size in bytes') + created_at: Optional[int] = Field( + None, description='Unix timestamp when the file was created, in seconds' + ) + filename: Optional[str] = Field(None, description='The name of the file') + purpose: Optional[str] = Field(None, description='The purpose of using the file') + download_url: Optional[str] = Field( + None, description='The URL to download the video' + ) + + +class MinimaxFileRetrieveResponse(BaseModel): + file: File + base_resp: MinimaxBaseResponse + + +class Status1(str, Enum): + Queueing = 'Queueing' + Preparing = 'Preparing' + Processing = 'Processing' + Success = 'Success' + Fail = 'Fail' + + +class MinimaxTaskResultResponse(BaseModel): + task_id: str = Field(..., description='The task ID being queried.') + status: Status1 = Field( + ..., + description="Task status: 'Queueing' (in queue), 'Preparing' (task is preparing), 'Processing' (generating), 'Success' (task completed successfully), or 'Fail' (task failed).", + ) + file_id: Optional[str] = Field( + None, + description='After the task status changes to Success, this field returns the file ID corresponding to the generated video.', + ) + base_resp: MinimaxBaseResponse + + +class OutputFormat(str, Enum): + jpeg = 'jpeg' + png = 'png' + + +class BFLFluxPro11GenerateRequest(BaseModel): + prompt: str = Field(..., description='The main text prompt for image generation') + image_prompt: Optional[str] = Field(None, description='Optional image prompt') + width: int = Field(..., description='Width of the generated image') + height: int = Field(..., description='Height of the generated image') + prompt_upsampling: Optional[bool] = Field( + None, description='Whether to use prompt upsampling' + ) + seed: Optional[int] = Field(None, description='Random seed for reproducibility') + safety_tolerance: Optional[int] = Field(None, description='Safety tolerance level') + output_format: Optional[OutputFormat] = Field( + None, description='Output image format' + ) + webhook_url: Optional[str] = Field( + None, description='Optional webhook URL for async processing' + ) + webhook_secret: Optional[str] = Field( + None, description='Optional webhook secret for async processing' + ) + + +class BFLFluxPro11GenerateResponse(BaseModel): + id: str = Field(..., description='Job ID for tracking') + polling_url: str = Field(..., description='URL to poll for results') + + +class BFLFluxProGenerateRequest(BaseModel): + prompt: str = Field(..., description='The text prompt for image generation.') + negative_prompt: Optional[str] = Field( + None, description='The negative prompt for image generation.' + ) + width: int = Field( + ..., description='The width of the image to generate.', ge=64, le=2048 + ) + height: int = Field( + ..., description='The height of the image to generate.', ge=64, le=2048 + ) + num_inference_steps: Optional[int] = Field( + None, description='The number of inference steps.', ge=1, le=100 + ) + guidance_scale: Optional[float] = Field( + None, description='The guidance scale for generation.', ge=1.0, le=20.0 + ) + seed: Optional[int] = Field(None, description='The seed value for reproducibility.') + num_images: Optional[int] = Field( + None, description='The number of images to generate.', ge=1, le=4 + ) + + +class BFLFluxProGenerateResponse(BaseModel): + id: str = Field(..., description='The unique identifier for the generation task.') + polling_url: str = Field(..., description='URL to poll for the generation result.') + + +class Steps(RootModel[int]): + root: int = Field( + ..., + description='Number of steps for the image generation process', + examples=[50], + ge=15, + le=50, + title='Steps', + ) + + +class Guidance(RootModel[float]): + root: float = Field( + ..., + description='Guidance strength for the image generation process', + ge=1.5, + le=100.0, + title='Guidance', + ) + + +class WebhookUrl(RootModel[AnyUrl]): + root: AnyUrl = Field( + ..., description='URL to receive webhook notifications', title='Webhook Url' + ) + + +class BFLAsyncResponse(BaseModel): + id: str = Field(..., title='Id') + polling_url: str = Field(..., title='Polling Url') + + +class BFLAsyncWebhookResponse(BaseModel): + id: str = Field(..., title='Id') + status: str = Field(..., title='Status') + webhook_url: str = Field(..., title='Webhook Url') + + +class Top(RootModel[int]): + root: int = Field( + ..., + description='Number of pixels to expand at the top of the image', + ge=0, + le=2048, + title='Top', + ) + + +class Bottom(RootModel[int]): + root: int = Field( + ..., + description='Number of pixels to expand at the bottom of the image', + ge=0, + le=2048, + title='Bottom', + ) + + +class Left(RootModel[int]): + root: int = Field( + ..., + description='Number of pixels to expand on the left side of the image', + ge=0, + le=2048, + title='Left', + ) + + +class Right(RootModel[int]): + root: int = Field( + ..., + description='Number of pixels to expand on the right side of the image', + ge=0, + le=2048, + title='Right', + ) + + +class CannyLowThreshold(RootModel[int]): + root: int = Field( + ..., + description='Low threshold for Canny edge detection', + ge=0, + le=500, + title='Canny Low Threshold', + ) + + +class CannyHighThreshold(RootModel[int]): + root: int = Field( + ..., + description='High threshold for Canny edge detection', + ge=0, + le=500, + title='Canny High Threshold', + ) + + +class Steps2(RootModel[int]): + root: int = Field( + ..., + description='Number of steps for the image generation process', + ge=15, + le=50, + title='Steps', + ) + + +class Guidance2(RootModel[float]): + root: float = Field( + ..., + description='Guidance strength for the image generation process', + ge=1.0, + le=100.0, + title='Guidance', + ) + + +class BFLOutputFormat(str, Enum): + jpeg = 'jpeg' + png = 'png' + + +class BFLValidationError(BaseModel): + loc: List[Union[str, int]] = Field(..., title='Location') + msg: str = Field(..., title='Message') + type: str = Field(..., title='Error Type') + + +class Datum2(BaseModel): + image_id: Optional[str] = Field( + None, description='Unique identifier for the generated image' + ) + url: Optional[str] = Field(None, description='URL to access the generated image') + + +class RecraftImageGenerationResponse(BaseModel): + created: int = Field( + ..., description='Unix timestamp when the generation was created' + ) + credits: int = Field(..., description='Number of credits used for the generation') + data: List[Datum2] = Field(..., description='Array of generated image information') + + +class RecraftImageFeatures(BaseModel): + nsfw_score: Optional[float] = None + + +class RecraftTextLayoutItem(BaseModel): + bbox: List[List[float]] + text: str + + +class RecraftImageColor(BaseModel): + rgb: Optional[List[int]] = None + std: Optional[List[float]] = None + weight: Optional[float] = None + + +class RecraftImageStyle(str, Enum): + digital_illustration = 'digital_illustration' + icon = 'icon' + realistic_image = 'realistic_image' + vector_illustration = 'vector_illustration' + + +class RecraftImageSubStyle(str, Enum): + field_2d_art_poster = '2d_art_poster' + field_3d = '3d' + field_80s = '80s' + glow = 'glow' + grain = 'grain' + hand_drawn = 'hand_drawn' + infantile_sketch = 'infantile_sketch' + kawaii = 'kawaii' + pixel_art = 'pixel_art' + psychedelic = 'psychedelic' + seamless = 'seamless' + voxel = 'voxel' + watercolor = 'watercolor' + broken_line = 'broken_line' + colored_outline = 'colored_outline' + colored_shapes = 'colored_shapes' + colored_shapes_gradient = 'colored_shapes_gradient' + doodle_fill = 'doodle_fill' + doodle_offset_fill = 'doodle_offset_fill' + offset_fill = 'offset_fill' + outline = 'outline' + outline_gradient = 'outline_gradient' + uneven_fill = 'uneven_fill' + field_70s = '70s' + cartoon = 'cartoon' + doodle_line_art = 'doodle_line_art' + engraving = 'engraving' + flat_2 = 'flat_2' + kawaii_1 = 'kawaii' + line_art = 'line_art' + linocut = 'linocut' + seamless_1 = 'seamless' + b_and_w = 'b_and_w' + enterprise = 'enterprise' + hard_flash = 'hard_flash' + hdr = 'hdr' + motion_blur = 'motion_blur' + natural_light = 'natural_light' + studio_portrait = 'studio_portrait' + line_circuit = 'line_circuit' + field_2d_art_poster_2 = '2d_art_poster_2' + engraving_color = 'engraving_color' + flat_air_art = 'flat_air_art' + hand_drawn_outline = 'hand_drawn_outline' + handmade_3d = 'handmade_3d' + stickers_drawings = 'stickers_drawings' + plastic = 'plastic' + pictogram = 'pictogram' + + +class RecraftTransformModel(str, Enum): + refm1 = 'refm1' + recraft20b = 'recraft20b' + recraftv2 = 'recraftv2' + recraftv3 = 'recraftv3' + flux1_1pro = 'flux1_1pro' + flux1dev = 'flux1dev' + imagen3 = 'imagen3' + hidream_i1_dev = 'hidream_i1_dev' + + +class RecraftImageFormat(str, Enum): + webp = 'webp' + png = 'png' + + +class RecraftResponseFormat(str, Enum): + url = 'url' + b64_json = 'b64_json' + + +class RecraftImage(BaseModel): + b64_json: Optional[str] = None + features: Optional[RecraftImageFeatures] = None + image_id: UUID + revised_prompt: Optional[str] = None + url: Optional[str] = None + + +class RecraftUserControls(BaseModel): + artistic_level: Optional[int] = None + background_color: Optional[RecraftImageColor] = None + colors: Optional[List[RecraftImageColor]] = None + no_text: Optional[bool] = None + + +class RecraftTextLayout(RootModel[List[RecraftTextLayoutItem]]): + root: List[RecraftTextLayoutItem] + + +class RecraftProcessImageRequest(BaseModel): + image: StrictBytes + image_format: Optional[RecraftImageFormat] = None + response_format: Optional[RecraftResponseFormat] = None + + +class RecraftProcessImageResponse(BaseModel): + created: int + credits: int + image: RecraftImage + + +class RecraftImageToImageRequest(BaseModel): + block_nsfw: Optional[bool] = None + calculate_features: Optional[bool] = None + controls: Optional[RecraftUserControls] = None + image: StrictBytes + image_format: Optional[RecraftImageFormat] = None + model: Optional[RecraftTransformModel] = None + n: Optional[int] = None + negative_prompt: Optional[str] = None + prompt: str + random_seed: Optional[int] = None + response_format: Optional[RecraftResponseFormat] = None + strength: float + style: Optional[RecraftImageStyle] = None + style_id: Optional[UUID] = None + substyle: Optional[RecraftImageSubStyle] = None + text_layout: Optional[RecraftTextLayout] = None + + +class RecraftGenerateImageResponse(BaseModel): + created: int + credits: int + data: List[RecraftImage] + + +class RecraftTransformImageWithMaskRequest(BaseModel): + block_nsfw: Optional[bool] = None + calculate_features: Optional[bool] = None + image: StrictBytes + image_format: Optional[RecraftImageFormat] = None + mask: StrictBytes + model: Optional[RecraftTransformModel] = None + n: Optional[int] = None + negative_prompt: Optional[str] = None + prompt: str + random_seed: Optional[int] = None + response_format: Optional[RecraftResponseFormat] = None + style: Optional[RecraftImageStyle] = None + style_id: Optional[UUID] = None + substyle: Optional[RecraftImageSubStyle] = None + text_layout: Optional[RecraftTextLayout] = None + + +class KlingErrorResponse(BaseModel): + code: int = Field( + ..., + description='- 1000: Authentication failed\n- 1001: Authorization is empty\n- 1002: Authorization is invalid\n- 1003: Authorization is not yet valid\n- 1004: Authorization has expired\n- 1100: Account exception\n- 1101: Account in arrears (postpaid scenario)\n- 1102: Resource pack depleted or expired (prepaid scenario)\n- 1103: Unauthorized access to requested resource\n- 1200: Invalid request parameters\n- 1201: Invalid parameters\n- 1202: Invalid request method\n- 1203: Requested resource does not exist\n- 1300: Trigger platform strategy\n- 1301: Trigger content security policy\n- 1302: API request too frequent\n- 1303: Concurrency/QPS exceeds limit\n- 1304: Trigger IP whitelist policy\n- 5000: Internal server error\n- 5001: Service temporarily unavailable\n- 5002: Server internal timeout\n', + ) + message: str = Field(..., description='Human-readable error message') + request_id: str = Field( + ..., description='Request ID for tracking and troubleshooting' + ) + + +class LumaAspectRatio(str, Enum): + field_1_1 = '1:1' + field_16_9 = '16:9' + field_9_16 = '9:16' + field_4_3 = '4:3' + field_3_4 = '3:4' + field_21_9 = '21:9' + field_9_21 = '9:21' + + +class LumaVideoModel(str, Enum): + ray_2 = 'ray-2' + ray_flash_2 = 'ray-flash-2' + ray_1_6 = 'ray-1-6' + + +class LumaVideoModelOutputResolution1(str, Enum): + field_540p = '540p' + field_720p = '720p' + field_1080p = '1080p' + field_4k = '4k' + + +class LumaVideoModelOutputResolution( + RootModel[Union[LumaVideoModelOutputResolution1, str]] +): + root: Union[LumaVideoModelOutputResolution1, str] + + +class LumaVideoModelOutputDuration1(str, Enum): + field_5s = '5s' + field_9s = '9s' + + +class LumaVideoModelOutputDuration( + RootModel[Union[LumaVideoModelOutputDuration1, str]] +): + root: Union[LumaVideoModelOutputDuration1, str] + + +class LumaImageModel(str, Enum): + photon_1 = 'photon-1' + photon_flash_1 = 'photon-flash-1' + + +class LumaImageRef(BaseModel): + url: Optional[AnyUrl] = Field(None, description='The URL of the image reference') + weight: Optional[float] = Field( + None, description='The weight of the image reference' + ) + + +class LumaImageIdentity(BaseModel): + images: Optional[List[AnyUrl]] = Field( + None, description='The URLs of the image identity' + ) + + +class LumaModifyImageRef(BaseModel): + url: Optional[AnyUrl] = Field(None, description='The URL of the image reference') + weight: Optional[float] = Field( + None, description='The weight of the modify image reference' + ) + + +class Type1(str, Enum): + generation = 'generation' + + +class LumaGenerationReference(BaseModel): + type: Literal['generation'] + id: UUID = Field(..., description='The ID of the generation') + + +class Type2(str, Enum): + image = 'image' + + +class LumaImageReference(BaseModel): + type: Literal['image'] + url: AnyUrl = Field(..., description='The URL of the image') + + +class LumaKeyframe(RootModel[Union[LumaGenerationReference, LumaImageReference]]): + root: Union[LumaGenerationReference, LumaImageReference] = Field( + ..., + description='A keyframe can be either a Generation reference, an Image, or a Video', + discriminator='type', + ) + + +class LumaGenerationType(str, Enum): + video = 'video' + image = 'image' + + +class LumaState(str, Enum): + queued = 'queued' + dreaming = 'dreaming' + completed = 'completed' + failed = 'failed' + + +class LumaAssets(BaseModel): + video: Optional[AnyUrl] = Field(None, description='The URL of the video') + image: Optional[AnyUrl] = Field(None, description='The URL of the image') + progress_video: Optional[AnyUrl] = Field( + None, description='The URL of the progress video' + ) + + +class GenerationType(str, Enum): + video = 'video' + + +class GenerationType1(str, Enum): + image = 'image' + + +class CharacterRef(BaseModel): + identity0: Optional[LumaImageIdentity] = None + + +class LumaImageGenerationRequest(BaseModel): + generation_type: Optional[GenerationType1] = 'image' + model: Optional[LumaImageModel] = 'photon-1' + prompt: Optional[str] = Field(None, description='The prompt of the generation') + aspect_ratio: Optional[LumaAspectRatio] = '16:9' + callback_url: Optional[AnyUrl] = Field( + None, description='The callback URL for the generation' + ) + image_ref: Optional[List[LumaImageRef]] = None + style_ref: Optional[List[LumaImageRef]] = None + character_ref: Optional[CharacterRef] = None + modify_image_ref: Optional[LumaModifyImageRef] = None + + +class GenerationType2(str, Enum): + upscale_video = 'upscale_video' + + +class LumaUpscaleVideoGenerationRequest(BaseModel): + generation_type: Optional[GenerationType2] = 'upscale_video' + resolution: Optional[LumaVideoModelOutputResolution] = None + callback_url: Optional[AnyUrl] = Field( + None, description='The callback URL for the upscale' + ) + + +class GenerationType3(str, Enum): + add_audio = 'add_audio' + + +class LumaAudioGenerationRequest(BaseModel): + generation_type: Optional[GenerationType3] = 'add_audio' + prompt: Optional[str] = Field(None, description='The prompt of the audio') + negative_prompt: Optional[str] = Field( + None, description='The negative prompt of the audio' + ) + callback_url: Optional[AnyUrl] = Field( + None, description='The callback URL for the audio' + ) + + +class LumaError(BaseModel): + detail: Optional[str] = Field(None, description='The error message') + + +class AspectRatio(str, Enum): + field_16_9 = '16:9' + field_4_3 = '4:3' + field_1_1 = '1:1' + field_3_4 = '3:4' + field_9_16 = '9:16' + + +class Duration(int, Enum): + integer_5 = 5 + integer_8 = 8 + + +class Model1(str, Enum): + v3_5 = 'v3.5' + + +class MotionMode(str, Enum): + normal = 'normal' + fast = 'fast' + + +class Quality(str, Enum): + field_360p = '360p' + field_540p = '540p' + field_720p = '720p' + field_1080p = '1080p' + + +class Style(str, Enum): + anime = 'anime' + field_3d_animation = '3d_animation' + clay = 'clay' + comic = 'comic' + cyberpunk = 'cyberpunk' + + +class PixverseTextVideoRequest(BaseModel): + aspect_ratio: AspectRatio + duration: Duration + model: Model1 + motion_mode: Optional[MotionMode] = None + negative_prompt: Optional[str] = None + prompt: str + quality: Quality + seed: Optional[int] = None + style: Optional[Style] = None + template_id: Optional[int] = None + water_mark: Optional[bool] = None + + +class Resp(BaseModel): + video_id: Optional[int] = None + + +class PixverseVideoResponse(BaseModel): + ErrCode: Optional[int] = None + ErrMsg: Optional[str] = None + Resp_1: Optional[Resp] = Field(None, alias='Resp') + + +class Resp1(BaseModel): + img_id: Optional[int] = None + + +class PixverseImageUploadResponse(BaseModel): + ErrCode: Optional[int] = None + ErrMsg: Optional[str] = None + Resp: Optional[Resp1] = None + + +class PixverseImageVideoRequest(BaseModel): + img_id: int + model: Model1 + prompt: str + duration: Duration + quality: Quality + motion_mode: Optional[MotionMode] = None + seed: Optional[int] = None + style: Optional[Style] = None + template_id: Optional[int] = None + water_mark: Optional[bool] = None + + +class PixverseTransitionVideoRequest(BaseModel): + first_frame_img: int + last_frame_img: int + model: Model1 + duration: Duration + quality: Quality + motion_mode: MotionMode + seed: int + prompt: str + style: Optional[Style] = None + template_id: Optional[int] = None + water_mark: Optional[bool] = None + + +class Status2(int, Enum): + integer_1 = 1 + integer_5 = 5 + integer_6 = 6 + integer_7 = 7 + integer_8 = 8 + + +class Resp2(BaseModel): + create_time: Optional[str] = None + id: Optional[int] = None + modify_time: Optional[str] = None + negative_prompt: Optional[str] = None + outputHeight: Optional[int] = None + outputWidth: Optional[int] = None + prompt: Optional[str] = None + resolution_ratio: Optional[int] = None + seed: Optional[int] = None + size: Optional[int] = None + status: Optional[Status2] = Field( + None, + description='Video generation status codes:\n* 1 - Generation successful\n* 5 - Generating\n* 6 - Deleted\n* 7 - Contents moderation failed\n* 8 - Generation failed\n', + ) + style: Optional[str] = None + url: Optional[str] = None + + +class PixverseVideoResultResponse(BaseModel): + ErrCode: Optional[int] = None + ErrMsg: Optional[str] = None + Resp: Optional[Resp2] = None + + +class Image(BaseModel): + bytesBase64Encoded: str + gcsUri: Optional[str] = None + mimeType: Optional[str] = None + + +class Image1(BaseModel): + bytesBase64Encoded: Optional[str] = None + gcsUri: str + mimeType: Optional[str] = None + + +class Instance(BaseModel): + prompt: str = Field(..., description='Text description of the video') + image: Optional[Union[Image, Image1]] = Field( + None, description='Optional image to guide video generation' + ) + + +class PersonGeneration(str, Enum): + ALLOW = 'ALLOW' + BLOCK = 'BLOCK' + + +class Parameters(BaseModel): + aspectRatio: Optional[str] = Field(None, examples=['16:9']) + negativePrompt: Optional[str] = None + personGeneration: Optional[PersonGeneration] = None + sampleCount: Optional[int] = None + seed: Optional[int] = None + storageUri: Optional[str] = Field( + None, description='Optional Cloud Storage URI to upload the video' + ) + durationSeconds: Optional[int] = None + enhancePrompt: Optional[bool] = None + + +class Veo2GenVidRequest(BaseModel): + instances: Optional[List[Instance]] = None + parameters: Optional[Parameters] = None + + +class Veo2GenVidResponse(BaseModel): + name: str = Field( + ..., + description='Operation resource name', + examples=[ + 'projects/PROJECT_ID/locations/us-central1/publishers/google/models/MODEL_ID/operations/a1b07c8e-7b5a-4aba-bb34-3e1ccb8afcc8' + ], + ) + + +class Veo2GenVidPollRequest(BaseModel): + operationName: str = Field( + ..., + description='Full operation name (from predict response)', + examples=[ + 'projects/PROJECT_ID/locations/us-central1/publishers/google/models/MODEL_ID/operations/OPERATION_ID' + ], + ) + + +class Video(BaseModel): + gcsUri: Optional[str] = Field(None, description='Cloud Storage URI of the video') + bytesBase64Encoded: Optional[str] = Field( + None, description='Base64-encoded video content' + ) + mimeType: Optional[str] = Field(None, description='Video MIME type') + + +class Response(BaseModel): + field_type: Optional[str] = Field( + None, + alias='@type', + examples=[ + 'type.googleapis.com/cloud.ai.large_models.vision.GenerateVideoResponse' + ], + ) + raiMediaFilteredCount: Optional[int] = Field( + None, description='Count of media filtered by responsible AI policies' + ) + raiMediaFilteredReasons: Optional[List[str]] = Field( + None, description='Reasons why media was filtered by responsible AI policies' + ) + videos: Optional[List[Video]] = None + + +class Error1(BaseModel): + code: Optional[int] = Field(None, description='Error code') + message: Optional[str] = Field(None, description='Error message') + + +class Veo2GenVidPollResponse(BaseModel): + name: Optional[str] = None + done: Optional[bool] = None + response: Optional[Response] = Field( + None, description='The actual prediction response if done is true' + ) + error: Optional[Error1] = Field( + None, description='Error details if operation failed' + ) + + +class RunwayImageToVideoResponse(BaseModel): + id: Optional[str] = Field(None, description='Task ID') + + +class RunwayTaskStatusEnum(str, Enum): + SUCCEEDED = 'SUCCEEDED' + RUNNING = 'RUNNING' + FAILED = 'FAILED' + PENDING = 'PENDING' + CANCELLED = 'CANCELLED' + THROTTLED = 'THROTTLED' + + +class RunwayModelEnum(str, Enum): + gen4_turbo = 'gen4_turbo' + gen3a_turbo = 'gen3a_turbo' + + +class Position(str, Enum): + first = 'first' + last = 'last' + + +class RunwayPromptImageDetailedObject(BaseModel): + uri: str = Field( + ..., description='A HTTPS URL or data URI containing an encoded image.' + ) + position: Position = Field( + ..., + description="The position of the image in the output video. 'last' is currently supported for gen3a_turbo only.", + ) + + +class RunwayDurationEnum(int, Enum): + integer_5 = 5 + integer_10 = 10 + + +class RunwayAspectRatioEnum(str, Enum): + field_1280_720 = '1280:720' + field_720_1280 = '720:1280' + field_1104_832 = '1104:832' + field_832_1104 = '832:1104' + field_960_960 = '960:960' + field_1584_672 = '1584:672' + field_1280_768 = '1280:768' + field_768_1280 = '768:1280' + + +class RunwayPromptImageObject( + RootModel[Union[str, List[RunwayPromptImageDetailedObject]]] +): + root: Union[str, List[RunwayPromptImageDetailedObject]] = Field( + ..., + description='Image(s) to use for the video generation. Can be a single URI or an array of image objects with positions.', + ) + + +class Datum3(BaseModel): + b64_json: Optional[str] = Field(None, description='Base64 encoded image data') + url: Optional[str] = Field(None, description='URL of the image') + revised_prompt: Optional[str] = Field(None, description='Revised prompt') + + +class InputTokensDetails(BaseModel): + text_tokens: Optional[int] = None + image_tokens: Optional[int] = None + + +class Usage(BaseModel): + input_tokens: Optional[int] = None + input_tokens_details: Optional[InputTokensDetails] = None + output_tokens: Optional[int] = None + total_tokens: Optional[int] = None + + +class OpenAIImageGenerationResponse(BaseModel): + data: Optional[List[Datum3]] = None + usage: Optional[Usage] = None + + +class Quality3(str, Enum): + low = 'low' + medium = 'medium' + high = 'high' + standard = 'standard' + hd = 'hd' + + +class OutputFormat1(str, Enum): + png = 'png' + webp = 'webp' + jpeg = 'jpeg' + + +class Moderation(str, Enum): + low = 'low' + auto = 'auto' + + +class Background(str, Enum): + transparent = 'transparent' + opaque = 'opaque' + + +class ResponseFormat(str, Enum): + url = 'url' + b64_json = 'b64_json' + + +class Style3(str, Enum): + vivid = 'vivid' + natural = 'natural' + + +class OpenAIImageGenerationRequest(BaseModel): + model: Optional[str] = Field( + None, description='The model to use for image generation', examples=['dall-e-3'] + ) + prompt: str = Field( + ..., + description='A text description of the desired image', + examples=['Draw a rocket in front of a blackhole in deep space'], + ) + n: Optional[int] = Field( + None, + description='The number of images to generate (1-10). Only 1 supported for dall-e-3.', + examples=[1], + ) + quality: Optional[Quality3] = Field( + None, description='The quality of the generated image', examples=['high'] + ) + size: Optional[str] = Field( + None, + description='Size of the image (e.g., 1024x1024, 1536x1024, auto)', + examples=['1024x1536'], + ) + output_format: Optional[OutputFormat1] = Field( + None, description='Format of the output image', examples=['png'] + ) + output_compression: Optional[int] = Field( + None, description='Compression level for JPEG or WebP (0-100)', examples=[100] + ) + moderation: Optional[Moderation] = Field( + None, description='Content moderation setting', examples=['auto'] + ) + background: Optional[Background] = Field( + None, description='Background transparency', examples=['opaque'] + ) + response_format: Optional[ResponseFormat] = Field( + None, description='Response format of image data', examples=['b64_json'] + ) + style: Optional[Style3] = Field( + None, description='Style of the image (only for dall-e-3)', examples=['vivid'] + ) + user: Optional[str] = Field( + None, + description='A unique identifier for end-user monitoring', + examples=['user-1234'], + ) + + +class OpenAIImageEditRequest(BaseModel): + model: str = Field( + ..., description='The model to use for image editing', examples=['gpt-image-1'] + ) + prompt: str = Field( + ..., + description='A text description of the desired edit', + examples=['Give the rocketship rainbow coloring'], + ) + n: Optional[int] = Field( + None, description='The number of images to generate', examples=[1] + ) + quality: Optional[str] = Field( + None, description='The quality of the edited image', examples=['low'] + ) + size: Optional[str] = Field( + None, description='Size of the output image', examples=['1024x1024'] + ) + output_format: Optional[OutputFormat1] = Field( + None, description='Format of the output image', examples=['png'] + ) + output_compression: Optional[int] = Field( + None, description='Compression level for JPEG or WebP (0-100)', examples=[100] + ) + moderation: Optional[Moderation] = Field( + None, description='Content moderation setting', examples=['auto'] + ) + background: Optional[str] = Field( + None, description='Background transparency', examples=['opaque'] + ) + user: Optional[str] = Field( + None, + description='A unique identifier for end-user monitoring', + examples=['user-1234'], + ) + + +class CustomerStorageResourceResponse(BaseModel): + download_url: Optional[str] = Field( + None, + description='The signed URL to use for downloading the file from the specified path', + ) + upload_url: Optional[str] = Field( + None, + description='The signed URL to use for uploading the file to the specified path', + ) + expires_at: Optional[datetime] = Field( + None, description='When the signed URL will expire' + ) + existing_file: Optional[bool] = Field( + None, description='Whether an existing file with the same hash was found' + ) + + +class Pikaffect(str, Enum): + Cake_ify = 'Cake-ify' + Crumble = 'Crumble' + Crush = 'Crush' + Decapitate = 'Decapitate' + Deflate = 'Deflate' + Dissolve = 'Dissolve' + Explode = 'Explode' + Eye_pop = 'Eye-pop' + Inflate = 'Inflate' + Levitate = 'Levitate' + Melt = 'Melt' + Peel = 'Peel' + Poke = 'Poke' + Squish = 'Squish' + Ta_da = 'Ta-da' + Tear = 'Tear' + + +class PikaBodyGeneratePikaffectsGeneratePikaffectsPost(BaseModel): + image: Optional[StrictBytes] = Field(None, title='Image') + pikaffect: Optional[Pikaffect] = Field(None, title='Pikaffect') + promptText: Optional[str] = Field(None, title='Prompttext') + negativePrompt: Optional[str] = Field(None, title='Negativeprompt') + seed: Optional[int] = Field(None, title='Seed') + + +class PikaGenerateResponse(BaseModel): + video_id: str = Field(..., title='Video Id') + + +class PikaBodyGeneratePikadditionsGeneratePikadditionsPost(BaseModel): + video: Optional[StrictBytes] = Field(None, title='Video') + image: Optional[StrictBytes] = Field(None, title='Image') + promptText: Optional[str] = Field(None, title='Prompttext') + negativePrompt: Optional[str] = Field(None, title='Negativeprompt') + seed: Optional[int] = Field(None, title='Seed') + + +class PikaBodyGeneratePikaswapsGeneratePikaswapsPost(BaseModel): + video: Optional[StrictBytes] = Field(None, title='Video') + image: Optional[StrictBytes] = Field(None, title='Image') + promptText: Optional[str] = Field(None, title='Prompttext') + modifyRegionMask: Optional[StrictBytes] = Field( + None, + description='A mask image that specifies the region to modify, where the mask is white and the background is black', + title='Modifyregionmask', + ) + modifyRegionRoi: Optional[str] = Field( + None, + description='Plaintext description of the object / region to modify', + title='Modifyregionroi', + ) + negativePrompt: Optional[str] = Field(None, title='Negativeprompt') + seed: Optional[int] = Field(None, title='Seed') + + +class IngredientsMode(str, Enum): + creative = 'creative' + precise = 'precise' + + +class AspectRatio1(RootModel[float]): + root: float = Field( + ..., + description='Aspect ratio (width / height)', + ge=0.4, + le=2.5, + title='Aspectratio', + ) + + +class PikaBodyGenerate22C2vGenerate22PikascenesPost(BaseModel): + images: Optional[List[StrictBytes]] = Field(None, title='Images') + ingredientsMode: IngredientsMode = Field(..., title='Ingredientsmode') + promptText: Optional[str] = Field(None, title='Prompttext') + negativePrompt: Optional[str] = Field(None, title='Negativeprompt') + seed: Optional[int] = Field(None, title='Seed') + resolution: Optional[str] = Field('1080p', title='Resolution') + duration: Optional[int] = Field(5, title='Duration') + aspectRatio: Optional[AspectRatio1] = Field( + None, description='Aspect ratio (width / height)', title='Aspectratio' + ) + + +class PikaStatusEnum(str, Enum): + queued = 'queued' + started = 'started' + finished = 'finished' + + +class PikaValidationError(BaseModel): + loc: List[Union[str, int]] = Field(..., title='Location') + msg: str = Field(..., title='Message') + type: str = Field(..., title='Error Type') + + +class PikaResolutionEnum(str, Enum): + field_1080p = '1080p' + field_720p = '720p' + + +class PikaDurationEnum(int, Enum): + integer_5 = 5 + integer_10 = 10 + + +class RgbItem(RootModel[int]): + root: int = Field(..., ge=0, le=255) + + +class RGBColor(BaseModel): + rgb: List[RgbItem] = Field(..., max_length=3, min_length=3) + + +class StabilityStabilityClientID(RootModel[str]): + root: str = Field( + ..., + description='The name of your application, used to help us communicate app-specific debugging or moderation issues to you.', + examples=['my-awesome-app'], + max_length=256, + ) + + +class StabilityStabilityClientUserID(RootModel[str]): + root: str = Field( + ..., + description='A unique identifier for your end user. Used to help us communicate user-specific debugging or moderation issues to you. Feel free to obfuscate this value to protect user privacy.', + examples=['DiscordUser#9999'], + max_length=256, + ) + + +class StabilityStabilityClientVersion(RootModel[str]): + root: str = Field( + ..., + description='The version of your application, used to help us communicate version-specific debugging or moderation issues to you.', + examples=['1.2.1'], + max_length=256, + ) + + +class Name(str, Enum): + content_moderation = 'content_moderation' + + +class StabilityContentModerationResponse(BaseModel): + id: str = Field( + ..., + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new) you file, as it will greatly assist us in diagnosing the root cause of the problem.', + examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], + min_length=1, + ) + name: Name = Field( + ..., + description='Our content moderation system has flagged some part of your request and subsequently denied it. You were not charged for this request. While this may at times be frustrating, it is necessary to maintain the integrity of our platform and ensure a safe experience for all users. If you would like to provide feedback, please use the [Support Form](https://kb.stability.ai/knowledge-base/kb-tickets/new).', + ) + errors: List[str] = Field( + ..., + description='One or more error messages indicating what went wrong.', + examples=[['some-field: is required']], + min_length=1, + ) + + +class RenderingSpeed(str, Enum): + BALANCED = 'BALANCED' + TURBO = 'TURBO' + QUALITY = 'QUALITY' + + +class StabilityCreativity(RootModel[float]): + root: float = Field( + ..., + description='Controls the likelihood of creating additional details not heavily conditioned by the init image.', + ge=0.2, + le=0.5, + ) + + +class StabilityGenerationID(RootModel[str]): + root: str = Field( + ..., + description='The `id` of a generation, typically used for async generations, that can be used to check the status of the generation or retrieve the result.', + examples=['a6dc6c6e20acda010fe14d71f180658f2896ed9b4ec25aa99a6ff06c796987c4'], + max_length=64, + min_length=64, + ) + + +class Mode(str, Enum): + text_to_image = 'text-to-image' + image_to_image = 'image-to-image' + + +class AspectRatio2(str, Enum): + field_21_9 = '21:9' + field_16_9 = '16:9' + field_3_2 = '3:2' + field_5_4 = '5:4' + field_1_1 = '1:1' + field_4_5 = '4:5' + field_2_3 = '2:3' + field_9_16 = '9:16' + field_9_21 = '9:21' + + +class Model4(str, Enum): + sd3_5_large = 'sd3.5-large' + sd3_5_large_turbo = 'sd3.5-large-turbo' + sd3_5_medium = 'sd3.5-medium' + + +class OutputFormat3(str, Enum): + png = 'png' + jpeg = 'jpeg' + + +class StylePreset(str, Enum): + enhance = 'enhance' + anime = 'anime' + photographic = 'photographic' + digital_art = 'digital-art' + comic_book = 'comic-book' + fantasy_art = 'fantasy-art' + line_art = 'line-art' + analog_film = 'analog-film' + neon_punk = 'neon-punk' + isometric = 'isometric' + low_poly = 'low-poly' + origami = 'origami' + modeling_compound = 'modeling-compound' + cinematic = 'cinematic' + field_3d_model = '3d-model' + pixel_art = 'pixel-art' + tile_texture = 'tile-texture' + + +class StabilityImageGenrationSD3Request(BaseModel): + prompt: str = Field( + ..., + description='What you wish to see in the output image. A strong, descriptive prompt that clearly defines\nelements, colors, and subjects will lead to better results.', + max_length=10000, + min_length=1, + ) + mode: Optional[Mode] = Field( + 'text-to-image', + description='Controls whether this is a text-to-image or image-to-image generation, which affects which parameters are required:\n- **text-to-image** requires only the `prompt` parameter\n- **image-to-image** requires the `prompt`, `image`, and `strength` parameters', + title='GenerationMode', + ) + image: Optional[StrictBytes] = Field( + None, + description='The image to use as the starting point for the generation.\n\nSupported formats:\n\n\n\n - jpeg\n - png\n - webp\n\nSupported dimensions:\n\n\n\n - Every side must be at least 64 pixels\n\n> **Important:** This parameter is only valid for **image-to-image** requests.', + ) + strength: Optional[float] = Field( + None, + description='Sometimes referred to as _denoising_, this parameter controls how much influence the\n`image` parameter has on the generated image. A value of 0 would yield an image that\nis identical to the input. A value of 1 would be as if you passed in no image at all.\n\n> **Important:** This parameter is only valid for **image-to-image** requests.', + ge=0.0, + le=1.0, + ) + aspect_ratio: Optional[AspectRatio2] = Field( + '1:1', + description='Controls the aspect ratio of the generated image. Defaults to 1:1.\n\n> **Important:** This parameter is only valid for **text-to-image** requests.', + ) + model: Optional[Model4] = Field( + 'sd3.5-large', + description='The model to use for generation.\n\n- `sd3.5-large` requires 6.5 credits per generation\n- `sd3.5-large-turbo` requires 4 credits per generation\n- `sd3.5-medium` requires 3.5 credits per generation\n- As of the April 17, 2025, `sd3-large`, `sd3-large-turbo` and `sd3-medium`\n\n\n\n are re-routed to their `sd3.5-[model version]` equivalent, at the same price.', + ) + seed: Optional[float] = Field( + 0, + description="A specific value that is used to guide the 'randomness' of the generation. (Omit this parameter or pass `0` to use a random seed.)", + ge=0.0, + le=4294967294.0, + ) + output_format: Optional[OutputFormat3] = Field( + 'png', description='Dictates the `content-type` of the generated image.' + ) + style_preset: Optional[StylePreset] = Field( + None, description='Guides the image model towards a particular style.' + ) + negative_prompt: Optional[str] = Field( + None, + description='Keywords of what you **do not** wish to see in the output image.\nThis is an advanced feature.', + max_length=10000, + ) + cfg_scale: Optional[float] = Field( + None, + description='How strictly the diffusion process adheres to the prompt text (higher values keep your image closer to your prompt). The _Large_ and _Medium_ models use a default of `4`. The _Turbo_ model uses a default of `1`.', + ge=1.0, + le=10.0, + ) + + +class FinishReason(str, Enum): + SUCCESS = 'SUCCESS' + CONTENT_FILTERED = 'CONTENT_FILTERED' + + +class StabilityImageGenrationSD3Response200(BaseModel): + image: str = Field( + ..., + description='The generated image, encoded to base64.', + examples=['AAAAIGZ0eXBpc29tAAACAGlzb21pc28yYXZjMW1...'], + ) + seed: Optional[float] = Field( + 0, + description='The seed used as random noise for this generation.', + examples=[343940597], + ge=0.0, + le=4294967294.0, + ) + finish_reason: FinishReason = Field( + ..., + description='The reason the generation finished.\n\n- `SUCCESS` = successful generation.\n- `CONTENT_FILTERED` = successful generation, however the output violated our content moderation\npolicy and has been blurred as a result.', + examples=['SUCCESS'], + ) + + +class StabilityImageGenrationSD3Response400(BaseModel): + id: str = Field( + ..., + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', + examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], + min_length=1, + ) + name: str = Field( + ..., + description='Short-hand name for an error, useful for discriminating between errors with the same status code.', + examples=['bad_request'], + min_length=1, + ) + errors: List[str] = Field( + ..., + description='One or more error messages indicating what went wrong.', + examples=[['some-field: is required']], + min_length=1, + ) + + +class StabilityImageGenrationSD3Response413(BaseModel): + id: str = Field( + ..., + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', + examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], + min_length=1, + ) + name: str = Field( + ..., + description='Short-hand name for an error, useful for discriminating between errors with the same status code.', + examples=['bad_request'], + min_length=1, + ) + errors: List[str] = Field( + ..., + description='One or more error messages indicating what went wrong.', + examples=[['some-field: is required']], + min_length=1, + ) + + +class StabilityImageGenrationSD3Response422(BaseModel): + id: str = Field( + ..., + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', + examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], + min_length=1, + ) + name: str = Field( + ..., + description='Short-hand name for an error, useful for discriminating between errors with the same status code.', + examples=['bad_request'], + min_length=1, + ) + errors: List[str] = Field( + ..., + description='One or more error messages indicating what went wrong.', + examples=[['some-field: is required']], + min_length=1, + ) + + +class StabilityImageGenrationSD3Response429(BaseModel): + id: str = Field( + ..., + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', + examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], + min_length=1, + ) + name: str = Field( + ..., + description='Short-hand name for an error, useful for discriminating between errors with the same status code.', + examples=['bad_request'], + min_length=1, + ) + errors: List[str] = Field( + ..., + description='One or more error messages indicating what went wrong.', + examples=[['some-field: is required']], + min_length=1, + ) + + +class StabilityImageGenrationSD3Response500(BaseModel): + id: str = Field( + ..., + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', + examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], + min_length=1, + ) + name: str = Field( + ..., + description='Short-hand name for an error, useful for discriminating between errors with the same status code.', + examples=['bad_request'], + min_length=1, + ) + errors: List[str] = Field( + ..., + description='One or more error messages indicating what went wrong.', + examples=[['some-field: is required']], + min_length=1, + ) + + +class OutputFormat4(str, Enum): + jpeg = 'jpeg' + png = 'png' + webp = 'webp' + + +class StabilityImageGenrationUpscaleConservativeRequest(BaseModel): + image: StrictBytes = Field( + ..., + description='The image you wish to upscale.\n\nSupported Formats:\n- jpeg\n- png\n- webp\n\nValidation Rules:\n- Every side must be at least 64 pixels\n- Total pixel count must be between 4,096 and 9,437,184 pixels\n- The aspect ratio must be between 1:2.5 and 2.5:1', + examples=['./some/image.png'], + ) + prompt: str = Field( + ..., + description="What you wish to see in the output image. A strong, descriptive prompt that clearly defines\nelements, colors, and subjects will lead to better results.\n\nTo control the weight of a given word use the format `(word:weight)`,\nwhere `word` is the word you'd like to control the weight of and `weight`\nis a value between 0 and 1. For example: `The sky was a crisp (blue:0.3) and (green:0.8)`\nwould convey a sky that was blue and green, but more green than blue.", + max_length=10000, + min_length=1, + ) + negative_prompt: Optional[str] = Field( + None, + description='A blurb of text describing what you **do not** wish to see in the output image.\nThis is an advanced feature.', + max_length=10000, + ) + seed: Optional[float] = Field( + 0, + description="A specific value that is used to guide the 'randomness' of the generation. (Omit this parameter or pass `0` to use a random seed.)", + ge=0.0, + le=4294967294.0, + ) + output_format: Optional[OutputFormat4] = Field( + 'png', description='Dictates the `content-type` of the generated image.' + ) + creativity: Optional[StabilityCreativity] = Field( + default_factory=lambda: StabilityCreativity.model_validate(0.35) + ) + + +class StabilityImageGenrationUpscaleConservativeResponse200(BaseModel): + image: str = Field( + ..., + description='The generated image, encoded to base64.', + examples=['AAAAIGZ0eXBpc29tAAACAGlzb21pc28yYXZjMW1...'], + ) + seed: Optional[float] = Field( + 0, + description='The seed used as random noise for this generation.', + examples=[343940597], + ge=0.0, + le=4294967294.0, + ) + finish_reason: FinishReason = Field( + ..., + description='The reason the generation finished.\n\n- `SUCCESS` = successful generation.\n- `CONTENT_FILTERED` = successful generation, however the output violated our content moderation\npolicy and has been blurred as a result.', + examples=['SUCCESS'], + ) + + +class StabilityImageGenrationUpscaleConservativeResponse400(BaseModel): + id: str = Field( + ..., + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', + examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], + min_length=1, + ) + name: str = Field( + ..., + description='Short-hand name for an error, useful for discriminating between errors with the same status code.', + examples=['bad_request'], + min_length=1, + ) + errors: List[str] = Field( + ..., + description='One or more error messages indicating what went wrong.', + examples=[['some-field: is required']], + min_length=1, + ) + + +class StabilityImageGenrationUpscaleConservativeResponse413(BaseModel): + id: str = Field( + ..., + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', + examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], + min_length=1, + ) + name: str = Field( + ..., + description='Short-hand name for an error, useful for discriminating between errors with the same status code.', + examples=['bad_request'], + min_length=1, + ) + errors: List[str] = Field( + ..., + description='One or more error messages indicating what went wrong.', + examples=[['some-field: is required']], + min_length=1, + ) + + +class StabilityImageGenrationUpscaleConservativeResponse422(BaseModel): + id: str = Field( + ..., + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', + examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], + min_length=1, + ) + name: str = Field( + ..., + description='Short-hand name for an error, useful for discriminating between errors with the same status code.', + examples=['bad_request'], + min_length=1, + ) + errors: List[str] = Field( + ..., + description='One or more error messages indicating what went wrong.', + examples=[['some-field: is required']], + min_length=1, + ) + + +class StabilityImageGenrationUpscaleConservativeResponse429(BaseModel): + id: str = Field( + ..., + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', + examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], + min_length=1, + ) + name: str = Field( + ..., + description='Short-hand name for an error, useful for discriminating between errors with the same status code.', + examples=['bad_request'], + min_length=1, + ) + errors: List[str] = Field( + ..., + description='One or more error messages indicating what went wrong.', + examples=[['some-field: is required']], + min_length=1, + ) + + +class StabilityImageGenrationUpscaleConservativeResponse500(BaseModel): + id: str = Field( + ..., + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', + examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], + min_length=1, + ) + name: str = Field( + ..., + description='Short-hand name for an error, useful for discriminating between errors with the same status code.', + examples=['bad_request'], + min_length=1, + ) + errors: List[str] = Field( + ..., + description='One or more error messages indicating what went wrong.', + examples=[['some-field: is required']], + min_length=1, + ) + + +class StabilityImageGenrationUpscaleCreativeRequest(BaseModel): + image: StrictBytes = Field( + ..., + description='The image you wish to upscale.\n\nSupported Formats:\n- jpeg\n- png\n- webp\n\nValidation Rules:\n- Every side must be at least 64 pixels\n- Total pixel count must be between 4,096 and 1,048,576 pixels', + examples=['./some/image.png'], + ) + prompt: str = Field( + ..., + description="What you wish to see in the output image. A strong, descriptive prompt that clearly defines\nelements, colors, and subjects will lead to better results.\n\nTo control the weight of a given word use the format `(word:weight)`,\nwhere `word` is the word you'd like to control the weight of and `weight`\nis a value between 0 and 1. For example: `The sky was a crisp (blue:0.3) and (green:0.8)`\nwould convey a sky that was blue and green, but more green than blue.", + max_length=10000, + min_length=1, + ) + negative_prompt: Optional[str] = Field( + None, + description='A blurb of text describing what you **do not** wish to see in the output image.\nThis is an advanced feature.', + max_length=10000, + ) + output_format: Optional[OutputFormat4] = Field( + 'png', description='Dictates the `content-type` of the generated image.' + ) + seed: Optional[float] = Field( + 0, + description="A specific value that is used to guide the 'randomness' of the generation. (Omit this parameter or pass `0` to use a random seed.)", + ge=0.0, + le=4294967294.0, + ) + creativity: Optional[float] = Field( + 0.3, + description='Indicates how creative the model should be when upscaling an image.\nHigher values will result in more details being added to the image during upscaling.', + ge=0.1, + le=0.5, + ) + style_preset: Optional[StylePreset] = Field( + None, description='Guides the image model towards a particular style.' + ) + + +class StabilityImageGenrationUpscaleCreativeResponse200(BaseModel): + id: StabilityGenerationID + + +class StabilityImageGenrationUpscaleCreativeResponse400(BaseModel): + id: str = Field( + ..., + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', + examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], + min_length=1, + ) + name: str = Field( + ..., + description='Short-hand name for an error, useful for discriminating between errors with the same status code.', + examples=['bad_request'], + min_length=1, + ) + errors: List[str] = Field( + ..., + description='One or more error messages indicating what went wrong.', + examples=[['some-field: is required']], + min_length=1, + ) + + +class StabilityImageGenrationUpscaleCreativeResponse413(BaseModel): + id: str = Field( + ..., + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', + examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], + min_length=1, + ) + name: str = Field( + ..., + description='Short-hand name for an error, useful for discriminating between errors with the same status code.', + examples=['bad_request'], + min_length=1, + ) + errors: List[str] = Field( + ..., + description='One or more error messages indicating what went wrong.', + examples=[['some-field: is required']], + min_length=1, + ) + + +class StabilityImageGenrationUpscaleCreativeResponse422(BaseModel): + id: str = Field( + ..., + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', + examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], + min_length=1, + ) + name: str = Field( + ..., + description='Short-hand name for an error, useful for discriminating between errors with the same status code.', + examples=['bad_request'], + min_length=1, + ) + errors: List[str] = Field( + ..., + description='One or more error messages indicating what went wrong.', + examples=[['some-field: is required']], + min_length=1, + ) + + +class StabilityImageGenrationUpscaleCreativeResponse429(BaseModel): + id: str = Field( + ..., + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', + examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], + min_length=1, + ) + name: str = Field( + ..., + description='Short-hand name for an error, useful for discriminating between errors with the same status code.', + examples=['bad_request'], + min_length=1, + ) + errors: List[str] = Field( + ..., + description='One or more error messages indicating what went wrong.', + examples=[['some-field: is required']], + min_length=1, + ) + + +class StabilityImageGenrationUpscaleCreativeResponse500(BaseModel): + id: str = Field( + ..., + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', + examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], + min_length=1, + ) + name: str = Field( + ..., + description='Short-hand name for an error, useful for discriminating between errors with the same status code.', + examples=['bad_request'], + min_length=1, + ) + errors: List[str] = Field( + ..., + description='One or more error messages indicating what went wrong.', + examples=[['some-field: is required']], + min_length=1, + ) + + +class StabilityImageGenrationUpscaleFastRequest(BaseModel): + image: StrictBytes = Field( + ..., + description='The image you wish to upscale.\n\nSupported Formats:\n- jpeg\n- png\n- webp\n\nValidation Rules:\n- Width must be between 32 and 1,536 pixels\n- Height must be between 32 and 1,536 pixels\n- Total pixel count must be between 1,024 and 1,048,576 pixels', + examples=['./some/image.png'], + ) + output_format: Optional[OutputFormat4] = Field( + 'png', description='Dictates the `content-type` of the generated image.' + ) + + +class StabilityImageGenrationUpscaleFastResponse200(BaseModel): + image: str = Field( + ..., + description='The generated image, encoded to base64.', + examples=['AAAAIGZ0eXBpc29tAAACAGlzb21pc28yYXZjMW1...'], + ) + seed: Optional[float] = Field( + 0, + description='The seed used as random noise for this generation.', + examples=[343940597], + ge=0.0, + le=4294967294.0, + ) + finish_reason: FinishReason = Field( + ..., + description='The reason the generation finished.\n\n- `SUCCESS` = successful generation.\n- `CONTENT_FILTERED` = successful generation, however the output violated our content moderation\npolicy and has been blurred as a result.', + examples=['SUCCESS'], + ) + + +class StabilityImageGenrationUpscaleFastResponse400(BaseModel): + id: str = Field( + ..., + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', + examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], + min_length=1, + ) + name: str = Field( + ..., + description='Short-hand name for an error, useful for discriminating between errors with the same status code.', + examples=['bad_request'], + min_length=1, + ) + errors: List[str] = Field( + ..., + description='One or more error messages indicating what went wrong.', + examples=[['some-field: is required']], + min_length=1, + ) + + +class StabilityImageGenrationUpscaleFastResponse413(BaseModel): + id: str = Field( + ..., + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', + examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], + min_length=1, + ) + name: str = Field( + ..., + description='Short-hand name for an error, useful for discriminating between errors with the same status code.', + examples=['bad_request'], + min_length=1, + ) + errors: List[str] = Field( + ..., + description='One or more error messages indicating what went wrong.', + examples=[['some-field: is required']], + min_length=1, + ) + + +class StabilityImageGenrationUpscaleFastResponse422(BaseModel): + id: str = Field( + ..., + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', + examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], + min_length=1, + ) + name: str = Field( + ..., + description='Short-hand name for an error, useful for discriminating between errors with the same status code.', + examples=['bad_request'], + min_length=1, + ) + errors: List[str] = Field( + ..., + description='One or more error messages indicating what went wrong.', + examples=[['some-field: is required']], + min_length=1, + ) + + +class StabilityImageGenrationUpscaleFastResponse429(BaseModel): + id: str = Field( + ..., + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', + examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], + min_length=1, + ) + name: str = Field( + ..., + description='Short-hand name for an error, useful for discriminating between errors with the same status code.', + examples=['bad_request'], + min_length=1, + ) + errors: List[str] = Field( + ..., + description='One or more error messages indicating what went wrong.', + examples=[['some-field: is required']], + min_length=1, + ) + + +class StabilityImageGenrationUpscaleFastResponse500(BaseModel): + id: str = Field( + ..., + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', + examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], + min_length=1, + ) + name: str = Field( + ..., + description='Short-hand name for an error, useful for discriminating between errors with the same status code.', + examples=['bad_request'], + min_length=1, + ) + errors: List[str] = Field( + ..., + description='One or more error messages indicating what went wrong.', + examples=[['some-field: is required']], + min_length=1, + ) + + +class ActionJobResult(BaseModel): + id: Optional[UUID] = Field(None, description='Unique identifier for the job result') + workflow_name: Optional[str] = Field(None, description='Name of the workflow') + operating_system: Optional[str] = Field(None, description='Operating system used') + python_version: Optional[str] = Field(None, description='PyTorch version used') + pytorch_version: Optional[str] = Field(None, description='PyTorch version used') + action_run_id: Optional[str] = Field( + None, description='Identifier of the run this result belongs to' + ) + action_job_id: Optional[str] = Field( + None, description='Identifier of the job this result belongs to' + ) + cuda_version: Optional[str] = Field(None, description='CUDA version used') + branch_name: Optional[str] = Field( + None, description='Name of the relevant git branch' + ) + commit_hash: Optional[str] = Field(None, description='The hash of the commit') + commit_id: Optional[str] = Field(None, description='The ID of the commit') + commit_time: Optional[int] = Field( + None, description='The Unix timestamp when the commit was made' + ) + commit_message: Optional[str] = Field(None, description='The message of the commit') + comfy_run_flags: Optional[str] = Field( + None, description='The comfy run flags. E.g. `--low-vram`' + ) + git_repo: Optional[str] = Field(None, description='The repository name') + pr_number: Optional[str] = Field(None, description='The pull request number') + start_time: Optional[int] = Field( + None, description='The start time of the job as a Unix timestamp.' + ) + end_time: Optional[int] = Field( + None, description='The end time of the job as a Unix timestamp.' + ) + avg_vram: Optional[int] = Field( + None, description='The average VRAM used by the job' + ) + peak_vram: Optional[int] = Field(None, description='The peak VRAM used by the job') + job_trigger_user: Optional[str] = Field( + None, description='The user who triggered the job.' + ) + author: Optional[str] = Field(None, description='The author of the commit') + machine_stats: Optional[MachineStats] = None + status: Optional[WorkflowRunStatus] = None + storage_file: Optional[StorageFile] = None + + +class Publisher(BaseModel): + name: Optional[str] = None + id: Optional[str] = Field( + None, + description="The unique identifier for the publisher. It's akin to a username. Should be lowercase.", + ) + description: Optional[str] = None + website: Optional[str] = None + support: Optional[str] = None + source_code_repo: Optional[str] = None + logo: Optional[str] = Field(None, description="URL to the publisher's logo.") + createdAt: Optional[datetime] = Field( + None, description='The date and time the publisher was created.' + ) + members: Optional[List[PublisherMember]] = Field( + None, description='A list of members in the publisher.' + ) + status: Optional[PublisherStatus] = Field( + None, description='The status of the publisher.' + ) + + +class NodeVersion(BaseModel): + id: Optional[str] = None + version: Optional[str] = Field( + None, + description='The version identifier, following semantic versioning. Must be unique for the node.', + ) + createdAt: Optional[datetime] = Field( + None, description='The date and time the version was created.' + ) + changelog: Optional[str] = Field( + None, description='Summary of changes made in this version' + ) + dependencies: Optional[List[str]] = Field( + None, description='A list of pip dependencies required by the node.' + ) + downloadUrl: Optional[str] = Field( + None, description='[Output Only] URL to download this version of the node' + ) + deprecated: Optional[bool] = Field( + None, description='Indicates if this version is deprecated.' + ) + status: Optional[NodeVersionStatus] = Field( + None, description='The status of the node version.' + ) + status_reason: Optional[str] = Field( + None, description='The reason for the status change.' + ) + node_id: Optional[str] = Field( + None, description='The unique identifier of the node.' + ) + comfy_node_extract_status: Optional[str] = Field( + None, description='The status of comfy node extraction process.' + ) + + +class IdeogramV3Request(BaseModel): + prompt: str = Field(..., description='The text prompt for image generation') + seed: Optional[int] = Field( + None, description='Seed value for reproducible generation' + ) + resolution: Optional[str] = Field( + None, description='Image resolution in format WxH', examples=['1280x800'] + ) + aspect_ratio: Optional[str] = Field( + None, description='Aspect ratio in format WxH', examples=['1x3'] + ) + rendering_speed: RenderingSpeed + magic_prompt: Optional[MagicPrompt] = Field( + None, description='Whether to enable magic prompt enhancement' + ) + negative_prompt: Optional[str] = Field( + None, description='Text prompt specifying what to avoid in the generation' + ) + num_images: Optional[int] = Field( + None, description='Number of images to generate', ge=1 + ) + color_palette: Optional[ColorPalette] = None + style_codes: Optional[List[StyleCode]] = Field( + None, description='Array of style codes in hexadecimal format' + ) + style_type: Optional[StyleType] = Field( + None, description='The type of style to apply' + ) + style_reference_images: Optional[List[str]] = Field( + None, description='Array of reference image URLs or identifiers' + ) + + +class IdeogramV3EditRequest(BaseModel): + image: Optional[StrictBytes] = Field( + None, + description='The image being edited (max size 10MB); only JPEG, WebP and PNG formats are supported at this time.', + ) + mask: Optional[StrictBytes] = Field( + None, + description='A black and white image of the same size as the image being edited (max size 10MB). Black regions in the mask should match up with the regions of the image that you would like to edit; only JPEG, WebP and PNG formats are supported at this time.', + ) + prompt: str = Field( + ..., description='The prompt used to describe the edited result.' + ) + magic_prompt: Optional[str] = Field( + None, + description='Determine if MagicPrompt should be used in generating the request or not.', + ) + num_images: Optional[int] = Field( + None, description='The number of images to generate.' + ) + seed: Optional[int] = Field( + None, description='Random seed. Set for reproducible generation.' + ) + rendering_speed: RenderingSpeed + color_palette: Optional[IdeogramColorPalette] = Field( + None, + description='A color palette for generation, must EITHER be specified via one of the presets (name) or explicitly via hexadecimal representations of the color with optional weights (members). Not supported by V_1, V_1_TURBO, V_2A and V_2A_TURBO models.', + ) + style_codes: Optional[List[StyleCode]] = Field( + None, + description='A list of 8 character hexadecimal codes representing the style of the image. Cannot be used in conjunction with style_reference_images or style_type.', + ) + style_reference_images: Optional[List[StrictBytes]] = Field( + None, + description='A set of images to use as style references (maximum total size 10MB across all style references). The images should be in JPEG, PNG or WebP format.', + ) + + +class KlingCameraControl(BaseModel): + type: Optional[KlingCameraControlType] = None + config: Optional[KlingCameraConfig] = None + + +class KlingText2VideoRequest(BaseModel): + model_name: Optional[KlingVideoGenModelName] = 'kling-v2-master' + prompt: Optional[str] = Field( + None, description='Positive text prompt', max_length=2500 + ) + negative_prompt: Optional[str] = Field( + None, description='Negative text prompt', max_length=2500 + ) + cfg_scale: Optional[KlingVideoGenCfgScale] = Field( + default_factory=lambda: KlingVideoGenCfgScale.model_validate(0.5) + ) + mode: Optional[KlingVideoGenMode] = 'std' + camera_control: Optional[KlingCameraControl] = None + aspect_ratio: Optional[KlingVideoGenAspectRatio] = '16:9' + duration: Optional[KlingVideoGenDuration] = '5' + callback_url: Optional[AnyUrl] = Field( + None, description='The callback notification address' + ) + external_task_id: Optional[str] = Field(None, description='Customized Task ID') + + +class KlingImage2VideoRequest(BaseModel): + model_name: Optional[KlingVideoGenModelName] = 'kling-v2-master' + image: Optional[str] = Field( + None, + description='Reference Image - URL or Base64 encoded string, cannot exceed 10MB, resolution not less than 300*300px, aspect ratio between 1:2.5 ~ 2.5:1. Base64 should not include data:image prefix.', + ) + image_tail: Optional[str] = Field( + None, + description='Reference Image - End frame control. URL or Base64 encoded string, cannot exceed 10MB, resolution not less than 300*300px. Base64 should not include data:image prefix.', + ) + prompt: Optional[str] = Field( + None, description='Positive text prompt', max_length=2500 + ) + negative_prompt: Optional[str] = Field( + None, description='Negative text prompt', max_length=2500 + ) + cfg_scale: Optional[KlingVideoGenCfgScale] = Field( + default_factory=lambda: KlingVideoGenCfgScale.model_validate(0.5) + ) + mode: Optional[KlingVideoGenMode] = 'std' + static_mask: Optional[str] = Field( + None, + description='Static Brush Application Area (Mask image created by users using the motion brush). The aspect ratio must match the input image.', + ) + dynamic_masks: Optional[List[DynamicMask]] = Field( + None, + description='Dynamic Brush Configuration List (up to 6 groups). For 5-second videos, trajectory length must not exceed 77 coordinates.', + ) + camera_control: Optional[KlingCameraControl] = None + aspect_ratio: Optional[KlingVideoGenAspectRatio] = '16:9' + duration: Optional[KlingVideoGenDuration] = '5' + callback_url: Optional[AnyUrl] = Field( + None, + description='The callback notification address. Server will notify when the task status changes.', + ) + external_task_id: Optional[str] = Field( + None, + description='Customized Task ID. Must be unique within a single user account.', + ) + + +class KlingVideoEffectsInput( + RootModel[Union[KlingSingleImageEffectInput, KlingDualCharacterEffectInput]] +): + root: Union[KlingSingleImageEffectInput, KlingDualCharacterEffectInput] + + +class StripeBillingDetails(BaseModel): + address: Optional[StripeAddress] = None + email: Optional[str] = None + name: Optional[str] = None + phone: Optional[str] = None + tax_id: Optional[Any] = None + + +class StripePaymentMethodDetails(BaseModel): + card: Optional[StripeCardDetails] = None + type: Optional[str] = None + + +class BFLFluxProFillInputs(BaseModel): + image: str = Field( + ..., + description='A Base64-encoded string representing the image you wish to modify. Can contain alpha mask if desired.', + title='Image', + ) + mask: Optional[str] = Field( + None, + description='A Base64-encoded string representing a mask for the areas you want to modify in the image. The mask should be the same dimensions as the image and in black and white. Black areas (0%) indicate no modification, while white areas (100%) specify areas for inpainting. Optional if you provide an alpha mask in the original image. Validation: The endpoint verifies that the dimensions of the mask match the original image.', + title='Mask', + ) + prompt: Optional[str] = Field( + '', + description='The description of the changes you want to make. This text guides the inpainting process, allowing you to specify features, styles, or modifications for the masked area.', + examples=['ein fantastisches bild'], + title='Prompt', + ) + steps: Optional[Steps] = Field( + default_factory=lambda: Steps.model_validate(50), + description='Number of steps for the image generation process', + examples=[50], + title='Steps', + ) + prompt_upsampling: Optional[bool] = Field( + False, + description='Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation', + title='Prompt Upsampling', + ) + seed: Optional[int] = Field( + None, description='Optional seed for reproducibility', title='Seed' + ) + guidance: Optional[Guidance] = Field( + default_factory=lambda: Guidance.model_validate(60), + description='Guidance strength for the image generation process', + title='Guidance', + ) + output_format: Optional[BFLOutputFormat] = Field( + 'jpeg', + description="Output format for the generated image. Can be 'jpeg' or 'png'.", + ) + safety_tolerance: Optional[int] = Field( + 2, + description='Tolerance level for input and output moderation. Between 0 and 6, 0 being most strict, 6 being least strict.', + examples=[2], + ge=0, + le=6, + title='Safety Tolerance', + ) + webhook_url: Optional[WebhookUrl] = Field( + None, description='URL to receive webhook notifications', title='Webhook Url' + ) + webhook_secret: Optional[str] = Field( + None, + description='Optional secret for webhook signature verification', + title='Webhook Secret', + ) + + +class BFLHTTPValidationError(BaseModel): + detail: Optional[List[BFLValidationError]] = Field(None, title='Detail') + + +class BFLFluxProExpandInputs(BaseModel): + image: str = Field( + ..., + description='A Base64-encoded string representing the image you wish to expand.', + title='Image', + ) + top: Optional[Top] = Field( + 0, description='Number of pixels to expand at the top of the image', title='Top' + ) + bottom: Optional[Bottom] = Field( + 0, + description='Number of pixels to expand at the bottom of the image', + title='Bottom', + ) + left: Optional[Left] = Field( + 0, + description='Number of pixels to expand on the left side of the image', + title='Left', + ) + right: Optional[Right] = Field( + 0, + description='Number of pixels to expand on the right side of the image', + title='Right', + ) + prompt: Optional[str] = Field( + '', + description='The description of the changes you want to make. This text guides the expansion process, allowing you to specify features, styles, or modifications for the expanded areas.', + examples=['ein fantastisches bild'], + title='Prompt', + ) + steps: Optional[Steps] = Field( + default_factory=lambda: Steps.model_validate(50), + description='Number of steps for the image generation process', + examples=[50], + title='Steps', + ) + prompt_upsampling: Optional[bool] = Field( + False, + description='Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation', + title='Prompt Upsampling', + ) + seed: Optional[int] = Field( + None, description='Optional seed for reproducibility', title='Seed' + ) + guidance: Optional[Guidance] = Field( + default_factory=lambda: Guidance.model_validate(60), + description='Guidance strength for the image generation process', + title='Guidance', + ) + output_format: Optional[BFLOutputFormat] = Field( + 'jpeg', + description="Output format for the generated image. Can be 'jpeg' or 'png'.", + ) + safety_tolerance: Optional[int] = Field( + 2, + description='Tolerance level for input and output moderation. Between 0 and 6, 0 being most strict, 6 being least strict.', + examples=[2], + ge=0, + le=6, + title='Safety Tolerance', + ) + webhook_url: Optional[WebhookUrl] = Field( + None, description='URL to receive webhook notifications', title='Webhook Url' + ) + webhook_secret: Optional[str] = Field( + None, + description='Optional secret for webhook signature verification', + title='Webhook Secret', + ) + + +class BFLCannyInputs(BaseModel): + prompt: str = Field( + ..., + description='Text prompt for image generation', + examples=['ein fantastisches bild'], + title='Prompt', + ) + control_image: Optional[str] = Field( + None, + description='Base64 encoded image to use as control input if no preprocessed image is provided', + title='Control Image', + ) + preprocessed_image: Optional[str] = Field( + None, + description='Optional pre-processed image that will bypass the control preprocessing step', + title='Preprocessed Image', + ) + canny_low_threshold: Optional[CannyLowThreshold] = Field( + default_factory=lambda: CannyLowThreshold.model_validate(50), + description='Low threshold for Canny edge detection', + title='Canny Low Threshold', + ) + canny_high_threshold: Optional[CannyHighThreshold] = Field( + default_factory=lambda: CannyHighThreshold.model_validate(200), + description='High threshold for Canny edge detection', + title='Canny High Threshold', + ) + prompt_upsampling: Optional[bool] = Field( + False, + description='Whether to perform upsampling on the prompt', + title='Prompt Upsampling', + ) + seed: Optional[int] = Field( + None, + description='Optional seed for reproducibility', + examples=[42], + title='Seed', + ) + steps: Optional[Steps2] = Field( + default_factory=lambda: Steps2.model_validate(50), + description='Number of steps for the image generation process', + title='Steps', + ) + output_format: Optional[BFLOutputFormat] = Field( + 'jpeg', + description="Output format for the generated image. Can be 'jpeg' or 'png'.", + ) + guidance: Optional[Guidance2] = Field( + default_factory=lambda: Guidance2.model_validate(30), + description='Guidance strength for the image generation process', + title='Guidance', + ) + safety_tolerance: Optional[int] = Field( + 2, + description='Tolerance level for input and output moderation. Between 0 and 6, 0 being most strict, 6 being least strict.', + ge=0, + le=6, + title='Safety Tolerance', + ) + webhook_url: Optional[WebhookUrl] = Field( + None, description='URL to receive webhook notifications', title='Webhook Url' + ) + webhook_secret: Optional[str] = Field( + None, + description='Optional secret for webhook signature verification', + title='Webhook Secret', + ) + + +class BFLDepthInputs(BaseModel): + prompt: str = Field( + ..., + description='Text prompt for image generation', + examples=['ein fantastisches bild'], + title='Prompt', + ) + control_image: Optional[str] = Field( + None, + description='Base64 encoded image to use as control input', + title='Control Image', + ) + preprocessed_image: Optional[str] = Field( + None, + description='Optional pre-processed image that will bypass the control preprocessing step', + title='Preprocessed Image', + ) + prompt_upsampling: Optional[bool] = Field( + False, + description='Whether to perform upsampling on the prompt', + title='Prompt Upsampling', + ) + seed: Optional[int] = Field( + None, + description='Optional seed for reproducibility', + examples=[42], + title='Seed', + ) + steps: Optional[Steps2] = Field( + default_factory=lambda: Steps2.model_validate(50), + description='Number of steps for the image generation process', + title='Steps', + ) + output_format: Optional[BFLOutputFormat] = Field( + 'jpeg', + description="Output format for the generated image. Can be 'jpeg' or 'png'.", + ) + guidance: Optional[Guidance2] = Field( + default_factory=lambda: Guidance2.model_validate(15), + description='Guidance strength for the image generation process', + title='Guidance', + ) + safety_tolerance: Optional[int] = Field( + 2, + description='Tolerance level for input and output moderation. Between 0 and 6, 0 being most strict, 6 being least strict.', + ge=0, + le=6, + title='Safety Tolerance', + ) + webhook_url: Optional[WebhookUrl] = Field( + None, description='URL to receive webhook notifications', title='Webhook Url' + ) + webhook_secret: Optional[str] = Field( + None, + description='Optional secret for webhook signature verification', + title='Webhook Secret', + ) + + +class Controls(BaseModel): + artistic_level: Optional[int] = Field( + None, + description='Defines artistic tone of your image. At a simple level, the person looks straight at the camera in a static and clean style. Dynamic and eccentric levels introduce movement and creativity.', + ge=0, + le=5, + ) + colors: Optional[List[RGBColor]] = Field( + None, description='An array of preferable colors' + ) + background_color: Optional[RGBColor] = Field( + None, description='Use given color as a desired background color' + ) + no_text: Optional[bool] = Field(None, description='Do not embed text layouts') + + +class RecraftImageGenerationRequest(BaseModel): + prompt: str = Field( + ..., description='The text prompt describing the image to generate' + ) + model: str = Field( + ..., description='The model to use for generation (e.g., "recraftv3")' + ) + style: Optional[str] = Field( + None, + description='The style to apply to the generated image (e.g., "digital_illustration")', + ) + style_id: Optional[str] = Field( + None, + description='The style ID to apply to the generated image (e.g., "123e4567-e89b-12d3-a456-426614174000"). If style_id is provided, style should not be provided.', + ) + size: str = Field( + ..., description='The size of the generated image (e.g., "1024x1024")' + ) + controls: Optional[Controls] = Field( + None, description='The controls for the generated image' + ) + n: int = Field(..., description='The number of images to generate', ge=1, le=4) + + +class LumaKeyframes(BaseModel): + frame0: Optional[LumaKeyframe] = None + frame1: Optional[LumaKeyframe] = None + + +class LumaGenerationRequest(BaseModel): + generation_type: Optional[GenerationType] = 'video' + prompt: str = Field(..., description='The prompt of the generation') + aspect_ratio: LumaAspectRatio + loop: Optional[bool] = Field(None, description='Whether to loop the video') + keyframes: Optional[LumaKeyframes] = None + callback_url: Optional[AnyUrl] = Field( + None, + description='The callback URL of the generation, a POST request with Generation object will be sent to the callback URL when the generation is dreaming, completed, or failed', + ) + model: LumaVideoModel + resolution: LumaVideoModelOutputResolution + duration: LumaVideoModelOutputDuration + + +class LumaGeneration(BaseModel): + id: Optional[UUID] = Field(None, description='The ID of the generation') + generation_type: Optional[LumaGenerationType] = None + state: Optional[LumaState] = None + failure_reason: Optional[str] = Field( + None, description='The reason for the state of the generation' + ) + created_at: Optional[datetime] = Field( + None, description='The date and time when the generation was created' + ) + assets: Optional[LumaAssets] = None + model: Optional[str] = Field(None, description='The model used for the generation') + request: Optional[ + Union[ + LumaGenerationRequest, + LumaImageGenerationRequest, + LumaUpscaleVideoGenerationRequest, + LumaAudioGenerationRequest, + ] + ] = Field(None, description='The request of the generation') + + +class RunwayImageToVideoRequest(BaseModel): + promptImage: RunwayPromptImageObject + seed: int = Field( + ..., description='Random seed for generation', ge=0, le=4294967295 + ) + model: RunwayModelEnum = Field(..., description='Model to use for generation') + promptText: Optional[str] = Field( + None, description='Text prompt for the generation', max_length=1000 + ) + duration: RunwayDurationEnum = Field( + ..., description='The number of seconds of duration for the output video.' + ) + ratio: RunwayAspectRatioEnum = Field( + ..., + description='The resolution (aspect ratio) of the output video. Allowable values depend on the selected model. 1280:768 and 768:1280 are only supported for gen3a_turbo.', + ) + + +class RunwayTaskStatusResponse(BaseModel): + id: Optional[str] = Field(None, description='Task ID') + status: Optional[RunwayTaskStatusEnum] = Field(None, description='Task status') + createdAt: Optional[datetime] = Field(None, description='Task creation timestamp') + output: Optional[List[str]] = Field(None, description='Array of output video URLs') + + +class PikaHTTPValidationError(BaseModel): + detail: Optional[List[PikaValidationError]] = Field(None, title='Detail') + + +class PikaBodyGenerate22T2vGenerate22T2vPost(BaseModel): + promptText: str = Field(..., title='Prompttext') + negativePrompt: Optional[str] = Field(None, title='Negativeprompt') + seed: Optional[int] = Field(None, title='Seed') + resolution: Optional[PikaResolutionEnum] = Field('1080p', title='Resolution') + duration: Optional[PikaDurationEnum] = Field(5, title='Duration') + aspectRatio: Optional[float] = Field( + 1.7777777777777777, + description='Aspect ratio (width / height)', + ge=0.4, + le=2.5, + title='Aspectratio', + ) + + +class PikaBodyGenerate22I2vGenerate22I2vPost(BaseModel): + image: Optional[StrictBytes] = Field(None, title='Image') + promptText: Optional[str] = Field(None, title='Prompttext') + negativePrompt: Optional[str] = Field(None, title='Negativeprompt') + seed: Optional[int] = Field(None, title='Seed') + resolution: Optional[PikaResolutionEnum] = Field('1080p', title='Resolution') + duration: Optional[PikaDurationEnum] = Field(5, title='Duration') + + +class PikaBodyGenerate22KeyframeGenerate22PikaframesPost(BaseModel): + keyFrames: Optional[List[StrictBytes]] = Field( + None, description='Array of keyframe images', title='Keyframes' + ) + promptText: str = Field(..., title='Prompttext') + negativePrompt: Optional[str] = Field(None, title='Negativeprompt') + seed: Optional[int] = Field(None, title='Seed') + resolution: Optional[PikaResolutionEnum] = Field('1080p', title='Resolution') + duration: Optional[int] = Field(None, ge=5, le=10, title='Duration') + + +class PikaVideoResponse(BaseModel): + id: str = Field(..., title='Id') + status: PikaStatusEnum = Field( + ..., description='The status of the video', title='Status' + ) + url: Optional[str] = Field(None, title='Url') + progress: Optional[int] = Field(None, title='Progress') + + +class Node(BaseModel): + id: Optional[str] = Field(None, description='The unique identifier of the node.') + name: Optional[str] = Field(None, description='The display name of the node.') + category: Optional[str] = Field(None, description='The category of the node.') + description: Optional[str] = None + author: Optional[str] = None + license: Optional[str] = Field( + None, description="The path to the LICENSE file in the node's repository." + ) + icon: Optional[str] = Field(None, description="URL to the node's icon.") + repository: Optional[str] = Field(None, description="URL to the node's repository.") + tags: Optional[List[str]] = None + latest_version: Optional[NodeVersion] = Field( + None, description='The latest version of the node.' + ) + rating: Optional[float] = Field(None, description='The average rating of the node.') + downloads: Optional[int] = Field( + None, description='The number of downloads of the node.' + ) + publisher: Optional[Publisher] = Field( + None, description='The publisher of the node.' + ) + status: Optional[NodeStatus] = Field(None, description='The status of the node.') + status_detail: Optional[str] = Field( + None, description='The status detail of the node.' + ) + translations: Optional[Dict[str, Dict[str, Any]]] = None + + +class KlingVideoEffectsRequest(BaseModel): + effect_scene: Union[KlingDualCharacterEffectsScene, KlingSingleImageEffectsScene] + input: KlingVideoEffectsInput + callback_url: Optional[AnyUrl] = Field( + None, + description='The callback notification address for the result of this task.', + ) + external_task_id: Optional[str] = Field( + None, + description='Customized Task ID. Must be unique within a single user account.', + ) + + +class StripeCharge(BaseModel): + id: Optional[str] = None + object: Optional[Object2] = None + amount: Optional[int] = None + amount_captured: Optional[int] = None + amount_refunded: Optional[int] = None + application: Optional[str] = None + application_fee: Optional[str] = None + application_fee_amount: Optional[int] = None + balance_transaction: Optional[str] = None + billing_details: Optional[StripeBillingDetails] = None + calculated_statement_descriptor: Optional[str] = None + captured: Optional[bool] = None + created: Optional[int] = None + currency: Optional[str] = None + customer: Optional[str] = None + description: Optional[str] = None + destination: Optional[Any] = None + dispute: Optional[Any] = None + disputed: Optional[bool] = None + failure_balance_transaction: Optional[Any] = None + failure_code: Optional[Any] = None + failure_message: Optional[Any] = None + fraud_details: Optional[Dict[str, Any]] = None + invoice: Optional[Any] = None + livemode: Optional[bool] = None + metadata: Optional[Dict[str, Any]] = None + on_behalf_of: Optional[Any] = None + order: Optional[Any] = None + outcome: Optional[StripeOutcome] = None + paid: Optional[bool] = None + payment_intent: Optional[str] = None + payment_method: Optional[str] = None + payment_method_details: Optional[StripePaymentMethodDetails] = None + radar_options: Optional[Dict[str, Any]] = None + receipt_email: Optional[str] = None + receipt_number: Optional[str] = None + receipt_url: Optional[str] = None + refunded: Optional[bool] = None + refunds: Optional[StripeRefundList] = None + review: Optional[Any] = None + shipping: Optional[StripeShipping] = None + source: Optional[Any] = None + source_transfer: Optional[Any] = None + statement_descriptor: Optional[Any] = None + statement_descriptor_suffix: Optional[Any] = None + status: Optional[str] = None + transfer_data: Optional[Any] = None + transfer_group: Optional[Any] = None + + +class StripeChargeList(BaseModel): + object: Optional[str] = None + data: Optional[List[StripeCharge]] = None + has_more: Optional[bool] = None + total_count: Optional[int] = None + url: Optional[str] = None + + +class StripePaymentIntent(BaseModel): + id: Optional[str] = None + object: Optional[Object1] = None + amount: Optional[int] = None + amount_capturable: Optional[int] = None + amount_details: Optional[StripeAmountDetails] = None + amount_received: Optional[int] = None + application: Optional[str] = None + application_fee_amount: Optional[int] = None + automatic_payment_methods: Optional[Any] = None + canceled_at: Optional[int] = None + cancellation_reason: Optional[str] = None + capture_method: Optional[str] = None + charges: Optional[StripeChargeList] = None + client_secret: Optional[str] = None + confirmation_method: Optional[str] = None + created: Optional[int] = None + currency: Optional[str] = None + customer: Optional[str] = None + description: Optional[str] = None + invoice: Optional[str] = None + last_payment_error: Optional[Any] = None + latest_charge: Optional[str] = None + livemode: Optional[bool] = None + metadata: Optional[Dict[str, Any]] = None + next_action: Optional[Any] = None + on_behalf_of: Optional[Any] = None + payment_method: Optional[str] = None + payment_method_configuration_details: Optional[Any] = None + payment_method_options: Optional[StripePaymentMethodOptions] = None + payment_method_types: Optional[List[str]] = None + processing: Optional[Any] = None + receipt_email: Optional[str] = None + review: Optional[Any] = None + setup_future_usage: Optional[Any] = None + shipping: Optional[StripeShipping] = None + source: Optional[Any] = None + statement_descriptor: Optional[Any] = None + statement_descriptor_suffix: Optional[Any] = None + status: Optional[str] = None + transfer_data: Optional[Any] = None + transfer_group: Optional[Any] = None + + +class Data8(BaseModel): + object: Optional[StripePaymentIntent] = None + + +class StripeEvent(BaseModel): + id: str + object: Object + api_version: Optional[str] = None + created: Optional[int] = None + data: Data8 + livemode: Optional[bool] = None + pending_webhooks: Optional[int] = None + request: Optional[StripeRequestInfo] = None + type: Type diff --git a/comfy_api_nodes/apis/bfl_api.py b/comfy_api_nodes/apis/bfl_api.py new file mode 100644 index 00000000..c189038f --- /dev/null +++ b/comfy_api_nodes/apis/bfl_api.py @@ -0,0 +1,156 @@ +from __future__ import annotations + +from enum import Enum +from typing import Any, Dict, Optional + +from pydantic import BaseModel, Field, confloat, conint + + +class BFLOutputFormat(str, Enum): + png = 'png' + jpeg = 'jpeg' + + +class BFLFluxExpandImageRequest(BaseModel): + prompt: str = Field(..., description='The description of the changes you want to make. This text guides the expansion process, allowing you to specify features, styles, or modifications for the expanded areas.') + prompt_upsampling: Optional[bool] = Field( + None, description='Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation.' + ) + seed: Optional[int] = Field(None, description='The seed value for reproducibility.') + top: conint(ge=0, le=2048) = Field(..., description='Number of pixels to expand at the top of the image') + bottom: conint(ge=0, le=2048) = Field(..., description='Number of pixels to expand at the bottom of the image') + left: conint(ge=0, le=2048) = Field(..., description='Number of pixels to expand at the left side of the image') + right: conint(ge=0, le=2048) = Field(..., description='Number of pixels to expand at the right side of the image') + steps: conint(ge=15, le=50) = Field(..., description='Number of steps for the image generation process') + guidance: confloat(ge=1.5, le=100) = Field(..., description='Guidance strength for the image generation process') + safety_tolerance: Optional[conint(ge=0, le=6)] = Field( + 6, description='Tolerance level for input and output moderation. Between 0 and 6, 0 being most strict, 6 being least strict. Defaults to 2.' + ) + output_format: Optional[BFLOutputFormat] = Field( + BFLOutputFormat.png, description="Output format for the generated image. Can be 'jpeg' or 'png'.", examples=['png'] + ) + image: str = Field(None, description='A Base64-encoded string representing the image you wish to expand') + + +class BFLFluxFillImageRequest(BaseModel): + prompt: str = Field(..., description='The description of the changes you want to make. This text guides the expansion process, allowing you to specify features, styles, or modifications for the expanded areas.') + prompt_upsampling: Optional[bool] = Field( + None, description='Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation.' + ) + seed: Optional[int] = Field(None, description='The seed value for reproducibility.') + steps: conint(ge=15, le=50) = Field(..., description='Number of steps for the image generation process') + guidance: confloat(ge=1.5, le=100) = Field(..., description='Guidance strength for the image generation process') + safety_tolerance: Optional[conint(ge=0, le=6)] = Field( + 6, description='Tolerance level for input and output moderation. Between 0 and 6, 0 being most strict, 6 being least strict. Defaults to 2.' + ) + output_format: Optional[BFLOutputFormat] = Field( + BFLOutputFormat.png, description="Output format for the generated image. Can be 'jpeg' or 'png'.", examples=['png'] + ) + image: str = Field(None, description='A Base64-encoded string representing the image you wish to modify. Can contain alpha mask if desired.') + mask: str = Field(None, description='A Base64-encoded string representing the mask of the areas you with to modify.') + + +class BFLFluxCannyImageRequest(BaseModel): + prompt: str = Field(..., description='Text prompt for image generation') + prompt_upsampling: Optional[bool] = Field( + None, description='Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation.' + ) + canny_low_threshold: Optional[int] = Field(None, description='Low threshold for Canny edge detection') + canny_high_threshold: Optional[int] = Field(None, description='High threshold for Canny edge detection') + seed: Optional[int] = Field(None, description='The seed value for reproducibility.') + steps: conint(ge=15, le=50) = Field(..., description='Number of steps for the image generation process') + guidance: confloat(ge=1, le=100) = Field(..., description='Guidance strength for the image generation process') + safety_tolerance: Optional[conint(ge=0, le=6)] = Field( + 6, description='Tolerance level for input and output moderation. Between 0 and 6, 0 being most strict, 6 being least strict. Defaults to 2.' + ) + output_format: Optional[BFLOutputFormat] = Field( + BFLOutputFormat.png, description="Output format for the generated image. Can be 'jpeg' or 'png'.", examples=['png'] + ) + control_image: Optional[str] = Field(None, description='Base64 encoded image to use as control input if no preprocessed image is provided') + preprocessed_image: Optional[str] = Field(None, description='Optional pre-processed image that will bypass the control preprocessing step') + + +class BFLFluxDepthImageRequest(BaseModel): + prompt: str = Field(..., description='Text prompt for image generation') + prompt_upsampling: Optional[bool] = Field( + None, description='Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation.' + ) + seed: Optional[int] = Field(None, description='The seed value for reproducibility.') + steps: conint(ge=15, le=50) = Field(..., description='Number of steps for the image generation process') + guidance: confloat(ge=1, le=100) = Field(..., description='Guidance strength for the image generation process') + safety_tolerance: Optional[conint(ge=0, le=6)] = Field( + 6, description='Tolerance level for input and output moderation. Between 0 and 6, 0 being most strict, 6 being least strict. Defaults to 2.' + ) + output_format: Optional[BFLOutputFormat] = Field( + BFLOutputFormat.png, description="Output format for the generated image. Can be 'jpeg' or 'png'.", examples=['png'] + ) + control_image: Optional[str] = Field(None, description='Base64 encoded image to use as control input if no preprocessed image is provided') + preprocessed_image: Optional[str] = Field(None, description='Optional pre-processed image that will bypass the control preprocessing step') + + +class BFLFluxProGenerateRequest(BaseModel): + prompt: str = Field(..., description='The text prompt for image generation.') + prompt_upsampling: Optional[bool] = Field( + None, description='Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation.' + ) + seed: Optional[int] = Field(None, description='The seed value for reproducibility.') + width: conint(ge=256, le=1440) = Field(1024, description='Width of the generated image in pixels. Must be a multiple of 32.') + height: conint(ge=256, le=1440) = Field(768, description='Height of the generated image in pixels. Must be a multiple of 32.') + safety_tolerance: Optional[conint(ge=0, le=6)] = Field( + 6, description='Tolerance level for input and output moderation. Between 0 and 6, 0 being most strict, 6 being least strict. Defaults to 2.' + ) + output_format: Optional[BFLOutputFormat] = Field( + BFLOutputFormat.png, description="Output format for the generated image. Can be 'jpeg' or 'png'.", examples=['png'] + ) + image_prompt: Optional[str] = Field(None, description='Optional image to remix in base64 format') + # image_prompt_strength: Optional[confloat(ge=0.0, le=1.0)] = Field( + # None, description='Blend between the prompt and the image prompt.' + # ) + + +class BFLFluxProUltraGenerateRequest(BaseModel): + prompt: str = Field(..., description='The text prompt for image generation.') + prompt_upsampling: Optional[bool] = Field( + None, description='Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation.' + ) + seed: Optional[int] = Field(None, description='The seed value for reproducibility.') + aspect_ratio: Optional[str] = Field(None, description='Aspect ratio of the image between 21:9 and 9:21.') + safety_tolerance: Optional[conint(ge=0, le=6)] = Field( + 6, description='Tolerance level for input and output moderation. Between 0 and 6, 0 being most strict, 6 being least strict. Defaults to 2.' + ) + output_format: Optional[BFLOutputFormat] = Field( + BFLOutputFormat.png, description="Output format for the generated image. Can be 'jpeg' or 'png'.", examples=['png'] + ) + raw: Optional[bool] = Field(None, description='Generate less processed, more natural-looking images.') + image_prompt: Optional[str] = Field(None, description='Optional image to remix in base64 format') + image_prompt_strength: Optional[confloat(ge=0.0, le=1.0)] = Field( + None, description='Blend between the prompt and the image prompt.' + ) + + +class BFLFluxProGenerateResponse(BaseModel): + id: str = Field(..., description='The unique identifier for the generation task.') + polling_url: str = Field(..., description='URL to poll for the generation result.') + + +class BFLStatus(str, Enum): + task_not_found = "Task not found" + pending = "Pending" + request_moderated = "Request Moderated" + content_moderated = "Content Moderated" + ready = "Ready" + error = "Error" + + +class BFLFluxProStatusResponse(BaseModel): + id: str = Field(..., description="The unique identifier for the generation task.") + status: BFLStatus = Field(..., description="The status of the task.") + result: Optional[Dict[str, Any]] = Field( + None, description="The result of the task (null if not completed)." + ) + progress: confloat(ge=0.0, le=1.0) = Field( + ..., description="The progress of the task (0.0 to 1.0)." + ) + details: Optional[Dict[str, Any]] = Field( + None, description="Additional details about the task (null if not available)." + ) diff --git a/comfy_api_nodes/apis/client.py b/comfy_api_nodes/apis/client.py new file mode 100644 index 00000000..cff52714 --- /dev/null +++ b/comfy_api_nodes/apis/client.py @@ -0,0 +1,635 @@ +""" +API Client Framework for api.comfy.org. + +This module provides a flexible framework for making API requests from ComfyUI nodes. +It supports both synchronous and asynchronous API operations with proper type validation. + +Key Components: +-------------- +1. ApiClient - Handles HTTP requests with authentication and error handling +2. ApiEndpoint - Defines a single HTTP endpoint with its request/response models +3. ApiOperation - Executes a single synchronous API operation + +Usage Examples: +-------------- + +# Example 1: Synchronous API Operation +# ------------------------------------ +# For a simple API call that returns the result immediately: + +# 1. Create the API client +api_client = ApiClient( + base_url="https://api.example.com", + auth_token="your_auth_token_here", + comfy_api_key="your_comfy_api_key_here", + timeout=30.0, + verify_ssl=True +) + +# 2. Define the endpoint +user_info_endpoint = ApiEndpoint( + path="/v1/users/me", + method=HttpMethod.GET, + request_model=EmptyRequest, # No request body needed + response_model=UserProfile, # Pydantic model for the response + query_params=None +) + +# 3. Create the request object +request = EmptyRequest() + +# 4. Create and execute the operation +operation = ApiOperation( + endpoint=user_info_endpoint, + request=request +) +user_profile = operation.execute(client=api_client) # Returns immediately with the result + + +# Example 2: Asynchronous API Operation with Polling +# ------------------------------------------------- +# For an API that starts a task and requires polling for completion: + +# 1. Define the endpoints (initial request and polling) +generate_image_endpoint = ApiEndpoint( + path="/v1/images/generate", + method=HttpMethod.POST, + request_model=ImageGenerationRequest, + response_model=TaskCreatedResponse, + query_params=None +) + +check_task_endpoint = ApiEndpoint( + path="/v1/tasks/{task_id}", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=ImageGenerationResult, + query_params=None +) + +# 2. Create the request object +request = ImageGenerationRequest( + prompt="a beautiful sunset over mountains", + width=1024, + height=1024, + num_images=1 +) + +# 3. Create and execute the polling operation +operation = PollingOperation( + initial_endpoint=generate_image_endpoint, + initial_request=request, + poll_endpoint=check_task_endpoint, + task_id_field="task_id", + status_field="status", + completed_statuses=["completed"], + failed_statuses=["failed", "error"] +) + +# This will make the initial request and then poll until completion +result = operation.execute(client=api_client) # Returns the final ImageGenerationResult when done +""" + +from __future__ import annotations +import logging +import time +import io +from typing import Dict, Type, Optional, Any, TypeVar, Generic, Callable +from enum import Enum +import json +import requests +from urllib.parse import urljoin +from pydantic import BaseModel, Field + +from comfy.cli_args import args +from comfy import utils + +T = TypeVar("T", bound=BaseModel) +R = TypeVar("R", bound=BaseModel) +P = TypeVar("P", bound=BaseModel) # For poll response + +PROGRESS_BAR_MAX = 100 + + +class EmptyRequest(BaseModel): + """Base class for empty request bodies. + For GET requests, fields will be sent as query parameters.""" + + pass + + +class UploadRequest(BaseModel): + file_name: str = Field(..., description="Filename to upload") + content_type: str | None = Field( + None, + description="Mime type of the file. For example: image/png, image/jpeg, video/mp4, etc.", + ) + + +class UploadResponse(BaseModel): + download_url: str = Field(..., description="URL to GET uploaded file") + upload_url: str = Field(..., description="URL to PUT file to upload") + + +class HttpMethod(str, Enum): + GET = "GET" + POST = "POST" + PUT = "PUT" + DELETE = "DELETE" + PATCH = "PATCH" + + +class ApiClient: + """ + Client for making HTTP requests to an API with authentication and error handling. + """ + + def __init__( + self, + base_url: str, + auth_token: Optional[str] = None, + comfy_api_key: Optional[str] = None, + timeout: float = 3600.0, + verify_ssl: bool = True, + ): + self.base_url = base_url + self.auth_token = auth_token + self.comfy_api_key = comfy_api_key + self.timeout = timeout + self.verify_ssl = verify_ssl + + def _create_json_payload_args( + self, + data: Optional[Dict[str, Any]] = None, + headers: Optional[Dict[str, str]] = None, + ) -> Dict[str, Any]: + return { + "json": data, + "headers": headers, + } + + def _create_form_data_args( + self, + data: Dict[str, Any], + files: Dict[str, Any], + headers: Optional[Dict[str, str]] = None, + multipart_parser = None, + ) -> Dict[str, Any]: + if headers and "Content-Type" in headers: + del headers["Content-Type"] + + if multipart_parser: + data = multipart_parser(data) + + return { + "data": data, + "files": files, + "headers": headers, + } + + def _create_urlencoded_form_data_args( + self, + data: Dict[str, Any], + headers: Optional[Dict[str, str]] = None, + ) -> Dict[str, Any]: + headers = headers or {} + headers["Content-Type"] = "application/x-www-form-urlencoded" + + return { + "data": data, + "headers": headers, + } + + def get_headers(self) -> Dict[str, str]: + """Get headers for API requests, including authentication if available""" + headers = {"Content-Type": "application/json", "Accept": "application/json"} + + if self.auth_token: + headers["Authorization"] = f"Bearer {self.auth_token}" + elif self.comfy_api_key: + headers["X-API-KEY"] = self.comfy_api_key + + return headers + + def request( + self, + method: str, + path: str, + params: Optional[Dict[str, Any]] = None, + data: Optional[Dict[str, Any]] = None, + files: Optional[Dict[str, Any]] = None, + headers: Optional[Dict[str, str]] = None, + content_type: str = "application/json", + multipart_parser: Callable = None, + ) -> Dict[str, Any]: + """ + Make an HTTP request to the API + + Args: + method: HTTP method (GET, POST, etc.) + path: API endpoint path (will be joined with base_url) + params: Query parameters + data: body data + files: Files to upload + headers: Additional headers + content_type: Content type of the request. Defaults to application/json. + + Returns: + Parsed JSON response + + Raises: + requests.RequestException: If the request fails + """ + url = urljoin(self.base_url, path) + self.check_auth(self.auth_token, self.comfy_api_key) + # Combine default headers with any provided headers + request_headers = self.get_headers() + if headers: + request_headers.update(headers) + + # Let requests handle the content type when files are present. + if files: + del request_headers["Content-Type"] + + logging.debug(f"[DEBUG] Request Headers: {request_headers}") + logging.debug(f"[DEBUG] Files: {files}") + logging.debug(f"[DEBUG] Params: {params}") + logging.debug(f"[DEBUG] Data: {data}") + + if content_type == "application/x-www-form-urlencoded": + payload_args = self._create_urlencoded_form_data_args(data, request_headers) + elif content_type == "multipart/form-data": + payload_args = self._create_form_data_args( + data, files, request_headers, multipart_parser + ) + else: + payload_args = self._create_json_payload_args(data, request_headers) + + try: + response = requests.request( + method=method, + url=url, + params=params, + timeout=self.timeout, + verify=self.verify_ssl, + **payload_args, + ) + + # Raise exception for error status codes + response.raise_for_status() + except requests.ConnectionError: + raise Exception( + f"Unable to connect to the API server at {self.base_url}. Please check your internet connection or verify the service is available." + ) + + except requests.Timeout: + raise Exception( + f"Request timed out after {self.timeout} seconds. The server might be experiencing high load or the operation is taking longer than expected." + ) + + except requests.HTTPError as e: + status_code = e.response.status_code if hasattr(e, "response") else None + error_message = f"HTTP Error: {str(e)}" + + # Try to extract detailed error message from JSON response + try: + if hasattr(e, "response") and e.response.content: + error_json = e.response.json() + if "error" in error_json and "message" in error_json["error"]: + error_message = f"API Error: {error_json['error']['message']}" + if "type" in error_json["error"]: + error_message += f" (Type: {error_json['error']['type']})" + else: + error_message = f"API Error: {error_json}" + except Exception as json_error: + # If we can't parse the JSON, fall back to the original error message + logging.debug( + f"[DEBUG] Failed to parse error response: {str(json_error)}" + ) + + logging.debug(f"[DEBUG] API Error: {error_message} (Status: {status_code})") + if hasattr(e, "response") and e.response.content: + logging.debug(f"[DEBUG] Response content: {e.response.content}") + if status_code == 401: + error_message = "Unauthorized: Please login first to use this node." + if status_code == 402: + error_message = "Payment Required: Please add credits to your account to use this node." + if status_code == 409: + error_message = "There is a problem with your account. Please contact support@comfy.org. " + if status_code == 429: + error_message = "Rate Limit Exceeded: Please try again later." + raise Exception(error_message) + + # Parse and return JSON response + if response.content: + return response.json() + return {} + + def check_auth(self, auth_token, comfy_api_key): + """Verify that an auth token is present or comfy_api_key is present""" + if auth_token is None and comfy_api_key is None: + raise Exception("Unauthorized: Please login first to use this node.") + return auth_token or comfy_api_key + + @staticmethod + def upload_file( + upload_url: str, + file: io.BytesIO | str, + content_type: str | None = None, + ): + """Upload a file to the API. Make sure the file has a filename equal to what the url expects. + + Args: + upload_url: The URL to upload to + file: Either a file path string, BytesIO object, or tuple of (file_path, filename) + mime_type: Optional mime type to set for the upload + """ + headers = {} + if content_type: + headers["Content-Type"] = content_type + + if isinstance(file, io.BytesIO): + file.seek(0) # Ensure we're at the start of the file + data = file.read() + return requests.put(upload_url, data=data, headers=headers) + elif isinstance(file, str): + with open(file, "rb") as f: + data = f.read() + return requests.put(upload_url, data=data, headers=headers) + + +class ApiEndpoint(Generic[T, R]): + """Defines an API endpoint with its request and response types""" + + def __init__( + self, + path: str, + method: HttpMethod, + request_model: Type[T], + response_model: Type[R], + query_params: Optional[Dict[str, Any]] = None, + ): + """Initialize an API endpoint definition. + + Args: + path: The URL path for this endpoint, can include placeholders like {id} + method: The HTTP method to use (GET, POST, etc.) + request_model: Pydantic model class that defines the structure and validation rules for API requests to this endpoint + response_model: Pydantic model class that defines the structure and validation rules for API responses from this endpoint + query_params: Optional dictionary of query parameters to include in the request + """ + self.path = path + self.method = method + self.request_model = request_model + self.response_model = response_model + self.query_params = query_params or {} + + +class SynchronousOperation(Generic[T, R]): + """ + Represents a single synchronous API operation. + """ + + def __init__( + self, + endpoint: ApiEndpoint[T, R], + request: T, + files: Optional[Dict[str, Any]] = None, + api_base: str | None = None, + auth_token: Optional[str] = None, + comfy_api_key: Optional[str] = None, + auth_kwargs: Optional[Dict[str,str]] = None, + timeout: float = 604800.0, + verify_ssl: bool = True, + content_type: str = "application/json", + multipart_parser: Callable = None, + ): + self.endpoint = endpoint + self.request = request + self.response = None + self.error = None + self.api_base: str = api_base or args.comfy_api_base + self.auth_token = auth_token + self.comfy_api_key = comfy_api_key + if auth_kwargs is not None: + self.auth_token = auth_kwargs.get("auth_token", self.auth_token) + self.comfy_api_key = auth_kwargs.get("comfy_api_key", self.comfy_api_key) + self.timeout = timeout + self.verify_ssl = verify_ssl + self.files = files + self.content_type = content_type + self.multipart_parser = multipart_parser + def execute(self, client: Optional[ApiClient] = None) -> R: + """Execute the API operation using the provided client or create one""" + try: + # Create client if not provided + if client is None: + client = ApiClient( + base_url=self.api_base, + auth_token=self.auth_token, + comfy_api_key=self.comfy_api_key, + timeout=self.timeout, + verify_ssl=self.verify_ssl, + ) + + # Convert request model to dict, but use None for EmptyRequest + request_dict = ( + None + if isinstance(self.request, EmptyRequest) + else self.request.model_dump(exclude_none=True) + ) + if request_dict: + for key, value in request_dict.items(): + if isinstance(value, Enum): + request_dict[key] = value.value + + if request_dict: + for key, value in request_dict.items(): + if isinstance(value, Enum): + request_dict[key] = value.value + + # Debug log for request + logging.debug( + f"[DEBUG] API Request: {self.endpoint.method.value} {self.endpoint.path}" + ) + logging.debug(f"[DEBUG] Request Data: {json.dumps(request_dict, indent=2)}") + logging.debug(f"[DEBUG] Query Params: {self.endpoint.query_params}") + + # Make the request + resp = client.request( + method=self.endpoint.method.value, + path=self.endpoint.path, + data=request_dict, + params=self.endpoint.query_params, + files=self.files, + content_type=self.content_type, + multipart_parser=self.multipart_parser + ) + + # Debug log for response + logging.debug("=" * 50) + logging.debug("[DEBUG] RESPONSE DETAILS:") + logging.debug("[DEBUG] Status Code: 200 (Success)") + logging.debug(f"[DEBUG] Response Body: {json.dumps(resp, indent=2)}") + logging.debug("=" * 50) + + # Parse and return the response + return self._parse_response(resp) + + except Exception as e: + logging.error(f"[DEBUG] API Exception: {str(e)}") + raise Exception(str(e)) + + def _parse_response(self, resp): + """Parse response data - can be overridden by subclasses""" + # The response is already the complete object, don't extract just the "data" field + # as that would lose the outer structure (created timestamp, etc.) + + # Parse response using the provided model + self.response = self.endpoint.response_model.model_validate(resp) + logging.debug(f"[DEBUG] Parsed Response: {self.response}") + return self.response + + +class TaskStatus(str, Enum): + """Enum for task status values""" + + COMPLETED = "completed" + FAILED = "failed" + PENDING = "pending" + + +class PollingOperation(Generic[T, R]): + """ + Represents an asynchronous API operation that requires polling for completion. + """ + + def __init__( + self, + poll_endpoint: ApiEndpoint[EmptyRequest, R], + completed_statuses: list, + failed_statuses: list, + status_extractor: Callable[[R], str], + progress_extractor: Callable[[R], float] = None, + request: Optional[T] = None, + api_base: str | None = None, + auth_token: Optional[str] = None, + comfy_api_key: Optional[str] = None, + auth_kwargs: Optional[Dict[str,str]] = None, + poll_interval: float = 5.0, + ): + self.poll_endpoint = poll_endpoint + self.request = request + self.api_base: str = api_base or args.comfy_api_base + self.auth_token = auth_token + self.comfy_api_key = comfy_api_key + if auth_kwargs is not None: + self.auth_token = auth_kwargs.get("auth_token", self.auth_token) + self.comfy_api_key = auth_kwargs.get("comfy_api_key", self.comfy_api_key) + self.poll_interval = poll_interval + + # Polling configuration + self.status_extractor = status_extractor or ( + lambda x: getattr(x, "status", None) + ) + self.progress_extractor = progress_extractor + self.completed_statuses = completed_statuses + self.failed_statuses = failed_statuses + + # For storing response data + self.final_response = None + self.error = None + + def execute(self, client: Optional[ApiClient] = None) -> R: + """Execute the polling operation using the provided client. If failed, raise an exception.""" + try: + if client is None: + client = ApiClient( + base_url=self.api_base, + auth_token=self.auth_token, + comfy_api_key=self.comfy_api_key, + ) + return self._poll_until_complete(client) + except Exception as e: + raise Exception(f"Error during polling: {str(e)}") + + def _check_task_status(self, response: R) -> TaskStatus: + """Check task status using the status extractor function""" + try: + status = self.status_extractor(response) + if status in self.completed_statuses: + return TaskStatus.COMPLETED + elif status in self.failed_statuses: + return TaskStatus.FAILED + return TaskStatus.PENDING + except Exception as e: + logging.error(f"Error extracting status: {e}") + return TaskStatus.PENDING + + def _poll_until_complete(self, client: ApiClient) -> R: + """Poll until the task is complete""" + poll_count = 0 + if self.progress_extractor: + progress = utils.ProgressBar(PROGRESS_BAR_MAX) + + while True: + try: + poll_count += 1 + logging.debug(f"[DEBUG] Polling attempt #{poll_count}") + + request_dict = ( + self.request.model_dump(exclude_none=True) + if self.request is not None + else None + ) + + if poll_count == 1: + logging.debug( + f"[DEBUG] Poll Request: {self.poll_endpoint.method.value} {self.poll_endpoint.path}" + ) + logging.debug( + f"[DEBUG] Poll Request Data: {json.dumps(request_dict, indent=2) if request_dict else 'None'}" + ) + + # Query task status + resp = client.request( + method=self.poll_endpoint.method.value, + path=self.poll_endpoint.path, + params=self.poll_endpoint.query_params, + data=request_dict, + ) + + # Parse response + response_obj = self.poll_endpoint.response_model.model_validate(resp) + # Check if task is complete + status = self._check_task_status(response_obj) + logging.debug(f"[DEBUG] Task Status: {status}") + + # If progress extractor is provided, extract progress + if self.progress_extractor: + new_progress = self.progress_extractor(response_obj) + if new_progress is not None: + progress.update_absolute(new_progress, total=PROGRESS_BAR_MAX) + + if status == TaskStatus.COMPLETED: + logging.debug("[DEBUG] Task completed successfully") + self.final_response = response_obj + if self.progress_extractor: + progress.update(100) + return self.final_response + elif status == TaskStatus.FAILED: + message = f"Task failed: {json.dumps(resp)}" + logging.error(f"[DEBUG] {message}") + raise Exception(message) + else: + logging.debug("[DEBUG] Task still pending, continuing to poll...") + + # Wait before polling again + logging.debug( + f"[DEBUG] Waiting {self.poll_interval} seconds before next poll" + ) + time.sleep(self.poll_interval) + + except Exception as e: + logging.error(f"[DEBUG] Polling error: {str(e)}") + raise Exception(f"Error while polling: {str(e)}") diff --git a/comfy_api_nodes/apis/luma_api.py b/comfy_api_nodes/apis/luma_api.py new file mode 100644 index 00000000..632c4ab9 --- /dev/null +++ b/comfy_api_nodes/apis/luma_api.py @@ -0,0 +1,253 @@ +from __future__ import annotations + + +import torch + +from enum import Enum +from typing import Optional, Union + +from pydantic import BaseModel, Field, confloat + + + +class LumaIO: + LUMA_REF = "LUMA_REF" + LUMA_CONCEPTS = "LUMA_CONCEPTS" + + +class LumaReference: + def __init__(self, image: torch.Tensor, weight: float): + self.image = image + self.weight = weight + + def create_api_model(self, download_url: str): + return LumaImageRef(url=download_url, weight=self.weight) + +class LumaReferenceChain: + def __init__(self, first_ref: LumaReference=None): + self.refs: list[LumaReference] = [] + if first_ref: + self.refs.append(first_ref) + + def add(self, luma_ref: LumaReference=None): + self.refs.append(luma_ref) + + def create_api_model(self, download_urls: list[str], max_refs=4): + if len(self.refs) == 0: + return None + api_refs: list[LumaImageRef] = [] + for ref, url in zip(self.refs, download_urls): + api_ref = LumaImageRef(url=url, weight=ref.weight) + api_refs.append(api_ref) + return api_refs + + def clone(self): + c = LumaReferenceChain() + for ref in self.refs: + c.add(ref) + return c + + +class LumaConcept: + def __init__(self, key: str): + self.key = key + + +class LumaConceptChain: + def __init__(self, str_list: list[str] = None): + self.concepts: list[LumaConcept] = [] + if str_list is not None: + for c in str_list: + if c != "None": + self.add(LumaConcept(key=c)) + + def add(self, concept: LumaConcept): + self.concepts.append(concept) + + def create_api_model(self): + if len(self.concepts) == 0: + return None + api_concepts: list[LumaConceptObject] = [] + for concept in self.concepts: + if concept.key == "None": + continue + api_concepts.append(LumaConceptObject(key=concept.key)) + if len(api_concepts) == 0: + return None + return api_concepts + + def clone(self): + c = LumaConceptChain() + for concept in self.concepts: + c.add(concept) + return c + + def clone_and_merge(self, other: LumaConceptChain): + c = self.clone() + for concept in other.concepts: + c.add(concept) + return c + + +def get_luma_concepts(include_none=False): + concepts = [] + if include_none: + concepts.append("None") + return concepts + [ + "truck_left", + "pan_right", + "pedestal_down", + "low_angle", + "pedestal_up", + "selfie", + "pan_left", + "roll_right", + "zoom_in", + "over_the_shoulder", + "orbit_right", + "orbit_left", + "static", + "tiny_planet", + "high_angle", + "bolt_cam", + "dolly_zoom", + "overhead", + "zoom_out", + "handheld", + "roll_left", + "pov", + "aerial_drone", + "push_in", + "crane_down", + "truck_right", + "tilt_down", + "elevator_doors", + "tilt_up", + "ground_level", + "pull_out", + "aerial", + "crane_up", + "eye_level" + ] + + +class LumaImageModel(str, Enum): + photon_1 = "photon-1" + photon_flash_1 = "photon-flash-1" + + +class LumaVideoModel(str, Enum): + ray_2 = "ray-2" + ray_flash_2 = "ray-flash-2" + ray_1_6 = "ray-1-6" + + +class LumaAspectRatio(str, Enum): + ratio_1_1 = "1:1" + ratio_16_9 = "16:9" + ratio_9_16 = "9:16" + ratio_4_3 = "4:3" + ratio_3_4 = "3:4" + ratio_21_9 = "21:9" + ratio_9_21 = "9:21" + + +class LumaVideoOutputResolution(str, Enum): + res_540p = "540p" + res_720p = "720p" + res_1080p = "1080p" + res_4k = "4k" + + +class LumaVideoModelOutputDuration(str, Enum): + dur_5s = "5s" + dur_9s = "9s" + + +class LumaGenerationType(str, Enum): + video = 'video' + image = 'image' + + +class LumaState(str, Enum): + queued = "queued" + dreaming = "dreaming" + completed = "completed" + failed = "failed" + + +class LumaAssets(BaseModel): + video: Optional[str] = Field(None, description='The URL of the video') + image: Optional[str] = Field(None, description='The URL of the image') + progress_video: Optional[str] = Field(None, description='The URL of the progress video') + + +class LumaImageRef(BaseModel): + '''Used for image gen''' + url: str = Field(..., description='The URL of the image reference') + weight: confloat(ge=0.0, le=1.0) = Field(..., description='The weight of the image reference') + + +class LumaImageReference(BaseModel): + '''Used for video gen''' + type: Optional[str] = Field('image', description='Input type, defaults to image') + url: str = Field(..., description='The URL of the image') + + +class LumaModifyImageRef(BaseModel): + url: str = Field(..., description='The URL of the image reference') + weight: confloat(ge=0.0, le=1.0) = Field(..., description='The weight of the image reference') + + +class LumaCharacterRef(BaseModel): + identity0: LumaImageIdentity = Field(..., description='The image identity object') + + +class LumaImageIdentity(BaseModel): + images: list[str] = Field(..., description='The URLs of the image identity') + + +class LumaGenerationReference(BaseModel): + type: str = Field('generation', description='Input type, defaults to generation') + id: str = Field(..., description='The ID of the generation') + + +class LumaKeyframes(BaseModel): + frame0: Optional[Union[LumaImageReference, LumaGenerationReference]] = Field(None, description='') + frame1: Optional[Union[LumaImageReference, LumaGenerationReference]] = Field(None, description='') + + +class LumaConceptObject(BaseModel): + key: str = Field(..., description='Camera Concept name') + + +class LumaImageGenerationRequest(BaseModel): + prompt: str = Field(..., description='The prompt of the generation') + model: LumaImageModel = Field(LumaImageModel.photon_1, description='The image model used for the generation') + aspect_ratio: Optional[LumaAspectRatio] = Field(LumaAspectRatio.ratio_16_9, description='The aspect ratio of the generation') + image_ref: Optional[list[LumaImageRef]] = Field(None, description='List of image reference objects') + style_ref: Optional[list[LumaImageRef]] = Field(None, description='List of style reference objects') + character_ref: Optional[LumaCharacterRef] = Field(None, description='The image identity object') + modify_image_ref: Optional[LumaModifyImageRef] = Field(None, description='The modify image reference object') + + +class LumaGenerationRequest(BaseModel): + prompt: str = Field(..., description='The prompt of the generation') + model: LumaVideoModel = Field(LumaVideoModel.ray_2, description='The video model used for the generation') + duration: Optional[LumaVideoModelOutputDuration] = Field(None, description='The duration of the generation') + aspect_ratio: Optional[LumaAspectRatio] = Field(None, description='The aspect ratio of the generation') + resolution: Optional[LumaVideoOutputResolution] = Field(None, description='The resolution of the generation') + loop: Optional[bool] = Field(None, description='Whether to loop the video') + keyframes: Optional[LumaKeyframes] = Field(None, description='The keyframes of the generation') + concepts: Optional[list[LumaConceptObject]] = Field(None, description='Camera Concepts to apply to generation') + + +class LumaGeneration(BaseModel): + id: str = Field(..., description='The ID of the generation') + generation_type: LumaGenerationType = Field(..., description='Generation type, image or video') + state: LumaState = Field(..., description='The state of the generation') + failure_reason: Optional[str] = Field(None, description='The reason for the state of the generation') + created_at: str = Field(..., description='The date and time when the generation was created') + assets: Optional[LumaAssets] = Field(None, description='The assets of the generation') + model: str = Field(..., description='The model used for the generation') + request: Union[LumaGenerationRequest, LumaImageGenerationRequest] = Field(..., description="The request used for the generation") diff --git a/comfy_api_nodes/apis/pixverse_api.py b/comfy_api_nodes/apis/pixverse_api.py new file mode 100644 index 00000000..9bb29c38 --- /dev/null +++ b/comfy_api_nodes/apis/pixverse_api.py @@ -0,0 +1,146 @@ +from __future__ import annotations + +from enum import Enum +from typing import Optional + +from pydantic import BaseModel, Field + + +pixverse_templates = { + "Microwave": 324641385496960, + "Suit Swagger": 328545151283968, + "Anything, Robot": 313358700761536, + "Subject 3 Fever": 327828816843648, + "kiss kiss": 315446315336768, +} + + +class PixverseIO: + TEMPLATE = "PIXVERSE_TEMPLATE" + + +class PixverseStatus(int, Enum): + successful = 1 + generating = 5 + deleted = 6 + contents_moderation = 7 + failed = 8 + + +class PixverseAspectRatio(str, Enum): + ratio_16_9 = "16:9" + ratio_4_3 = "4:3" + ratio_1_1 = "1:1" + ratio_3_4 = "3:4" + ratio_9_16 = "9:16" + + +class PixverseQuality(str, Enum): + res_360p = "360p" + res_540p = "540p" + res_720p = "720p" + res_1080p = "1080p" + + +class PixverseDuration(int, Enum): + dur_5 = 5 + dur_8 = 8 + + +class PixverseMotionMode(str, Enum): + normal = "normal" + fast = "fast" + + +class PixverseStyle(str, Enum): + anime = "anime" + animation_3d = "3d_animation" + clay = "clay" + comic = "comic" + cyberpunk = "cyberpunk" + + +# NOTE: forgoing descriptions for now in return for dev speed +class PixverseTextVideoRequest(BaseModel): + aspect_ratio: PixverseAspectRatio = Field(...) + quality: PixverseQuality = Field(...) + duration: PixverseDuration = Field(...) + model: Optional[str] = Field("v3.5") + motion_mode: Optional[PixverseMotionMode] = Field(PixverseMotionMode.normal) + prompt: str = Field(...) + negative_prompt: Optional[str] = Field(None) + seed: Optional[int] = Field(None) + style: Optional[str] = Field(None) + template_id: Optional[int] = Field(None) + water_mark: Optional[bool] = Field(None) + + +class PixverseImageVideoRequest(BaseModel): + quality: PixverseQuality = Field(...) + duration: PixverseDuration = Field(...) + img_id: int = Field(...) + model: Optional[str] = Field("v3.5") + motion_mode: Optional[PixverseMotionMode] = Field(PixverseMotionMode.normal) + prompt: str = Field(...) + negative_prompt: Optional[str] = Field(None) + seed: Optional[int] = Field(None) + style: Optional[str] = Field(None) + template_id: Optional[int] = Field(None) + water_mark: Optional[bool] = Field(None) + + +class PixverseTransitionVideoRequest(BaseModel): + quality: PixverseQuality = Field(...) + duration: PixverseDuration = Field(...) + first_frame_img: int = Field(...) + last_frame_img: int = Field(...) + model: Optional[str] = Field("v3.5") + motion_mode: Optional[PixverseMotionMode] = Field(PixverseMotionMode.normal) + prompt: str = Field(...) + # negative_prompt: Optional[str] = Field(None) + seed: Optional[int] = Field(None) + # style: Optional[str] = Field(None) + # template_id: Optional[int] = Field(None) + # water_mark: Optional[bool] = Field(None) + + +class PixverseImageUploadResponse(BaseModel): + ErrCode: Optional[int] = None + ErrMsg: Optional[str] = None + Resp: Optional[PixverseImgIdResponseObject] = Field(None, alias='Resp') + + +class PixverseImgIdResponseObject(BaseModel): + img_id: Optional[int] = None + + +class PixverseVideoResponse(BaseModel): + ErrCode: Optional[int] = Field(None) + ErrMsg: Optional[str] = Field(None) + Resp: Optional[PixverseVideoIdResponseObject] = Field(None) + + +class PixverseVideoIdResponseObject(BaseModel): + video_id: int = Field(..., description='Video_id') + + +class PixverseGenerationStatusResponse(BaseModel): + ErrCode: Optional[int] = Field(None) + ErrMsg: Optional[str] = Field(None) + Resp: Optional[PixverseGenerationStatusResponseObject] = Field(None) + + +class PixverseGenerationStatusResponseObject(BaseModel): + create_time: Optional[str] = Field(None) + id: Optional[int] = Field(None) + modify_time: Optional[str] = Field(None) + negative_prompt: Optional[str] = Field(None) + outputHeight: Optional[int] = Field(None) + outputWidth: Optional[int] = Field(None) + prompt: Optional[str] = Field(None) + resolution_ratio: Optional[int] = Field(None) + seed: Optional[int] = Field(None) + size: Optional[int] = Field(None) + status: Optional[int] = Field(None) + style: Optional[str] = Field(None) + url: Optional[str] = Field(None) diff --git a/comfy_api_nodes/apis/recraft_api.py b/comfy_api_nodes/apis/recraft_api.py new file mode 100644 index 00000000..c36d95f2 --- /dev/null +++ b/comfy_api_nodes/apis/recraft_api.py @@ -0,0 +1,262 @@ +from __future__ import annotations + + + +from enum import Enum +from typing import Optional + +from pydantic import BaseModel, Field, conint, confloat + + +class RecraftColor: + def __init__(self, r: int, g: int, b: int): + self.color = [r, g, b] + + def create_api_model(self): + return RecraftColorObject(rgb=self.color) + + +class RecraftColorChain: + def __init__(self): + self.colors: list[RecraftColor] = [] + + def get_first(self): + if len(self.colors) > 0: + return self.colors[0] + return None + + def add(self, color: RecraftColor): + self.colors.append(color) + + def create_api_model(self): + if not self.colors: + return None + colors_api = [x.create_api_model() for x in self.colors] + return colors_api + + def clone(self): + c = RecraftColorChain() + for color in self.colors: + c.add(color) + return c + + def clone_and_merge(self, other: RecraftColorChain): + c = self.clone() + for color in other.colors: + c.add(color) + return c + + +class RecraftControls: + def __init__(self, colors: RecraftColorChain=None, background_color: RecraftColorChain=None, + artistic_level: int=None, no_text: bool=None): + self.colors = colors + self.background_color = background_color + self.artistic_level = artistic_level + self.no_text = no_text + + def create_api_model(self): + if self.colors is None and self.background_color is None and self.artistic_level is None and self.no_text is None: + return None + colors_api = None + background_color_api = None + if self.colors: + colors_api = self.colors.create_api_model() + if self.background_color: + first_background = self.background_color.get_first() + background_color_api = first_background.create_api_model() if first_background else None + + return RecraftControlsObject(colors=colors_api, background_color=background_color_api, + artistic_level=self.artistic_level, no_text=self.no_text) + + +class RecraftStyle: + def __init__(self, style: str=None, substyle: str=None, style_id: str=None): + self.style = style + if substyle == "None": + substyle = None + self.substyle = substyle + self.style_id = style_id + + +class RecraftIO: + STYLEV3 = "RECRAFT_V3_STYLE" + COLOR = "RECRAFT_COLOR" + CONTROLS = "RECRAFT_CONTROLS" + + +class RecraftStyleV3(str, Enum): + #any = 'any' NOTE: this does not work for some reason... why? + realistic_image = 'realistic_image' + digital_illustration = 'digital_illustration' + vector_illustration = 'vector_illustration' + logo_raster = 'logo_raster' + + +def get_v3_substyles(style_v3: str, include_none=True) -> list[str]: + substyles: list[str] = [] + if include_none: + substyles.append("None") + return substyles + dict_recraft_substyles_v3.get(style_v3, []) + + +dict_recraft_substyles_v3 = { + RecraftStyleV3.realistic_image: [ + "b_and_w", + "enterprise", + "evening_light", + "faded_nostalgia", + "forest_life", + "hard_flash", + "hdr", + "motion_blur", + "mystic_naturalism", + "natural_light", + "natural_tones", + "organic_calm", + "real_life_glow", + "retro_realism", + "retro_snapshot", + "studio_portrait", + "urban_drama", + "village_realism", + "warm_folk" + ], + RecraftStyleV3.digital_illustration: [ + "2d_art_poster", + "2d_art_poster_2", + "antiquarian", + "bold_fantasy", + "child_book", + "child_books", + "cover", + "crosshatch", + "digital_engraving", + "engraving_color", + "expressionism", + "freehand_details", + "grain", + "grain_20", + "graphic_intensity", + "hand_drawn", + "hand_drawn_outline", + "handmade_3d", + "hard_comics", + "infantile_sketch", + "long_shadow", + "modern_folk", + "multicolor", + "neon_calm", + "noir", + "nostalgic_pastel", + "outline_details", + "pastel_gradient", + "pastel_sketch", + "pixel_art", + "plastic", + "pop_art", + "pop_renaissance", + "seamless", + "street_art", + "tablet_sketch", + "urban_glow", + "urban_sketching", + "vanilla_dreams", + "young_adult_book", + "young_adult_book_2" + ], + RecraftStyleV3.vector_illustration: [ + "bold_stroke", + "chemistry", + "colored_stencil", + "contour_pop_art", + "cosmics", + "cutout", + "depressive", + "editorial", + "emotional_flat", + "engraving", + "infographical", + "line_art", + "line_circuit", + "linocut", + "marker_outline", + "mosaic", + "naivector", + "roundish_flat", + "seamless", + "segmented_colors", + "sharp_contrast", + "thin", + "vector_photo", + "vivid_shapes" + ], + RecraftStyleV3.logo_raster: [ + "emblem_graffiti", + "emblem_pop_art", + "emblem_punk", + "emblem_stamp", + "emblem_vintage" + ], +} + + +class RecraftModel(str, Enum): + recraftv3 = 'recraftv3' + recraftv2 = 'recraftv2' + + +class RecraftImageSize(str, Enum): + res_1024x1024 = '1024x1024' + res_1365x1024 = '1365x1024' + res_1024x1365 = '1024x1365' + res_1536x1024 = '1536x1024' + res_1024x1536 = '1024x1536' + res_1820x1024 = '1820x1024' + res_1024x1820 = '1024x1820' + res_1024x2048 = '1024x2048' + res_2048x1024 = '2048x1024' + res_1434x1024 = '1434x1024' + res_1024x1434 = '1024x1434' + res_1024x1280 = '1024x1280' + res_1280x1024 = '1280x1024' + res_1024x1707 = '1024x1707' + res_1707x1024 = '1707x1024' + + +class RecraftColorObject(BaseModel): + rgb: list[int] = Field(..., description='An array of 3 integer values in range of 0...255 defining RGB Color Model') + + +class RecraftControlsObject(BaseModel): + colors: Optional[list[RecraftColorObject]] = Field(None, description='An array of preferable colors') + background_color: Optional[RecraftColorObject] = Field(None, description='Use given color as a desired background color') + no_text: Optional[bool] = Field(None, description='Do not embed text layouts') + artistic_level: Optional[conint(ge=0, le=5)] = Field(None, description='Defines artistic tone of your image. At a simple level, the person looks straight at the camera in a static and clean style. Dynamic and eccentric levels introduce movement and creativity. The value should be in range [0..5].') + + +class RecraftImageGenerationRequest(BaseModel): + prompt: str = Field(..., description='The text prompt describing the image to generate') + size: Optional[RecraftImageSize] = Field(None, description='The size of the generated image (e.g., "1024x1024")') + n: conint(ge=1, le=6) = Field(..., description='The number of images to generate') + negative_prompt: Optional[str] = Field(None, description='A text description of undesired elements on an image') + model: Optional[RecraftModel] = Field(RecraftModel.recraftv3, description='The model to use for generation (e.g., "recraftv3")') + style: Optional[str] = Field(None, description='The style to apply to the generated image (e.g., "digital_illustration")') + substyle: Optional[str] = Field(None, description='The substyle to apply to the generated image, depending on the style input') + controls: Optional[RecraftControlsObject] = Field(None, description='A set of custom parameters to tweak generation process') + style_id: Optional[str] = Field(None, description='Use a previously uploaded style as a reference; UUID') + strength: Optional[confloat(ge=0.0, le=1.0)] = Field(None, description='Defines the difference with the original image, should lie in [0, 1], where 0 means almost identical, and 1 means miserable similarity') + random_seed: Optional[int] = Field(None, description="Seed for video generation") + # text_layout + + +class RecraftReturnedObject(BaseModel): + image_id: str = Field(..., description='Unique identifier for the generated image') + url: str = Field(..., description='URL to access the generated image') + + +class RecraftImageGenerationResponse(BaseModel): + created: int = Field(..., description='Unix timestamp when the generation was created') + credits: int = Field(..., description='Number of credits used for the generation') + data: Optional[list[RecraftReturnedObject]] = Field(None, description='Array of generated image information') + image: Optional[RecraftReturnedObject] = Field(None, description='Single generated image') diff --git a/comfy_api_nodes/apis/stability_api.py b/comfy_api_nodes/apis/stability_api.py new file mode 100644 index 00000000..47c87dae --- /dev/null +++ b/comfy_api_nodes/apis/stability_api.py @@ -0,0 +1,127 @@ +from __future__ import annotations + +from enum import Enum +from typing import Optional + +from pydantic import BaseModel, Field, confloat + + +class StabilityFormat(str, Enum): + png = 'png' + jpeg = 'jpeg' + webp = 'webp' + + +class StabilityAspectRatio(str, Enum): + ratio_1_1 = "1:1" + ratio_16_9 = "16:9" + ratio_9_16 = "9:16" + ratio_3_2 = "3:2" + ratio_2_3 = "2:3" + ratio_5_4 = "5:4" + ratio_4_5 = "4:5" + ratio_21_9 = "21:9" + ratio_9_21 = "9:21" + + +def get_stability_style_presets(include_none=True): + presets = [] + if include_none: + presets.append("None") + return presets + [x.value for x in StabilityStylePreset] + + +class StabilityStylePreset(str, Enum): + _3d_model = "3d-model" + analog_film = "analog-film" + anime = "anime" + cinematic = "cinematic" + comic_book = "comic-book" + digital_art = "digital-art" + enhance = "enhance" + fantasy_art = "fantasy-art" + isometric = "isometric" + line_art = "line-art" + low_poly = "low-poly" + modeling_compound = "modeling-compound" + neon_punk = "neon-punk" + origami = "origami" + photographic = "photographic" + pixel_art = "pixel-art" + tile_texture = "tile-texture" + + +class Stability_SD3_5_Model(str, Enum): + sd3_5_large = "sd3.5-large" + # sd3_5_large_turbo = "sd3.5-large-turbo" + sd3_5_medium = "sd3.5-medium" + + +class Stability_SD3_5_GenerationMode(str, Enum): + text_to_image = "text-to-image" + image_to_image = "image-to-image" + + +class StabilityStable3_5Request(BaseModel): + model: str = Field(...) + mode: str = Field(...) + prompt: str = Field(...) + negative_prompt: Optional[str] = Field(None) + aspect_ratio: Optional[str] = Field(None) + seed: Optional[int] = Field(None) + output_format: Optional[str] = Field(StabilityFormat.png.value) + image: Optional[str] = Field(None) + style_preset: Optional[str] = Field(None) + cfg_scale: float = Field(...) + strength: Optional[confloat(ge=0.0, le=1.0)] = Field(None) + + +class StabilityUpscaleConservativeRequest(BaseModel): + prompt: str = Field(...) + negative_prompt: Optional[str] = Field(None) + seed: Optional[int] = Field(None) + output_format: Optional[str] = Field(StabilityFormat.png.value) + image: Optional[str] = Field(None) + creativity: Optional[confloat(ge=0.2, le=0.5)] = Field(None) + + +class StabilityUpscaleCreativeRequest(BaseModel): + prompt: str = Field(...) + negative_prompt: Optional[str] = Field(None) + seed: Optional[int] = Field(None) + output_format: Optional[str] = Field(StabilityFormat.png.value) + image: Optional[str] = Field(None) + creativity: Optional[confloat(ge=0.1, le=0.5)] = Field(None) + style_preset: Optional[str] = Field(None) + + +class StabilityStableUltraRequest(BaseModel): + prompt: str = Field(...) + negative_prompt: Optional[str] = Field(None) + aspect_ratio: Optional[str] = Field(None) + seed: Optional[int] = Field(None) + output_format: Optional[str] = Field(StabilityFormat.png.value) + image: Optional[str] = Field(None) + style_preset: Optional[str] = Field(None) + strength: Optional[confloat(ge=0.0, le=1.0)] = Field(None) + + +class StabilityStableUltraResponse(BaseModel): + image: Optional[str] = Field(None) + finish_reason: Optional[str] = Field(None) + seed: Optional[int] = Field(None) + + +class StabilityResultsGetResponse(BaseModel): + image: Optional[str] = Field(None) + finish_reason: Optional[str] = Field(None) + seed: Optional[int] = Field(None) + id: Optional[str] = Field(None) + name: Optional[str] = Field(None) + errors: Optional[list[str]] = Field(None) + status: Optional[str] = Field(None) + result: Optional[str] = Field(None) + + +class StabilityAsyncResponse(BaseModel): + id: Optional[str] = Field(None) diff --git a/comfy_api_nodes/canary.py b/comfy_api_nodes/canary.py new file mode 100644 index 00000000..4df7590b --- /dev/null +++ b/comfy_api_nodes/canary.py @@ -0,0 +1,10 @@ +import av + +ver = av.__version__.split(".") +if int(ver[0]) < 14: + raise Exception("INSTALL NEW VERSION OF PYAV TO USE API NODES.") + +if int(ver[0]) == 14 and int(ver[1]) < 2: + raise Exception("INSTALL NEW VERSION OF PYAV TO USE API NODES.") + +NODE_CLASS_MAPPINGS = {} diff --git a/comfy_api_nodes/mapper_utils.py b/comfy_api_nodes/mapper_utils.py new file mode 100644 index 00000000..6fab8f4b --- /dev/null +++ b/comfy_api_nodes/mapper_utils.py @@ -0,0 +1,116 @@ +from enum import Enum + +from pydantic.fields import FieldInfo +from pydantic import BaseModel +from pydantic_core import PydanticUndefined + +from comfy.comfy_types.node_typing import IO, InputTypeOptions + +NodeInput = tuple[IO, InputTypeOptions] + + +def _create_base_config(field_info: FieldInfo) -> InputTypeOptions: + config = {} + if hasattr(field_info, "default") and field_info.default is not PydanticUndefined: + config["default"] = field_info.default + if hasattr(field_info, "description") and field_info.description is not None: + config["tooltip"] = field_info.description + return config + + +def _get_number_constraints_config(field_info: FieldInfo) -> dict: + config = {} + if hasattr(field_info, "metadata"): + metadata = field_info.metadata + for constraint in metadata: + if hasattr(constraint, "ge"): + config["min"] = constraint.ge + if hasattr(constraint, "le"): + config["max"] = constraint.le + if hasattr(constraint, "multiple_of"): + config["step"] = constraint.multiple_of + return config + + +def _model_field_to_image_input(field_info: FieldInfo, **kwargs) -> NodeInput: + return IO.IMAGE, { + **_create_base_config(field_info), + **kwargs, + } + + +def _model_field_to_string_input(field_info: FieldInfo, **kwargs) -> NodeInput: + return IO.STRING, { + **_create_base_config(field_info), + **kwargs, + } + + +def _model_field_to_float_input(field_info: FieldInfo, **kwargs) -> NodeInput: + return IO.FLOAT, { + **_create_base_config(field_info), + **_get_number_constraints_config(field_info), + **kwargs, + } + + +def _model_field_to_int_input(field_info: FieldInfo, **kwargs) -> NodeInput: + return IO.INT, { + **_create_base_config(field_info), + **_get_number_constraints_config(field_info), + **kwargs, + } + + +def _model_field_to_combo_input( + field_info: FieldInfo, enum_type: type[Enum] = None, **kwargs +) -> NodeInput: + combo_config = {} + if enum_type is not None: + combo_config["options"] = [option.value for option in enum_type] + combo_config = { + **combo_config, + **_create_base_config(field_info), + **kwargs, + } + return IO.COMBO, combo_config + + +def model_field_to_node_input( + input_type: IO, base_model: type[BaseModel], field_name: str, **kwargs +) -> NodeInput: + """ + Maps a field from a Pydantic model to a Comfy node input. + + Args: + input_type: The type of the input. + base_model: The Pydantic model to map the field from. + field_name: The name of the field to map. + **kwargs: Additional key/values to include in the input options. + + Note: + For combo inputs, pass an `Enum` to the `enum_type` keyword argument to populate the options automatically. + + Example: + >>> model_field_to_node_input(IO.STRING, MyModel, "my_field", multiline=True) + >>> model_field_to_node_input(IO.COMBO, MyModel, "my_field", enum_type=MyEnum) + >>> model_field_to_node_input(IO.FLOAT, MyModel, "my_field", slider=True) + """ + field_info: FieldInfo = base_model.model_fields[field_name] + result: NodeInput + + if input_type == IO.IMAGE: + result = _model_field_to_image_input(field_info, **kwargs) + elif input_type == IO.STRING: + result = _model_field_to_string_input(field_info, **kwargs) + elif input_type == IO.FLOAT: + result = _model_field_to_float_input(field_info, **kwargs) + elif input_type == IO.INT: + result = _model_field_to_int_input(field_info, **kwargs) + elif input_type == IO.COMBO: + result = _model_field_to_combo_input(field_info, **kwargs) + else: + message = f"Invalid input type: {input_type}" + raise ValueError(message) + + return result diff --git a/comfy_api_nodes/nodes_bfl.py b/comfy_api_nodes/nodes_bfl.py new file mode 100644 index 00000000..66ef1b39 --- /dev/null +++ b/comfy_api_nodes/nodes_bfl.py @@ -0,0 +1,906 @@ +import io +from inspect import cleandoc +from comfy.comfy_types.node_typing import IO, ComfyNodeABC +from comfy_api_nodes.apis.bfl_api import ( + BFLStatus, + BFLFluxExpandImageRequest, + BFLFluxFillImageRequest, + BFLFluxCannyImageRequest, + BFLFluxDepthImageRequest, + BFLFluxProGenerateRequest, + BFLFluxProUltraGenerateRequest, + BFLFluxProGenerateResponse, +) +from comfy_api_nodes.apis.client import ( + ApiEndpoint, + HttpMethod, + SynchronousOperation, +) +from comfy_api_nodes.apinode_utils import ( + downscale_image_tensor, + validate_aspect_ratio, + process_image_response, + resize_mask_to_image, + validate_string, +) + +import numpy as np +from PIL import Image +import requests +import torch +import base64 +import time + + +def convert_mask_to_image(mask: torch.Tensor): + """ + Make mask have the expected amount of dims (4) and channels (3) to be recognized as an image. + """ + mask = mask.unsqueeze(-1) + mask = torch.cat([mask]*3, dim=-1) + return mask + + +def handle_bfl_synchronous_operation( + operation: SynchronousOperation, timeout_bfl_calls=360 +): + response_api: BFLFluxProGenerateResponse = operation.execute() + return _poll_until_generated( + response_api.polling_url, timeout=timeout_bfl_calls + ) + +def _poll_until_generated(polling_url: str, timeout=360): + # used bfl-comfy-nodes to verify code implementation: + # https://github.com/black-forest-labs/bfl-comfy-nodes/tree/main + start_time = time.time() + retries_404 = 0 + max_retries_404 = 5 + retry_404_seconds = 2 + retry_202_seconds = 2 + retry_pending_seconds = 1 + request = requests.Request(method=HttpMethod.GET, url=polling_url) + # NOTE: should True loop be replaced with checking if workflow has been interrupted? + while True: + response = requests.Session().send(request.prepare()) + if response.status_code == 200: + result = response.json() + if result["status"] == BFLStatus.ready: + img_url = result["result"]["sample"] + img_response = requests.get(img_url) + return process_image_response(img_response) + elif result["status"] in [ + BFLStatus.request_moderated, + BFLStatus.content_moderated, + ]: + status = result["status"] + raise Exception( + f"BFL API did not return an image due to: {status}." + ) + elif result["status"] == BFLStatus.error: + raise Exception(f"BFL API encountered an error: {result}.") + elif result["status"] == BFLStatus.pending: + time.sleep(retry_pending_seconds) + continue + elif response.status_code == 404: + if retries_404 < max_retries_404: + retries_404 += 1 + time.sleep(retry_404_seconds) + continue + raise Exception( + f"BFL API could not find task after {max_retries_404} tries." + ) + elif response.status_code == 202: + time.sleep(retry_202_seconds) + elif time.time() - start_time > timeout: + raise Exception( + f"BFL API experienced a timeout; could not return request under {timeout} seconds." + ) + else: + raise Exception(f"BFL API encountered an error: {response.json()}") + +def convert_image_to_base64(image: torch.Tensor): + scaled_image = downscale_image_tensor(image, total_pixels=2048 * 2048) + # remove batch dimension if present + if len(scaled_image.shape) > 3: + scaled_image = scaled_image[0] + image_np = (scaled_image.numpy() * 255).astype(np.uint8) + img = Image.fromarray(image_np) + img_byte_arr = io.BytesIO() + img.save(img_byte_arr, format="PNG") + return base64.b64encode(img_byte_arr.getvalue()).decode() + + +class FluxProUltraImageNode(ComfyNodeABC): + """ + Generates images using Flux Pro 1.1 Ultra via api based on prompt and resolution. + """ + + MINIMUM_RATIO = 1 / 4 + MAXIMUM_RATIO = 4 / 1 + MINIMUM_RATIO_STR = "1:4" + MAXIMUM_RATIO_STR = "4:1" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Prompt for the image generation", + }, + ), + "prompt_upsampling": ( + IO.BOOLEAN, + { + "default": False, + "tooltip": "Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation, but results are nondeterministic (same seed will not produce exactly the same result).", + }, + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 0xFFFFFFFFFFFFFFFF, + "control_after_generate": True, + "tooltip": "The random seed used for creating the noise.", + }, + ), + "aspect_ratio": ( + IO.STRING, + { + "default": "16:9", + "tooltip": "Aspect ratio of image; must be between 1:4 and 4:1.", + }, + ), + "raw": ( + IO.BOOLEAN, + { + "default": False, + "tooltip": "When True, generate less processed, more natural-looking images.", + }, + ), + }, + "optional": { + "image_prompt": (IO.IMAGE,), + "image_prompt_strength": ( + IO.FLOAT, + { + "default": 0.1, + "min": 0.0, + "max": 1.0, + "step": 0.01, + "tooltip": "Blend between the prompt and the image prompt.", + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + @classmethod + def VALIDATE_INPUTS(cls, aspect_ratio: str): + try: + validate_aspect_ratio( + aspect_ratio, + minimum_ratio=cls.MINIMUM_RATIO, + maximum_ratio=cls.MAXIMUM_RATIO, + minimum_ratio_str=cls.MINIMUM_RATIO_STR, + maximum_ratio_str=cls.MAXIMUM_RATIO_STR, + ) + except Exception as e: + return str(e) + return True + + RETURN_TYPES = (IO.IMAGE,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/BFL" + + def api_call( + self, + prompt: str, + aspect_ratio: str, + prompt_upsampling=False, + raw=False, + seed=0, + image_prompt=None, + image_prompt_strength=0.1, + **kwargs, + ): + if image_prompt is None: + validate_string(prompt, strip_whitespace=False) + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/bfl/flux-pro-1.1-ultra/generate", + method=HttpMethod.POST, + request_model=BFLFluxProUltraGenerateRequest, + response_model=BFLFluxProGenerateResponse, + ), + request=BFLFluxProUltraGenerateRequest( + prompt=prompt, + prompt_upsampling=prompt_upsampling, + seed=seed, + aspect_ratio=validate_aspect_ratio( + aspect_ratio, + minimum_ratio=self.MINIMUM_RATIO, + maximum_ratio=self.MAXIMUM_RATIO, + minimum_ratio_str=self.MINIMUM_RATIO_STR, + maximum_ratio_str=self.MAXIMUM_RATIO_STR, + ), + raw=raw, + image_prompt=( + image_prompt + if image_prompt is None + else convert_image_to_base64(image_prompt) + ), + image_prompt_strength=( + None if image_prompt is None else round(image_prompt_strength, 2) + ), + ), + auth_kwargs=kwargs, + ) + output_image = handle_bfl_synchronous_operation(operation) + return (output_image,) + + + +class FluxProImageNode(ComfyNodeABC): + """ + Generates images synchronously based on prompt and resolution. + """ + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Prompt for the image generation", + }, + ), + "prompt_upsampling": ( + IO.BOOLEAN, + { + "default": False, + "tooltip": "Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation, but results are nondeterministic (same seed will not produce exactly the same result).", + }, + ), + "width": ( + IO.INT, + { + "default": 1024, + "min": 256, + "max": 1440, + "step": 32, + }, + ), + "height": ( + IO.INT, + { + "default": 768, + "min": 256, + "max": 1440, + "step": 32, + }, + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 0xFFFFFFFFFFFFFFFF, + "control_after_generate": True, + "tooltip": "The random seed used for creating the noise.", + }, + ), + }, + "optional": { + "image_prompt": (IO.IMAGE,), + # "image_prompt_strength": ( + # IO.FLOAT, + # { + # "default": 0.1, + # "min": 0.0, + # "max": 1.0, + # "step": 0.01, + # "tooltip": "Blend between the prompt and the image prompt.", + # }, + # ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + RETURN_TYPES = (IO.IMAGE,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/BFL" + + def api_call( + self, + prompt: str, + prompt_upsampling, + width: int, + height: int, + seed=0, + image_prompt=None, + # image_prompt_strength=0.1, + **kwargs, + ): + image_prompt = ( + image_prompt + if image_prompt is None + else convert_image_to_base64(image_prompt) + ) + + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/bfl/flux-pro-1.1/generate", + method=HttpMethod.POST, + request_model=BFLFluxProGenerateRequest, + response_model=BFLFluxProGenerateResponse, + ), + request=BFLFluxProGenerateRequest( + prompt=prompt, + prompt_upsampling=prompt_upsampling, + width=width, + height=height, + seed=seed, + image_prompt=image_prompt, + ), + auth_kwargs=kwargs, + ) + output_image = handle_bfl_synchronous_operation(operation) + return (output_image,) + + +class FluxProExpandNode(ComfyNodeABC): + """ + Outpaints image based on prompt. + """ + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": (IO.IMAGE,), + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Prompt for the image generation", + }, + ), + "prompt_upsampling": ( + IO.BOOLEAN, + { + "default": False, + "tooltip": "Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation, but results are nondeterministic (same seed will not produce exactly the same result).", + }, + ), + "top": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 2048, + "tooltip": "Number of pixels to expand at the top of the image" + }, + ), + "bottom": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 2048, + "tooltip": "Number of pixels to expand at the bottom of the image" + }, + ), + "left": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 2048, + "tooltip": "Number of pixels to expand at the left side of the image" + }, + ), + "right": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 2048, + "tooltip": "Number of pixels to expand at the right side of the image" + }, + ), + "guidance": ( + IO.FLOAT, + { + "default": 60, + "min": 1.5, + "max": 100, + "tooltip": "Guidance strength for the image generation process" + }, + ), + "steps": ( + IO.INT, + { + "default": 50, + "min": 15, + "max": 50, + "tooltip": "Number of steps for the image generation process" + }, + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 0xFFFFFFFFFFFFFFFF, + "control_after_generate": True, + "tooltip": "The random seed used for creating the noise.", + }, + ), + }, + "optional": { + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + RETURN_TYPES = (IO.IMAGE,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/BFL" + + def api_call( + self, + image: torch.Tensor, + prompt: str, + prompt_upsampling: bool, + top: int, + bottom: int, + left: int, + right: int, + steps: int, + guidance: float, + seed=0, + **kwargs, + ): + image = convert_image_to_base64(image) + + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/bfl/flux-pro-1.0-expand/generate", + method=HttpMethod.POST, + request_model=BFLFluxExpandImageRequest, + response_model=BFLFluxProGenerateResponse, + ), + request=BFLFluxExpandImageRequest( + prompt=prompt, + prompt_upsampling=prompt_upsampling, + top=top, + bottom=bottom, + left=left, + right=right, + steps=steps, + guidance=guidance, + seed=seed, + image=image, + ), + auth_kwargs=kwargs, + ) + output_image = handle_bfl_synchronous_operation(operation) + return (output_image,) + + + +class FluxProFillNode(ComfyNodeABC): + """ + Inpaints image based on mask and prompt. + """ + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": (IO.IMAGE,), + "mask": (IO.MASK,), + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Prompt for the image generation", + }, + ), + "prompt_upsampling": ( + IO.BOOLEAN, + { + "default": False, + "tooltip": "Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation, but results are nondeterministic (same seed will not produce exactly the same result).", + }, + ), + "guidance": ( + IO.FLOAT, + { + "default": 60, + "min": 1.5, + "max": 100, + "tooltip": "Guidance strength for the image generation process" + }, + ), + "steps": ( + IO.INT, + { + "default": 50, + "min": 15, + "max": 50, + "tooltip": "Number of steps for the image generation process" + }, + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 0xFFFFFFFFFFFFFFFF, + "control_after_generate": True, + "tooltip": "The random seed used for creating the noise.", + }, + ), + }, + "optional": { + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + RETURN_TYPES = (IO.IMAGE,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/BFL" + + def api_call( + self, + image: torch.Tensor, + mask: torch.Tensor, + prompt: str, + prompt_upsampling: bool, + steps: int, + guidance: float, + seed=0, + **kwargs, + ): + # prepare mask + mask = resize_mask_to_image(mask, image) + mask = convert_image_to_base64(convert_mask_to_image(mask)) + # make sure image will have alpha channel removed + image = convert_image_to_base64(image[:,:,:,:3]) + + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/bfl/flux-pro-1.0-fill/generate", + method=HttpMethod.POST, + request_model=BFLFluxFillImageRequest, + response_model=BFLFluxProGenerateResponse, + ), + request=BFLFluxFillImageRequest( + prompt=prompt, + prompt_upsampling=prompt_upsampling, + steps=steps, + guidance=guidance, + seed=seed, + image=image, + mask=mask, + ), + auth_kwargs=kwargs, + ) + output_image = handle_bfl_synchronous_operation(operation) + return (output_image,) + + +class FluxProCannyNode(ComfyNodeABC): + """ + Generate image using a control image (canny). + """ + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "control_image": (IO.IMAGE,), + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Prompt for the image generation", + }, + ), + "prompt_upsampling": ( + IO.BOOLEAN, + { + "default": False, + "tooltip": "Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation, but results are nondeterministic (same seed will not produce exactly the same result).", + }, + ), + "canny_low_threshold": ( + IO.FLOAT, + { + "default": 0.1, + "min": 0.01, + "max": 0.99, + "step": 0.01, + "tooltip": "Low threshold for Canny edge detection; ignored if skip_processing is True" + }, + ), + "canny_high_threshold": ( + IO.FLOAT, + { + "default": 0.4, + "min": 0.01, + "max": 0.99, + "step": 0.01, + "tooltip": "High threshold for Canny edge detection; ignored if skip_processing is True" + }, + ), + "skip_preprocessing": ( + IO.BOOLEAN, + { + "default": False, + "tooltip": "Whether to skip preprocessing; set to True if control_image already is canny-fied, False if it is a raw image.", + }, + ), + "guidance": ( + IO.FLOAT, + { + "default": 30, + "min": 1, + "max": 100, + "tooltip": "Guidance strength for the image generation process" + }, + ), + "steps": ( + IO.INT, + { + "default": 50, + "min": 15, + "max": 50, + "tooltip": "Number of steps for the image generation process" + }, + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 0xFFFFFFFFFFFFFFFF, + "control_after_generate": True, + "tooltip": "The random seed used for creating the noise.", + }, + ), + }, + "optional": { + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + RETURN_TYPES = (IO.IMAGE,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/BFL" + + def api_call( + self, + control_image: torch.Tensor, + prompt: str, + prompt_upsampling: bool, + canny_low_threshold: float, + canny_high_threshold: float, + skip_preprocessing: bool, + steps: int, + guidance: float, + seed=0, + **kwargs, + ): + control_image = convert_image_to_base64(control_image[:,:,:,:3]) + preprocessed_image = None + + # scale canny threshold between 0-500, to match BFL's API + def scale_value(value: float, min_val=0, max_val=500): + return min_val + value * (max_val - min_val) + canny_low_threshold = int(round(scale_value(canny_low_threshold))) + canny_high_threshold = int(round(scale_value(canny_high_threshold))) + + + if skip_preprocessing: + preprocessed_image = control_image + control_image = None + canny_low_threshold = None + canny_high_threshold = None + + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/bfl/flux-pro-1.0-canny/generate", + method=HttpMethod.POST, + request_model=BFLFluxCannyImageRequest, + response_model=BFLFluxProGenerateResponse, + ), + request=BFLFluxCannyImageRequest( + prompt=prompt, + prompt_upsampling=prompt_upsampling, + steps=steps, + guidance=guidance, + seed=seed, + control_image=control_image, + canny_low_threshold=canny_low_threshold, + canny_high_threshold=canny_high_threshold, + preprocessed_image=preprocessed_image, + ), + auth_kwargs=kwargs, + ) + output_image = handle_bfl_synchronous_operation(operation) + return (output_image,) + + +class FluxProDepthNode(ComfyNodeABC): + """ + Generate image using a control image (depth). + """ + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "control_image": (IO.IMAGE,), + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Prompt for the image generation", + }, + ), + "prompt_upsampling": ( + IO.BOOLEAN, + { + "default": False, + "tooltip": "Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation, but results are nondeterministic (same seed will not produce exactly the same result).", + }, + ), + "skip_preprocessing": ( + IO.BOOLEAN, + { + "default": False, + "tooltip": "Whether to skip preprocessing; set to True if control_image already is depth-ified, False if it is a raw image.", + }, + ), + "guidance": ( + IO.FLOAT, + { + "default": 15, + "min": 1, + "max": 100, + "tooltip": "Guidance strength for the image generation process" + }, + ), + "steps": ( + IO.INT, + { + "default": 50, + "min": 15, + "max": 50, + "tooltip": "Number of steps for the image generation process" + }, + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 0xFFFFFFFFFFFFFFFF, + "control_after_generate": True, + "tooltip": "The random seed used for creating the noise.", + }, + ), + }, + "optional": { + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + RETURN_TYPES = (IO.IMAGE,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/BFL" + + def api_call( + self, + control_image: torch.Tensor, + prompt: str, + prompt_upsampling: bool, + skip_preprocessing: bool, + steps: int, + guidance: float, + seed=0, + **kwargs, + ): + control_image = convert_image_to_base64(control_image[:,:,:,:3]) + preprocessed_image = None + + if skip_preprocessing: + preprocessed_image = control_image + control_image = None + + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/bfl/flux-pro-1.0-depth/generate", + method=HttpMethod.POST, + request_model=BFLFluxDepthImageRequest, + response_model=BFLFluxProGenerateResponse, + ), + request=BFLFluxDepthImageRequest( + prompt=prompt, + prompt_upsampling=prompt_upsampling, + steps=steps, + guidance=guidance, + seed=seed, + control_image=control_image, + preprocessed_image=preprocessed_image, + ), + auth_kwargs=kwargs, + ) + output_image = handle_bfl_synchronous_operation(operation) + return (output_image,) + + +# A dictionary that contains all nodes you want to export with their names +# NOTE: names should be globally unique +NODE_CLASS_MAPPINGS = { + "FluxProUltraImageNode": FluxProUltraImageNode, + # "FluxProImageNode": FluxProImageNode, + "FluxProExpandNode": FluxProExpandNode, + "FluxProFillNode": FluxProFillNode, + "FluxProCannyNode": FluxProCannyNode, + "FluxProDepthNode": FluxProDepthNode, +} + +# A dictionary that contains the friendly/humanly readable titles for the nodes +NODE_DISPLAY_NAME_MAPPINGS = { + "FluxProUltraImageNode": "Flux 1.1 [pro] Ultra Image", + # "FluxProImageNode": "Flux 1.1 [pro] Image", + "FluxProExpandNode": "Flux.1 Expand Image", + "FluxProFillNode": "Flux.1 Fill Image", + "FluxProCannyNode": "Flux.1 Canny Control Image", + "FluxProDepthNode": "Flux.1 Depth Control Image", +} diff --git a/comfy_api_nodes/nodes_ideogram.py b/comfy_api_nodes/nodes_ideogram.py new file mode 100644 index 00000000..d25468b1 --- /dev/null +++ b/comfy_api_nodes/nodes_ideogram.py @@ -0,0 +1,779 @@ +from comfy.comfy_types.node_typing import IO, ComfyNodeABC, InputTypeDict +from inspect import cleandoc +from PIL import Image +import numpy as np +import io +import torch +from comfy_api_nodes.apis import ( + IdeogramGenerateRequest, + IdeogramGenerateResponse, + ImageRequest, + IdeogramV3Request, + IdeogramV3EditRequest, +) + +from comfy_api_nodes.apis.client import ( + ApiEndpoint, + HttpMethod, + SynchronousOperation, +) + +from comfy_api_nodes.apinode_utils import ( + download_url_to_bytesio, + bytesio_to_image_tensor, + resize_mask_to_image, +) + +V1_V1_RES_MAP = { + "Auto":"AUTO", + "512 x 1536":"RESOLUTION_512_1536", + "576 x 1408":"RESOLUTION_576_1408", + "576 x 1472":"RESOLUTION_576_1472", + "576 x 1536":"RESOLUTION_576_1536", + "640 x 1024":"RESOLUTION_640_1024", + "640 x 1344":"RESOLUTION_640_1344", + "640 x 1408":"RESOLUTION_640_1408", + "640 x 1472":"RESOLUTION_640_1472", + "640 x 1536":"RESOLUTION_640_1536", + "704 x 1152":"RESOLUTION_704_1152", + "704 x 1216":"RESOLUTION_704_1216", + "704 x 1280":"RESOLUTION_704_1280", + "704 x 1344":"RESOLUTION_704_1344", + "704 x 1408":"RESOLUTION_704_1408", + "704 x 1472":"RESOLUTION_704_1472", + "720 x 1280":"RESOLUTION_720_1280", + "736 x 1312":"RESOLUTION_736_1312", + "768 x 1024":"RESOLUTION_768_1024", + "768 x 1088":"RESOLUTION_768_1088", + "768 x 1152":"RESOLUTION_768_1152", + "768 x 1216":"RESOLUTION_768_1216", + "768 x 1232":"RESOLUTION_768_1232", + "768 x 1280":"RESOLUTION_768_1280", + "768 x 1344":"RESOLUTION_768_1344", + "832 x 960":"RESOLUTION_832_960", + "832 x 1024":"RESOLUTION_832_1024", + "832 x 1088":"RESOLUTION_832_1088", + "832 x 1152":"RESOLUTION_832_1152", + "832 x 1216":"RESOLUTION_832_1216", + "832 x 1248":"RESOLUTION_832_1248", + "864 x 1152":"RESOLUTION_864_1152", + "896 x 960":"RESOLUTION_896_960", + "896 x 1024":"RESOLUTION_896_1024", + "896 x 1088":"RESOLUTION_896_1088", + "896 x 1120":"RESOLUTION_896_1120", + "896 x 1152":"RESOLUTION_896_1152", + "960 x 832":"RESOLUTION_960_832", + "960 x 896":"RESOLUTION_960_896", + "960 x 1024":"RESOLUTION_960_1024", + "960 x 1088":"RESOLUTION_960_1088", + "1024 x 640":"RESOLUTION_1024_640", + "1024 x 768":"RESOLUTION_1024_768", + "1024 x 832":"RESOLUTION_1024_832", + "1024 x 896":"RESOLUTION_1024_896", + "1024 x 960":"RESOLUTION_1024_960", + "1024 x 1024":"RESOLUTION_1024_1024", + "1088 x 768":"RESOLUTION_1088_768", + "1088 x 832":"RESOLUTION_1088_832", + "1088 x 896":"RESOLUTION_1088_896", + "1088 x 960":"RESOLUTION_1088_960", + "1120 x 896":"RESOLUTION_1120_896", + "1152 x 704":"RESOLUTION_1152_704", + "1152 x 768":"RESOLUTION_1152_768", + "1152 x 832":"RESOLUTION_1152_832", + "1152 x 864":"RESOLUTION_1152_864", + "1152 x 896":"RESOLUTION_1152_896", + "1216 x 704":"RESOLUTION_1216_704", + "1216 x 768":"RESOLUTION_1216_768", + "1216 x 832":"RESOLUTION_1216_832", + "1232 x 768":"RESOLUTION_1232_768", + "1248 x 832":"RESOLUTION_1248_832", + "1280 x 704":"RESOLUTION_1280_704", + "1280 x 720":"RESOLUTION_1280_720", + "1280 x 768":"RESOLUTION_1280_768", + "1280 x 800":"RESOLUTION_1280_800", + "1312 x 736":"RESOLUTION_1312_736", + "1344 x 640":"RESOLUTION_1344_640", + "1344 x 704":"RESOLUTION_1344_704", + "1344 x 768":"RESOLUTION_1344_768", + "1408 x 576":"RESOLUTION_1408_576", + "1408 x 640":"RESOLUTION_1408_640", + "1408 x 704":"RESOLUTION_1408_704", + "1472 x 576":"RESOLUTION_1472_576", + "1472 x 640":"RESOLUTION_1472_640", + "1472 x 704":"RESOLUTION_1472_704", + "1536 x 512":"RESOLUTION_1536_512", + "1536 x 576":"RESOLUTION_1536_576", + "1536 x 640":"RESOLUTION_1536_640", +} + +V1_V2_RATIO_MAP = { + "1:1":"ASPECT_1_1", + "4:3":"ASPECT_4_3", + "3:4":"ASPECT_3_4", + "16:9":"ASPECT_16_9", + "9:16":"ASPECT_9_16", + "2:1":"ASPECT_2_1", + "1:2":"ASPECT_1_2", + "3:2":"ASPECT_3_2", + "2:3":"ASPECT_2_3", + "4:5":"ASPECT_4_5", + "5:4":"ASPECT_5_4", +} + +V3_RATIO_MAP = { + "1:3":"1x3", + "3:1":"3x1", + "1:2":"1x2", + "2:1":"2x1", + "9:16":"9x16", + "16:9":"16x9", + "10:16":"10x16", + "16:10":"16x10", + "2:3":"2x3", + "3:2":"3x2", + "3:4":"3x4", + "4:3":"4x3", + "4:5":"4x5", + "5:4":"5x4", + "1:1":"1x1", +} + +V3_RESOLUTIONS= [ + "Auto", + "512x1536", + "576x1408", + "576x1472", + "576x1536", + "640x1344", + "640x1408", + "640x1472", + "640x1536", + "704x1152", + "704x1216", + "704x1280", + "704x1344", + "704x1408", + "704x1472", + "736x1312", + "768x1088", + "768x1216", + "768x1280", + "768x1344", + "800x1280", + "832x960", + "832x1024", + "832x1088", + "832x1152", + "832x1216", + "832x1248", + "864x1152", + "896x960", + "896x1024", + "896x1088", + "896x1120", + "896x1152", + "960x832", + "960x896", + "960x1024", + "960x1088", + "1024x832", + "1024x896", + "1024x960", + "1024x1024", + "1088x768", + "1088x832", + "1088x896", + "1088x960", + "1120x896", + "1152x704", + "1152x832", + "1152x864", + "1152x896", + "1216x704", + "1216x768", + "1216x832", + "1248x832", + "1280x704", + "1280x768", + "1280x800", + "1312x736", + "1344x640", + "1344x704", + "1344x768", + "1408x576", + "1408x640", + "1408x704", + "1472x576", + "1472x640", + "1472x704", + "1536x512", + "1536x576", + "1536x640" +] + +def download_and_process_images(image_urls): + """Helper function to download and process multiple images from URLs""" + + # Initialize list to store image tensors + image_tensors = [] + + for image_url in image_urls: + # Using functions from apinode_utils.py to handle downloading and processing + image_bytesio = download_url_to_bytesio(image_url) # Download image content to BytesIO + img_tensor = bytesio_to_image_tensor(image_bytesio, mode="RGB") # Convert to torch.Tensor with RGB mode + image_tensors.append(img_tensor) + + # Stack tensors to match (N, width, height, channels) + if image_tensors: + stacked_tensors = torch.cat(image_tensors, dim=0) + else: + raise Exception("No valid images were processed") + + return stacked_tensors + + +class IdeogramV1(ComfyNodeABC): + """ + Generates images using the Ideogram V1 model. + """ + + def __init__(self): + pass + + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + return { + "required": { + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Prompt for the image generation", + }, + ), + "turbo": ( + IO.BOOLEAN, + { + "default": False, + "tooltip": "Whether to use turbo mode (faster generation, potentially lower quality)", + } + ), + }, + "optional": { + "aspect_ratio": ( + IO.COMBO, + { + "options": list(V1_V2_RATIO_MAP.keys()), + "default": "1:1", + "tooltip": "The aspect ratio for image generation.", + }, + ), + "magic_prompt_option": ( + IO.COMBO, + { + "options": ["AUTO", "ON", "OFF"], + "default": "AUTO", + "tooltip": "Determine if MagicPrompt should be used in generation", + }, + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 2147483647, + "step": 1, + "control_after_generate": True, + "display": "number", + }, + ), + "negative_prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Description of what to exclude from the image", + }, + ), + "num_images": ( + IO.INT, + {"default": 1, "min": 1, "max": 8, "step": 1, "display": "number"}, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + RETURN_TYPES = (IO.IMAGE,) + FUNCTION = "api_call" + CATEGORY = "api node/image/Ideogram/v1" + DESCRIPTION = cleandoc(__doc__ or "") + API_NODE = True + + def api_call( + self, + prompt, + turbo=False, + aspect_ratio="1:1", + magic_prompt_option="AUTO", + seed=0, + negative_prompt="", + num_images=1, + **kwargs, + ): + # Determine the model based on turbo setting + aspect_ratio = V1_V2_RATIO_MAP.get(aspect_ratio, None) + model = "V_1_TURBO" if turbo else "V_1" + + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/ideogram/generate", + method=HttpMethod.POST, + request_model=IdeogramGenerateRequest, + response_model=IdeogramGenerateResponse, + ), + request=IdeogramGenerateRequest( + image_request=ImageRequest( + prompt=prompt, + model=model, + num_images=num_images, + seed=seed, + aspect_ratio=aspect_ratio if aspect_ratio != "ASPECT_1_1" else None, + magic_prompt_option=( + magic_prompt_option if magic_prompt_option != "AUTO" else None + ), + negative_prompt=negative_prompt if negative_prompt else None, + ) + ), + auth_kwargs=kwargs, + ) + + response = operation.execute() + + if not response.data or len(response.data) == 0: + raise Exception("No images were generated in the response") + + image_urls = [image_data.url for image_data in response.data if image_data.url] + + if not image_urls: + raise Exception("No image URLs were generated in the response") + + return (download_and_process_images(image_urls),) + + +class IdeogramV2(ComfyNodeABC): + """ + Generates images using the Ideogram V2 model. + """ + + def __init__(self): + pass + + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + return { + "required": { + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Prompt for the image generation", + }, + ), + "turbo": ( + IO.BOOLEAN, + { + "default": False, + "tooltip": "Whether to use turbo mode (faster generation, potentially lower quality)", + } + ), + }, + "optional": { + "aspect_ratio": ( + IO.COMBO, + { + "options": list(V1_V2_RATIO_MAP.keys()), + "default": "1:1", + "tooltip": "The aspect ratio for image generation. Ignored if resolution is not set to AUTO.", + }, + ), + "resolution": ( + IO.COMBO, + { + "options": list(V1_V1_RES_MAP.keys()), + "default": "Auto", + "tooltip": "The resolution for image generation. If not set to AUTO, this overrides the aspect_ratio setting.", + }, + ), + "magic_prompt_option": ( + IO.COMBO, + { + "options": ["AUTO", "ON", "OFF"], + "default": "AUTO", + "tooltip": "Determine if MagicPrompt should be used in generation", + }, + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 2147483647, + "step": 1, + "control_after_generate": True, + "display": "number", + }, + ), + "style_type": ( + IO.COMBO, + { + "options": ["AUTO", "GENERAL", "REALISTIC", "DESIGN", "RENDER_3D", "ANIME"], + "default": "NONE", + "tooltip": "Style type for generation (V2 only)", + }, + ), + "negative_prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Description of what to exclude from the image", + }, + ), + "num_images": ( + IO.INT, + {"default": 1, "min": 1, "max": 8, "step": 1, "display": "number"}, + ), + #"color_palette": ( + # IO.STRING, + # { + # "multiline": False, + # "default": "", + # "tooltip": "Color palette preset name or hex colors with weights", + # }, + #), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + RETURN_TYPES = (IO.IMAGE,) + FUNCTION = "api_call" + CATEGORY = "api node/image/Ideogram/v2" + DESCRIPTION = cleandoc(__doc__ or "") + API_NODE = True + + def api_call( + self, + prompt, + turbo=False, + aspect_ratio="1:1", + resolution="Auto", + magic_prompt_option="AUTO", + seed=0, + style_type="NONE", + negative_prompt="", + num_images=1, + color_palette="", + **kwargs, + ): + aspect_ratio = V1_V2_RATIO_MAP.get(aspect_ratio, None) + resolution = V1_V1_RES_MAP.get(resolution, None) + # Determine the model based on turbo setting + model = "V_2_TURBO" if turbo else "V_2" + + # Handle resolution vs aspect_ratio logic + # If resolution is not AUTO, it overrides aspect_ratio + final_resolution = None + final_aspect_ratio = None + + if resolution != "AUTO": + final_resolution = resolution + else: + final_aspect_ratio = aspect_ratio if aspect_ratio != "ASPECT_1_1" else None + + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/ideogram/generate", + method=HttpMethod.POST, + request_model=IdeogramGenerateRequest, + response_model=IdeogramGenerateResponse, + ), + request=IdeogramGenerateRequest( + image_request=ImageRequest( + prompt=prompt, + model=model, + num_images=num_images, + seed=seed, + aspect_ratio=final_aspect_ratio, + resolution=final_resolution, + magic_prompt_option=( + magic_prompt_option if magic_prompt_option != "AUTO" else None + ), + style_type=style_type if style_type != "NONE" else None, + negative_prompt=negative_prompt if negative_prompt else None, + color_palette=color_palette if color_palette else None, + ) + ), + auth_kwargs=kwargs, + ) + + response = operation.execute() + + if not response.data or len(response.data) == 0: + raise Exception("No images were generated in the response") + + image_urls = [image_data.url for image_data in response.data if image_data.url] + + if not image_urls: + raise Exception("No image URLs were generated in the response") + + return (download_and_process_images(image_urls),) + +class IdeogramV3(ComfyNodeABC): + """ + Generates images using the Ideogram V3 model. Supports both regular image generation from text prompts and image editing with mask. + """ + + def __init__(self): + pass + + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + return { + "required": { + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Prompt for the image generation or editing", + }, + ), + }, + "optional": { + "image": ( + IO.IMAGE, + { + "default": None, + "tooltip": "Optional reference image for image editing.", + }, + ), + "mask": ( + IO.MASK, + { + "default": None, + "tooltip": "Optional mask for inpainting (white areas will be replaced)", + }, + ), + "aspect_ratio": ( + IO.COMBO, + { + "options": list(V3_RATIO_MAP.keys()), + "default": "1:1", + "tooltip": "The aspect ratio for image generation. Ignored if resolution is not set to Auto.", + }, + ), + "resolution": ( + IO.COMBO, + { + "options": V3_RESOLUTIONS, + "default": "Auto", + "tooltip": "The resolution for image generation. If not set to Auto, this overrides the aspect_ratio setting.", + }, + ), + "magic_prompt_option": ( + IO.COMBO, + { + "options": ["AUTO", "ON", "OFF"], + "default": "AUTO", + "tooltip": "Determine if MagicPrompt should be used in generation", + }, + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 2147483647, + "step": 1, + "control_after_generate": True, + "display": "number", + }, + ), + "num_images": ( + IO.INT, + {"default": 1, "min": 1, "max": 8, "step": 1, "display": "number"}, + ), + "rendering_speed": ( + IO.COMBO, + { + "options": ["BALANCED", "TURBO", "QUALITY"], + "default": "BALANCED", + "tooltip": "Controls the trade-off between generation speed and quality", + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + RETURN_TYPES = (IO.IMAGE,) + FUNCTION = "api_call" + CATEGORY = "api node/image/Ideogram/v3" + DESCRIPTION = cleandoc(__doc__ or "") + API_NODE = True + + def api_call( + self, + prompt, + image=None, + mask=None, + resolution="Auto", + aspect_ratio="1:1", + magic_prompt_option="AUTO", + seed=0, + num_images=1, + rendering_speed="BALANCED", + **kwargs, + ): + # Check if both image and mask are provided for editing mode + if image is not None and mask is not None: + # Edit mode + path = "/proxy/ideogram/ideogram-v3/edit" + + # Process image and mask + input_tensor = image.squeeze().cpu() + # Resize mask to match image dimension + mask = resize_mask_to_image(mask, image, allow_gradient=False) + # Invert mask, as Ideogram API will edit black areas instead of white areas (opposite of convention). + mask = 1.0 - mask + + # Validate mask dimensions match image + if mask.shape[1:] != image.shape[1:-1]: + raise Exception("Mask and Image must be the same size") + + # Process image + img_np = (input_tensor.numpy() * 255).astype(np.uint8) + img = Image.fromarray(img_np) + img_byte_arr = io.BytesIO() + img.save(img_byte_arr, format="PNG") + img_byte_arr.seek(0) + img_binary = img_byte_arr + img_binary.name = "image.png" + + # Process mask - white areas will be replaced + mask_np = (mask.squeeze().cpu().numpy() * 255).astype(np.uint8) + mask_img = Image.fromarray(mask_np) + mask_byte_arr = io.BytesIO() + mask_img.save(mask_byte_arr, format="PNG") + mask_byte_arr.seek(0) + mask_binary = mask_byte_arr + mask_binary.name = "mask.png" + + # Create edit request + edit_request = IdeogramV3EditRequest( + prompt=prompt, + rendering_speed=rendering_speed, + ) + + # Add optional parameters + if magic_prompt_option != "AUTO": + edit_request.magic_prompt = magic_prompt_option + if seed != 0: + edit_request.seed = seed + if num_images > 1: + edit_request.num_images = num_images + + # Execute the operation for edit mode + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=path, + method=HttpMethod.POST, + request_model=IdeogramV3EditRequest, + response_model=IdeogramGenerateResponse, + ), + request=edit_request, + files={ + "image": img_binary, + "mask": mask_binary, + }, + content_type="multipart/form-data", + auth_kwargs=kwargs, + ) + + elif image is not None or mask is not None: + # If only one of image or mask is provided, raise an error + raise Exception("Ideogram V3 image editing requires both an image AND a mask") + else: + # Generation mode + path = "/proxy/ideogram/ideogram-v3/generate" + + # Create generation request + gen_request = IdeogramV3Request( + prompt=prompt, + rendering_speed=rendering_speed, + ) + + # Handle resolution vs aspect ratio + if resolution != "Auto": + gen_request.resolution = resolution + elif aspect_ratio != "1:1": + v3_aspect = V3_RATIO_MAP.get(aspect_ratio) + if v3_aspect: + gen_request.aspect_ratio = v3_aspect + + # Add optional parameters + if magic_prompt_option != "AUTO": + gen_request.magic_prompt = magic_prompt_option + if seed != 0: + gen_request.seed = seed + if num_images > 1: + gen_request.num_images = num_images + + # Execute the operation for generation mode + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=path, + method=HttpMethod.POST, + request_model=IdeogramV3Request, + response_model=IdeogramGenerateResponse, + ), + request=gen_request, + auth_kwargs=kwargs, + ) + + # Execute the operation and process response + response = operation.execute() + + if not response.data or len(response.data) == 0: + raise Exception("No images were generated in the response") + + image_urls = [image_data.url for image_data in response.data if image_data.url] + + if not image_urls: + raise Exception("No image URLs were generated in the response") + + return (download_and_process_images(image_urls),) + + +NODE_CLASS_MAPPINGS = { + "IdeogramV1": IdeogramV1, + "IdeogramV2": IdeogramV2, + "IdeogramV3": IdeogramV3, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "IdeogramV1": "Ideogram V1", + "IdeogramV2": "Ideogram V2", + "IdeogramV3": "Ideogram V3", +} + diff --git a/comfy_api_nodes/nodes_kling.py b/comfy_api_nodes/nodes_kling.py new file mode 100644 index 00000000..2d0fd888 --- /dev/null +++ b/comfy_api_nodes/nodes_kling.py @@ -0,0 +1,1629 @@ +"""Kling API Nodes + +For source of truth on the allowed permutations of request fields, please reference: +- [Compatibility Table](https://app.klingai.com/global/dev/document-api/apiReference/model/skillsMap) +""" + +from __future__ import annotations +from typing import Optional, TypeVar, Any +import math +import logging + +import torch + +from comfy_api_nodes.apis import ( + KlingTaskStatus, + KlingCameraControl, + KlingCameraConfig, + KlingCameraControlType, + KlingVideoGenDuration, + KlingVideoGenMode, + KlingVideoGenAspectRatio, + KlingVideoGenModelName, + KlingText2VideoRequest, + KlingText2VideoResponse, + KlingImage2VideoRequest, + KlingImage2VideoResponse, + KlingVideoExtendRequest, + KlingVideoExtendResponse, + KlingLipSyncVoiceLanguage, + KlingLipSyncInputObject, + KlingLipSyncRequest, + KlingLipSyncResponse, + KlingVirtualTryOnModelName, + KlingVirtualTryOnRequest, + KlingVirtualTryOnResponse, + KlingVideoResult, + KlingImageResult, + KlingImageGenerationsRequest, + KlingImageGenerationsResponse, + KlingImageGenImageReferenceType, + KlingImageGenModelName, + KlingImageGenAspectRatio, + KlingVideoEffectsRequest, + KlingVideoEffectsResponse, + KlingDualCharacterEffectsScene, + KlingSingleImageEffectsScene, + KlingDualCharacterEffectInput, + KlingSingleImageEffectInput, + KlingCharacterEffectModelName, + KlingSingleImageEffectModelName, +) +from comfy_api_nodes.apis.client import ( + ApiEndpoint, + HttpMethod, + SynchronousOperation, + PollingOperation, + EmptyRequest, +) +from comfy_api_nodes.apinode_utils import ( + tensor_to_base64_string, + download_url_to_video_output, + upload_video_to_comfyapi, + upload_audio_to_comfyapi, + download_url_to_image_tensor, +) +from comfy_api_nodes.mapper_utils import model_field_to_node_input +from comfy_api.input.basic_types import AudioInput +from comfy_api.input.video_types import VideoInput +from comfy_api.input_impl import VideoFromFile +from comfy.comfy_types.node_typing import IO, InputTypeOptions, ComfyNodeABC + +KLING_API_VERSION = "v1" +PATH_TEXT_TO_VIDEO = f"/proxy/kling/{KLING_API_VERSION}/videos/text2video" +PATH_IMAGE_TO_VIDEO = f"/proxy/kling/{KLING_API_VERSION}/videos/image2video" +PATH_VIDEO_EXTEND = f"/proxy/kling/{KLING_API_VERSION}/videos/video-extend" +PATH_LIP_SYNC = f"/proxy/kling/{KLING_API_VERSION}/videos/lip-sync" +PATH_VIDEO_EFFECTS = f"/proxy/kling/{KLING_API_VERSION}/videos/effects" +PATH_CHARACTER_IMAGE = f"/proxy/kling/{KLING_API_VERSION}/images/generations" +PATH_VIRTUAL_TRY_ON = f"/proxy/kling/{KLING_API_VERSION}/images/kolors-virtual-try-on" +PATH_IMAGE_GENERATIONS = f"/proxy/kling/{KLING_API_VERSION}/images/generations" + + +MAX_PROMPT_LENGTH_T2V = 2500 +MAX_PROMPT_LENGTH_I2V = 500 +MAX_PROMPT_LENGTH_IMAGE_GEN = 500 +MAX_NEGATIVE_PROMPT_LENGTH_IMAGE_GEN = 200 +MAX_PROMPT_LENGTH_LIP_SYNC = 120 + +R = TypeVar("R") + + +class KlingApiError(Exception): + """Base exception for Kling API errors.""" + + pass + + +def poll_until_finished(auth_kwargs: dict[str,str], api_endpoint: ApiEndpoint[Any, R]) -> R: + """Polls the Kling API endpoint until the task reaches a terminal state, then returns the response.""" + return PollingOperation( + poll_endpoint=api_endpoint, + completed_statuses=[ + KlingTaskStatus.succeed.value, + ], + failed_statuses=[KlingTaskStatus.failed.value], + status_extractor=lambda response: ( + response.data.task_status.value + if response.data and response.data.task_status + else None + ), + auth_kwargs=auth_kwargs, + ).execute() + + +def is_valid_camera_control_configs(configs: list[float]) -> bool: + """Verifies that at least one camera control configuration is non-zero.""" + return any(not math.isclose(value, 0.0) for value in configs) + + +def is_valid_prompt(prompt: str) -> bool: + """Verifies that the prompt is not empty.""" + return bool(prompt) + + +def is_valid_task_creation_response(response: KlingText2VideoResponse) -> bool: + """Verifies that the initial response contains a task ID.""" + return bool(response.data.task_id) + + +def is_valid_video_response(response: KlingText2VideoResponse) -> bool: + """Verifies that the response contains a task result with at least one video.""" + return ( + response.data is not None + and response.data.task_result is not None + and response.data.task_result.videos is not None + and len(response.data.task_result.videos) > 0 + ) + + +def is_valid_image_response(response: KlingVirtualTryOnResponse) -> bool: + """Verifies that the response contains a task result with at least one image.""" + return ( + response.data is not None + and response.data.task_result is not None + and response.data.task_result.images is not None + and len(response.data.task_result.images) > 0 + ) + + +def validate_prompts(prompt: str, negative_prompt: str, max_length: int) -> bool: + """Verifies that the positive prompt is not empty and that neither promt is too long.""" + if not prompt: + raise ValueError("Positive prompt is empty") + if len(prompt) > max_length: + raise ValueError(f"Positive prompt is too long: {len(prompt)} characters") + if negative_prompt and len(negative_prompt) > max_length: + raise ValueError( + f"Negative prompt is too long: {len(negative_prompt)} characters" + ) + return True + + +def validate_task_creation_response(response) -> None: + """Validates that the Kling task creation request was successful.""" + if not is_valid_task_creation_response(response): + error_msg = f"Kling initial request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}" + logging.error(error_msg) + raise KlingApiError(error_msg) + + +def validate_video_result_response(response) -> None: + """Validates that the Kling task result contains a video.""" + if not is_valid_video_response(response): + error_msg = f"Kling task {response.data.task_id} succeeded but no video data found in response." + logging.error(f"Error: {error_msg}.\nResponse: {response}") + raise KlingApiError(error_msg) + + +def validate_image_result_response(response) -> None: + """Validates that the Kling task result contains an image.""" + if not is_valid_image_response(response): + error_msg = f"Kling task {response.data.task_id} succeeded but no image data found in response." + logging.error(f"Error: {error_msg}.\nResponse: {response}") + raise KlingApiError(error_msg) + + +def validate_input_image(image: torch.Tensor) -> None: + """ + Validates the input image adheres to the expectations of the Kling API: + - The image resolution should not be less than 300*300px + - The aspect ratio of the image should be between 1:2.5 ~ 2.5:1 + + See: https://app.klingai.com/global/dev/document-api/apiReference/model/imageToVideo + """ + if len(image.shape) == 4: + height, width = image.shape[1], image.shape[2] + elif len(image.shape) == 3: + height, width = image.shape[0], image.shape[1] + else: + raise ValueError("Invalid image tensor shape.") + + # Ensure minimum resolution is met + if height < 300: + raise ValueError("Image height must be at least 300px") + if width < 300: + raise ValueError("Image width must be at least 300px") + + # Ensure aspect ratio is within acceptable range + aspect_ratio = width / height + if aspect_ratio < 1 / 2.5 or aspect_ratio > 2.5: + raise ValueError("Image aspect ratio must be between 1:2.5 and 2.5:1") + + +def get_camera_control_input_config( + tooltip: str, default: float = 0.0 +) -> tuple[IO, InputTypeOptions]: + """Returns common InputTypeOptions for Kling camera control configurations.""" + input_config = { + "default": default, + "min": -10.0, + "max": 10.0, + "step": 0.25, + "display": "slider", + "tooltip": tooltip, + } + return IO.FLOAT, input_config + + +def get_video_from_response(response) -> KlingVideoResult: + """Returns the first video object from the Kling video generation task result.""" + video = response.data.task_result.videos[0] + logging.info( + "Kling task %s succeeded. Video URL: %s", response.data.task_id, video.url + ) + return video + + +def get_images_from_response(response) -> list[KlingImageResult]: + images = response.data.task_result.images + logging.info("Kling task %s succeeded. Images: %s", response.data.task_id, images) + return images + + +def video_result_to_node_output( + video: KlingVideoResult, +) -> tuple[VideoFromFile, str, str]: + """Converts a KlingVideoResult to a tuple of (VideoFromFile, str, str) to be used as a ComfyUI node output.""" + return ( + download_url_to_video_output(video.url), + str(video.id), + str(video.duration), + ) + + +def image_result_to_node_output( + images: list[KlingImageResult], +) -> torch.Tensor: + """ + Converts a KlingImageResult to a tuple containing a [B, H, W, C] tensor. + If multiple images are returned, they will be stacked along the batch dimension. + """ + if len(images) == 1: + return download_url_to_image_tensor(images[0].url) + else: + return torch.cat([download_url_to_image_tensor(image.url) for image in images]) + + +class KlingNodeBase(ComfyNodeABC): + """Base class for Kling nodes.""" + + FUNCTION = "api_call" + CATEGORY = "api node/video/Kling" + API_NODE = True + + +class KlingCameraControls(KlingNodeBase): + """Kling Camera Controls Node""" + + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "camera_control_type": model_field_to_node_input( + IO.COMBO, + KlingCameraControl, + "type", + enum_type=KlingCameraControlType, + ), + "horizontal_movement": get_camera_control_input_config( + "Controls camera's movement along horizontal axis (x-axis). Negative indicates left, positive indicates right" + ), + "vertical_movement": get_camera_control_input_config( + "Controls camera's movement along vertical axis (y-axis). Negative indicates downward, positive indicates upward." + ), + "pan": get_camera_control_input_config( + "Controls camera's rotation in vertical plane (x-axis). Negative indicates downward rotation, positive indicates upward rotation.", + default=0.5, + ), + "tilt": get_camera_control_input_config( + "Controls camera's rotation in horizontal plane (y-axis). Negative indicates left rotation, positive indicates right rotation.", + ), + "roll": get_camera_control_input_config( + "Controls camera's rolling amount (z-axis). Negative indicates counterclockwise, positive indicates clockwise.", + ), + "zoom": get_camera_control_input_config( + "Controls change in camera's focal length. Negative indicates narrower field of view, positive indicates wider field of view.", + ), + } + } + + DESCRIPTION = "Allows specifying configuration options for Kling Camera Controls and motion control effects." + RETURN_TYPES = ("CAMERA_CONTROL",) + RETURN_NAMES = ("camera_control",) + FUNCTION = "main" + + @classmethod + def VALIDATE_INPUTS( + cls, + horizontal_movement: float, + vertical_movement: float, + pan: float, + tilt: float, + roll: float, + zoom: float, + ) -> bool | str: + if not is_valid_camera_control_configs( + [ + horizontal_movement, + vertical_movement, + pan, + tilt, + roll, + zoom, + ] + ): + return "Invalid camera control configs: at least one of the values must be non-zero" + return True + + def main( + self, + camera_control_type: str, + horizontal_movement: float, + vertical_movement: float, + pan: float, + tilt: float, + roll: float, + zoom: float, + ) -> tuple[KlingCameraControl]: + return ( + KlingCameraControl( + type=KlingCameraControlType(camera_control_type), + config=KlingCameraConfig( + horizontal=horizontal_movement, + vertical=vertical_movement, + pan=pan, + roll=roll, + tilt=tilt, + zoom=zoom, + ), + ), + ) + + +class KlingTextToVideoNode(KlingNodeBase): + """Kling Text to Video Node""" + + @staticmethod + def get_mode_string_mapping() -> dict[str, tuple[str, str, str]]: + """ + Returns a mapping of mode strings to their corresponding (mode, duration, model_name) tuples. + Only includes config combos that support the `image_tail` request field. + + See: [Kling API Docs Capability Map](https://app.klingai.com/global/dev/document-api/apiReference/model/skillsMap) + """ + return { + "standard mode / 5s duration / kling-v1": ("std", "5", "kling-v1"), + "standard mode / 10s duration / kling-v1": ("std", "10", "kling-v1"), + "pro mode / 5s duration / kling-v1": ("pro", "5", "kling-v1"), + "pro mode / 10s duration / kling-v1": ("pro", "10", "kling-v1"), + "standard mode / 5s duration / kling-v1-6": ("std", "5", "kling-v1-6"), + "standard mode / 10s duration / kling-v1-6": ("std", "10", "kling-v1-6"), + "pro mode / 5s duration / kling-v2-master": ("pro", "5", "kling-v2-master"), + "pro mode / 10s duration / kling-v2-master": ("pro", "10", "kling-v2-master"), + "standard mode / 5s duration / kling-v2-master": ("std", "5", "kling-v2-master"), + "standard mode / 10s duration / kling-v2-master": ("std", "10", "kling-v2-master"), + } + + @classmethod + def INPUT_TYPES(s): + modes = list(KlingTextToVideoNode.get_mode_string_mapping().keys()) + return { + "required": { + "prompt": model_field_to_node_input( + IO.STRING, KlingText2VideoRequest, "prompt", multiline=True + ), + "negative_prompt": model_field_to_node_input( + IO.STRING, KlingText2VideoRequest, "negative_prompt", multiline=True + ), + "cfg_scale": model_field_to_node_input( + IO.FLOAT, + KlingText2VideoRequest, + "cfg_scale", + default=1.0, + min=0.0, + max=1.0, + ), + "aspect_ratio": model_field_to_node_input( + IO.COMBO, + KlingText2VideoRequest, + "aspect_ratio", + enum_type=KlingVideoGenAspectRatio, + ), + "mode": ( + modes, + { + "default": modes[4], + "tooltip": "The configuration to use for the video generation following the format: mode / duration / model_name.", + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + RETURN_TYPES = ("VIDEO", "STRING", "STRING") + RETURN_NAMES = ("VIDEO", "video_id", "duration") + DESCRIPTION = "Kling Text to Video Node" + + def get_response(self, task_id: str, auth_kwargs: dict[str,str]) -> KlingText2VideoResponse: + return poll_until_finished( + auth_kwargs, + ApiEndpoint( + path=f"{PATH_TEXT_TO_VIDEO}/{task_id}", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=KlingText2VideoResponse, + ), + ) + + def api_call( + self, + prompt: str, + negative_prompt: str, + cfg_scale: float, + mode: str, + aspect_ratio: str, + camera_control: Optional[KlingCameraControl] = None, + model_name: Optional[str] = None, + duration: Optional[str] = None, + **kwargs, + ) -> tuple[VideoFromFile, str, str]: + validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_T2V) + if model_name is None: + mode, duration, model_name = self.get_mode_string_mapping()[mode] + initial_operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=PATH_TEXT_TO_VIDEO, + method=HttpMethod.POST, + request_model=KlingText2VideoRequest, + response_model=KlingText2VideoResponse, + ), + request=KlingText2VideoRequest( + prompt=prompt if prompt else None, + negative_prompt=negative_prompt if negative_prompt else None, + duration=KlingVideoGenDuration(duration), + mode=KlingVideoGenMode(mode), + model_name=KlingVideoGenModelName(model_name), + cfg_scale=cfg_scale, + aspect_ratio=KlingVideoGenAspectRatio(aspect_ratio), + camera_control=camera_control, + ), + auth_kwargs=kwargs, + ) + + task_creation_response = initial_operation.execute() + validate_task_creation_response(task_creation_response) + + task_id = task_creation_response.data.task_id + final_response = self.get_response(task_id, auth_kwargs=kwargs) + validate_video_result_response(final_response) + + video = get_video_from_response(final_response) + return video_result_to_node_output(video) + + +class KlingCameraControlT2VNode(KlingTextToVideoNode): + """ + Kling Text to Video Camera Control Node. This node is a text to video node, but it supports controlling the camera. + Duration, mode, and model_name request fields are hard-coded because camera control is only supported in pro mode with the kling-v1-5 model at 5s duration as of 2025-05-02. + """ + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "prompt": model_field_to_node_input( + IO.STRING, KlingText2VideoRequest, "prompt", multiline=True + ), + "negative_prompt": model_field_to_node_input( + IO.STRING, + KlingText2VideoRequest, + "negative_prompt", + multiline=True, + ), + "cfg_scale": model_field_to_node_input( + IO.FLOAT, + KlingText2VideoRequest, + "cfg_scale", + default=0.75, + min=0.0, + max=1.0, + ), + "aspect_ratio": model_field_to_node_input( + IO.COMBO, + KlingText2VideoRequest, + "aspect_ratio", + enum_type=KlingVideoGenAspectRatio, + ), + "camera_control": ( + "CAMERA_CONTROL", + { + "tooltip": "Can be created using the Kling Camera Controls node. Controls the camera movement and motion during the video generation.", + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + DESCRIPTION = "Transform text into cinematic videos with professional camera movements that simulate real-world cinematography. Control virtual camera actions including zoom, rotation, pan, tilt, and first-person view, while maintaining focus on your original text." + + def api_call( + self, + prompt: str, + negative_prompt: str, + cfg_scale: float, + aspect_ratio: str, + camera_control: Optional[KlingCameraControl] = None, + **kwargs, + ): + return super().api_call( + model_name=KlingVideoGenModelName.kling_v1, + cfg_scale=cfg_scale, + mode=KlingVideoGenMode.std, + aspect_ratio=KlingVideoGenAspectRatio(aspect_ratio), + duration=KlingVideoGenDuration.field_5, + prompt=prompt, + negative_prompt=negative_prompt, + camera_control=camera_control, + **kwargs, + ) + + +class KlingImage2VideoNode(KlingNodeBase): + """Kling Image to Video Node""" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "start_frame": model_field_to_node_input( + IO.IMAGE, + KlingImage2VideoRequest, + "image", + tooltip="The reference image used to generate the video.", + ), + "prompt": model_field_to_node_input( + IO.STRING, KlingImage2VideoRequest, "prompt", multiline=True + ), + "negative_prompt": model_field_to_node_input( + IO.STRING, + KlingImage2VideoRequest, + "negative_prompt", + multiline=True, + ), + "model_name": model_field_to_node_input( + IO.COMBO, + KlingImage2VideoRequest, + "model_name", + enum_type=KlingVideoGenModelName, + ), + "cfg_scale": model_field_to_node_input( + IO.FLOAT, + KlingImage2VideoRequest, + "cfg_scale", + default=0.8, + min=0.0, + max=1.0, + ), + "mode": model_field_to_node_input( + IO.COMBO, + KlingImage2VideoRequest, + "mode", + enum_type=KlingVideoGenMode, + ), + "aspect_ratio": model_field_to_node_input( + IO.COMBO, + KlingImage2VideoRequest, + "aspect_ratio", + enum_type=KlingVideoGenAspectRatio, + ), + "duration": model_field_to_node_input( + IO.COMBO, + KlingImage2VideoRequest, + "duration", + enum_type=KlingVideoGenDuration, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + RETURN_TYPES = ("VIDEO", "STRING", "STRING") + RETURN_NAMES = ("VIDEO", "video_id", "duration") + DESCRIPTION = "Kling Image to Video Node" + + def get_response(self, task_id: str, auth_kwargs: dict[str,str]) -> KlingImage2VideoResponse: + return poll_until_finished( + auth_kwargs, + ApiEndpoint( + path=f"{PATH_IMAGE_TO_VIDEO}/{task_id}", + method=HttpMethod.GET, + request_model=KlingImage2VideoRequest, + response_model=KlingImage2VideoResponse, + ), + ) + + def api_call( + self, + start_frame: torch.Tensor, + prompt: str, + negative_prompt: str, + model_name: str, + cfg_scale: float, + mode: str, + aspect_ratio: str, + duration: str, + camera_control: Optional[KlingCameraControl] = None, + end_frame: Optional[torch.Tensor] = None, + **kwargs, + ) -> tuple[VideoFromFile]: + validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_I2V) + validate_input_image(start_frame) + + if camera_control is not None: + # Camera control type for image 2 video is always `simple` + camera_control.type = KlingCameraControlType.simple + + initial_operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=PATH_IMAGE_TO_VIDEO, + method=HttpMethod.POST, + request_model=KlingImage2VideoRequest, + response_model=KlingImage2VideoResponse, + ), + request=KlingImage2VideoRequest( + model_name=KlingVideoGenModelName(model_name), + image=tensor_to_base64_string(start_frame), + image_tail=( + tensor_to_base64_string(end_frame) + if end_frame is not None + else None + ), + prompt=prompt, + negative_prompt=negative_prompt if negative_prompt else None, + cfg_scale=cfg_scale, + mode=KlingVideoGenMode(mode), + duration=KlingVideoGenDuration(duration), + camera_control=camera_control, + ), + auth_kwargs=kwargs, + ) + + task_creation_response = initial_operation.execute() + validate_task_creation_response(task_creation_response) + task_id = task_creation_response.data.task_id + + final_response = self.get_response(task_id, auth_kwargs=kwargs) + validate_video_result_response(final_response) + + video = get_video_from_response(final_response) + return video_result_to_node_output(video) + + +class KlingCameraControlI2VNode(KlingImage2VideoNode): + """ + Kling Image to Video Camera Control Node. This node is a image to video node, but it supports controlling the camera. + Duration, mode, and model_name request fields are hard-coded because camera control is only supported in pro mode with the kling-v1-5 model at 5s duration as of 2025-05-02. + """ + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "start_frame": model_field_to_node_input( + IO.IMAGE, KlingImage2VideoRequest, "image" + ), + "prompt": model_field_to_node_input( + IO.STRING, KlingImage2VideoRequest, "prompt", multiline=True + ), + "negative_prompt": model_field_to_node_input( + IO.STRING, + KlingImage2VideoRequest, + "negative_prompt", + multiline=True, + ), + "cfg_scale": model_field_to_node_input( + IO.FLOAT, + KlingImage2VideoRequest, + "cfg_scale", + default=0.75, + min=0.0, + max=1.0, + ), + "aspect_ratio": model_field_to_node_input( + IO.COMBO, + KlingImage2VideoRequest, + "aspect_ratio", + enum_type=KlingVideoGenAspectRatio, + ), + "camera_control": ( + "CAMERA_CONTROL", + { + "tooltip": "Can be created using the Kling Camera Controls node. Controls the camera movement and motion during the video generation.", + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + DESCRIPTION = "Transform still images into cinematic videos with professional camera movements that simulate real-world cinematography. Control virtual camera actions including zoom, rotation, pan, tilt, and first-person view, while maintaining focus on your original image." + + def api_call( + self, + start_frame: torch.Tensor, + prompt: str, + negative_prompt: str, + cfg_scale: float, + aspect_ratio: str, + camera_control: KlingCameraControl, + **kwargs, + ): + return super().api_call( + model_name=KlingVideoGenModelName.kling_v1_5, + start_frame=start_frame, + cfg_scale=cfg_scale, + mode=KlingVideoGenMode.pro, + aspect_ratio=KlingVideoGenAspectRatio(aspect_ratio), + duration=KlingVideoGenDuration.field_5, + prompt=prompt, + negative_prompt=negative_prompt, + camera_control=camera_control, + **kwargs, + ) + + +class KlingStartEndFrameNode(KlingImage2VideoNode): + """ + Kling First Last Frame Node. This node allows creation of a video from a first and last frame. It calls the normal image to video endpoint, but only allows the subset of input options that support the `image_tail` request field. + """ + + @staticmethod + def get_mode_string_mapping() -> dict[str, tuple[str, str, str]]: + """ + Returns a mapping of mode strings to their corresponding (mode, duration, model_name) tuples. + Only includes config combos that support the `image_tail` request field. + + See: [Kling API Docs Capability Map](https://app.klingai.com/global/dev/document-api/apiReference/model/skillsMap) + """ + return { + "standard mode / 5s duration / kling-v1": ("std", "5", "kling-v1"), + "pro mode / 5s duration / kling-v1": ("pro", "5", "kling-v1"), + "pro mode / 5s duration / kling-v1-5": ("pro", "5", "kling-v1-5"), + "pro mode / 10s duration / kling-v1-5": ("pro", "10", "kling-v1-5"), + "pro mode / 5s duration / kling-v1-6": ("pro", "5", "kling-v1-6"), + "pro mode / 10s duration / kling-v1-6": ("pro", "10", "kling-v1-6"), + } + + @classmethod + def INPUT_TYPES(s): + modes = list(KlingStartEndFrameNode.get_mode_string_mapping().keys()) + return { + "required": { + "start_frame": model_field_to_node_input( + IO.IMAGE, KlingImage2VideoRequest, "image" + ), + "end_frame": model_field_to_node_input( + IO.IMAGE, KlingImage2VideoRequest, "image_tail" + ), + "prompt": model_field_to_node_input( + IO.STRING, KlingImage2VideoRequest, "prompt", multiline=True + ), + "negative_prompt": model_field_to_node_input( + IO.STRING, + KlingImage2VideoRequest, + "negative_prompt", + multiline=True, + ), + "cfg_scale": model_field_to_node_input( + IO.FLOAT, + KlingImage2VideoRequest, + "cfg_scale", + default=0.5, + min=0.0, + max=1.0, + ), + "aspect_ratio": model_field_to_node_input( + IO.COMBO, + KlingImage2VideoRequest, + "aspect_ratio", + enum_type=KlingVideoGenAspectRatio, + ), + "mode": ( + modes, + { + "default": modes[2], + "tooltip": "The configuration to use for the video generation following the format: mode / duration / model_name.", + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + DESCRIPTION = "Generate a video sequence that transitions between your provided start and end images. The node creates all frames in between, producing a smooth transformation from the first frame to the last." + + def api_call( + self, + start_frame: torch.Tensor, + end_frame: torch.Tensor, + prompt: str, + negative_prompt: str, + cfg_scale: float, + aspect_ratio: str, + mode: str, + **kwargs, + ): + mode, duration, model_name = KlingStartEndFrameNode.get_mode_string_mapping()[ + mode + ] + return super().api_call( + prompt=prompt, + negative_prompt=negative_prompt, + model_name=model_name, + start_frame=start_frame, + cfg_scale=cfg_scale, + mode=mode, + aspect_ratio=aspect_ratio, + duration=duration, + end_frame=end_frame, + **kwargs, + ) + + +class KlingVideoExtendNode(KlingNodeBase): + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "prompt": model_field_to_node_input( + IO.STRING, KlingVideoExtendRequest, "prompt", multiline=True + ), + "negative_prompt": model_field_to_node_input( + IO.STRING, + KlingVideoExtendRequest, + "negative_prompt", + multiline=True, + ), + "cfg_scale": model_field_to_node_input( + IO.FLOAT, + KlingVideoExtendRequest, + "cfg_scale", + default=0.5, + min=0.0, + max=1.0, + ), + "video_id": model_field_to_node_input( + IO.STRING, KlingVideoExtendRequest, "video_id", forceInput=True + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + RETURN_TYPES = ("VIDEO", "STRING", "STRING") + RETURN_NAMES = ("VIDEO", "video_id", "duration") + DESCRIPTION = "Kling Video Extend Node. Extend videos made by other Kling nodes. The video_id is created by using other Kling Nodes." + + def get_response(self, task_id: str, auth_kwargs: dict[str,str]) -> KlingVideoExtendResponse: + return poll_until_finished( + auth_kwargs, + ApiEndpoint( + path=f"{PATH_VIDEO_EXTEND}/{task_id}", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=KlingVideoExtendResponse, + ), + ) + + def api_call( + self, + prompt: str, + negative_prompt: str, + cfg_scale: float, + video_id: str, + **kwargs, + ) -> tuple[VideoFromFile, str, str]: + validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_T2V) + initial_operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=PATH_VIDEO_EXTEND, + method=HttpMethod.POST, + request_model=KlingVideoExtendRequest, + response_model=KlingVideoExtendResponse, + ), + request=KlingVideoExtendRequest( + prompt=prompt if prompt else None, + negative_prompt=negative_prompt if negative_prompt else None, + cfg_scale=cfg_scale, + video_id=video_id, + ), + auth_kwargs=kwargs, + ) + + task_creation_response = initial_operation.execute() + validate_task_creation_response(task_creation_response) + task_id = task_creation_response.data.task_id + + final_response = self.get_response(task_id, auth_kwargs=kwargs) + validate_video_result_response(final_response) + + video = get_video_from_response(final_response) + return video_result_to_node_output(video) + + +class KlingVideoEffectsBase(KlingNodeBase): + """Kling Video Effects Base""" + + RETURN_TYPES = ("VIDEO", "STRING", "STRING") + RETURN_NAMES = ("VIDEO", "video_id", "duration") + + def get_response(self, task_id: str, auth_kwargs: dict[str,str]) -> KlingVideoEffectsResponse: + return poll_until_finished( + auth_kwargs, + ApiEndpoint( + path=f"{PATH_VIDEO_EFFECTS}/{task_id}", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=KlingVideoEffectsResponse, + ), + ) + + def api_call( + self, + dual_character: bool, + effect_scene: KlingDualCharacterEffectsScene | KlingSingleImageEffectsScene, + model_name: str, + duration: KlingVideoGenDuration, + image_1: torch.Tensor, + image_2: Optional[torch.Tensor] = None, + mode: Optional[KlingVideoGenMode] = None, + **kwargs, + ): + if dual_character: + request_input_field = KlingDualCharacterEffectInput( + model_name=model_name, + mode=mode, + images=[ + tensor_to_base64_string(image_1), + tensor_to_base64_string(image_2), + ], + duration=duration, + ) + else: + request_input_field = KlingSingleImageEffectInput( + model_name=model_name, + image=tensor_to_base64_string(image_1), + duration=duration, + ) + + initial_operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=PATH_VIDEO_EFFECTS, + method=HttpMethod.POST, + request_model=KlingVideoEffectsRequest, + response_model=KlingVideoEffectsResponse, + ), + request=KlingVideoEffectsRequest( + effect_scene=effect_scene, + input=request_input_field, + ), + auth_kwargs=kwargs, + ) + + task_creation_response = initial_operation.execute() + validate_task_creation_response(task_creation_response) + task_id = task_creation_response.data.task_id + + final_response = self.get_response(task_id, auth_kwargs=kwargs) + validate_video_result_response(final_response) + + video = get_video_from_response(final_response) + return video_result_to_node_output(video) + + +class KlingDualCharacterVideoEffectNode(KlingVideoEffectsBase): + """Kling Dual Character Video Effect Node""" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image_left": (IO.IMAGE, {"tooltip": "Left side image"}), + "image_right": (IO.IMAGE, {"tooltip": "Right side image"}), + "effect_scene": model_field_to_node_input( + IO.COMBO, + KlingVideoEffectsRequest, + "effect_scene", + enum_type=KlingDualCharacterEffectsScene, + ), + "model_name": model_field_to_node_input( + IO.COMBO, + KlingDualCharacterEffectInput, + "model_name", + enum_type=KlingCharacterEffectModelName, + ), + "mode": model_field_to_node_input( + IO.COMBO, + KlingDualCharacterEffectInput, + "mode", + enum_type=KlingVideoGenMode, + ), + "duration": model_field_to_node_input( + IO.COMBO, + KlingDualCharacterEffectInput, + "duration", + enum_type=KlingVideoGenDuration, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + DESCRIPTION = "Achieve different special effects when generating a video based on the effect_scene. First image will be positioned on left side, second on right side of the composite." + RETURN_TYPES = ("VIDEO", "STRING") + RETURN_NAMES = ("VIDEO", "duration") + + def api_call( + self, + image_left: torch.Tensor, + image_right: torch.Tensor, + effect_scene: KlingDualCharacterEffectsScene, + model_name: KlingCharacterEffectModelName, + mode: KlingVideoGenMode, + duration: KlingVideoGenDuration, + **kwargs, + ): + video, _, duration = super().api_call( + dual_character=True, + effect_scene=effect_scene, + model_name=model_name, + mode=mode, + duration=duration, + image_1=image_left, + image_2=image_right, + **kwargs, + ) + return video, duration + +class KlingSingleImageVideoEffectNode(KlingVideoEffectsBase): + """Kling Single Image Video Effect Node""" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": ( + IO.IMAGE, + { + "tooltip": " Reference Image. URL or Base64 encoded string (without data:image prefix). File size cannot exceed 10MB, resolution not less than 300*300px, aspect ratio between 1:2.5 ~ 2.5:1" + }, + ), + "effect_scene": model_field_to_node_input( + IO.COMBO, + KlingVideoEffectsRequest, + "effect_scene", + enum_type=KlingSingleImageEffectsScene, + ), + "model_name": model_field_to_node_input( + IO.COMBO, + KlingSingleImageEffectInput, + "model_name", + enum_type=KlingSingleImageEffectModelName, + ), + "duration": model_field_to_node_input( + IO.COMBO, + KlingSingleImageEffectInput, + "duration", + enum_type=KlingVideoGenDuration, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + DESCRIPTION = "Achieve different special effects when generating a video based on the effect_scene." + + def api_call( + self, + image: torch.Tensor, + effect_scene: KlingSingleImageEffectsScene, + model_name: KlingSingleImageEffectModelName, + duration: KlingVideoGenDuration, + **kwargs, + ): + return super().api_call( + dual_character=False, + effect_scene=effect_scene, + model_name=model_name, + duration=duration, + image_1=image, + **kwargs, + ) + + +class KlingLipSyncBase(KlingNodeBase): + """Kling Lip Sync Base""" + + RETURN_TYPES = ("VIDEO", "STRING", "STRING") + RETURN_NAMES = ("VIDEO", "video_id", "duration") + + def validate_text(self, text: str): + if not text: + raise ValueError("Text is required") + if len(text) > MAX_PROMPT_LENGTH_LIP_SYNC: + raise ValueError( + f"Text is too long. Maximum length is {MAX_PROMPT_LENGTH_LIP_SYNC} characters." + ) + + def get_response(self, task_id: str, auth_kwargs: dict[str,str]) -> KlingLipSyncResponse: + """Polls the Kling API endpoint until the task reaches a terminal state.""" + return poll_until_finished( + auth_kwargs, + ApiEndpoint( + path=f"{PATH_LIP_SYNC}/{task_id}", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=KlingLipSyncResponse, + ), + ) + + def api_call( + self, + video: VideoInput, + audio: Optional[AudioInput] = None, + voice_language: Optional[str] = None, + mode: Optional[str] = None, + text: Optional[str] = None, + voice_speed: Optional[float] = None, + voice_id: Optional[str] = None, + **kwargs + ) -> tuple[VideoFromFile, str, str]: + if text: + self.validate_text(text) + + # Upload video to Comfy API and get download URL + video_url = upload_video_to_comfyapi(video, auth_kwargs=kwargs) + logging.info("Uploaded video to Comfy API. URL: %s", video_url) + + # Upload the audio file to Comfy API and get download URL + if audio: + audio_url = upload_audio_to_comfyapi(audio, auth_kwargs=kwargs) + logging.info("Uploaded audio to Comfy API. URL: %s", audio_url) + else: + audio_url = None + + initial_operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=PATH_LIP_SYNC, + method=HttpMethod.POST, + request_model=KlingLipSyncRequest, + response_model=KlingLipSyncResponse, + ), + request=KlingLipSyncRequest( + input=KlingLipSyncInputObject( + video_url=video_url, + mode=mode, + text=text, + voice_language=voice_language, + voice_speed=voice_speed, + audio_type="url", + audio_url=audio_url, + voice_id=voice_id, + ), + ), + auth_kwargs=kwargs, + ) + + task_creation_response = initial_operation.execute() + validate_task_creation_response(task_creation_response) + task_id = task_creation_response.data.task_id + + final_response = self.get_response(task_id, auth_kwargs=kwargs) + validate_video_result_response(final_response) + + video = get_video_from_response(final_response) + return video_result_to_node_output(video) + + +class KlingLipSyncAudioToVideoNode(KlingLipSyncBase): + """Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file.""" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "video": (IO.VIDEO, {}), + "audio": (IO.AUDIO, {}), + "voice_language": model_field_to_node_input( + IO.COMBO, + KlingLipSyncInputObject, + "voice_language", + enum_type=KlingLipSyncVoiceLanguage, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + DESCRIPTION = "Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file." + + def api_call( + self, + video: VideoInput, + audio: AudioInput, + voice_language: str, + **kwargs, + ): + return super().api_call( + video=video, + audio=audio, + voice_language=voice_language, + mode="audio2video", + **kwargs, + ) + + +class KlingLipSyncTextToVideoNode(KlingLipSyncBase): + """Kling Lip Sync Text to Video Node. Syncs mouth movements in a video file to a text prompt.""" + + @staticmethod + def get_voice_config() -> dict[str, tuple[str, str]]: + return { + # English voices + "Melody": ("girlfriend_4_speech02", "en"), + "Sunny": ("genshin_vindi2", "en"), + "Sage": ("zhinen_xuesheng", "en"), + "Ace": ("AOT", "en"), + "Blossom": ("ai_shatang", "en"), + "Peppy": ("genshin_klee2", "en"), + "Dove": ("genshin_kirara", "en"), + "Shine": ("ai_kaiya", "en"), + "Anchor": ("oversea_male1", "en"), + "Lyric": ("ai_chenjiahao_712", "en"), + "Tender": ("chat1_female_new-3", "en"), + "Siren": ("chat_0407_5-1", "en"), + "Zippy": ("cartoon-boy-07", "en"), + "Bud": ("uk_boy1", "en"), + "Sprite": ("cartoon-girl-01", "en"), + "Candy": ("PeppaPig_platform", "en"), + "Beacon": ("ai_huangzhong_712", "en"), + "Rock": ("ai_huangyaoshi_712", "en"), + "Titan": ("ai_laoguowang_712", "en"), + "Grace": ("chengshu_jiejie", "en"), + "Helen": ("you_pingjing", "en"), + "Lore": ("calm_story1", "en"), + "Crag": ("uk_man2", "en"), + "Prattle": ("laopopo_speech02", "en"), + "Hearth": ("heainainai_speech02", "en"), + "The Reader": ("reader_en_m-v1", "en"), + "Commercial Lady": ("commercial_lady_en_f-v1", "en"), + # Chinese voices + "阳光少年": ("genshin_vindi2", "zh"), + "懂事小弟": ("zhinen_xuesheng", "zh"), + "运动少年": ("tiyuxi_xuedi", "zh"), + "青春少女": ("ai_shatang", "zh"), + "温柔小妹": ("genshin_klee2", "zh"), + "元气少女": ("genshin_kirara", "zh"), + "阳光男生": ("ai_kaiya", "zh"), + "幽默小哥": ("tiexin_nanyou", "zh"), + "文艺小哥": ("ai_chenjiahao_712", "zh"), + "甜美邻家": ("girlfriend_1_speech02", "zh"), + "温柔姐姐": ("chat1_female_new-3", "zh"), + "职场女青": ("girlfriend_2_speech02", "zh"), + "活泼男童": ("cartoon-boy-07", "zh"), + "俏皮女童": ("cartoon-girl-01", "zh"), + "稳重老爸": ("ai_huangyaoshi_712", "zh"), + "温柔妈妈": ("you_pingjing", "zh"), + "严肃上司": ("ai_laoguowang_712", "zh"), + "优雅贵妇": ("chengshu_jiejie", "zh"), + "慈祥爷爷": ("zhuxi_speech02", "zh"), + "唠叨爷爷": ("uk_oldman3", "zh"), + "唠叨奶奶": ("laopopo_speech02", "zh"), + "和蔼奶奶": ("heainainai_speech02", "zh"), + "东北老铁": ("dongbeilaotie_speech02", "zh"), + "重庆小伙": ("chongqingxiaohuo_speech02", "zh"), + "四川妹子": ("chuanmeizi_speech02", "zh"), + "潮汕大叔": ("chaoshandashu_speech02", "zh"), + "台湾男生": ("ai_taiwan_man2_speech02", "zh"), + "西安掌柜": ("xianzhanggui_speech02", "zh"), + "天津姐姐": ("tianjinjiejie_speech02", "zh"), + "新闻播报男": ("diyinnansang_DB_CN_M_04-v2", "zh"), + "译制片男": ("yizhipiannan-v1", "zh"), + "撒娇女友": ("tianmeixuemei-v1", "zh"), + "刀片烟嗓": ("daopianyansang-v1", "zh"), + "乖巧正太": ("mengwa-v1", "zh"), + } + + @classmethod + def INPUT_TYPES(s): + voice_options = list(s.get_voice_config().keys()) + return { + "required": { + "video": (IO.VIDEO, {}), + "text": model_field_to_node_input( + IO.STRING, KlingLipSyncInputObject, "text", multiline=True + ), + "voice": (voice_options, {"default": voice_options[0]}), + "voice_speed": model_field_to_node_input( + IO.FLOAT, KlingLipSyncInputObject, "voice_speed", slider=True + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + DESCRIPTION = "Kling Lip Sync Text to Video Node. Syncs mouth movements in a video file to a text prompt." + + def api_call( + self, + video: VideoInput, + text: str, + voice: str, + voice_speed: float, + **kwargs, + ): + voice_id, voice_language = KlingLipSyncTextToVideoNode.get_voice_config()[voice] + return super().api_call( + video=video, + text=text, + voice_language=voice_language, + voice_id=voice_id, + voice_speed=voice_speed, + mode="text2video", + **kwargs, + ) + + +class KlingImageGenerationBase(KlingNodeBase): + """Kling Image Generation Base Node.""" + + RETURN_TYPES = ("IMAGE",) + CATEGORY = "api node/image/Kling" + + def validate_prompt(self, prompt: str, negative_prompt: Optional[str] = None): + if not prompt or len(prompt) > MAX_PROMPT_LENGTH_IMAGE_GEN: + raise ValueError( + f"Prompt must be less than {MAX_PROMPT_LENGTH_IMAGE_GEN} characters" + ) + if negative_prompt and len(negative_prompt) > MAX_PROMPT_LENGTH_IMAGE_GEN: + raise ValueError( + f"Negative prompt must be less than {MAX_PROMPT_LENGTH_IMAGE_GEN} characters" + ) + + +class KlingVirtualTryOnNode(KlingImageGenerationBase): + """Kling Virtual Try On Node.""" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "human_image": (IO.IMAGE, {}), + "cloth_image": (IO.IMAGE, {}), + "model_name": model_field_to_node_input( + IO.COMBO, + KlingVirtualTryOnRequest, + "model_name", + enum_type=KlingVirtualTryOnModelName, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + DESCRIPTION = "Kling Virtual Try On Node. Input a human image and a cloth image to try on the cloth on the human." + + def get_response( + self, task_id: str, auth_kwargs: dict[str,str] = None + ) -> KlingVirtualTryOnResponse: + return poll_until_finished( + auth_kwargs, + ApiEndpoint( + path=f"{PATH_VIRTUAL_TRY_ON}/{task_id}", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=KlingVirtualTryOnResponse, + ), + ) + + def api_call( + self, + human_image: torch.Tensor, + cloth_image: torch.Tensor, + model_name: KlingVirtualTryOnModelName, + **kwargs, + ): + initial_operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=PATH_VIRTUAL_TRY_ON, + method=HttpMethod.POST, + request_model=KlingVirtualTryOnRequest, + response_model=KlingVirtualTryOnResponse, + ), + request=KlingVirtualTryOnRequest( + human_image=tensor_to_base64_string(human_image), + cloth_image=tensor_to_base64_string(cloth_image), + model_name=model_name, + ), + auth_kwargs=kwargs, + ) + + task_creation_response = initial_operation.execute() + validate_task_creation_response(task_creation_response) + task_id = task_creation_response.data.task_id + + final_response = self.get_response(task_id, auth_kwargs=kwargs) + validate_image_result_response(final_response) + + images = get_images_from_response(final_response) + return (image_result_to_node_output(images),) + + +class KlingImageGenerationNode(KlingImageGenerationBase): + """Kling Image Generation Node. Generate an image from a text prompt with an optional reference image.""" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "prompt": model_field_to_node_input( + IO.STRING, + KlingImageGenerationsRequest, + "prompt", + multiline=True, + max_length=MAX_PROMPT_LENGTH_IMAGE_GEN, + ), + "negative_prompt": model_field_to_node_input( + IO.STRING, + KlingImageGenerationsRequest, + "negative_prompt", + multiline=True, + ), + "image_type": model_field_to_node_input( + IO.COMBO, + KlingImageGenerationsRequest, + "image_reference", + enum_type=KlingImageGenImageReferenceType, + ), + "image_fidelity": model_field_to_node_input( + IO.FLOAT, + KlingImageGenerationsRequest, + "image_fidelity", + slider=True, + step=0.01, + ), + "human_fidelity": model_field_to_node_input( + IO.FLOAT, + KlingImageGenerationsRequest, + "human_fidelity", + slider=True, + step=0.01, + ), + "model_name": model_field_to_node_input( + IO.COMBO, + KlingImageGenerationsRequest, + "model_name", + enum_type=KlingImageGenModelName, + ), + "aspect_ratio": model_field_to_node_input( + IO.COMBO, + KlingImageGenerationsRequest, + "aspect_ratio", + enum_type=KlingImageGenAspectRatio, + ), + "n": model_field_to_node_input( + IO.INT, + KlingImageGenerationsRequest, + "n", + ), + }, + "optional": { + "image": (IO.IMAGE, {}), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + DESCRIPTION = "Kling Image Generation Node. Generate an image from a text prompt with an optional reference image." + + def get_response( + self, task_id: str, auth_kwargs: Optional[dict[str,str]] = None + ) -> KlingImageGenerationsResponse: + return poll_until_finished( + auth_kwargs, + ApiEndpoint( + path=f"{PATH_IMAGE_GENERATIONS}/{task_id}", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=KlingImageGenerationsResponse, + ), + ) + + def api_call( + self, + model_name: KlingImageGenModelName, + prompt: str, + negative_prompt: str, + image_type: KlingImageGenImageReferenceType, + image_fidelity: float, + human_fidelity: float, + n: int, + aspect_ratio: KlingImageGenAspectRatio, + image: Optional[torch.Tensor] = None, + **kwargs, + ): + self.validate_prompt(prompt, negative_prompt) + + if image is not None: + image = tensor_to_base64_string(image) + + initial_operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=PATH_IMAGE_GENERATIONS, + method=HttpMethod.POST, + request_model=KlingImageGenerationsRequest, + response_model=KlingImageGenerationsResponse, + ), + request=KlingImageGenerationsRequest( + model_name=model_name, + prompt=prompt, + negative_prompt=negative_prompt, + image=image, + image_reference=image_type, + image_fidelity=image_fidelity, + human_fidelity=human_fidelity, + n=n, + aspect_ratio=aspect_ratio, + ), + auth_kwargs=kwargs, + ) + + task_creation_response = initial_operation.execute() + validate_task_creation_response(task_creation_response) + task_id = task_creation_response.data.task_id + + final_response = self.get_response(task_id, auth_kwargs=kwargs) + validate_image_result_response(final_response) + + images = get_images_from_response(final_response) + return (image_result_to_node_output(images),) + + +NODE_CLASS_MAPPINGS = { + "KlingCameraControls": KlingCameraControls, + "KlingTextToVideoNode": KlingTextToVideoNode, + "KlingImage2VideoNode": KlingImage2VideoNode, + "KlingCameraControlI2VNode": KlingCameraControlI2VNode, + "KlingCameraControlT2VNode": KlingCameraControlT2VNode, + "KlingStartEndFrameNode": KlingStartEndFrameNode, + "KlingVideoExtendNode": KlingVideoExtendNode, + "KlingLipSyncAudioToVideoNode": KlingLipSyncAudioToVideoNode, + "KlingLipSyncTextToVideoNode": KlingLipSyncTextToVideoNode, + "KlingVirtualTryOnNode": KlingVirtualTryOnNode, + "KlingImageGenerationNode": KlingImageGenerationNode, + "KlingSingleImageVideoEffectNode": KlingSingleImageVideoEffectNode, + "KlingDualCharacterVideoEffectNode": KlingDualCharacterVideoEffectNode, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "KlingCameraControls": "Kling Camera Controls", + "KlingTextToVideoNode": "Kling Text to Video", + "KlingImage2VideoNode": "Kling Image to Video", + "KlingCameraControlI2VNode": "Kling Image to Video (Camera Control)", + "KlingCameraControlT2VNode": "Kling Text to Video (Camera Control)", + "KlingStartEndFrameNode": "Kling Start-End Frame to Video", + "KlingVideoExtendNode": "Kling Video Extend", + "KlingLipSyncAudioToVideoNode": "Kling Lip Sync Video with Audio", + "KlingLipSyncTextToVideoNode": "Kling Lip Sync Video with Text", + "KlingVirtualTryOnNode": "Kling Virtual Try On", + "KlingImageGenerationNode": "Kling Image Generation", + "KlingSingleImageVideoEffectNode": "Kling Video Effects", + "KlingDualCharacterVideoEffectNode": "Kling Dual Character Video Effects", +} diff --git a/comfy_api_nodes/nodes_luma.py b/comfy_api_nodes/nodes_luma.py new file mode 100644 index 00000000..bd33a53e --- /dev/null +++ b/comfy_api_nodes/nodes_luma.py @@ -0,0 +1,704 @@ +from __future__ import annotations +from inspect import cleandoc +from typing import Optional +from comfy.comfy_types.node_typing import IO, ComfyNodeABC +from comfy_api.input_impl.video_types import VideoFromFile +from comfy_api_nodes.apis.luma_api import ( + LumaImageModel, + LumaVideoModel, + LumaVideoOutputResolution, + LumaVideoModelOutputDuration, + LumaAspectRatio, + LumaState, + LumaImageGenerationRequest, + LumaGenerationRequest, + LumaGeneration, + LumaCharacterRef, + LumaModifyImageRef, + LumaImageIdentity, + LumaReference, + LumaReferenceChain, + LumaImageReference, + LumaKeyframes, + LumaConceptChain, + LumaIO, + get_luma_concepts, +) +from comfy_api_nodes.apis.client import ( + ApiEndpoint, + HttpMethod, + SynchronousOperation, + PollingOperation, + EmptyRequest, +) +from comfy_api_nodes.apinode_utils import ( + upload_images_to_comfyapi, + process_image_response, + validate_string, +) + +import requests +import torch +from io import BytesIO + + +class LumaReferenceNode(ComfyNodeABC): + """ + Holds an image and weight for use with Luma Generate Image node. + """ + + RETURN_TYPES = (LumaIO.LUMA_REF,) + RETURN_NAMES = ("luma_ref",) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "create_luma_reference" + CATEGORY = "api node/image/Luma" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": ( + IO.IMAGE, + { + "tooltip": "Image to use as reference.", + }, + ), + "weight": ( + IO.FLOAT, + { + "default": 1.0, + "min": 0.0, + "max": 1.0, + "step": 0.01, + "tooltip": "Weight of image reference.", + }, + ), + }, + "optional": {"luma_ref": (LumaIO.LUMA_REF,)}, + } + + def create_luma_reference( + self, image: torch.Tensor, weight: float, luma_ref: LumaReferenceChain = None + ): + if luma_ref is not None: + luma_ref = luma_ref.clone() + else: + luma_ref = LumaReferenceChain() + luma_ref.add(LumaReference(image=image, weight=round(weight, 2))) + return (luma_ref,) + + +class LumaConceptsNode(ComfyNodeABC): + """ + Holds one or more Camera Concepts for use with Luma Text to Video and Luma Image to Video nodes. + """ + + RETURN_TYPES = (LumaIO.LUMA_CONCEPTS,) + RETURN_NAMES = ("luma_concepts",) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "create_concepts" + CATEGORY = "api node/video/Luma" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "concept1": (get_luma_concepts(include_none=True),), + "concept2": (get_luma_concepts(include_none=True),), + "concept3": (get_luma_concepts(include_none=True),), + "concept4": (get_luma_concepts(include_none=True),), + }, + "optional": { + "luma_concepts": ( + LumaIO.LUMA_CONCEPTS, + { + "tooltip": "Optional Camera Concepts to add to the ones chosen here." + }, + ), + }, + } + + def create_concepts( + self, + concept1: str, + concept2: str, + concept3: str, + concept4: str, + luma_concepts: LumaConceptChain = None, + ): + chain = LumaConceptChain(str_list=[concept1, concept2, concept3, concept4]) + if luma_concepts is not None: + chain = luma_concepts.clone_and_merge(chain) + return (chain,) + + +class LumaImageGenerationNode(ComfyNodeABC): + """ + Generates images synchronously based on prompt and aspect ratio. + """ + + RETURN_TYPES = (IO.IMAGE,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/Luma" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Prompt for the image generation", + }, + ), + "model": ([model.value for model in LumaImageModel],), + "aspect_ratio": ( + [ratio.value for ratio in LumaAspectRatio], + { + "default": LumaAspectRatio.ratio_16_9, + }, + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 0xFFFFFFFFFFFFFFFF, + "control_after_generate": True, + "tooltip": "Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.", + }, + ), + "style_image_weight": ( + IO.FLOAT, + { + "default": 1.0, + "min": 0.0, + "max": 1.0, + "step": 0.01, + "tooltip": "Weight of style image. Ignored if no style_image provided.", + }, + ), + }, + "optional": { + "image_luma_ref": ( + LumaIO.LUMA_REF, + { + "tooltip": "Luma Reference node connection to influence generation with input images; up to 4 images can be considered." + }, + ), + "style_image": ( + IO.IMAGE, + {"tooltip": "Style reference image; only 1 image will be used."}, + ), + "character_image": ( + IO.IMAGE, + { + "tooltip": "Character reference images; can be a batch of multiple, up to 4 images can be considered." + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call( + self, + prompt: str, + model: str, + aspect_ratio: str, + seed, + style_image_weight: float, + image_luma_ref: LumaReferenceChain = None, + style_image: torch.Tensor = None, + character_image: torch.Tensor = None, + **kwargs, + ): + validate_string(prompt, strip_whitespace=True, min_length=3) + # handle image_luma_ref + api_image_ref = None + if image_luma_ref is not None: + api_image_ref = self._convert_luma_refs( + image_luma_ref, max_refs=4, auth_kwargs=kwargs, + ) + # handle style_luma_ref + api_style_ref = None + if style_image is not None: + api_style_ref = self._convert_style_image( + style_image, weight=style_image_weight, auth_kwargs=kwargs, + ) + # handle character_ref images + character_ref = None + if character_image is not None: + download_urls = upload_images_to_comfyapi( + character_image, max_images=4, auth_kwargs=kwargs, + ) + character_ref = LumaCharacterRef( + identity0=LumaImageIdentity(images=download_urls) + ) + + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/luma/generations/image", + method=HttpMethod.POST, + request_model=LumaImageGenerationRequest, + response_model=LumaGeneration, + ), + request=LumaImageGenerationRequest( + prompt=prompt, + model=model, + aspect_ratio=aspect_ratio, + image_ref=api_image_ref, + style_ref=api_style_ref, + character_ref=character_ref, + ), + auth_kwargs=kwargs, + ) + response_api: LumaGeneration = operation.execute() + + operation = PollingOperation( + poll_endpoint=ApiEndpoint( + path=f"/proxy/luma/generations/{response_api.id}", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=LumaGeneration, + ), + completed_statuses=[LumaState.completed], + failed_statuses=[LumaState.failed], + status_extractor=lambda x: x.state, + auth_kwargs=kwargs, + ) + response_poll = operation.execute() + + img_response = requests.get(response_poll.assets.image) + img = process_image_response(img_response) + return (img,) + + def _convert_luma_refs( + self, luma_ref: LumaReferenceChain, max_refs: int, auth_kwargs: Optional[dict[str,str]] = None + ): + luma_urls = [] + ref_count = 0 + for ref in luma_ref.refs: + download_urls = upload_images_to_comfyapi( + ref.image, max_images=1, auth_kwargs=auth_kwargs + ) + luma_urls.append(download_urls[0]) + ref_count += 1 + if ref_count >= max_refs: + break + return luma_ref.create_api_model(download_urls=luma_urls, max_refs=max_refs) + + def _convert_style_image( + self, style_image: torch.Tensor, weight: float, auth_kwargs: Optional[dict[str,str]] = None + ): + chain = LumaReferenceChain( + first_ref=LumaReference(image=style_image, weight=weight) + ) + return self._convert_luma_refs(chain, max_refs=1, auth_kwargs=auth_kwargs) + + +class LumaImageModifyNode(ComfyNodeABC): + """ + Modifies images synchronously based on prompt and aspect ratio. + """ + + RETURN_TYPES = (IO.IMAGE,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/Luma" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": (IO.IMAGE,), + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Prompt for the image generation", + }, + ), + "image_weight": ( + IO.FLOAT, + { + "default": 0.1, + "min": 0.0, + "max": 0.98, + "step": 0.01, + "tooltip": "Weight of the image; the closer to 1.0, the less the image will be modified.", + }, + ), + "model": ([model.value for model in LumaImageModel],), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 0xFFFFFFFFFFFFFFFF, + "control_after_generate": True, + "tooltip": "Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.", + }, + ), + }, + "optional": {}, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call( + self, + prompt: str, + model: str, + image: torch.Tensor, + image_weight: float, + seed, + **kwargs, + ): + # first, upload image + download_urls = upload_images_to_comfyapi( + image, max_images=1, auth_kwargs=kwargs, + ) + image_url = download_urls[0] + # next, make Luma call with download url provided + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/luma/generations/image", + method=HttpMethod.POST, + request_model=LumaImageGenerationRequest, + response_model=LumaGeneration, + ), + request=LumaImageGenerationRequest( + prompt=prompt, + model=model, + modify_image_ref=LumaModifyImageRef( + url=image_url, weight=round(max(min(1.0-image_weight, 0.98), 0.0), 2) + ), + ), + auth_kwargs=kwargs, + ) + response_api: LumaGeneration = operation.execute() + + operation = PollingOperation( + poll_endpoint=ApiEndpoint( + path=f"/proxy/luma/generations/{response_api.id}", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=LumaGeneration, + ), + completed_statuses=[LumaState.completed], + failed_statuses=[LumaState.failed], + status_extractor=lambda x: x.state, + auth_kwargs=kwargs, + ) + response_poll = operation.execute() + + img_response = requests.get(response_poll.assets.image) + img = process_image_response(img_response) + return (img,) + + +class LumaTextToVideoGenerationNode(ComfyNodeABC): + """ + Generates videos synchronously based on prompt and output_size. + """ + + RETURN_TYPES = (IO.VIDEO,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/video/Luma" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Prompt for the video generation", + }, + ), + "model": ([model.value for model in LumaVideoModel],), + "aspect_ratio": ( + [ratio.value for ratio in LumaAspectRatio], + { + "default": LumaAspectRatio.ratio_16_9, + }, + ), + "resolution": ( + [resolution.value for resolution in LumaVideoOutputResolution], + { + "default": LumaVideoOutputResolution.res_540p, + }, + ), + "duration": ([dur.value for dur in LumaVideoModelOutputDuration],), + "loop": ( + IO.BOOLEAN, + { + "default": False, + }, + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 0xFFFFFFFFFFFFFFFF, + "control_after_generate": True, + "tooltip": "Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.", + }, + ), + }, + "optional": { + "luma_concepts": ( + LumaIO.LUMA_CONCEPTS, + { + "tooltip": "Optional Camera Concepts to dictate camera motion via the Luma Concepts node." + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call( + self, + prompt: str, + model: str, + aspect_ratio: str, + resolution: str, + duration: str, + loop: bool, + seed, + luma_concepts: LumaConceptChain = None, + **kwargs, + ): + validate_string(prompt, strip_whitespace=False, min_length=3) + duration = duration if model != LumaVideoModel.ray_1_6 else None + resolution = resolution if model != LumaVideoModel.ray_1_6 else None + + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/luma/generations", + method=HttpMethod.POST, + request_model=LumaGenerationRequest, + response_model=LumaGeneration, + ), + request=LumaGenerationRequest( + prompt=prompt, + model=model, + resolution=resolution, + aspect_ratio=aspect_ratio, + duration=duration, + loop=loop, + concepts=luma_concepts.create_api_model() if luma_concepts else None, + ), + auth_kwargs=kwargs, + ) + response_api: LumaGeneration = operation.execute() + + operation = PollingOperation( + poll_endpoint=ApiEndpoint( + path=f"/proxy/luma/generations/{response_api.id}", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=LumaGeneration, + ), + completed_statuses=[LumaState.completed], + failed_statuses=[LumaState.failed], + status_extractor=lambda x: x.state, + auth_kwargs=kwargs, + ) + response_poll = operation.execute() + + vid_response = requests.get(response_poll.assets.video) + return (VideoFromFile(BytesIO(vid_response.content)),) + + +class LumaImageToVideoGenerationNode(ComfyNodeABC): + """ + Generates videos synchronously based on prompt, input images, and output_size. + """ + + RETURN_TYPES = (IO.VIDEO,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/video/Luma" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Prompt for the video generation", + }, + ), + "model": ([model.value for model in LumaVideoModel],), + # "aspect_ratio": ([ratio.value for ratio in LumaAspectRatio], { + # "default": LumaAspectRatio.ratio_16_9, + # }), + "resolution": ( + [resolution.value for resolution in LumaVideoOutputResolution], + { + "default": LumaVideoOutputResolution.res_540p, + }, + ), + "duration": ([dur.value for dur in LumaVideoModelOutputDuration],), + "loop": ( + IO.BOOLEAN, + { + "default": False, + }, + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 0xFFFFFFFFFFFFFFFF, + "control_after_generate": True, + "tooltip": "Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.", + }, + ), + }, + "optional": { + "first_image": ( + IO.IMAGE, + {"tooltip": "First frame of generated video."}, + ), + "last_image": (IO.IMAGE, {"tooltip": "Last frame of generated video."}), + "luma_concepts": ( + LumaIO.LUMA_CONCEPTS, + { + "tooltip": "Optional Camera Concepts to dictate camera motion via the Luma Concepts node." + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call( + self, + prompt: str, + model: str, + resolution: str, + duration: str, + loop: bool, + seed, + first_image: torch.Tensor = None, + last_image: torch.Tensor = None, + luma_concepts: LumaConceptChain = None, + **kwargs, + ): + if first_image is None and last_image is None: + raise Exception( + "At least one of first_image and last_image requires an input." + ) + keyframes = self._convert_to_keyframes(first_image, last_image, auth_kwargs=kwargs) + duration = duration if model != LumaVideoModel.ray_1_6 else None + resolution = resolution if model != LumaVideoModel.ray_1_6 else None + + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/luma/generations", + method=HttpMethod.POST, + request_model=LumaGenerationRequest, + response_model=LumaGeneration, + ), + request=LumaGenerationRequest( + prompt=prompt, + model=model, + aspect_ratio=LumaAspectRatio.ratio_16_9, # ignored, but still needed by the API for some reason + resolution=resolution, + duration=duration, + loop=loop, + keyframes=keyframes, + concepts=luma_concepts.create_api_model() if luma_concepts else None, + ), + auth_kwargs=kwargs, + ) + response_api: LumaGeneration = operation.execute() + + operation = PollingOperation( + poll_endpoint=ApiEndpoint( + path=f"/proxy/luma/generations/{response_api.id}", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=LumaGeneration, + ), + completed_statuses=[LumaState.completed], + failed_statuses=[LumaState.failed], + status_extractor=lambda x: x.state, + auth_kwargs=kwargs, + ) + response_poll = operation.execute() + + vid_response = requests.get(response_poll.assets.video) + return (VideoFromFile(BytesIO(vid_response.content)),) + + def _convert_to_keyframes( + self, + first_image: torch.Tensor = None, + last_image: torch.Tensor = None, + auth_kwargs: Optional[dict[str,str]] = None, + ): + if first_image is None and last_image is None: + return None + frame0 = None + frame1 = None + if first_image is not None: + download_urls = upload_images_to_comfyapi( + first_image, max_images=1, auth_kwargs=auth_kwargs, + ) + frame0 = LumaImageReference(type="image", url=download_urls[0]) + if last_image is not None: + download_urls = upload_images_to_comfyapi( + last_image, max_images=1, auth_kwargs=auth_kwargs, + ) + frame1 = LumaImageReference(type="image", url=download_urls[0]) + return LumaKeyframes(frame0=frame0, frame1=frame1) + + +# A dictionary that contains all nodes you want to export with their names +# NOTE: names should be globally unique +NODE_CLASS_MAPPINGS = { + "LumaImageNode": LumaImageGenerationNode, + "LumaImageModifyNode": LumaImageModifyNode, + "LumaVideoNode": LumaTextToVideoGenerationNode, + "LumaImageToVideoNode": LumaImageToVideoGenerationNode, + "LumaReferenceNode": LumaReferenceNode, + "LumaConceptsNode": LumaConceptsNode, +} + +# A dictionary that contains the friendly/humanly readable titles for the nodes +NODE_DISPLAY_NAME_MAPPINGS = { + "LumaImageNode": "Luma Text to Image", + "LumaImageModifyNode": "Luma Image to Image", + "LumaVideoNode": "Luma Text to Video", + "LumaImageToVideoNode": "Luma Image to Video", + "LumaReferenceNode": "Luma Reference", + "LumaConceptsNode": "Luma Concepts", +} diff --git a/comfy_api_nodes/nodes_minimax.py b/comfy_api_nodes/nodes_minimax.py new file mode 100644 index 00000000..fd64aeb0 --- /dev/null +++ b/comfy_api_nodes/nodes_minimax.py @@ -0,0 +1,309 @@ +from comfy.comfy_types.node_typing import IO +from comfy_api.input_impl.video_types import VideoFromFile +from comfy_api_nodes.apis import ( + MinimaxVideoGenerationRequest, + MinimaxVideoGenerationResponse, + MinimaxFileRetrieveResponse, + MinimaxTaskResultResponse, + SubjectReferenceItem, + Model +) +from comfy_api_nodes.apis.client import ( + ApiEndpoint, + HttpMethod, + SynchronousOperation, + PollingOperation, + EmptyRequest, +) +from comfy_api_nodes.apinode_utils import ( + download_url_to_bytesio, + upload_images_to_comfyapi, + validate_string, +) + +import torch +import logging + + +class MinimaxTextToVideoNode: + """ + Generates videos synchronously based on a prompt, and optional parameters using MiniMax's API. + """ + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "prompt_text": ( + "STRING", + { + "multiline": True, + "default": "", + "tooltip": "Text prompt to guide the video generation", + }, + ), + "model": ( + [ + "T2V-01", + "T2V-01-Director", + ], + { + "default": "T2V-01", + "tooltip": "Model to use for video generation", + }, + ), + }, + "optional": { + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 0xFFFFFFFFFFFFFFFF, + "control_after_generate": True, + "tooltip": "The random seed used for creating the noise.", + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + RETURN_TYPES = ("VIDEO",) + DESCRIPTION = "Generates videos from prompts using MiniMax's API" + FUNCTION = "generate_video" + CATEGORY = "api node/video/MiniMax" + API_NODE = True + OUTPUT_NODE = True + + def generate_video( + self, + prompt_text, + seed=0, + model="T2V-01", + image: torch.Tensor=None, # used for ImageToVideo + subject: torch.Tensor=None, # used for SubjectToVideo + **kwargs, + ): + ''' + Function used between MiniMax nodes - supports T2V, I2V, and S2V, based on provided arguments. + ''' + if image is None: + validate_string(prompt_text, field_name="prompt_text") + # upload image, if passed in + image_url = None + if image is not None: + image_url = upload_images_to_comfyapi(image, max_images=1, auth_kwargs=kwargs)[0] + + # TODO: figure out how to deal with subject properly, API returns invalid params when using S2V-01 model + subject_reference = None + if subject is not None: + subject_url = upload_images_to_comfyapi(subject, max_images=1, auth_kwargs=kwargs)[0] + subject_reference = [SubjectReferenceItem(image=subject_url)] + + + video_generate_operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/minimax/video_generation", + method=HttpMethod.POST, + request_model=MinimaxVideoGenerationRequest, + response_model=MinimaxVideoGenerationResponse, + ), + request=MinimaxVideoGenerationRequest( + model=Model(model), + prompt=prompt_text, + callback_url=None, + first_frame_image=image_url, + subject_reference=subject_reference, + prompt_optimizer=None, + ), + auth_kwargs=kwargs, + ) + response = video_generate_operation.execute() + + task_id = response.task_id + if not task_id: + raise Exception(f"MiniMax generation failed: {response.base_resp}") + + video_generate_operation = PollingOperation( + poll_endpoint=ApiEndpoint( + path="/proxy/minimax/query/video_generation", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=MinimaxTaskResultResponse, + query_params={"task_id": task_id}, + ), + completed_statuses=["Success"], + failed_statuses=["Fail"], + status_extractor=lambda x: x.status.value, + auth_kwargs=kwargs, + ) + task_result = video_generate_operation.execute() + + file_id = task_result.file_id + if file_id is None: + raise Exception("Request was not successful. Missing file ID.") + file_retrieve_operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/minimax/files/retrieve", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=MinimaxFileRetrieveResponse, + query_params={"file_id": int(file_id)}, + ), + request=EmptyRequest(), + auth_kwargs=kwargs, + ) + file_result = file_retrieve_operation.execute() + + file_url = file_result.file.download_url + if file_url is None: + raise Exception( + f"No video was found in the response. Full response: {file_result.model_dump()}" + ) + logging.info(f"Generated video URL: {file_url}") + + video_io = download_url_to_bytesio(file_url) + if video_io is None: + error_msg = f"Failed to download video from {file_url}" + logging.error(error_msg) + raise Exception(error_msg) + return (VideoFromFile(video_io),) + + +class MinimaxImageToVideoNode(MinimaxTextToVideoNode): + """ + Generates videos synchronously based on an image and prompt, and optional parameters using MiniMax's API. + """ + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": ( + IO.IMAGE, + { + "tooltip": "Image to use as first frame of video generation" + }, + ), + "prompt_text": ( + "STRING", + { + "multiline": True, + "default": "", + "tooltip": "Text prompt to guide the video generation", + }, + ), + "model": ( + [ + "I2V-01-Director", + "I2V-01", + "I2V-01-live", + ], + { + "default": "I2V-01", + "tooltip": "Model to use for video generation", + }, + ), + }, + "optional": { + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 0xFFFFFFFFFFFFFFFF, + "control_after_generate": True, + "tooltip": "The random seed used for creating the noise.", + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + RETURN_TYPES = ("VIDEO",) + DESCRIPTION = "Generates videos from an image and prompts using MiniMax's API" + FUNCTION = "generate_video" + CATEGORY = "api node/video/MiniMax" + API_NODE = True + OUTPUT_NODE = True + + +class MinimaxSubjectToVideoNode(MinimaxTextToVideoNode): + """ + Generates videos synchronously based on an image and prompt, and optional parameters using MiniMax's API. + """ + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "subject": ( + IO.IMAGE, + { + "tooltip": "Image of subject to reference video generation" + }, + ), + "prompt_text": ( + "STRING", + { + "multiline": True, + "default": "", + "tooltip": "Text prompt to guide the video generation", + }, + ), + "model": ( + [ + "S2V-01", + ], + { + "default": "S2V-01", + "tooltip": "Model to use for video generation", + }, + ), + }, + "optional": { + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 0xFFFFFFFFFFFFFFFF, + "control_after_generate": True, + "tooltip": "The random seed used for creating the noise.", + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + RETURN_TYPES = ("VIDEO",) + DESCRIPTION = "Generates videos from an image and prompts using MiniMax's API" + FUNCTION = "generate_video" + CATEGORY = "api node/video/MiniMax" + API_NODE = True + OUTPUT_NODE = True + + +# A dictionary that contains all nodes you want to export with their names +# NOTE: names should be globally unique +NODE_CLASS_MAPPINGS = { + "MinimaxTextToVideoNode": MinimaxTextToVideoNode, + "MinimaxImageToVideoNode": MinimaxImageToVideoNode, + # "MinimaxSubjectToVideoNode": MinimaxSubjectToVideoNode, +} + +# A dictionary that contains the friendly/humanly readable titles for the nodes +NODE_DISPLAY_NAME_MAPPINGS = { + "MinimaxTextToVideoNode": "MiniMax Text to Video", + "MinimaxImageToVideoNode": "MiniMax Image to Video", + "MinimaxSubjectToVideoNode": "MiniMax Subject to Video", +} diff --git a/comfy_api_nodes/nodes_openai.py b/comfy_api_nodes/nodes_openai.py new file mode 100644 index 00000000..c63908be --- /dev/null +++ b/comfy_api_nodes/nodes_openai.py @@ -0,0 +1,496 @@ +import io +from inspect import cleandoc +import numpy as np +import torch +from PIL import Image + +from comfy.comfy_types.node_typing import IO, ComfyNodeABC, InputTypeDict + + +from comfy_api_nodes.apis import ( + OpenAIImageGenerationRequest, + OpenAIImageEditRequest, + OpenAIImageGenerationResponse, +) + +from comfy_api_nodes.apis.client import ( + ApiEndpoint, + HttpMethod, + SynchronousOperation, +) + +from comfy_api_nodes.apinode_utils import ( + downscale_image_tensor, + validate_and_cast_response, + validate_string, +) + +class OpenAIDalle2(ComfyNodeABC): + """ + Generates images synchronously via OpenAI's DALL·E 2 endpoint. + """ + + def __init__(self): + pass + + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + return { + "required": { + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Text prompt for DALL·E", + }, + ), + }, + "optional": { + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 2**31 - 1, + "step": 1, + "display": "number", + "control_after_generate": True, + "tooltip": "not implemented yet in backend", + }, + ), + "size": ( + IO.COMBO, + { + "options": ["256x256", "512x512", "1024x1024"], + "default": "1024x1024", + "tooltip": "Image size", + }, + ), + "n": ( + IO.INT, + { + "default": 1, + "min": 1, + "max": 8, + "step": 1, + "display": "number", + "tooltip": "How many images to generate", + }, + ), + "image": ( + IO.IMAGE, + { + "default": None, + "tooltip": "Optional reference image for image editing.", + }, + ), + "mask": ( + IO.MASK, + { + "default": None, + "tooltip": "Optional mask for inpainting (white areas will be replaced)", + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + RETURN_TYPES = (IO.IMAGE,) + FUNCTION = "api_call" + CATEGORY = "api node/image/OpenAI" + DESCRIPTION = cleandoc(__doc__ or "") + API_NODE = True + + def api_call( + self, + prompt, + seed=0, + image=None, + mask=None, + n=1, + size="1024x1024", + **kwargs + ): + validate_string(prompt, strip_whitespace=False) + model = "dall-e-2" + path = "/proxy/openai/images/generations" + content_type = "application/json" + request_class = OpenAIImageGenerationRequest + img_binary = None + + if image is not None and mask is not None: + path = "/proxy/openai/images/edits" + content_type = "multipart/form-data" + request_class = OpenAIImageEditRequest + + input_tensor = image.squeeze().cpu() + height, width, channels = input_tensor.shape + rgba_tensor = torch.ones(height, width, 4, device="cpu") + rgba_tensor[:, :, :channels] = input_tensor + + if mask.shape[1:] != image.shape[1:-1]: + raise Exception("Mask and Image must be the same size") + rgba_tensor[:, :, 3] = 1 - mask.squeeze().cpu() + + rgba_tensor = downscale_image_tensor(rgba_tensor.unsqueeze(0)).squeeze() + + image_np = (rgba_tensor.numpy() * 255).astype(np.uint8) + img = Image.fromarray(image_np) + img_byte_arr = io.BytesIO() + img.save(img_byte_arr, format="PNG") + img_byte_arr.seek(0) + img_binary = img_byte_arr # .getvalue() + img_binary.name = "image.png" + elif image is not None or mask is not None: + raise Exception("Dall-E 2 image editing requires an image AND a mask") + + # Build the operation + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=path, + method=HttpMethod.POST, + request_model=request_class, + response_model=OpenAIImageGenerationResponse, + ), + request=request_class( + model=model, + prompt=prompt, + n=n, + size=size, + seed=seed, + ), + files=( + { + "image": img_binary, + } + if img_binary + else None + ), + content_type=content_type, + auth_kwargs=kwargs, + ) + + response = operation.execute() + + img_tensor = validate_and_cast_response(response) + return (img_tensor,) + + +class OpenAIDalle3(ComfyNodeABC): + """ + Generates images synchronously via OpenAI's DALL·E 3 endpoint. + """ + + def __init__(self): + pass + + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + return { + "required": { + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Text prompt for DALL·E", + }, + ), + }, + "optional": { + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 2**31 - 1, + "step": 1, + "display": "number", + "control_after_generate": True, + "tooltip": "not implemented yet in backend", + }, + ), + "quality": ( + IO.COMBO, + { + "options": ["standard", "hd"], + "default": "standard", + "tooltip": "Image quality", + }, + ), + "style": ( + IO.COMBO, + { + "options": ["natural", "vivid"], + "default": "natural", + "tooltip": "Vivid causes the model to lean towards generating hyper-real and dramatic images. Natural causes the model to produce more natural, less hyper-real looking images.", + }, + ), + "size": ( + IO.COMBO, + { + "options": ["1024x1024", "1024x1792", "1792x1024"], + "default": "1024x1024", + "tooltip": "Image size", + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + RETURN_TYPES = (IO.IMAGE,) + FUNCTION = "api_call" + CATEGORY = "api node/image/OpenAI" + DESCRIPTION = cleandoc(__doc__ or "") + API_NODE = True + + def api_call( + self, + prompt, + seed=0, + style="natural", + quality="standard", + size="1024x1024", + **kwargs + ): + validate_string(prompt, strip_whitespace=False) + model = "dall-e-3" + + # build the operation + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/openai/images/generations", + method=HttpMethod.POST, + request_model=OpenAIImageGenerationRequest, + response_model=OpenAIImageGenerationResponse, + ), + request=OpenAIImageGenerationRequest( + model=model, + prompt=prompt, + quality=quality, + size=size, + style=style, + seed=seed, + ), + auth_kwargs=kwargs, + ) + + response = operation.execute() + + img_tensor = validate_and_cast_response(response) + return (img_tensor,) + + +class OpenAIGPTImage1(ComfyNodeABC): + """ + Generates images synchronously via OpenAI's GPT Image 1 endpoint. + """ + + def __init__(self): + pass + + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + return { + "required": { + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Text prompt for GPT Image 1", + }, + ), + }, + "optional": { + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 2**31 - 1, + "step": 1, + "display": "number", + "control_after_generate": True, + "tooltip": "not implemented yet in backend", + }, + ), + "quality": ( + IO.COMBO, + { + "options": ["low", "medium", "high"], + "default": "low", + "tooltip": "Image quality, affects cost and generation time.", + }, + ), + "background": ( + IO.COMBO, + { + "options": ["opaque", "transparent"], + "default": "opaque", + "tooltip": "Return image with or without background", + }, + ), + "size": ( + IO.COMBO, + { + "options": ["auto", "1024x1024", "1024x1536", "1536x1024"], + "default": "auto", + "tooltip": "Image size", + }, + ), + "n": ( + IO.INT, + { + "default": 1, + "min": 1, + "max": 8, + "step": 1, + "display": "number", + "tooltip": "How many images to generate", + }, + ), + "image": ( + IO.IMAGE, + { + "default": None, + "tooltip": "Optional reference image for image editing.", + }, + ), + "mask": ( + IO.MASK, + { + "default": None, + "tooltip": "Optional mask for inpainting (white areas will be replaced)", + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + RETURN_TYPES = (IO.IMAGE,) + FUNCTION = "api_call" + CATEGORY = "api node/image/OpenAI" + DESCRIPTION = cleandoc(__doc__ or "") + API_NODE = True + + def api_call( + self, + prompt, + seed=0, + quality="low", + background="opaque", + image=None, + mask=None, + n=1, + size="1024x1024", + **kwargs + ): + validate_string(prompt, strip_whitespace=False) + model = "gpt-image-1" + path = "/proxy/openai/images/generations" + content_type="application/json" + request_class = OpenAIImageGenerationRequest + img_binaries = [] + mask_binary = None + files = [] + + if image is not None: + path = "/proxy/openai/images/edits" + request_class = OpenAIImageEditRequest + content_type ="multipart/form-data" + + batch_size = image.shape[0] + + for i in range(batch_size): + single_image = image[i : i + 1] + scaled_image = downscale_image_tensor(single_image).squeeze() + + image_np = (scaled_image.numpy() * 255).astype(np.uint8) + img = Image.fromarray(image_np) + img_byte_arr = io.BytesIO() + img.save(img_byte_arr, format="PNG") + img_byte_arr.seek(0) + img_binary = img_byte_arr + img_binary.name = f"image_{i}.png" + + img_binaries.append(img_binary) + if batch_size == 1: + files.append(("image", img_binary)) + else: + files.append(("image[]", img_binary)) + + if mask is not None: + if image is None: + raise Exception("Cannot use a mask without an input image") + if image.shape[0] != 1: + raise Exception("Cannot use a mask with multiple image") + if mask.shape[1:] != image.shape[1:-1]: + raise Exception("Mask and Image must be the same size") + batch, height, width = mask.shape + rgba_mask = torch.zeros(height, width, 4, device="cpu") + rgba_mask[:, :, 3] = 1 - mask.squeeze().cpu() + + scaled_mask = downscale_image_tensor(rgba_mask.unsqueeze(0)).squeeze() + + mask_np = (scaled_mask.numpy() * 255).astype(np.uint8) + mask_img = Image.fromarray(mask_np) + mask_img_byte_arr = io.BytesIO() + mask_img.save(mask_img_byte_arr, format="PNG") + mask_img_byte_arr.seek(0) + mask_binary = mask_img_byte_arr + mask_binary.name = "mask.png" + files.append(("mask", mask_binary)) + + # Build the operation + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=path, + method=HttpMethod.POST, + request_model=request_class, + response_model=OpenAIImageGenerationResponse, + ), + request=request_class( + model=model, + prompt=prompt, + quality=quality, + background=background, + n=n, + seed=seed, + size=size, + ), + files=files if files else None, + content_type=content_type, + auth_kwargs=kwargs, + ) + + response = operation.execute() + + img_tensor = validate_and_cast_response(response) + return (img_tensor,) + + +# A dictionary that contains all nodes you want to export with their names +# NOTE: names should be globally unique +NODE_CLASS_MAPPINGS = { + "OpenAIDalle2": OpenAIDalle2, + "OpenAIDalle3": OpenAIDalle3, + "OpenAIGPTImage1": OpenAIGPTImage1, +} + +# A dictionary that contains the friendly/humanly readable titles for the nodes +NODE_DISPLAY_NAME_MAPPINGS = { + "OpenAIDalle2": "OpenAI DALL·E 2", + "OpenAIDalle3": "OpenAI DALL·E 3", + "OpenAIGPTImage1": "OpenAI GPT Image 1", +} diff --git a/comfy_api_nodes/nodes_pika.py b/comfy_api_nodes/nodes_pika.py new file mode 100644 index 00000000..08ec9cf0 --- /dev/null +++ b/comfy_api_nodes/nodes_pika.py @@ -0,0 +1,757 @@ +""" +Pika x ComfyUI API Nodes + +Pika API docs: https://pika-827374fb.mintlify.app/api-reference +""" +from __future__ import annotations + +import io +from typing import Optional, TypeVar +import logging +import torch +import numpy as np +from comfy_api_nodes.apis import ( + PikaBodyGenerate22T2vGenerate22T2vPost, + PikaGenerateResponse, + PikaBodyGenerate22I2vGenerate22I2vPost, + PikaVideoResponse, + PikaBodyGenerate22C2vGenerate22PikascenesPost, + IngredientsMode, + PikaDurationEnum, + PikaResolutionEnum, + PikaBodyGeneratePikaffectsGeneratePikaffectsPost, + PikaBodyGeneratePikadditionsGeneratePikadditionsPost, + PikaBodyGeneratePikaswapsGeneratePikaswapsPost, + PikaBodyGenerate22KeyframeGenerate22PikaframesPost, + Pikaffect, +) +from comfy_api_nodes.apis.client import ( + ApiEndpoint, + HttpMethod, + SynchronousOperation, + PollingOperation, + EmptyRequest, +) +from comfy_api_nodes.apinode_utils import ( + tensor_to_bytesio, + download_url_to_video_output, +) +from comfy_api_nodes.mapper_utils import model_field_to_node_input +from comfy_api.input_impl.video_types import VideoInput, VideoContainer, VideoCodec +from comfy_api.input_impl import VideoFromFile +from comfy.comfy_types.node_typing import IO, ComfyNodeABC, InputTypeOptions + +R = TypeVar("R") + +PATH_PIKADDITIONS = "/proxy/pika/generate/pikadditions" +PATH_PIKASWAPS = "/proxy/pika/generate/pikaswaps" +PATH_PIKAFFECTS = "/proxy/pika/generate/pikaffects" + +PIKA_API_VERSION = "2.2" +PATH_TEXT_TO_VIDEO = f"/proxy/pika/generate/{PIKA_API_VERSION}/t2v" +PATH_IMAGE_TO_VIDEO = f"/proxy/pika/generate/{PIKA_API_VERSION}/i2v" +PATH_PIKAFRAMES = f"/proxy/pika/generate/{PIKA_API_VERSION}/pikaframes" +PATH_PIKASCENES = f"/proxy/pika/generate/{PIKA_API_VERSION}/pikascenes" + +PATH_VIDEO_GET = "/proxy/pika/videos" + + +class PikaApiError(Exception): + """Exception for Pika API errors.""" + + pass + + +def is_valid_video_response(response: PikaVideoResponse) -> bool: + """Check if the video response is valid.""" + return hasattr(response, "url") and response.url is not None + + +def is_valid_initial_response(response: PikaGenerateResponse) -> bool: + """Check if the initial response is valid.""" + return hasattr(response, "video_id") and response.video_id is not None + + +class PikaNodeBase(ComfyNodeABC): + """Base class for Pika nodes.""" + + @classmethod + def get_base_inputs_types( + cls, request_model + ) -> dict[str, tuple[IO, InputTypeOptions]]: + """Get the base required inputs types common to all Pika nodes.""" + return { + "prompt_text": model_field_to_node_input( + IO.STRING, + request_model, + "promptText", + multiline=True, + ), + "negative_prompt": model_field_to_node_input( + IO.STRING, + request_model, + "negativePrompt", + multiline=True, + ), + "seed": model_field_to_node_input( + IO.INT, + request_model, + "seed", + min=0, + max=0xFFFFFFFF, + control_after_generate=True, + ), + "resolution": model_field_to_node_input( + IO.COMBO, + request_model, + "resolution", + enum_type=PikaResolutionEnum, + ), + "duration": model_field_to_node_input( + IO.COMBO, + request_model, + "duration", + enum_type=PikaDurationEnum, + ), + } + + CATEGORY = "api node/video/Pika" + API_NODE = True + FUNCTION = "api_call" + RETURN_TYPES = ("VIDEO",) + + def poll_for_task_status( + self, task_id: str, auth_kwargs: Optional[dict[str,str]] = None + ) -> PikaGenerateResponse: + polling_operation = PollingOperation( + poll_endpoint=ApiEndpoint( + path=f"{PATH_VIDEO_GET}/{task_id}", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=PikaVideoResponse, + ), + completed_statuses=[ + "finished", + ], + failed_statuses=["failed", "cancelled"], + status_extractor=lambda response: ( + response.status.value if response.status else None + ), + progress_extractor=lambda response: ( + response.progress if hasattr(response, "progress") else None + ), + auth_kwargs=auth_kwargs, + ) + return polling_operation.execute() + + def execute_task( + self, + initial_operation: SynchronousOperation[R, PikaGenerateResponse], + auth_kwargs: Optional[dict[str,str]] = None, + ) -> tuple[VideoFromFile]: + """Executes the initial operation then polls for the task status until it is completed. + + Args: + initial_operation: The initial operation to execute. + auth_kwargs: The authentication token(s) to use for the API call. + + Returns: + A tuple containing the video file as a VIDEO output. + """ + initial_response = initial_operation.execute() + if not is_valid_initial_response(initial_response): + error_msg = f"Pika initial request failed. Code: {initial_response.code}, Message: {initial_response.message}, Data: {initial_response.data}" + logging.error(error_msg) + raise PikaApiError(error_msg) + + task_id = initial_response.video_id + final_response = self.poll_for_task_status(task_id, auth_kwargs) + if not is_valid_video_response(final_response): + error_msg = ( + f"Pika task {task_id} succeeded but no video data found in response." + ) + logging.error(error_msg) + raise PikaApiError(error_msg) + + video_url = str(final_response.url) + logging.info("Pika task %s succeeded. Video URL: %s", task_id, video_url) + + return (download_url_to_video_output(video_url),) + + +class PikaImageToVideoV2_2(PikaNodeBase): + """Pika 2.2 Image to Video Node.""" + + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "image": ( + IO.IMAGE, + {"tooltip": "The image to convert to video"}, + ), + **cls.get_base_inputs_types(PikaBodyGenerate22I2vGenerate22I2vPost), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + DESCRIPTION = "Sends an image and prompt to the Pika API v2.2 to generate a video." + + def api_call( + self, + image: torch.Tensor, + prompt_text: str, + negative_prompt: str, + seed: int, + resolution: str, + duration: int, + **kwargs + ) -> tuple[VideoFromFile]: + # Convert image to BytesIO + image_bytes_io = tensor_to_bytesio(image) + image_bytes_io.seek(0) + + pika_files = {"image": ("image.png", image_bytes_io, "image/png")} + + # Prepare non-file data + pika_request_data = PikaBodyGenerate22I2vGenerate22I2vPost( + promptText=prompt_text, + negativePrompt=negative_prompt, + seed=seed, + resolution=resolution, + duration=duration, + ) + + initial_operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=PATH_IMAGE_TO_VIDEO, + method=HttpMethod.POST, + request_model=PikaBodyGenerate22I2vGenerate22I2vPost, + response_model=PikaGenerateResponse, + ), + request=pika_request_data, + files=pika_files, + content_type="multipart/form-data", + auth_kwargs=kwargs, + ) + + return self.execute_task(initial_operation, auth_kwargs=kwargs) + + +class PikaTextToVideoNodeV2_2(PikaNodeBase): + """Pika Text2Video v2.2 Node.""" + + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + **cls.get_base_inputs_types(PikaBodyGenerate22T2vGenerate22T2vPost), + "aspect_ratio": model_field_to_node_input( + IO.FLOAT, + PikaBodyGenerate22T2vGenerate22T2vPost, + "aspectRatio", + step=0.001, + min=0.4, + max=2.5, + default=1.7777777777777777, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + DESCRIPTION = "Sends a text prompt to the Pika API v2.2 to generate a video." + + def api_call( + self, + prompt_text: str, + negative_prompt: str, + seed: int, + resolution: str, + duration: int, + aspect_ratio: float, + **kwargs, + ) -> tuple[VideoFromFile]: + initial_operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=PATH_TEXT_TO_VIDEO, + method=HttpMethod.POST, + request_model=PikaBodyGenerate22T2vGenerate22T2vPost, + response_model=PikaGenerateResponse, + ), + request=PikaBodyGenerate22T2vGenerate22T2vPost( + promptText=prompt_text, + negativePrompt=negative_prompt, + seed=seed, + resolution=resolution, + duration=duration, + aspectRatio=aspect_ratio, + ), + auth_kwargs=kwargs, + content_type="application/x-www-form-urlencoded", + ) + + return self.execute_task(initial_operation, auth_kwargs=kwargs) + + +class PikaScenesV2_2(PikaNodeBase): + """PikaScenes v2.2 Node.""" + + @classmethod + def INPUT_TYPES(cls): + image_ingredient_input = ( + IO.IMAGE, + {"tooltip": "Image that will be used as ingredient to create a video."}, + ) + return { + "required": { + **cls.get_base_inputs_types( + PikaBodyGenerate22C2vGenerate22PikascenesPost, + ), + "ingredients_mode": model_field_to_node_input( + IO.COMBO, + PikaBodyGenerate22C2vGenerate22PikascenesPost, + "ingredientsMode", + enum_type=IngredientsMode, + default="creative", + ), + "aspect_ratio": model_field_to_node_input( + IO.FLOAT, + PikaBodyGenerate22C2vGenerate22PikascenesPost, + "aspectRatio", + step=0.001, + min=0.4, + max=2.5, + default=1.7777777777777777, + ), + }, + "optional": { + "image_ingredient_1": image_ingredient_input, + "image_ingredient_2": image_ingredient_input, + "image_ingredient_3": image_ingredient_input, + "image_ingredient_4": image_ingredient_input, + "image_ingredient_5": image_ingredient_input, + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + DESCRIPTION = "Combine your images to create a video with the objects in them. Upload multiple images as ingredients and generate a high-quality video that incorporates all of them." + + def api_call( + self, + prompt_text: str, + negative_prompt: str, + seed: int, + resolution: str, + duration: int, + ingredients_mode: str, + aspect_ratio: float, + image_ingredient_1: Optional[torch.Tensor] = None, + image_ingredient_2: Optional[torch.Tensor] = None, + image_ingredient_3: Optional[torch.Tensor] = None, + image_ingredient_4: Optional[torch.Tensor] = None, + image_ingredient_5: Optional[torch.Tensor] = None, + **kwargs, + ) -> tuple[VideoFromFile]: + # Convert all passed images to BytesIO + all_image_bytes_io = [] + for image in [ + image_ingredient_1, + image_ingredient_2, + image_ingredient_3, + image_ingredient_4, + image_ingredient_5, + ]: + if image is not None: + image_bytes_io = tensor_to_bytesio(image) + image_bytes_io.seek(0) + all_image_bytes_io.append(image_bytes_io) + + pika_files = [ + ("images", (f"image_{i}.png", image_bytes_io, "image/png")) + for i, image_bytes_io in enumerate(all_image_bytes_io) + ] + + pika_request_data = PikaBodyGenerate22C2vGenerate22PikascenesPost( + ingredientsMode=ingredients_mode, + promptText=prompt_text, + negativePrompt=negative_prompt, + seed=seed, + resolution=resolution, + duration=duration, + aspectRatio=aspect_ratio, + ) + + initial_operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=PATH_PIKASCENES, + method=HttpMethod.POST, + request_model=PikaBodyGenerate22C2vGenerate22PikascenesPost, + response_model=PikaGenerateResponse, + ), + request=pika_request_data, + files=pika_files, + content_type="multipart/form-data", + auth_kwargs=kwargs, + ) + + return self.execute_task(initial_operation, auth_kwargs=kwargs) + + +class PikAdditionsNode(PikaNodeBase): + """Pika Pikadditions Node. Add an image into a video.""" + + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "video": (IO.VIDEO, {"tooltip": "The video to add an image to."}), + "image": (IO.IMAGE, {"tooltip": "The image to add to the video."}), + "prompt_text": model_field_to_node_input( + IO.STRING, + PikaBodyGeneratePikadditionsGeneratePikadditionsPost, + "promptText", + multiline=True, + ), + "negative_prompt": model_field_to_node_input( + IO.STRING, + PikaBodyGeneratePikadditionsGeneratePikadditionsPost, + "negativePrompt", + multiline=True, + ), + "seed": model_field_to_node_input( + IO.INT, + PikaBodyGeneratePikadditionsGeneratePikadditionsPost, + "seed", + min=0, + max=0xFFFFFFFF, + control_after_generate=True, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + DESCRIPTION = "Add any object or image into your video. Upload a video and specify what you’d like to add to create a seamlessly integrated result." + + def api_call( + self, + video: VideoInput, + image: torch.Tensor, + prompt_text: str, + negative_prompt: str, + seed: int, + **kwargs, + ) -> tuple[VideoFromFile]: + # Convert video to BytesIO + video_bytes_io = io.BytesIO() + video.save_to(video_bytes_io, format=VideoContainer.MP4, codec=VideoCodec.H264) + video_bytes_io.seek(0) + + # Convert image to BytesIO + image_bytes_io = tensor_to_bytesio(image) + image_bytes_io.seek(0) + + pika_files = [ + ("video", ("video.mp4", video_bytes_io, "video/mp4")), + ("image", ("image.png", image_bytes_io, "image/png")), + ] + + # Prepare non-file data + pika_request_data = PikaBodyGeneratePikadditionsGeneratePikadditionsPost( + promptText=prompt_text, + negativePrompt=negative_prompt, + seed=seed, + ) + + initial_operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=PATH_PIKADDITIONS, + method=HttpMethod.POST, + request_model=PikaBodyGeneratePikadditionsGeneratePikadditionsPost, + response_model=PikaGenerateResponse, + ), + request=pika_request_data, + files=pika_files, + content_type="multipart/form-data", + auth_kwargs=kwargs, + ) + + return self.execute_task(initial_operation, auth_kwargs=kwargs) + + +class PikaSwapsNode(PikaNodeBase): + """Pika Pikaswaps Node.""" + + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "video": (IO.VIDEO, {"tooltip": "The video to swap an object in."}), + "image": ( + IO.IMAGE, + { + "tooltip": "The image used to replace the masked object in the video." + }, + ), + "mask": ( + IO.MASK, + {"tooltip": "Use the mask to define areas in the video to replace"}, + ), + "prompt_text": model_field_to_node_input( + IO.STRING, + PikaBodyGeneratePikaswapsGeneratePikaswapsPost, + "promptText", + multiline=True, + ), + "negative_prompt": model_field_to_node_input( + IO.STRING, + PikaBodyGeneratePikaswapsGeneratePikaswapsPost, + "negativePrompt", + multiline=True, + ), + "seed": model_field_to_node_input( + IO.INT, + PikaBodyGeneratePikaswapsGeneratePikaswapsPost, + "seed", + min=0, + max=0xFFFFFFFF, + control_after_generate=True, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + DESCRIPTION = "Swap out any object or region of your video with a new image or object. Define areas to replace either with a mask or coordinates." + RETURN_TYPES = ("VIDEO",) + + def api_call( + self, + video: VideoInput, + image: torch.Tensor, + mask: torch.Tensor, + prompt_text: str, + negative_prompt: str, + seed: int, + **kwargs, + ) -> tuple[VideoFromFile]: + # Convert video to BytesIO + video_bytes_io = io.BytesIO() + video.save_to(video_bytes_io, format=VideoContainer.MP4, codec=VideoCodec.H264) + video_bytes_io.seek(0) + + # Convert mask to binary mask with three channels + mask = torch.round(mask) + mask = mask.repeat(1, 3, 1, 1) + + # Convert 3-channel binary mask to BytesIO + mask_bytes_io = io.BytesIO() + mask_bytes_io.write(mask.numpy().astype(np.uint8)) + mask_bytes_io.seek(0) + + # Convert image to BytesIO + image_bytes_io = tensor_to_bytesio(image) + image_bytes_io.seek(0) + + pika_files = [ + ("video", ("video.mp4", video_bytes_io, "video/mp4")), + ("image", ("image.png", image_bytes_io, "image/png")), + ("modifyRegionMask", ("mask.png", mask_bytes_io, "image/png")), + ] + + # Prepare non-file data + pika_request_data = PikaBodyGeneratePikaswapsGeneratePikaswapsPost( + promptText=prompt_text, + negativePrompt=negative_prompt, + seed=seed, + ) + + initial_operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=PATH_PIKADDITIONS, + method=HttpMethod.POST, + request_model=PikaBodyGeneratePikadditionsGeneratePikadditionsPost, + response_model=PikaGenerateResponse, + ), + request=pika_request_data, + files=pika_files, + content_type="multipart/form-data", + auth_kwargs=kwargs, + ) + + return self.execute_task(initial_operation, auth_kwargs=kwargs) + + +class PikaffectsNode(PikaNodeBase): + """Pika Pikaffects Node.""" + + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "image": ( + IO.IMAGE, + {"tooltip": "The reference image to apply the Pikaffect to."}, + ), + "pikaffect": model_field_to_node_input( + IO.COMBO, + PikaBodyGeneratePikaffectsGeneratePikaffectsPost, + "pikaffect", + enum_type=Pikaffect, + default="Cake-ify", + ), + "prompt_text": model_field_to_node_input( + IO.STRING, + PikaBodyGeneratePikaffectsGeneratePikaffectsPost, + "promptText", + multiline=True, + ), + "negative_prompt": model_field_to_node_input( + IO.STRING, + PikaBodyGeneratePikaffectsGeneratePikaffectsPost, + "negativePrompt", + multiline=True, + ), + "seed": model_field_to_node_input( + IO.INT, + PikaBodyGeneratePikaffectsGeneratePikaffectsPost, + "seed", + min=0, + max=0xFFFFFFFF, + control_after_generate=True, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + DESCRIPTION = "Generate a video with a specific Pikaffect. Supported Pikaffects: Cake-ify, Crumble, Crush, Decapitate, Deflate, Dissolve, Explode, Eye-pop, Inflate, Levitate, Melt, Peel, Poke, Squish, Ta-da, Tear" + + def api_call( + self, + image: torch.Tensor, + pikaffect: str, + prompt_text: str, + negative_prompt: str, + seed: int, + **kwargs, + ) -> tuple[VideoFromFile]: + + initial_operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=PATH_PIKAFFECTS, + method=HttpMethod.POST, + request_model=PikaBodyGeneratePikaffectsGeneratePikaffectsPost, + response_model=PikaGenerateResponse, + ), + request=PikaBodyGeneratePikaffectsGeneratePikaffectsPost( + pikaffect=pikaffect, + promptText=prompt_text, + negativePrompt=negative_prompt, + seed=seed, + ), + files={"image": ("image.png", tensor_to_bytesio(image), "image/png")}, + content_type="multipart/form-data", + auth_kwargs=kwargs, + ) + + return self.execute_task(initial_operation, auth_kwargs=kwargs) + + +class PikaStartEndFrameNode2_2(PikaNodeBase): + """PikaFrames v2.2 Node.""" + + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "image_start": (IO.IMAGE, {"tooltip": "The first image to combine."}), + "image_end": (IO.IMAGE, {"tooltip": "The last image to combine."}), + **cls.get_base_inputs_types( + PikaBodyGenerate22KeyframeGenerate22PikaframesPost + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + DESCRIPTION = "Generate a video by combining your first and last frame. Upload two images to define the start and end points, and let the AI create a smooth transition between them." + + def api_call( + self, + image_start: torch.Tensor, + image_end: torch.Tensor, + prompt_text: str, + negative_prompt: str, + seed: int, + resolution: str, + duration: int, + **kwargs, + ) -> tuple[VideoFromFile]: + + pika_files = [ + ( + "keyFrames", + ("image_start.png", tensor_to_bytesio(image_start), "image/png"), + ), + ("keyFrames", ("image_end.png", tensor_to_bytesio(image_end), "image/png")), + ] + + initial_operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=PATH_PIKAFRAMES, + method=HttpMethod.POST, + request_model=PikaBodyGenerate22KeyframeGenerate22PikaframesPost, + response_model=PikaGenerateResponse, + ), + request=PikaBodyGenerate22KeyframeGenerate22PikaframesPost( + promptText=prompt_text, + negativePrompt=negative_prompt, + seed=seed, + resolution=resolution, + duration=duration, + ), + files=pika_files, + content_type="multipart/form-data", + auth_kwargs=kwargs, + ) + + return self.execute_task(initial_operation, auth_kwargs=kwargs) + + +NODE_CLASS_MAPPINGS = { + "PikaImageToVideoNode2_2": PikaImageToVideoV2_2, + "PikaTextToVideoNode2_2": PikaTextToVideoNodeV2_2, + "PikaScenesV2_2": PikaScenesV2_2, + "Pikadditions": PikAdditionsNode, + "Pikaswaps": PikaSwapsNode, + "Pikaffects": PikaffectsNode, + "PikaStartEndFrameNode2_2": PikaStartEndFrameNode2_2, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "PikaImageToVideoNode2_2": "Pika Image to Video", + "PikaTextToVideoNode2_2": "Pika Text to Video", + "PikaScenesV2_2": "Pika Scenes (Video Image Composition)", + "Pikadditions": "Pikadditions (Video Object Insertion)", + "Pikaswaps": "Pika Swaps (Video Object Replacement)", + "Pikaffects": "Pikaffects (Video Effects)", + "PikaStartEndFrameNode2_2": "Pika Start and End Frame to Video", +} diff --git a/comfy_api_nodes/nodes_pixverse.py b/comfy_api_nodes/nodes_pixverse.py new file mode 100644 index 00000000..0c29e77c --- /dev/null +++ b/comfy_api_nodes/nodes_pixverse.py @@ -0,0 +1,492 @@ +from inspect import cleandoc + +from comfy_api_nodes.apis.pixverse_api import ( + PixverseTextVideoRequest, + PixverseImageVideoRequest, + PixverseTransitionVideoRequest, + PixverseImageUploadResponse, + PixverseVideoResponse, + PixverseGenerationStatusResponse, + PixverseAspectRatio, + PixverseQuality, + PixverseDuration, + PixverseMotionMode, + PixverseStatus, + PixverseIO, + pixverse_templates, +) +from comfy_api_nodes.apis.client import ( + ApiEndpoint, + HttpMethod, + SynchronousOperation, + PollingOperation, + EmptyRequest, +) +from comfy_api_nodes.apinode_utils import ( + tensor_to_bytesio, + validate_string, +) +from comfy.comfy_types.node_typing import IO, ComfyNodeABC +from comfy_api.input_impl import VideoFromFile + +import torch +import requests +from io import BytesIO + + +def upload_image_to_pixverse(image: torch.Tensor, auth_kwargs=None): + # first, upload image to Pixverse and get image id to use in actual generation call + files = { + "image": tensor_to_bytesio(image) + } + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/pixverse/image/upload", + method=HttpMethod.POST, + request_model=EmptyRequest, + response_model=PixverseImageUploadResponse, + ), + request=EmptyRequest(), + files=files, + content_type="multipart/form-data", + auth_kwargs=auth_kwargs, + ) + response_upload: PixverseImageUploadResponse = operation.execute() + + if response_upload.Resp is None: + raise Exception(f"PixVerse image upload request failed: '{response_upload.ErrMsg}'") + + return response_upload.Resp.img_id + + +class PixverseTemplateNode: + """ + Select template for PixVerse Video generation. + """ + + RETURN_TYPES = (PixverseIO.TEMPLATE,) + RETURN_NAMES = ("pixverse_template",) + FUNCTION = "create_template" + CATEGORY = "api node/video/PixVerse" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "template": (list(pixverse_templates.keys()), ), + } + } + + def create_template(self, template: str): + template_id = pixverse_templates.get(template, None) + if template_id is None: + raise Exception(f"Template '{template}' is not recognized.") + # just return the integer + return (template_id,) + + +class PixverseTextToVideoNode(ComfyNodeABC): + """ + Generates videos synchronously based on prompt and output_size. + """ + + RETURN_TYPES = (IO.VIDEO,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/video/PixVerse" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Prompt for the video generation", + }, + ), + "aspect_ratio": ( + [ratio.value for ratio in PixverseAspectRatio], + ), + "quality": ( + [resolution.value for resolution in PixverseQuality], + { + "default": PixverseQuality.res_540p, + }, + ), + "duration_seconds": ([dur.value for dur in PixverseDuration],), + "motion_mode": ([mode.value for mode in PixverseMotionMode],), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 2147483647, + "control_after_generate": True, + "tooltip": "Seed for video generation.", + }, + ), + }, + "optional": { + "negative_prompt": ( + IO.STRING, + { + "default": "", + "forceInput": True, + "tooltip": "An optional text description of undesired elements on an image.", + }, + ), + "pixverse_template": ( + PixverseIO.TEMPLATE, + { + "tooltip": "An optional template to influence style of generation, created by the PixVerse Template node." + } + ) + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call( + self, + prompt: str, + aspect_ratio: str, + quality: str, + duration_seconds: int, + motion_mode: str, + seed, + negative_prompt: str=None, + pixverse_template: int=None, + **kwargs, + ): + validate_string(prompt, strip_whitespace=False) + # 1080p is limited to 5 seconds duration + # only normal motion_mode supported for 1080p or for non-5 second duration + if quality == PixverseQuality.res_1080p: + motion_mode = PixverseMotionMode.normal + duration_seconds = PixverseDuration.dur_5 + elif duration_seconds != PixverseDuration.dur_5: + motion_mode = PixverseMotionMode.normal + + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/pixverse/video/text/generate", + method=HttpMethod.POST, + request_model=PixverseTextVideoRequest, + response_model=PixverseVideoResponse, + ), + request=PixverseTextVideoRequest( + prompt=prompt, + aspect_ratio=aspect_ratio, + quality=quality, + duration=duration_seconds, + motion_mode=motion_mode, + negative_prompt=negative_prompt if negative_prompt else None, + template_id=pixverse_template, + seed=seed, + ), + auth_kwargs=kwargs, + ) + response_api = operation.execute() + + if response_api.Resp is None: + raise Exception(f"PixVerse request failed: '{response_api.ErrMsg}'") + + operation = PollingOperation( + poll_endpoint=ApiEndpoint( + path=f"/proxy/pixverse/video/result/{response_api.Resp.video_id}", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=PixverseGenerationStatusResponse, + ), + completed_statuses=[PixverseStatus.successful], + failed_statuses=[PixverseStatus.contents_moderation, PixverseStatus.failed, PixverseStatus.deleted], + status_extractor=lambda x: x.Resp.status, + auth_kwargs=kwargs, + ) + response_poll = operation.execute() + + vid_response = requests.get(response_poll.Resp.url) + return (VideoFromFile(BytesIO(vid_response.content)),) + + +class PixverseImageToVideoNode(ComfyNodeABC): + """ + Generates videos synchronously based on prompt and output_size. + """ + + RETURN_TYPES = (IO.VIDEO,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/video/PixVerse" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": ( + IO.IMAGE, + ), + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Prompt for the video generation", + }, + ), + "quality": ( + [resolution.value for resolution in PixverseQuality], + { + "default": PixverseQuality.res_540p, + }, + ), + "duration_seconds": ([dur.value for dur in PixverseDuration],), + "motion_mode": ([mode.value for mode in PixverseMotionMode],), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 2147483647, + "control_after_generate": True, + "tooltip": "Seed for video generation.", + }, + ), + }, + "optional": { + "negative_prompt": ( + IO.STRING, + { + "default": "", + "forceInput": True, + "tooltip": "An optional text description of undesired elements on an image.", + }, + ), + "pixverse_template": ( + PixverseIO.TEMPLATE, + { + "tooltip": "An optional template to influence style of generation, created by the PixVerse Template node." + } + ) + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call( + self, + image: torch.Tensor, + prompt: str, + quality: str, + duration_seconds: int, + motion_mode: str, + seed, + negative_prompt: str=None, + pixverse_template: int=None, + **kwargs, + ): + validate_string(prompt, strip_whitespace=False) + img_id = upload_image_to_pixverse(image, auth_kwargs=kwargs) + + # 1080p is limited to 5 seconds duration + # only normal motion_mode supported for 1080p or for non-5 second duration + if quality == PixverseQuality.res_1080p: + motion_mode = PixverseMotionMode.normal + duration_seconds = PixverseDuration.dur_5 + elif duration_seconds != PixverseDuration.dur_5: + motion_mode = PixverseMotionMode.normal + + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/pixverse/video/img/generate", + method=HttpMethod.POST, + request_model=PixverseImageVideoRequest, + response_model=PixverseVideoResponse, + ), + request=PixverseImageVideoRequest( + img_id=img_id, + prompt=prompt, + quality=quality, + duration=duration_seconds, + motion_mode=motion_mode, + negative_prompt=negative_prompt if negative_prompt else None, + template_id=pixverse_template, + seed=seed, + ), + auth_kwargs=kwargs, + ) + response_api = operation.execute() + + if response_api.Resp is None: + raise Exception(f"PixVerse request failed: '{response_api.ErrMsg}'") + + operation = PollingOperation( + poll_endpoint=ApiEndpoint( + path=f"/proxy/pixverse/video/result/{response_api.Resp.video_id}", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=PixverseGenerationStatusResponse, + ), + completed_statuses=[PixverseStatus.successful], + failed_statuses=[PixverseStatus.contents_moderation, PixverseStatus.failed, PixverseStatus.deleted], + status_extractor=lambda x: x.Resp.status, + auth_kwargs=kwargs, + ) + response_poll = operation.execute() + + vid_response = requests.get(response_poll.Resp.url) + return (VideoFromFile(BytesIO(vid_response.content)),) + + +class PixverseTransitionVideoNode(ComfyNodeABC): + """ + Generates videos synchronously based on prompt and output_size. + """ + + RETURN_TYPES = (IO.VIDEO,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/video/PixVerse" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "first_frame": ( + IO.IMAGE, + ), + "last_frame": ( + IO.IMAGE, + ), + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Prompt for the video generation", + }, + ), + "quality": ( + [resolution.value for resolution in PixverseQuality], + { + "default": PixverseQuality.res_540p, + }, + ), + "duration_seconds": ([dur.value for dur in PixverseDuration],), + "motion_mode": ([mode.value for mode in PixverseMotionMode],), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 2147483647, + "control_after_generate": True, + "tooltip": "Seed for video generation.", + }, + ), + }, + "optional": { + "negative_prompt": ( + IO.STRING, + { + "default": "", + "forceInput": True, + "tooltip": "An optional text description of undesired elements on an image.", + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call( + self, + first_frame: torch.Tensor, + last_frame: torch.Tensor, + prompt: str, + quality: str, + duration_seconds: int, + motion_mode: str, + seed, + negative_prompt: str=None, + **kwargs, + ): + validate_string(prompt, strip_whitespace=False) + first_frame_id = upload_image_to_pixverse(first_frame, auth_kwargs=kwargs) + last_frame_id = upload_image_to_pixverse(last_frame, auth_kwargs=kwargs) + + # 1080p is limited to 5 seconds duration + # only normal motion_mode supported for 1080p or for non-5 second duration + if quality == PixverseQuality.res_1080p: + motion_mode = PixverseMotionMode.normal + duration_seconds = PixverseDuration.dur_5 + elif duration_seconds != PixverseDuration.dur_5: + motion_mode = PixverseMotionMode.normal + + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/pixverse/video/transition/generate", + method=HttpMethod.POST, + request_model=PixverseTransitionVideoRequest, + response_model=PixverseVideoResponse, + ), + request=PixverseTransitionVideoRequest( + first_frame_img=first_frame_id, + last_frame_img=last_frame_id, + prompt=prompt, + quality=quality, + duration=duration_seconds, + motion_mode=motion_mode, + negative_prompt=negative_prompt if negative_prompt else None, + seed=seed, + ), + auth_kwargs=kwargs, + ) + response_api = operation.execute() + + if response_api.Resp is None: + raise Exception(f"PixVerse request failed: '{response_api.ErrMsg}'") + + operation = PollingOperation( + poll_endpoint=ApiEndpoint( + path=f"/proxy/pixverse/video/result/{response_api.Resp.video_id}", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=PixverseGenerationStatusResponse, + ), + completed_statuses=[PixverseStatus.successful], + failed_statuses=[PixverseStatus.contents_moderation, PixverseStatus.failed, PixverseStatus.deleted], + status_extractor=lambda x: x.Resp.status, + auth_kwargs=kwargs, + ) + response_poll = operation.execute() + + vid_response = requests.get(response_poll.Resp.url) + return (VideoFromFile(BytesIO(vid_response.content)),) + + +NODE_CLASS_MAPPINGS = { + "PixverseTextToVideoNode": PixverseTextToVideoNode, + "PixverseImageToVideoNode": PixverseImageToVideoNode, + "PixverseTransitionVideoNode": PixverseTransitionVideoNode, + "PixverseTemplateNode": PixverseTemplateNode, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "PixverseTextToVideoNode": "PixVerse Text to Video", + "PixverseImageToVideoNode": "PixVerse Image to Video", + "PixverseTransitionVideoNode": "PixVerse Transition Video", + "PixverseTemplateNode": "PixVerse Template", +} diff --git a/comfy_api_nodes/nodes_recraft.py b/comfy_api_nodes/nodes_recraft.py new file mode 100644 index 00000000..767d93e3 --- /dev/null +++ b/comfy_api_nodes/nodes_recraft.py @@ -0,0 +1,1117 @@ +from __future__ import annotations +from inspect import cleandoc +from comfy.utils import ProgressBar +from comfy_extras.nodes_images import SVG # Added +from comfy.comfy_types.node_typing import IO +from comfy_api_nodes.apis.recraft_api import ( + RecraftImageGenerationRequest, + RecraftImageGenerationResponse, + RecraftImageSize, + RecraftModel, + RecraftStyle, + RecraftStyleV3, + RecraftColor, + RecraftColorChain, + RecraftControls, + RecraftIO, + get_v3_substyles, +) +from comfy_api_nodes.apis.client import ( + ApiEndpoint, + HttpMethod, + SynchronousOperation, + EmptyRequest, +) +from comfy_api_nodes.apinode_utils import ( + bytesio_to_image_tensor, + download_url_to_bytesio, + tensor_to_bytesio, + resize_mask_to_image, + validate_string, +) +import torch +from io import BytesIO +from PIL import UnidentifiedImageError + + +def handle_recraft_file_request( + image: torch.Tensor, + path: str, + mask: torch.Tensor=None, + total_pixels=4096*4096, + timeout=1024, + request=None, + auth_kwargs: dict[str,str] = None, + ) -> list[BytesIO]: + """ + Handle sending common Recraft file-only request to get back file bytes. + """ + if request is None: + request = EmptyRequest() + + files = { + 'image': tensor_to_bytesio(image, total_pixels=total_pixels).read() + } + if mask is not None: + files['mask'] = tensor_to_bytesio(mask, total_pixels=total_pixels).read() + + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=path, + method=HttpMethod.POST, + request_model=type(request), + response_model=RecraftImageGenerationResponse, + ), + request=request, + files=files, + content_type="multipart/form-data", + auth_kwargs=auth_kwargs, + multipart_parser=recraft_multipart_parser, + ) + response: RecraftImageGenerationResponse = operation.execute() + all_bytesio = [] + if response.image is not None: + all_bytesio.append(download_url_to_bytesio(response.image.url, timeout=timeout)) + else: + for data in response.data: + all_bytesio.append(download_url_to_bytesio(data.url, timeout=timeout)) + + return all_bytesio + + +def recraft_multipart_parser(data, parent_key=None, formatter: callable=None, converted_to_check: list[list]=None, is_list=False) -> dict: + """ + Formats data such that multipart/form-data will work with requests library + when both files and data are present. + + The OpenAI client that Recraft uses has a bizarre way of serializing lists: + + It does NOT keep track of indeces of each list, so for background_color, that must be serialized as: + 'background_color[rgb][]' = [0, 0, 255] + where the array is assigned to a key that has '[]' at the end, to signal it's an array. + + This has the consequence of nested lists having the exact same key, forcing arrays to merge; all colors inputs fall under the same key: + if 1 color -> 'controls[colors][][rgb][]' = [0, 0, 255] + if 2 colors -> 'controls[colors][][rgb][]' = [0, 0, 255, 255, 0, 0] + if 3 colors -> 'controls[colors][][rgb][]' = [0, 0, 255, 255, 0, 0, 0, 255, 0] + etc. + Whoever made this serialization up at OpenAI added the constraint that lists must be of uniform length on objects of same 'type'. + """ + # Modification of a function that handled a different type of multipart parsing, big ups: + # https://gist.github.com/kazqvaizer/4cebebe5db654a414132809f9f88067b + + def handle_converted_lists(data, parent_key, lists_to_check=tuple[list]): + # if list already exists exists, just extend list with data + for check_list in lists_to_check: + for conv_tuple in check_list: + if conv_tuple[0] == parent_key and type(conv_tuple[1]) is list: + conv_tuple[1].append(formatter(data)) + return True + return False + + if converted_to_check is None: + converted_to_check = [] + + + if formatter is None: + formatter = lambda v: v # Multipart representation of value + + if type(data) is not dict: + # if list already exists exists, just extend list with data + added = handle_converted_lists(data, parent_key, converted_to_check) + if added: + return {} + # otherwise if is_list, create new list with data + if is_list: + return {parent_key: [formatter(data)]} + # return new key with data + return {parent_key: formatter(data)} + + converted = [] + next_check = [converted] + next_check.extend(converted_to_check) + + for key, value in data.items(): + current_key = key if parent_key is None else f"{parent_key}[{key}]" + if type(value) is dict: + converted.extend(recraft_multipart_parser(value, current_key, formatter, next_check).items()) + elif type(value) is list: + for ind, list_value in enumerate(value): + iter_key = f"{current_key}[]" + converted.extend(recraft_multipart_parser(list_value, iter_key, formatter, next_check, is_list=True).items()) + else: + converted.append((current_key, formatter(value))) + + return dict(converted) + + +class handle_recraft_image_output: + """ + Catch an exception related to receiving SVG data instead of image, when Infinite Style Library style_id is in use. + """ + def __init__(self): + pass + + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_val, exc_tb): + if exc_type is not None and exc_type is UnidentifiedImageError: + raise Exception("Received output data was not an image; likely an SVG. If you used style_id, make sure it is not a Vector art style.") + + +class RecraftColorRGBNode: + """ + Create Recraft Color by choosing specific RGB values. + """ + + RETURN_TYPES = (RecraftIO.COLOR,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + RETURN_NAMES = ("recraft_color",) + FUNCTION = "create_color" + CATEGORY = "api node/image/Recraft" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "r": (IO.INT, { + "default": 0, + "min": 0, + "max": 255, + "tooltip": "Red value of color." + }), + "g": (IO.INT, { + "default": 0, + "min": 0, + "max": 255, + "tooltip": "Green value of color." + }), + "b": (IO.INT, { + "default": 0, + "min": 0, + "max": 255, + "tooltip": "Blue value of color." + }), + }, + "optional": { + "recraft_color": (RecraftIO.COLOR,), + } + } + + def create_color(self, r: int, g: int, b: int, recraft_color: RecraftColorChain=None): + recraft_color = recraft_color.clone() if recraft_color else RecraftColorChain() + recraft_color.add(RecraftColor(r, g, b)) + return (recraft_color, ) + + +class RecraftControlsNode: + """ + Create Recraft Controls for customizing Recraft generation. + """ + + RETURN_TYPES = (RecraftIO.CONTROLS,) + RETURN_NAMES = ("recraft_controls",) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "create_controls" + CATEGORY = "api node/image/Recraft" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + }, + "optional": { + "colors": (RecraftIO.COLOR,), + "background_color": (RecraftIO.COLOR,), + } + } + + def create_controls(self, colors: RecraftColorChain=None, background_color: RecraftColorChain=None): + return (RecraftControls(colors=colors, background_color=background_color), ) + + +class RecraftStyleV3RealisticImageNode: + """ + Select realistic_image style and optional substyle. + """ + + RETURN_TYPES = (RecraftIO.STYLEV3,) + RETURN_NAMES = ("recraft_style",) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "create_style" + CATEGORY = "api node/image/Recraft" + + RECRAFT_STYLE = RecraftStyleV3.realistic_image + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "substyle": (get_v3_substyles(s.RECRAFT_STYLE),), + } + } + + def create_style(self, substyle: str): + if substyle == "None": + substyle = None + return (RecraftStyle(self.RECRAFT_STYLE, substyle),) + + +class RecraftStyleV3DigitalIllustrationNode(RecraftStyleV3RealisticImageNode): + """ + Select digital_illustration style and optional substyle. + """ + + RECRAFT_STYLE = RecraftStyleV3.digital_illustration + + +class RecraftStyleV3VectorIllustrationNode(RecraftStyleV3RealisticImageNode): + """ + Select vector_illustration style and optional substyle. + """ + + RECRAFT_STYLE = RecraftStyleV3.vector_illustration + + +class RecraftStyleV3LogoRasterNode(RecraftStyleV3RealisticImageNode): + """ + Select vector_illustration style and optional substyle. + """ + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "substyle": (get_v3_substyles(s.RECRAFT_STYLE, include_none=False),), + } + } + + RECRAFT_STYLE = RecraftStyleV3.logo_raster + + +class RecraftStyleInfiniteStyleLibrary: + """ + Select style based on preexisting UUID from Recraft's Infinite Style Library. + """ + + RETURN_TYPES = (RecraftIO.STYLEV3,) + RETURN_NAMES = ("recraft_style",) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "create_style" + CATEGORY = "api node/image/Recraft" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "style_id": (IO.STRING, { + "default": "", + "tooltip": "UUID of style from Infinite Style Library.", + }) + } + } + + def create_style(self, style_id: str): + if not style_id: + raise Exception("The style_id input cannot be empty.") + return (RecraftStyle(style_id=style_id),) + + +class RecraftTextToImageNode: + """ + Generates images synchronously based on prompt and resolution. + """ + + RETURN_TYPES = (IO.IMAGE,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/Recraft" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Prompt for the image generation.", + }, + ), + "size": ( + [res.value for res in RecraftImageSize], + { + "default": RecraftImageSize.res_1024x1024, + "tooltip": "The size of the generated image.", + }, + ), + "n": ( + IO.INT, + { + "default": 1, + "min": 1, + "max": 6, + "tooltip": "The number of images to generate.", + }, + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 0xFFFFFFFFFFFFFFFF, + "control_after_generate": True, + "tooltip": "Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.", + }, + ), + }, + "optional": { + "recraft_style": (RecraftIO.STYLEV3,), + "negative_prompt": ( + IO.STRING, + { + "default": "", + "forceInput": True, + "tooltip": "An optional text description of undesired elements on an image.", + }, + ), + "recraft_controls": ( + RecraftIO.CONTROLS, + { + "tooltip": "Optional additional controls over the generation via the Recraft Controls node." + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call( + self, + prompt: str, + size: str, + n: int, + seed, + recraft_style: RecraftStyle = None, + negative_prompt: str = None, + recraft_controls: RecraftControls = None, + **kwargs, + ): + validate_string(prompt, strip_whitespace=False, max_length=1000) + default_style = RecraftStyle(RecraftStyleV3.realistic_image) + if recraft_style is None: + recraft_style = default_style + + controls_api = None + if recraft_controls: + controls_api = recraft_controls.create_api_model() + + if not negative_prompt: + negative_prompt = None + + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/recraft/image_generation", + method=HttpMethod.POST, + request_model=RecraftImageGenerationRequest, + response_model=RecraftImageGenerationResponse, + ), + request=RecraftImageGenerationRequest( + prompt=prompt, + negative_prompt=negative_prompt, + model=RecraftModel.recraftv3, + size=size, + n=n, + style=recraft_style.style, + substyle=recraft_style.substyle, + style_id=recraft_style.style_id, + controls=controls_api, + ), + auth_kwargs=kwargs, + ) + response: RecraftImageGenerationResponse = operation.execute() + images = [] + for data in response.data: + with handle_recraft_image_output(): + image = bytesio_to_image_tensor( + download_url_to_bytesio(data.url, timeout=1024) + ) + if len(image.shape) < 4: + image = image.unsqueeze(0) + images.append(image) + output_image = torch.cat(images, dim=0) + + return (output_image,) + + +class RecraftImageToImageNode: + """ + Modify image based on prompt and strength. + """ + + RETURN_TYPES = (IO.IMAGE,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/Recraft" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": (IO.IMAGE, ), + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Prompt for the image generation.", + }, + ), + "n": ( + IO.INT, + { + "default": 1, + "min": 1, + "max": 6, + "tooltip": "The number of images to generate.", + }, + ), + "strength": ( + IO.FLOAT, + { + "default": 0.5, + "min": 0.0, + "max": 1.0, + "step": 0.01, + "tooltip": "Defines the difference with the original image, should lie in [0, 1], where 0 means almost identical, and 1 means miserable similarity." + } + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 0xFFFFFFFFFFFFFFFF, + "control_after_generate": True, + "tooltip": "Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.", + }, + ), + }, + "optional": { + "recraft_style": (RecraftIO.STYLEV3,), + "negative_prompt": ( + IO.STRING, + { + "default": "", + "forceInput": True, + "tooltip": "An optional text description of undesired elements on an image.", + }, + ), + "recraft_controls": ( + RecraftIO.CONTROLS, + { + "tooltip": "Optional additional controls over the generation via the Recraft Controls node." + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call( + self, + image: torch.Tensor, + prompt: str, + n: int, + strength: float, + seed, + recraft_style: RecraftStyle = None, + negative_prompt: str = None, + recraft_controls: RecraftControls = None, + **kwargs, + ): + validate_string(prompt, strip_whitespace=False, max_length=1000) + default_style = RecraftStyle(RecraftStyleV3.realistic_image) + if recraft_style is None: + recraft_style = default_style + + controls_api = None + if recraft_controls: + controls_api = recraft_controls.create_api_model() + + if not negative_prompt: + negative_prompt = None + + request = RecraftImageGenerationRequest( + prompt=prompt, + negative_prompt=negative_prompt, + model=RecraftModel.recraftv3, + n=n, + strength=round(strength, 2), + style=recraft_style.style, + substyle=recraft_style.substyle, + style_id=recraft_style.style_id, + controls=controls_api, + ) + + images = [] + total = image.shape[0] + pbar = ProgressBar(total) + for i in range(total): + sub_bytes = handle_recraft_file_request( + image=image[i], + path="/proxy/recraft/images/imageToImage", + request=request, + auth_kwargs=kwargs, + ) + with handle_recraft_image_output(): + images.append(torch.cat([bytesio_to_image_tensor(x) for x in sub_bytes], dim=0)) + pbar.update(1) + + images_tensor = torch.cat(images, dim=0) + return (images_tensor, ) + + +class RecraftImageInpaintingNode: + """ + Modify image based on prompt and mask. + """ + + RETURN_TYPES = (IO.IMAGE,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/Recraft" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": (IO.IMAGE, ), + "mask": (IO.MASK, ), + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Prompt for the image generation.", + }, + ), + "n": ( + IO.INT, + { + "default": 1, + "min": 1, + "max": 6, + "tooltip": "The number of images to generate.", + }, + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 0xFFFFFFFFFFFFFFFF, + "control_after_generate": True, + "tooltip": "Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.", + }, + ), + }, + "optional": { + "recraft_style": (RecraftIO.STYLEV3,), + "negative_prompt": ( + IO.STRING, + { + "default": "", + "forceInput": True, + "tooltip": "An optional text description of undesired elements on an image.", + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call( + self, + image: torch.Tensor, + mask: torch.Tensor, + prompt: str, + n: int, + seed, + recraft_style: RecraftStyle = None, + negative_prompt: str = None, + **kwargs, + ): + validate_string(prompt, strip_whitespace=False, max_length=1000) + default_style = RecraftStyle(RecraftStyleV3.realistic_image) + if recraft_style is None: + recraft_style = default_style + + if not negative_prompt: + negative_prompt = None + + request = RecraftImageGenerationRequest( + prompt=prompt, + negative_prompt=negative_prompt, + model=RecraftModel.recraftv3, + n=n, + style=recraft_style.style, + substyle=recraft_style.substyle, + style_id=recraft_style.style_id, + ) + + # prepare mask tensor + mask = resize_mask_to_image(mask, image, allow_gradient=False, add_channel_dim=True) + + images = [] + total = image.shape[0] + pbar = ProgressBar(total) + for i in range(total): + sub_bytes = handle_recraft_file_request( + image=image[i], + mask=mask[i:i+1], + path="/proxy/recraft/images/inpaint", + request=request, + auth_kwargs=kwargs, + ) + with handle_recraft_image_output(): + images.append(torch.cat([bytesio_to_image_tensor(x) for x in sub_bytes], dim=0)) + pbar.update(1) + + images_tensor = torch.cat(images, dim=0) + return (images_tensor, ) + + +class RecraftTextToVectorNode: + """ + Generates SVG synchronously based on prompt and resolution. + """ + + RETURN_TYPES = ("SVG",) # Changed + DESCRIPTION = cleandoc(__doc__ or "") if 'cleandoc' in globals() else __doc__ # Keep cleandoc if other nodes use it + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/Recraft" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Prompt for the image generation.", + }, + ), + "substyle": (get_v3_substyles(RecraftStyleV3.vector_illustration),), + "size": ( + [res.value for res in RecraftImageSize], + { + "default": RecraftImageSize.res_1024x1024, + "tooltip": "The size of the generated image.", + }, + ), + "n": ( + IO.INT, + { + "default": 1, + "min": 1, + "max": 6, + "tooltip": "The number of images to generate.", + }, + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 0xFFFFFFFFFFFFFFFF, + "control_after_generate": True, + "tooltip": "Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.", + }, + ), + }, + "optional": { + "negative_prompt": ( + IO.STRING, + { + "default": "", + "forceInput": True, + "tooltip": "An optional text description of undesired elements on an image.", + }, + ), + "recraft_controls": ( + RecraftIO.CONTROLS, + { + "tooltip": "Optional additional controls over the generation via the Recraft Controls node." + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call( + self, + prompt: str, + substyle: str, + size: str, + n: int, + seed, + negative_prompt: str = None, + recraft_controls: RecraftControls = None, + **kwargs, + ): + validate_string(prompt, strip_whitespace=False, max_length=1000) + # create RecraftStyle so strings will be formatted properly (i.e. "None" will become None) + recraft_style = RecraftStyle(RecraftStyleV3.vector_illustration, substyle=substyle) + + controls_api = None + if recraft_controls: + controls_api = recraft_controls.create_api_model() + + if not negative_prompt: + negative_prompt = None + + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/recraft/image_generation", + method=HttpMethod.POST, + request_model=RecraftImageGenerationRequest, + response_model=RecraftImageGenerationResponse, + ), + request=RecraftImageGenerationRequest( + prompt=prompt, + negative_prompt=negative_prompt, + model=RecraftModel.recraftv3, + size=size, + n=n, + style=recraft_style.style, + substyle=recraft_style.substyle, + controls=controls_api, + ), + auth_kwargs=kwargs, + ) + response: RecraftImageGenerationResponse = operation.execute() + svg_data = [] + for data in response.data: + svg_data.append(download_url_to_bytesio(data.url, timeout=1024)) + + return (SVG(svg_data),) + + +class RecraftVectorizeImageNode: + """ + Generates SVG synchronously from an input image. + """ + + RETURN_TYPES = ("SVG",) # Changed + DESCRIPTION = cleandoc(__doc__ or "") if 'cleandoc' in globals() else __doc__ # Keep cleandoc if other nodes use it + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/Recraft" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": (IO.IMAGE, ), + }, + "optional": { + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call( + self, + image: torch.Tensor, + **kwargs, + ): + svgs = [] + total = image.shape[0] + pbar = ProgressBar(total) + for i in range(total): + sub_bytes = handle_recraft_file_request( + image=image[i], + path="/proxy/recraft/images/vectorize", + auth_kwargs=kwargs, + ) + svgs.append(SVG(sub_bytes)) + pbar.update(1) + + return (SVG.combine_all(svgs), ) + + +class RecraftReplaceBackgroundNode: + """ + Replace background on image, based on provided prompt. + """ + + RETURN_TYPES = (IO.IMAGE,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/Recraft" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": (IO.IMAGE, ), + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Prompt for the image generation.", + }, + ), + "n": ( + IO.INT, + { + "default": 1, + "min": 1, + "max": 6, + "tooltip": "The number of images to generate.", + }, + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 0xFFFFFFFFFFFFFFFF, + "control_after_generate": True, + "tooltip": "Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.", + }, + ), + }, + "optional": { + "recraft_style": (RecraftIO.STYLEV3,), + "negative_prompt": ( + IO.STRING, + { + "default": "", + "forceInput": True, + "tooltip": "An optional text description of undesired elements on an image.", + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call( + self, + image: torch.Tensor, + prompt: str, + n: int, + seed, + recraft_style: RecraftStyle = None, + negative_prompt: str = None, + **kwargs, + ): + default_style = RecraftStyle(RecraftStyleV3.realistic_image) + if recraft_style is None: + recraft_style = default_style + + if not negative_prompt: + negative_prompt = None + + request = RecraftImageGenerationRequest( + prompt=prompt, + negative_prompt=negative_prompt, + model=RecraftModel.recraftv3, + n=n, + style=recraft_style.style, + substyle=recraft_style.substyle, + style_id=recraft_style.style_id, + ) + + images = [] + total = image.shape[0] + pbar = ProgressBar(total) + for i in range(total): + sub_bytes = handle_recraft_file_request( + image=image[i], + path="/proxy/recraft/images/replaceBackground", + request=request, + auth_kwargs=kwargs, + ) + images.append(torch.cat([bytesio_to_image_tensor(x) for x in sub_bytes], dim=0)) + pbar.update(1) + + images_tensor = torch.cat(images, dim=0) + return (images_tensor, ) + + +class RecraftRemoveBackgroundNode: + """ + Remove background from image, and return processed image and mask. + """ + + RETURN_TYPES = (IO.IMAGE, IO.MASK) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/Recraft" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": (IO.IMAGE, ), + }, + "optional": { + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call( + self, + image: torch.Tensor, + **kwargs, + ): + images = [] + total = image.shape[0] + pbar = ProgressBar(total) + for i in range(total): + sub_bytes = handle_recraft_file_request( + image=image[i], + path="/proxy/recraft/images/removeBackground", + auth_kwargs=kwargs, + ) + images.append(torch.cat([bytesio_to_image_tensor(x) for x in sub_bytes], dim=0)) + pbar.update(1) + + images_tensor = torch.cat(images, dim=0) + # use alpha channel as masks, in B,H,W format + masks_tensor = images_tensor[:,:,:,-1:].squeeze(-1) + return (images_tensor, masks_tensor) + + +class RecraftCrispUpscaleNode: + """ + Upscale image synchronously. + Enhances a given raster image using ‘crisp upscale’ tool, increasing image resolution, making the image sharper and cleaner. + """ + + RETURN_TYPES = (IO.IMAGE,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/Recraft" + + RECRAFT_PATH = "/proxy/recraft/images/crispUpscale" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": (IO.IMAGE, ), + }, + "optional": { + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call( + self, + image: torch.Tensor, + **kwargs, + ): + images = [] + total = image.shape[0] + pbar = ProgressBar(total) + for i in range(total): + sub_bytes = handle_recraft_file_request( + image=image[i], + path=self.RECRAFT_PATH, + auth_kwargs=kwargs, + ) + images.append(torch.cat([bytesio_to_image_tensor(x) for x in sub_bytes], dim=0)) + pbar.update(1) + + images_tensor = torch.cat(images, dim=0) + return (images_tensor,) + + +class RecraftCreativeUpscaleNode(RecraftCrispUpscaleNode): + """ + Upscale image synchronously. + Enhances a given raster image using ‘creative upscale’ tool, boosting resolution with a focus on refining small details and faces. + """ + + RETURN_TYPES = (IO.IMAGE,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/Recraft" + + RECRAFT_PATH = "/proxy/recraft/images/creativeUpscale" + + +# A dictionary that contains all nodes you want to export with their names +# NOTE: names should be globally unique +NODE_CLASS_MAPPINGS = { + "RecraftTextToImageNode": RecraftTextToImageNode, + "RecraftImageToImageNode": RecraftImageToImageNode, + "RecraftImageInpaintingNode": RecraftImageInpaintingNode, + "RecraftTextToVectorNode": RecraftTextToVectorNode, + "RecraftVectorizeImageNode": RecraftVectorizeImageNode, + "RecraftRemoveBackgroundNode": RecraftRemoveBackgroundNode, + "RecraftReplaceBackgroundNode": RecraftReplaceBackgroundNode, + "RecraftCrispUpscaleNode": RecraftCrispUpscaleNode, + "RecraftCreativeUpscaleNode": RecraftCreativeUpscaleNode, + "RecraftStyleV3RealisticImage": RecraftStyleV3RealisticImageNode, + "RecraftStyleV3DigitalIllustration": RecraftStyleV3DigitalIllustrationNode, + "RecraftStyleV3LogoRaster": RecraftStyleV3LogoRasterNode, + "RecraftStyleV3InfiniteStyleLibrary": RecraftStyleInfiniteStyleLibrary, + "RecraftColorRGB": RecraftColorRGBNode, + "RecraftControls": RecraftControlsNode, +} + +# A dictionary that contains the friendly/humanly readable titles for the nodes +NODE_DISPLAY_NAME_MAPPINGS = { + "RecraftTextToImageNode": "Recraft Text to Image", + "RecraftImageToImageNode": "Recraft Image to Image", + "RecraftImageInpaintingNode": "Recraft Image Inpainting", + "RecraftTextToVectorNode": "Recraft Text to Vector", + "RecraftVectorizeImageNode": "Recraft Vectorize Image", + "RecraftRemoveBackgroundNode": "Recraft Remove Background", + "RecraftReplaceBackgroundNode": "Recraft Replace Background", + "RecraftCrispUpscaleNode": "Recraft Crisp Upscale Image", + "RecraftCreativeUpscaleNode": "Recraft Creative Upscale Image", + "RecraftStyleV3RealisticImage": "Recraft Style - Realistic Image", + "RecraftStyleV3DigitalIllustration": "Recraft Style - Digital Illustration", + "RecraftStyleV3LogoRaster": "Recraft Style - Logo Raster", + "RecraftStyleV3InfiniteStyleLibrary": "Recraft Style - Infinite Style Library", + "RecraftColorRGB": "Recraft Color RGB", + "RecraftControls": "Recraft Controls", +} diff --git a/comfy_api_nodes/nodes_stability.py b/comfy_api_nodes/nodes_stability.py new file mode 100644 index 00000000..02e42167 --- /dev/null +++ b/comfy_api_nodes/nodes_stability.py @@ -0,0 +1,614 @@ +from inspect import cleandoc +from comfy.comfy_types.node_typing import IO +from comfy_api_nodes.apis.stability_api import ( + StabilityUpscaleConservativeRequest, + StabilityUpscaleCreativeRequest, + StabilityAsyncResponse, + StabilityResultsGetResponse, + StabilityStable3_5Request, + StabilityStableUltraRequest, + StabilityStableUltraResponse, + StabilityAspectRatio, + Stability_SD3_5_Model, + Stability_SD3_5_GenerationMode, + get_stability_style_presets, +) +from comfy_api_nodes.apis.client import ( + ApiEndpoint, + HttpMethod, + SynchronousOperation, + PollingOperation, + EmptyRequest, +) +from comfy_api_nodes.apinode_utils import ( + bytesio_to_image_tensor, + tensor_to_bytesio, + validate_string, +) + +import torch +import base64 +from io import BytesIO +from enum import Enum + + +class StabilityPollStatus(str, Enum): + finished = "finished" + in_progress = "in_progress" + failed = "failed" + + +def get_async_dummy_status(x: StabilityResultsGetResponse): + if x.name is not None or x.errors is not None: + return StabilityPollStatus.failed + elif x.finish_reason is not None: + return StabilityPollStatus.finished + return StabilityPollStatus.in_progress + + +class StabilityStableImageUltraNode: + """ + Generates images synchronously based on prompt and resolution. + """ + + RETURN_TYPES = (IO.IMAGE,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/Stability AI" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "What you wish to see in the output image. A strong, descriptive prompt that clearly defines" + + "What you wish to see in the output image. A strong, descriptive prompt that clearly defines" + + "elements, colors, and subjects will lead to better results. " + + "To control the weight of a given word use the format `(word:weight)`," + + "where `word` is the word you'd like to control the weight of and `weight`" + + "is a value between 0 and 1. For example: `The sky was a crisp (blue:0.3) and (green:0.8)`" + + "would convey a sky that was blue and green, but more green than blue." + }, + ), + "aspect_ratio": ([x.value for x in StabilityAspectRatio], + { + "default": StabilityAspectRatio.ratio_1_1, + "tooltip": "Aspect ratio of generated image.", + }, + ), + "style_preset": (get_stability_style_presets(), + { + "tooltip": "Optional desired style of generated image.", + }, + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 4294967294, + "control_after_generate": True, + "tooltip": "The random seed used for creating the noise.", + }, + ), + }, + "optional": { + "image": (IO.IMAGE,), + "negative_prompt": ( + IO.STRING, + { + "default": "", + "forceInput": True, + "tooltip": "A blurb of text describing what you do not wish to see in the output image. This is an advanced feature." + }, + ), + "image_denoise": ( + IO.FLOAT, + { + "default": 0.5, + "min": 0.0, + "max": 1.0, + "step": 0.01, + "tooltip": "Denoise of input image; 0.0 yields image identical to input, 1.0 is as if no image was provided at all.", + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call(self, prompt: str, aspect_ratio: str, style_preset: str, seed: int, + negative_prompt: str=None, image: torch.Tensor = None, image_denoise: float=None, + **kwargs): + validate_string(prompt, strip_whitespace=False) + # prepare image binary if image present + image_binary = None + if image is not None: + image_binary = tensor_to_bytesio(image, total_pixels=1504*1504).read() + else: + image_denoise = None + + if not negative_prompt: + negative_prompt = None + if style_preset == "None": + style_preset = None + + files = { + "image": image_binary + } + + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/stability/v2beta/stable-image/generate/ultra", + method=HttpMethod.POST, + request_model=StabilityStableUltraRequest, + response_model=StabilityStableUltraResponse, + ), + request=StabilityStableUltraRequest( + prompt=prompt, + negative_prompt=negative_prompt, + aspect_ratio=aspect_ratio, + seed=seed, + strength=image_denoise, + style_preset=style_preset, + ), + files=files, + content_type="multipart/form-data", + auth_kwargs=kwargs, + ) + response_api = operation.execute() + + if response_api.finish_reason != "SUCCESS": + raise Exception(f"Stable Image Ultra generation failed: {response_api.finish_reason}.") + + image_data = base64.b64decode(response_api.image) + returned_image = bytesio_to_image_tensor(BytesIO(image_data)) + + return (returned_image,) + + +class StabilityStableImageSD_3_5Node: + """ + Generates images synchronously based on prompt and resolution. + """ + + RETURN_TYPES = (IO.IMAGE,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/Stability AI" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "What you wish to see in the output image. A strong, descriptive prompt that clearly defines elements, colors, and subjects will lead to better results." + }, + ), + "model": ([x.value for x in Stability_SD3_5_Model],), + "aspect_ratio": ([x.value for x in StabilityAspectRatio], + { + "default": StabilityAspectRatio.ratio_1_1, + "tooltip": "Aspect ratio of generated image.", + }, + ), + "style_preset": (get_stability_style_presets(), + { + "tooltip": "Optional desired style of generated image.", + }, + ), + "cfg_scale": ( + IO.FLOAT, + { + "default": 4.0, + "min": 1.0, + "max": 10.0, + "step": 0.1, + "tooltip": "How strictly the diffusion process adheres to the prompt text (higher values keep your image closer to your prompt)", + }, + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 4294967294, + "control_after_generate": True, + "tooltip": "The random seed used for creating the noise.", + }, + ), + }, + "optional": { + "image": (IO.IMAGE,), + "negative_prompt": ( + IO.STRING, + { + "default": "", + "forceInput": True, + "tooltip": "Keywords of what you do not wish to see in the output image. This is an advanced feature." + }, + ), + "image_denoise": ( + IO.FLOAT, + { + "default": 0.5, + "min": 0.0, + "max": 1.0, + "step": 0.01, + "tooltip": "Denoise of input image; 0.0 yields image identical to input, 1.0 is as if no image was provided at all.", + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call(self, model: str, prompt: str, aspect_ratio: str, style_preset: str, seed: int, cfg_scale: float, + negative_prompt: str=None, image: torch.Tensor = None, image_denoise: float=None, + **kwargs): + validate_string(prompt, strip_whitespace=False) + # prepare image binary if image present + image_binary = None + mode = Stability_SD3_5_GenerationMode.text_to_image + if image is not None: + image_binary = tensor_to_bytesio(image, total_pixels=1504*1504).read() + mode = Stability_SD3_5_GenerationMode.image_to_image + aspect_ratio = None + else: + image_denoise = None + + if not negative_prompt: + negative_prompt = None + if style_preset == "None": + style_preset = None + + files = { + "image": image_binary + } + + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/stability/v2beta/stable-image/generate/sd3", + method=HttpMethod.POST, + request_model=StabilityStable3_5Request, + response_model=StabilityStableUltraResponse, + ), + request=StabilityStable3_5Request( + prompt=prompt, + negative_prompt=negative_prompt, + aspect_ratio=aspect_ratio, + seed=seed, + strength=image_denoise, + style_preset=style_preset, + cfg_scale=cfg_scale, + model=model, + mode=mode, + ), + files=files, + content_type="multipart/form-data", + auth_kwargs=kwargs, + ) + response_api = operation.execute() + + if response_api.finish_reason != "SUCCESS": + raise Exception(f"Stable Diffusion 3.5 Image generation failed: {response_api.finish_reason}.") + + image_data = base64.b64decode(response_api.image) + returned_image = bytesio_to_image_tensor(BytesIO(image_data)) + + return (returned_image,) + + +class StabilityUpscaleConservativeNode: + """ + Upscale image with minimal alterations to 4K resolution. + """ + + RETURN_TYPES = (IO.IMAGE,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/Stability AI" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": (IO.IMAGE,), + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "What you wish to see in the output image. A strong, descriptive prompt that clearly defines elements, colors, and subjects will lead to better results." + }, + ), + "creativity": ( + IO.FLOAT, + { + "default": 0.35, + "min": 0.2, + "max": 0.5, + "step": 0.01, + "tooltip": "Controls the likelihood of creating additional details not heavily conditioned by the init image.", + }, + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 4294967294, + "control_after_generate": True, + "tooltip": "The random seed used for creating the noise.", + }, + ), + }, + "optional": { + "negative_prompt": ( + IO.STRING, + { + "default": "", + "forceInput": True, + "tooltip": "Keywords of what you do not wish to see in the output image. This is an advanced feature." + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call(self, image: torch.Tensor, prompt: str, creativity: float, seed: int, negative_prompt: str=None, + **kwargs): + validate_string(prompt, strip_whitespace=False) + image_binary = tensor_to_bytesio(image, total_pixels=1024*1024).read() + + if not negative_prompt: + negative_prompt = None + + files = { + "image": image_binary + } + + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/stability/v2beta/stable-image/upscale/conservative", + method=HttpMethod.POST, + request_model=StabilityUpscaleConservativeRequest, + response_model=StabilityStableUltraResponse, + ), + request=StabilityUpscaleConservativeRequest( + prompt=prompt, + negative_prompt=negative_prompt, + creativity=round(creativity,2), + seed=seed, + ), + files=files, + content_type="multipart/form-data", + auth_kwargs=kwargs, + ) + response_api = operation.execute() + + if response_api.finish_reason != "SUCCESS": + raise Exception(f"Stability Upscale Conservative generation failed: {response_api.finish_reason}.") + + image_data = base64.b64decode(response_api.image) + returned_image = bytesio_to_image_tensor(BytesIO(image_data)) + + return (returned_image,) + + +class StabilityUpscaleCreativeNode: + """ + Upscale image with minimal alterations to 4K resolution. + """ + + RETURN_TYPES = (IO.IMAGE,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/Stability AI" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": (IO.IMAGE,), + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "What you wish to see in the output image. A strong, descriptive prompt that clearly defines elements, colors, and subjects will lead to better results." + }, + ), + "creativity": ( + IO.FLOAT, + { + "default": 0.3, + "min": 0.1, + "max": 0.5, + "step": 0.01, + "tooltip": "Controls the likelihood of creating additional details not heavily conditioned by the init image.", + }, + ), + "style_preset": (get_stability_style_presets(), + { + "tooltip": "Optional desired style of generated image.", + }, + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 4294967294, + "control_after_generate": True, + "tooltip": "The random seed used for creating the noise.", + }, + ), + }, + "optional": { + "negative_prompt": ( + IO.STRING, + { + "default": "", + "forceInput": True, + "tooltip": "Keywords of what you do not wish to see in the output image. This is an advanced feature." + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call(self, image: torch.Tensor, prompt: str, creativity: float, style_preset: str, seed: int, negative_prompt: str=None, + **kwargs): + validate_string(prompt, strip_whitespace=False) + image_binary = tensor_to_bytesio(image, total_pixels=1024*1024).read() + + if not negative_prompt: + negative_prompt = None + if style_preset == "None": + style_preset = None + + files = { + "image": image_binary + } + + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/stability/v2beta/stable-image/upscale/creative", + method=HttpMethod.POST, + request_model=StabilityUpscaleCreativeRequest, + response_model=StabilityAsyncResponse, + ), + request=StabilityUpscaleCreativeRequest( + prompt=prompt, + negative_prompt=negative_prompt, + creativity=round(creativity,2), + style_preset=style_preset, + seed=seed, + ), + files=files, + content_type="multipart/form-data", + auth_kwargs=kwargs, + ) + response_api = operation.execute() + + operation = PollingOperation( + poll_endpoint=ApiEndpoint( + path=f"/proxy/stability/v2beta/results/{response_api.id}", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=StabilityResultsGetResponse, + ), + poll_interval=3, + completed_statuses=[StabilityPollStatus.finished], + failed_statuses=[StabilityPollStatus.failed], + status_extractor=lambda x: get_async_dummy_status(x), + auth_kwargs=kwargs, + ) + response_poll: StabilityResultsGetResponse = operation.execute() + + if response_poll.finish_reason != "SUCCESS": + raise Exception(f"Stability Upscale Creative generation failed: {response_poll.finish_reason}.") + + image_data = base64.b64decode(response_poll.result) + returned_image = bytesio_to_image_tensor(BytesIO(image_data)) + + return (returned_image,) + + +class StabilityUpscaleFastNode: + """ + Quickly upscales an image via Stability API call to 4x its original size; intended for upscaling low-quality/compressed images. + """ + + RETURN_TYPES = (IO.IMAGE,) + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "api_call" + API_NODE = True + CATEGORY = "api node/image/Stability AI" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": (IO.IMAGE,), + }, + "optional": { + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call(self, image: torch.Tensor, + **kwargs): + image_binary = tensor_to_bytesio(image, total_pixels=4096*4096).read() + + files = { + "image": image_binary + } + + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/stability/v2beta/stable-image/upscale/fast", + method=HttpMethod.POST, + request_model=EmptyRequest, + response_model=StabilityStableUltraResponse, + ), + request=EmptyRequest(), + files=files, + content_type="multipart/form-data", + auth_kwargs=kwargs, + ) + response_api = operation.execute() + + if response_api.finish_reason != "SUCCESS": + raise Exception(f"Stability Upscale Fast failed: {response_api.finish_reason}.") + + image_data = base64.b64decode(response_api.image) + returned_image = bytesio_to_image_tensor(BytesIO(image_data)) + + return (returned_image,) + + +# A dictionary that contains all nodes you want to export with their names +# NOTE: names should be globally unique +NODE_CLASS_MAPPINGS = { + "StabilityStableImageUltraNode": StabilityStableImageUltraNode, + "StabilityStableImageSD_3_5Node": StabilityStableImageSD_3_5Node, + "StabilityUpscaleConservativeNode": StabilityUpscaleConservativeNode, + "StabilityUpscaleCreativeNode": StabilityUpscaleCreativeNode, + "StabilityUpscaleFastNode": StabilityUpscaleFastNode, +} + +# A dictionary that contains the friendly/humanly readable titles for the nodes +NODE_DISPLAY_NAME_MAPPINGS = { + "StabilityStableImageUltraNode": "Stability AI Stable Image Ultra", + "StabilityStableImageSD_3_5Node": "Stability AI Stable Diffusion 3.5 Image", + "StabilityUpscaleConservativeNode": "Stability AI Upscale Conservative", + "StabilityUpscaleCreativeNode": "Stability AI Upscale Creative", + "StabilityUpscaleFastNode": "Stability AI Upscale Fast", +} diff --git a/comfy_api_nodes/nodes_veo2.py b/comfy_api_nodes/nodes_veo2.py new file mode 100644 index 00000000..2740179c --- /dev/null +++ b/comfy_api_nodes/nodes_veo2.py @@ -0,0 +1,284 @@ +import io +import logging +import base64 +import requests +import torch + +from comfy.comfy_types.node_typing import IO, ComfyNodeABC +from comfy_api.input_impl.video_types import VideoFromFile +from comfy_api_nodes.apis import ( + Veo2GenVidRequest, + Veo2GenVidResponse, + Veo2GenVidPollRequest, + Veo2GenVidPollResponse +) +from comfy_api_nodes.apis.client import ( + ApiEndpoint, + HttpMethod, + SynchronousOperation, + PollingOperation, +) + +from comfy_api_nodes.apinode_utils import ( + downscale_image_tensor, + tensor_to_base64_string +) + +def convert_image_to_base64(image: torch.Tensor): + if image is None: + return None + + scaled_image = downscale_image_tensor(image, total_pixels=2048*2048) + return tensor_to_base64_string(scaled_image) + +class VeoVideoGenerationNode(ComfyNodeABC): + """ + Generates videos from text prompts using Google's Veo API. + + This node can create videos from text descriptions and optional image inputs, + with control over parameters like aspect ratio, duration, and more. + """ + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Text description of the video", + }, + ), + "aspect_ratio": ( + IO.COMBO, + { + "options": ["16:9", "9:16"], + "default": "16:9", + "tooltip": "Aspect ratio of the output video", + }, + ), + }, + "optional": { + "negative_prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Negative text prompt to guide what to avoid in the video", + }, + ), + "duration_seconds": ( + IO.INT, + { + "default": 5, + "min": 5, + "max": 8, + "step": 1, + "display": "number", + "tooltip": "Duration of the output video in seconds", + }, + ), + "enhance_prompt": ( + IO.BOOLEAN, + { + "default": True, + "tooltip": "Whether to enhance the prompt with AI assistance", + } + ), + "person_generation": ( + IO.COMBO, + { + "options": ["ALLOW", "BLOCK"], + "default": "ALLOW", + "tooltip": "Whether to allow generating people in the video", + }, + ), + "seed": ( + IO.INT, + { + "default": 0, + "min": 0, + "max": 0xFFFFFFFF, + "step": 1, + "display": "number", + "control_after_generate": True, + "tooltip": "Seed for video generation (0 for random)", + }, + ), + "image": (IO.IMAGE, { + "default": None, + "tooltip": "Optional reference image to guide video generation", + }), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + RETURN_TYPES = (IO.VIDEO,) + FUNCTION = "generate_video" + CATEGORY = "api node/video/Veo" + DESCRIPTION = "Generates videos from text prompts using Google's Veo API" + API_NODE = True + + def generate_video( + self, + prompt, + aspect_ratio="16:9", + negative_prompt="", + duration_seconds=5, + enhance_prompt=True, + person_generation="ALLOW", + seed=0, + image=None, + **kwargs, + ): + # Prepare the instances for the request + instances = [] + + instance = { + "prompt": prompt + } + + # Add image if provided + if image is not None: + image_base64 = convert_image_to_base64(image) + if image_base64: + instance["image"] = { + "bytesBase64Encoded": image_base64, + "mimeType": "image/png" + } + + instances.append(instance) + + # Create parameters dictionary + parameters = { + "aspectRatio": aspect_ratio, + "personGeneration": person_generation, + "durationSeconds": duration_seconds, + "enhancePrompt": enhance_prompt, + } + + # Add optional parameters if provided + if negative_prompt: + parameters["negativePrompt"] = negative_prompt + if seed > 0: + parameters["seed"] = seed + + # Initial request to start video generation + initial_operation = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/veo/generate", + method=HttpMethod.POST, + request_model=Veo2GenVidRequest, + response_model=Veo2GenVidResponse + ), + request=Veo2GenVidRequest( + instances=instances, + parameters=parameters + ), + auth_kwargs=kwargs, + ) + + initial_response = initial_operation.execute() + operation_name = initial_response.name + + logging.info(f"Veo generation started with operation name: {operation_name}") + + # Define status extractor function + def status_extractor(response): + # Only return "completed" if the operation is done, regardless of success or failure + # We'll check for errors after polling completes + return "completed" if response.done else "pending" + + # Define progress extractor function + def progress_extractor(response): + # Could be enhanced if the API provides progress information + return None + + # Define the polling operation + poll_operation = PollingOperation( + poll_endpoint=ApiEndpoint( + path="/proxy/veo/poll", + method=HttpMethod.POST, + request_model=Veo2GenVidPollRequest, + response_model=Veo2GenVidPollResponse + ), + completed_statuses=["completed"], + failed_statuses=[], # No failed statuses, we'll handle errors after polling + status_extractor=status_extractor, + progress_extractor=progress_extractor, + request=Veo2GenVidPollRequest( + operationName=operation_name + ), + auth_kwargs=kwargs, + poll_interval=5.0 + ) + + # Execute the polling operation + poll_response = poll_operation.execute() + + # Now check for errors in the final response + # Check for error in poll response + if hasattr(poll_response, 'error') and poll_response.error: + error_message = f"Veo API error: {poll_response.error.message} (code: {poll_response.error.code})" + logging.error(error_message) + raise Exception(error_message) + + # Check for RAI filtered content + if (hasattr(poll_response.response, 'raiMediaFilteredCount') and + poll_response.response.raiMediaFilteredCount > 0): + + # Extract reason message if available + if (hasattr(poll_response.response, 'raiMediaFilteredReasons') and + poll_response.response.raiMediaFilteredReasons): + reason = poll_response.response.raiMediaFilteredReasons[0] + error_message = f"Content filtered by Google's Responsible AI practices: {reason} ({poll_response.response.raiMediaFilteredCount} videos filtered.)" + else: + error_message = f"Content filtered by Google's Responsible AI practices ({poll_response.response.raiMediaFilteredCount} videos filtered.)" + + logging.error(error_message) + raise Exception(error_message) + + # Extract video data + video_data = None + if poll_response.response and hasattr(poll_response.response, 'videos') and poll_response.response.videos and len(poll_response.response.videos) > 0: + video = poll_response.response.videos[0] + + # Check if video is provided as base64 or URL + if hasattr(video, 'bytesBase64Encoded') and video.bytesBase64Encoded: + # Decode base64 string to bytes + video_data = base64.b64decode(video.bytesBase64Encoded) + elif hasattr(video, 'gcsUri') and video.gcsUri: + # Download from URL + video_url = video.gcsUri + video_response = requests.get(video_url) + video_data = video_response.content + else: + raise Exception("Video returned but no data or URL was provided") + else: + raise Exception("Video generation completed but no video was returned") + + if not video_data: + raise Exception("No video data was returned") + + logging.info("Video generation completed successfully") + + # Convert video data to BytesIO object + video_io = io.BytesIO(video_data) + + # Return VideoFromFile object + return (VideoFromFile(video_io),) + + +# Register the node +NODE_CLASS_MAPPINGS = { + "VeoVideoGenerationNode": VeoVideoGenerationNode, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "VeoVideoGenerationNode": "Google Veo2 Video Generation", +} diff --git a/comfy_api_nodes/redocly-dev.yaml b/comfy_api_nodes/redocly-dev.yaml new file mode 100644 index 00000000..d9e3cab7 --- /dev/null +++ b/comfy_api_nodes/redocly-dev.yaml @@ -0,0 +1,10 @@ +# This file is used to filter the Comfy Org OpenAPI spec for schemas related to API Nodes. +# This is used for development purposes to generate stubs for unreleased API endpoints. +apis: + filter: + root: openapi.yaml + decorators: + filter-in: + property: tags + value: ['API Nodes'] + matchStrategy: all diff --git a/comfy_api_nodes/redocly.yaml b/comfy_api_nodes/redocly.yaml new file mode 100644 index 00000000..d102345b --- /dev/null +++ b/comfy_api_nodes/redocly.yaml @@ -0,0 +1,10 @@ +# This file is used to filter the Comfy Org OpenAPI spec for schemas related to API Nodes. + +apis: + filter: + root: openapi.yaml + decorators: + filter-in: + property: tags + value: ['API Nodes', 'Released'] + matchStrategy: all diff --git a/comfy_extras/nodes_ace.py b/comfy_extras/nodes_ace.py new file mode 100644 index 00000000..cbfec15a --- /dev/null +++ b/comfy_extras/nodes_ace.py @@ -0,0 +1,49 @@ +import torch +import comfy.model_management +import node_helpers + +class TextEncodeAceStepAudio: + @classmethod + def INPUT_TYPES(s): + return {"required": { + "clip": ("CLIP", ), + "tags": ("STRING", {"multiline": True, "dynamicPrompts": True}), + "lyrics": ("STRING", {"multiline": True, "dynamicPrompts": True}), + "lyrics_strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + }} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "encode" + + CATEGORY = "conditioning" + + def encode(self, clip, tags, lyrics, lyrics_strength): + tokens = clip.tokenize(tags, lyrics=lyrics) + conditioning = clip.encode_from_tokens_scheduled(tokens) + conditioning = node_helpers.conditioning_set_values(conditioning, {"lyrics_strength": lyrics_strength}) + return (conditioning, ) + + +class EmptyAceStepLatentAudio: + def __init__(self): + self.device = comfy.model_management.intermediate_device() + + @classmethod + def INPUT_TYPES(s): + return {"required": {"seconds": ("FLOAT", {"default": 120.0, "min": 1.0, "max": 1000.0, "step": 0.1}), + "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096, "tooltip": "The number of latent images in the batch."}), + }} + RETURN_TYPES = ("LATENT",) + FUNCTION = "generate" + + CATEGORY = "latent/audio" + + def generate(self, seconds, batch_size): + length = int(seconds * 44100 / 512 / 8) + latent = torch.zeros([batch_size, 8, 16, length], device=self.device) + return ({"samples": latent, "type": "audio"}, ) + + +NODE_CLASS_MAPPINGS = { + "TextEncodeAceStepAudio": TextEncodeAceStepAudio, + "EmptyAceStepLatentAudio": EmptyAceStepLatentAudio, +} diff --git a/comfy_extras/nodes_audio.py b/comfy_extras/nodes_audio.py index 136ad615..49af1eae 100644 --- a/comfy_extras/nodes_audio.py +++ b/comfy_extras/nodes_audio.py @@ -1,5 +1,6 @@ from __future__ import annotations +import av import torchaudio import torch import comfy.model_management @@ -7,7 +8,6 @@ import folder_paths import os import io import json -import struct import random import hashlib import node_helpers @@ -90,60 +90,118 @@ class VAEDecodeAudio: return ({"waveform": audio, "sample_rate": 44100}, ) -def create_vorbis_comment_block(comment_dict, last_block): - vendor_string = b'ComfyUI' - vendor_length = len(vendor_string) +def save_audio(self, audio, filename_prefix="ComfyUI", format="flac", prompt=None, extra_pnginfo=None, quality="128k"): - comments = [] - for key, value in comment_dict.items(): - comment = f"{key}={value}".encode('utf-8') - comments.append(struct.pack('I', len(comment_data))[1:] + comment_data + # Opus supported sample rates + OPUS_RATES = [8000, 12000, 16000, 24000, 48000] - return comment_block + for (batch_number, waveform) in enumerate(audio["waveform"].cpu()): + filename_with_batch_num = filename.replace("%batch_num%", str(batch_number)) + file = f"{filename_with_batch_num}_{counter:05}_.{format}" + output_path = os.path.join(full_output_folder, file) -def insert_or_replace_vorbis_comment(flac_io, comment_dict): - if len(comment_dict) == 0: - return flac_io + # Use original sample rate initially + sample_rate = audio["sample_rate"] - flac_io.seek(4) + # Handle Opus sample rate requirements + if format == "opus": + if sample_rate > 48000: + sample_rate = 48000 + elif sample_rate not in OPUS_RATES: + # Find the next highest supported rate + for rate in sorted(OPUS_RATES): + if rate > sample_rate: + sample_rate = rate + break + if sample_rate not in OPUS_RATES: # Fallback if still not supported + sample_rate = 48000 - blocks = [] - last_block = False + # Resample if necessary + if sample_rate != audio["sample_rate"]: + waveform = torchaudio.functional.resample(waveform, audio["sample_rate"], sample_rate) - while not last_block: - header = flac_io.read(4) - last_block = (header[0] & 0x80) != 0 - block_type = header[0] & 0x7F - block_length = struct.unpack('>I', b'\x00' + header[1:])[0] - block_data = flac_io.read(block_length) + # Create in-memory WAV buffer + wav_buffer = io.BytesIO() + torchaudio.save(wav_buffer, waveform, sample_rate, format="WAV") + wav_buffer.seek(0) # Rewind for reading - if block_type == 4 or block_type == 1: - pass - else: - header = bytes([(header[0] & (~0x80))]) + header[1:] - blocks.append(header + block_data) + # Use PyAV to convert and add metadata + input_container = av.open(wav_buffer) - blocks.append(create_vorbis_comment_block(comment_dict, last_block=True)) + # Create output with specified format + output_buffer = io.BytesIO() + output_container = av.open(output_buffer, mode='w', format=format) - new_flac_io = io.BytesIO() - new_flac_io.write(b'fLaC') - for block in blocks: - new_flac_io.write(block) + # Set metadata on the container + for key, value in metadata.items(): + output_container.metadata[key] = value - new_flac_io.write(flac_io.read()) - return new_flac_io + # Set up the output stream with appropriate properties + input_container.streams.audio[0] + if format == "opus": + out_stream = output_container.add_stream("libopus", rate=sample_rate) + if quality == "64k": + out_stream.bit_rate = 64000 + elif quality == "96k": + out_stream.bit_rate = 96000 + elif quality == "128k": + out_stream.bit_rate = 128000 + elif quality == "192k": + out_stream.bit_rate = 192000 + elif quality == "320k": + out_stream.bit_rate = 320000 + elif format == "mp3": + out_stream = output_container.add_stream("libmp3lame", rate=sample_rate) + if quality == "V0": + #TODO i would really love to support V3 and V5 but there doesn't seem to be a way to set the qscale level, the property below is a bool + out_stream.codec_context.qscale = 1 + elif quality == "128k": + out_stream.bit_rate = 128000 + elif quality == "320k": + out_stream.bit_rate = 320000 + else: #format == "flac": + out_stream = output_container.add_stream("flac", rate=sample_rate) + # Copy frames from input to output + for frame in input_container.decode(audio=0): + frame.pts = None # Let PyAV handle timestamps + output_container.mux(out_stream.encode(frame)) + + # Flush encoder + output_container.mux(out_stream.encode(None)) + + # Close containers + output_container.close() + input_container.close() + + # Write the output to file + output_buffer.seek(0) + with open(output_path, 'wb') as f: + f.write(output_buffer.getbuffer()) + + results.append({ + "filename": file, + "subfolder": subfolder, + "type": self.type + }) + counter += 1 + + return { "ui": { "audio": results } } + class SaveAudio: def __init__(self): self.output_dir = folder_paths.get_output_directory() @@ -153,50 +211,70 @@ class SaveAudio: @classmethod def INPUT_TYPES(s): return {"required": { "audio": ("AUDIO", ), - "filename_prefix": ("STRING", {"default": "audio/ComfyUI"})}, + "filename_prefix": ("STRING", {"default": "audio/ComfyUI"}), + }, "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"}, } RETURN_TYPES = () - FUNCTION = "save_audio" + FUNCTION = "save_flac" OUTPUT_NODE = True CATEGORY = "audio" - def save_audio(self, audio, filename_prefix="ComfyUI", prompt=None, extra_pnginfo=None): - filename_prefix += self.prefix_append - full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, self.output_dir) - results: list[FileLocator] = [] + def save_flac(self, audio, filename_prefix="ComfyUI", format="flac", prompt=None, extra_pnginfo=None): + return save_audio(self, audio, filename_prefix, format, prompt, extra_pnginfo) - metadata = {} - if not args.disable_metadata: - if prompt is not None: - metadata["prompt"] = json.dumps(prompt) - if extra_pnginfo is not None: - for x in extra_pnginfo: - metadata[x] = json.dumps(extra_pnginfo[x]) +class SaveAudioMP3: + def __init__(self): + self.output_dir = folder_paths.get_output_directory() + self.type = "output" + self.prefix_append = "" - for (batch_number, waveform) in enumerate(audio["waveform"].cpu()): - filename_with_batch_num = filename.replace("%batch_num%", str(batch_number)) - file = f"{filename_with_batch_num}_{counter:05}_.flac" + @classmethod + def INPUT_TYPES(s): + return {"required": { "audio": ("AUDIO", ), + "filename_prefix": ("STRING", {"default": "audio/ComfyUI"}), + "quality": (["V0", "128k", "320k"], {"default": "V0"}), + }, + "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"}, + } - buff = io.BytesIO() - torchaudio.save(buff, waveform, audio["sample_rate"], format="FLAC") + RETURN_TYPES = () + FUNCTION = "save_mp3" - buff = insert_or_replace_vorbis_comment(buff, metadata) + OUTPUT_NODE = True - with open(os.path.join(full_output_folder, file), 'wb') as f: - f.write(buff.getbuffer()) + CATEGORY = "audio" - results.append({ - "filename": file, - "subfolder": subfolder, - "type": self.type - }) - counter += 1 + def save_mp3(self, audio, filename_prefix="ComfyUI", format="mp3", prompt=None, extra_pnginfo=None, quality="128k"): + return save_audio(self, audio, filename_prefix, format, prompt, extra_pnginfo, quality) - return { "ui": { "audio": results } } +class SaveAudioOpus: + def __init__(self): + self.output_dir = folder_paths.get_output_directory() + self.type = "output" + self.prefix_append = "" + + @classmethod + def INPUT_TYPES(s): + return {"required": { "audio": ("AUDIO", ), + "filename_prefix": ("STRING", {"default": "audio/ComfyUI"}), + "quality": (["64k", "96k", "128k", "192k", "320k"], {"default": "128k"}), + }, + "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"}, + } + + RETURN_TYPES = () + FUNCTION = "save_opus" + + OUTPUT_NODE = True + + CATEGORY = "audio" + + def save_opus(self, audio, filename_prefix="ComfyUI", format="opus", prompt=None, extra_pnginfo=None, quality="V3"): + return save_audio(self, audio, filename_prefix, format, prompt, extra_pnginfo, quality) class PreviewAudio(SaveAudio): def __init__(self): @@ -248,7 +326,20 @@ NODE_CLASS_MAPPINGS = { "VAEEncodeAudio": VAEEncodeAudio, "VAEDecodeAudio": VAEDecodeAudio, "SaveAudio": SaveAudio, + "SaveAudioMP3": SaveAudioMP3, + "SaveAudioOpus": SaveAudioOpus, "LoadAudio": LoadAudio, "PreviewAudio": PreviewAudio, "ConditioningStableAudio": ConditioningStableAudio, } + +NODE_DISPLAY_NAME_MAPPINGS = { + "EmptyLatentAudio": "Empty Latent Audio", + "VAEEncodeAudio": "VAE Encode Audio", + "VAEDecodeAudio": "VAE Decode Audio", + "PreviewAudio": "Preview Audio", + "LoadAudio": "Load Audio", + "SaveAudio": "Save Audio (FLAC)", + "SaveAudioMP3": "Save Audio (MP3)", + "SaveAudioOpus": "Save Audio (Opus)", +} diff --git a/comfy_extras/nodes_cond.py b/comfy_extras/nodes_cond.py index 4c3a1d5b..57426217 100644 --- a/comfy_extras/nodes_cond.py +++ b/comfy_extras/nodes_cond.py @@ -20,6 +20,29 @@ class CLIPTextEncodeControlnet: c.append(n) return (c, ) +class T5TokenizerOptions: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "clip": ("CLIP", ), + "min_padding": ("INT", {"default": 0, "min": 0, "max": 10000, "step": 1}), + "min_length": ("INT", {"default": 0, "min": 0, "max": 10000, "step": 1}), + } + } + + RETURN_TYPES = ("CLIP",) + FUNCTION = "set_options" + + def set_options(self, clip, min_padding, min_length): + clip = clip.clone() + for t5_type in ["t5xxl", "pile_t5xl", "t5base", "mt5xl", "umt5xxl"]: + clip.set_tokenizer_option("{}_min_padding".format(t5_type), min_padding) + clip.set_tokenizer_option("{}_min_length".format(t5_type), min_length) + + return (clip, ) + NODE_CLASS_MAPPINGS = { - "CLIPTextEncodeControlnet": CLIPTextEncodeControlnet + "CLIPTextEncodeControlnet": CLIPTextEncodeControlnet, + "T5TokenizerOptions": T5TokenizerOptions, } diff --git a/comfy_extras/nodes_custom_sampler.py b/comfy_extras/nodes_custom_sampler.py index c9689b74..3e5be3d3 100644 --- a/comfy_extras/nodes_custom_sampler.py +++ b/comfy_extras/nodes_custom_sampler.py @@ -1,3 +1,4 @@ +import math import comfy.samplers import comfy.sample from comfy.k_diffusion import sampling as k_diffusion_sampling @@ -249,6 +250,55 @@ class SetFirstSigma: sigmas[0] = sigma return (sigmas, ) +class ExtendIntermediateSigmas: + @classmethod + def INPUT_TYPES(s): + return {"required": + {"sigmas": ("SIGMAS", ), + "steps": ("INT", {"default": 2, "min": 1, "max": 100}), + "start_at_sigma": ("FLOAT", {"default": -1.0, "min": -1.0, "max": 20000.0, "step": 0.01, "round": False}), + "end_at_sigma": ("FLOAT", {"default": 12.0, "min": 0.0, "max": 20000.0, "step": 0.01, "round": False}), + "spacing": (['linear', 'cosine', 'sine'],), + } + } + RETURN_TYPES = ("SIGMAS",) + CATEGORY = "sampling/custom_sampling/sigmas" + + FUNCTION = "extend" + + def extend(self, sigmas: torch.Tensor, steps: int, start_at_sigma: float, end_at_sigma: float, spacing: str): + if start_at_sigma < 0: + start_at_sigma = float("inf") + + interpolator = { + 'linear': lambda x: x, + 'cosine': lambda x: torch.sin(x*math.pi/2), + 'sine': lambda x: 1 - torch.cos(x*math.pi/2) + }[spacing] + + # linear space for our interpolation function + x = torch.linspace(0, 1, steps + 1, device=sigmas.device)[1:-1] + computed_spacing = interpolator(x) + + extended_sigmas = [] + for i in range(len(sigmas) - 1): + sigma_current = sigmas[i] + sigma_next = sigmas[i+1] + + extended_sigmas.append(sigma_current) + + if end_at_sigma <= sigma_current <= start_at_sigma: + interpolated_steps = computed_spacing * (sigma_next - sigma_current) + sigma_current + extended_sigmas.extend(interpolated_steps.tolist()) + + # Add the last sigma value + if len(sigmas) > 0: + extended_sigmas.append(sigmas[-1]) + + extended_sigmas = torch.FloatTensor(extended_sigmas) + + return (extended_sigmas,) + class KSamplerSelect: @classmethod def INPUT_TYPES(s): @@ -735,6 +785,7 @@ NODE_CLASS_MAPPINGS = { "SplitSigmasDenoise": SplitSigmasDenoise, "FlipSigmas": FlipSigmas, "SetFirstSigma": SetFirstSigma, + "ExtendIntermediateSigmas": ExtendIntermediateSigmas, "CFGGuider": CFGGuider, "DualCFGGuider": DualCFGGuider, diff --git a/comfy_extras/nodes_images.py b/comfy_extras/nodes_images.py index e11a4583..77c30561 100644 --- a/comfy_extras/nodes_images.py +++ b/comfy_extras/nodes_images.py @@ -10,6 +10,9 @@ from PIL.PngImagePlugin import PngInfo import numpy as np import json import os +import re +from io import BytesIO +from inspect import cleandoc from comfy.comfy_types import FileLocator @@ -190,10 +193,109 @@ class SaveAnimatedPNG: return { "ui": { "images": results, "animated": (True,)} } +class SVG: + """ + Stores SVG representations via a list of BytesIO objects. + """ + def __init__(self, data: list[BytesIO]): + self.data = data + + def combine(self, other: 'SVG') -> 'SVG': + return SVG(self.data + other.data) + + @staticmethod + def combine_all(svgs: list['SVG']) -> 'SVG': + all_svgs_list: list[BytesIO] = [] + for svg_item in svgs: + all_svgs_list.extend(svg_item.data) + return SVG(all_svgs_list) + +class SaveSVGNode: + """ + Save SVG files on disk. + """ + + def __init__(self): + self.output_dir = folder_paths.get_output_directory() + self.type = "output" + self.prefix_append = "" + + RETURN_TYPES = () + DESCRIPTION = cleandoc(__doc__ or "") # Handle potential None value + FUNCTION = "save_svg" + CATEGORY = "image/save" # Changed + OUTPUT_NODE = True + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "svg": ("SVG",), # Changed + "filename_prefix": ("STRING", {"default": "svg/ComfyUI", "tooltip": "The prefix for the file to save. This may include formatting information such as %date:yyyy-MM-dd% or %Empty Latent Image.width% to include values from nodes."}) + }, + "hidden": { + "prompt": "PROMPT", + "extra_pnginfo": "EXTRA_PNGINFO" + } + } + + def save_svg(self, svg: SVG, filename_prefix="svg/ComfyUI", prompt=None, extra_pnginfo=None): + filename_prefix += self.prefix_append + full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, self.output_dir) + results = list() + + # Prepare metadata JSON + metadata_dict = {} + if prompt is not None: + metadata_dict["prompt"] = prompt + if extra_pnginfo is not None: + metadata_dict.update(extra_pnginfo) + + # Convert metadata to JSON string + metadata_json = json.dumps(metadata_dict, indent=2) if metadata_dict else None + + for batch_number, svg_bytes in enumerate(svg.data): + filename_with_batch_num = filename.replace("%batch_num%", str(batch_number)) + file = f"{filename_with_batch_num}_{counter:05}_.svg" + + # Read SVG content + svg_bytes.seek(0) + svg_content = svg_bytes.read().decode('utf-8') + + # Inject metadata if available + if metadata_json: + # Create metadata element with CDATA section + metadata_element = f""" + + + """ + # Insert metadata after opening svg tag using regex with a replacement function + def replacement(match): + # match.group(1) contains the captured tag + return match.group(1) + '\n' + metadata_element + + # Apply the substitution + svg_content = re.sub(r'(]*>)', replacement, svg_content, flags=re.UNICODE) + + # Write the modified SVG to file + with open(os.path.join(full_output_folder, file), 'wb') as svg_file: + svg_file.write(svg_content.encode('utf-8')) + + results.append({ + "filename": file, + "subfolder": subfolder, + "type": self.type + }) + counter += 1 + return { "ui": { "images": results } } + NODE_CLASS_MAPPINGS = { "ImageCrop": ImageCrop, "RepeatImageBatch": RepeatImageBatch, "ImageFromBatch": ImageFromBatch, "SaveAnimatedWEBP": SaveAnimatedWEBP, "SaveAnimatedPNG": SaveAnimatedPNG, + "SaveSVGNode": SaveSVGNode, } diff --git a/comfy_extras/nodes_load_3d.py b/comfy_extras/nodes_load_3d.py index 53d892bc..d5b4d911 100644 --- a/comfy_extras/nodes_load_3d.py +++ b/comfy_extras/nodes_load_3d.py @@ -2,6 +2,10 @@ import nodes import folder_paths import os +from comfy.comfy_types import IO +from comfy_api.input_impl import VideoFromFile + + def normalize_path(path): return path.replace('\\', '/') @@ -21,8 +25,8 @@ class Load3D(): "height": ("INT", {"default": 1024, "min": 1, "max": 4096, "step": 1}), }} - RETURN_TYPES = ("IMAGE", "MASK", "STRING", "IMAGE", "IMAGE", "LOAD3D_CAMERA") - RETURN_NAMES = ("image", "mask", "mesh_path", "normal", "lineart", "camera_info") + RETURN_TYPES = ("IMAGE", "MASK", "STRING", "IMAGE", "IMAGE", "LOAD3D_CAMERA", IO.VIDEO) + RETURN_NAMES = ("image", "mask", "mesh_path", "normal", "lineart", "camera_info", "recording_video") FUNCTION = "process" EXPERIMENTAL = True @@ -41,7 +45,14 @@ class Load3D(): normal_image, ignore_mask2 = load_image_node.load_image(image=normal_path) lineart_image, ignore_mask3 = load_image_node.load_image(image=lineart_path) - return output_image, output_mask, model_file, normal_image, lineart_image, image['camera_info'] + video = None + + if image['recording'] != "": + recording_video_path = folder_paths.get_annotated_filepath(image['recording']) + + video = VideoFromFile(recording_video_path) + + return output_image, output_mask, model_file, normal_image, lineart_image, image['camera_info'], video class Load3DAnimation(): @classmethod @@ -59,8 +70,8 @@ class Load3DAnimation(): "height": ("INT", {"default": 1024, "min": 1, "max": 4096, "step": 1}), }} - RETURN_TYPES = ("IMAGE", "MASK", "STRING", "IMAGE", "LOAD3D_CAMERA") - RETURN_NAMES = ("image", "mask", "mesh_path", "normal", "camera_info") + RETURN_TYPES = ("IMAGE", "MASK", "STRING", "IMAGE", "LOAD3D_CAMERA", IO.VIDEO) + RETURN_NAMES = ("image", "mask", "mesh_path", "normal", "camera_info", "recording_video") FUNCTION = "process" EXPERIMENTAL = True @@ -77,7 +88,14 @@ class Load3DAnimation(): ignore_image, output_mask = load_image_node.load_image(image=mask_path) normal_image, ignore_mask2 = load_image_node.load_image(image=normal_path) - return output_image, output_mask, model_file, normal_image, image['camera_info'] + video = None + + if image['recording'] != "": + recording_video_path = folder_paths.get_annotated_filepath(image['recording']) + + video = VideoFromFile(recording_video_path) + + return output_image, output_mask, model_file, normal_image, image['camera_info'], video class Preview3D(): @classmethod diff --git a/comfy_extras/nodes_lt.py b/comfy_extras/nodes_lt.py index 52588920..e6dc122c 100644 --- a/comfy_extras/nodes_lt.py +++ b/comfy_extras/nodes_lt.py @@ -38,6 +38,7 @@ class LTXVImgToVideo: "height": ("INT", {"default": 512, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}), "length": ("INT", {"default": 97, "min": 9, "max": nodes.MAX_RESOLUTION, "step": 8}), "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}), + "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0}), }} RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") @@ -46,7 +47,7 @@ class LTXVImgToVideo: CATEGORY = "conditioning/video_models" FUNCTION = "generate" - def generate(self, positive, negative, image, vae, width, height, length, batch_size): + def generate(self, positive, negative, image, vae, width, height, length, batch_size, strength): pixels = comfy.utils.common_upscale(image.movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1) encode_pixels = pixels[:, :, :, :3] t = vae.encode(encode_pixels) @@ -59,7 +60,7 @@ class LTXVImgToVideo: dtype=torch.float32, device=latent.device, ) - conditioning_latent_frames_mask[:, :, :t.shape[2]] = 0 + conditioning_latent_frames_mask[:, :, :t.shape[2]] = 1.0 - strength return (positive, negative, {"samples": latent, "noise_mask": conditioning_latent_frames_mask}, ) @@ -152,6 +153,15 @@ class LTXVAddGuide: return node_helpers.conditioning_set_values(cond, {"keyframe_idxs": keyframe_idxs}) def append_keyframe(self, positive, negative, frame_idx, latent_image, noise_mask, guiding_latent, strength, scale_factors): + _, latent_idx = self.get_latent_index( + cond=positive, + latent_length=latent_image.shape[2], + guide_length=guiding_latent.shape[2], + frame_idx=frame_idx, + scale_factors=scale_factors, + ) + noise_mask[:, :, latent_idx:latent_idx + guiding_latent.shape[2]] = 1.0 + positive = self.add_keyframe_index(positive, frame_idx, guiding_latent, scale_factors) negative = self.add_keyframe_index(negative, frame_idx, guiding_latent, scale_factors) @@ -385,7 +395,7 @@ def encode_single_frame(output_file, image_array: np.ndarray, crf): container = av.open(output_file, "w", format="mp4") try: stream = container.add_stream( - "h264", rate=1, options={"crf": str(crf), "preset": "veryfast"} + "libx264", rate=1, options={"crf": str(crf), "preset": "veryfast"} ) stream.height = image_array.shape[0] stream.width = image_array.shape[1] diff --git a/comfy_extras/nodes_model_merging.py b/comfy_extras/nodes_model_merging.py index ccf60115..f20beab7 100644 --- a/comfy_extras/nodes_model_merging.py +++ b/comfy_extras/nodes_model_merging.py @@ -209,6 +209,9 @@ def save_checkpoint(model, clip=None, vae=None, clip_vision=None, filename_prefi metadata["modelspec.predict_key"] = "epsilon" elif model.model.model_type == comfy.model_base.ModelType.V_PREDICTION: metadata["modelspec.predict_key"] = "v" + extra_keys["v_pred"] = torch.tensor([]) + if getattr(model_sampling, "zsnr", False): + extra_keys["ztsnr"] = torch.tensor([]) if not args.disable_metadata: metadata["prompt"] = prompt_info @@ -273,7 +276,7 @@ class CLIPSave: comfy.model_management.load_models_gpu([clip.load_model()], force_patch_weights=True) clip_sd = clip.get_sd() - for prefix in ["clip_l.", "clip_g.", ""]: + for prefix in ["clip_l.", "clip_g.", "clip_h.", "t5xxl.", "pile_t5xl.", "mt5xl.", "umt5xxl.", "t5base.", "gemma2_2b.", "llama.", "hydit_clip.", ""]: k = list(filter(lambda a: a.startswith(prefix), clip_sd.keys())) current_clip_sd = {} for x in k: diff --git a/comfy_extras/nodes_optimalsteps.py b/comfy_extras/nodes_optimalsteps.py index f6928199..e7c851ca 100644 --- a/comfy_extras/nodes_optimalsteps.py +++ b/comfy_extras/nodes_optimalsteps.py @@ -20,13 +20,14 @@ def loglinear_interp(t_steps, num_steps): NOISE_LEVELS = {"FLUX": [0.9968, 0.9886, 0.9819, 0.975, 0.966, 0.9471, 0.9158, 0.8287, 0.5512, 0.2808, 0.001], "Wan":[1.0, 0.997, 0.995, 0.993, 0.991, 0.989, 0.987, 0.985, 0.98, 0.975, 0.973, 0.968, 0.96, 0.946, 0.927, 0.902, 0.864, 0.776, 0.539, 0.208, 0.001], +"Chroma": [0.992, 0.99, 0.988, 0.985, 0.982, 0.978, 0.973, 0.968, 0.961, 0.953, 0.943, 0.931, 0.917, 0.9, 0.881, 0.858, 0.832, 0.802, 0.769, 0.731, 0.69, 0.646, 0.599, 0.55, 0.501, 0.451, 0.402, 0.355, 0.311, 0.27, 0.232, 0.199, 0.169, 0.143, 0.12, 0.101, 0.084, 0.07, 0.058, 0.048, 0.001], } class OptimalStepsScheduler: @classmethod def INPUT_TYPES(s): return {"required": - {"model_type": (["FLUX", "Wan"], ), + {"model_type": (["FLUX", "Wan", "Chroma"], ), "steps": ("INT", {"default": 20, "min": 3, "max": 1000}), "denoise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), } diff --git a/comfy_extras/nodes_post_processing.py b/comfy_extras/nodes_post_processing.py index 5b954201..cb1a0d88 100644 --- a/comfy_extras/nodes_post_processing.py +++ b/comfy_extras/nodes_post_processing.py @@ -141,6 +141,7 @@ class Quantize: CATEGORY = "image/postprocessing" + @staticmethod def bayer(im, pal_im, order): def normalized_bayer_matrix(n): if n == 0: diff --git a/comfy_extras/nodes_preview_any.py b/comfy_extras/nodes_preview_any.py new file mode 100644 index 00000000..e6805696 --- /dev/null +++ b/comfy_extras/nodes_preview_any.py @@ -0,0 +1,43 @@ +import json +from comfy.comfy_types.node_typing import IO + +# Preview Any - original implement from +# https://github.com/rgthree/rgthree-comfy/blob/main/py/display_any.py +# upstream requested in https://github.com/Kosinkadink/rfcs/blob/main/rfcs/0000-corenodes.md#preview-nodes +class PreviewAny(): + @classmethod + def INPUT_TYPES(cls): + return { + "required": {"source": (IO.ANY, {})}, + } + + RETURN_TYPES = () + FUNCTION = "main" + OUTPUT_NODE = True + + CATEGORY = "utils" + + def main(self, source=None): + value = 'None' + if isinstance(source, str): + value = source + elif isinstance(source, (int, float, bool)): + value = str(source) + elif source is not None: + try: + value = json.dumps(source) + except Exception: + try: + value = str(source) + except Exception: + value = 'source exists, but could not be serialized.' + + return {"ui": {"text": (value,)}} + +NODE_CLASS_MAPPINGS = { + "PreviewAny": PreviewAny, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "PreviewAny": "Preview Any", +} diff --git a/comfy_extras/nodes_primitive.py b/comfy_extras/nodes_primitive.py index 184b990c..1f93f87a 100644 --- a/comfy_extras/nodes_primitive.py +++ b/comfy_extras/nodes_primitive.py @@ -21,6 +21,21 @@ class String(ComfyNodeABC): return (value,) +class StringMultiline(ComfyNodeABC): + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + return { + "required": {"value": (IO.STRING, {"multiline": True,},)}, + } + + RETURN_TYPES = (IO.STRING,) + FUNCTION = "execute" + CATEGORY = "utils/primitive" + + def execute(self, value: str) -> tuple[str]: + return (value,) + + class Int(ComfyNodeABC): @classmethod def INPUT_TYPES(cls) -> InputTypeDict: @@ -68,6 +83,7 @@ class Boolean(ComfyNodeABC): NODE_CLASS_MAPPINGS = { "PrimitiveString": String, + "PrimitiveStringMultiline": StringMultiline, "PrimitiveInt": Int, "PrimitiveFloat": Float, "PrimitiveBoolean": Boolean, @@ -75,6 +91,7 @@ NODE_CLASS_MAPPINGS = { NODE_DISPLAY_NAME_MAPPINGS = { "PrimitiveString": "String", + "PrimitiveStringMultiline": "String (Multiline)", "PrimitiveInt": "Int", "PrimitiveFloat": "Float", "PrimitiveBoolean": "Boolean", diff --git a/comfy_extras/nodes_string.py b/comfy_extras/nodes_string.py new file mode 100644 index 00000000..a852326e --- /dev/null +++ b/comfy_extras/nodes_string.py @@ -0,0 +1,322 @@ +import re + +from comfy.comfy_types.node_typing import IO + +class StringConcatenate(): + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "string_a": (IO.STRING, {"multiline": True}), + "string_b": (IO.STRING, {"multiline": True}) + } + } + + RETURN_TYPES = (IO.STRING,) + FUNCTION = "execute" + CATEGORY = "utils/string" + + def execute(self, string_a, string_b, **kwargs): + return string_a + string_b, + +class StringSubstring(): + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "string": (IO.STRING, {"multiline": True}), + "start": (IO.INT, {}), + "end": (IO.INT, {}), + } + } + + RETURN_TYPES = (IO.STRING,) + FUNCTION = "execute" + CATEGORY = "utils/string" + + def execute(self, string, start, end, **kwargs): + return string[start:end], + +class StringLength(): + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "string": (IO.STRING, {"multiline": True}) + } + } + + RETURN_TYPES = (IO.INT,) + RETURN_NAMES = ("length",) + FUNCTION = "execute" + CATEGORY = "utils/string" + + def execute(self, string, **kwargs): + length = len(string) + + return length, + +class CaseConverter(): + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "string": (IO.STRING, {"multiline": True}), + "mode": (IO.COMBO, {"options": ["UPPERCASE", "lowercase", "Capitalize", "Title Case"]}) + } + } + + RETURN_TYPES = (IO.STRING,) + FUNCTION = "execute" + CATEGORY = "utils/string" + + def execute(self, string, mode, **kwargs): + if mode == "UPPERCASE": + result = string.upper() + elif mode == "lowercase": + result = string.lower() + elif mode == "Capitalize": + result = string.capitalize() + elif mode == "Title Case": + result = string.title() + else: + result = string + + return result, + + +class StringTrim(): + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "string": (IO.STRING, {"multiline": True}), + "mode": (IO.COMBO, {"options": ["Both", "Left", "Right"]}) + } + } + + RETURN_TYPES = (IO.STRING,) + FUNCTION = "execute" + CATEGORY = "utils/string" + + def execute(self, string, mode, **kwargs): + if mode == "Both": + result = string.strip() + elif mode == "Left": + result = string.lstrip() + elif mode == "Right": + result = string.rstrip() + else: + result = string + + return result, + +class StringReplace(): + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "string": (IO.STRING, {"multiline": True}), + "find": (IO.STRING, {"multiline": True}), + "replace": (IO.STRING, {"multiline": True}) + } + } + + RETURN_TYPES = (IO.STRING,) + FUNCTION = "execute" + CATEGORY = "utils/string" + + def execute(self, string, find, replace, **kwargs): + result = string.replace(find, replace) + return result, + + +class StringContains(): + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "string": (IO.STRING, {"multiline": True}), + "substring": (IO.STRING, {"multiline": True}), + "case_sensitive": (IO.BOOLEAN, {"default": True}) + } + } + + RETURN_TYPES = (IO.BOOLEAN,) + RETURN_NAMES = ("contains",) + FUNCTION = "execute" + CATEGORY = "utils/string" + + def execute(self, string, substring, case_sensitive, **kwargs): + if case_sensitive: + contains = substring in string + else: + contains = substring.lower() in string.lower() + + return contains, + + +class StringCompare(): + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "string_a": (IO.STRING, {"multiline": True}), + "string_b": (IO.STRING, {"multiline": True}), + "mode": (IO.COMBO, {"options": ["Starts With", "Ends With", "Equal"]}), + "case_sensitive": (IO.BOOLEAN, {"default": True}) + } + } + + RETURN_TYPES = (IO.BOOLEAN,) + FUNCTION = "execute" + CATEGORY = "utils/string" + + def execute(self, string_a, string_b, mode, case_sensitive, **kwargs): + if case_sensitive: + a = string_a + b = string_b + else: + a = string_a.lower() + b = string_b.lower() + + if mode == "Equal": + return a == b, + elif mode == "Starts With": + return a.startswith(b), + elif mode == "Ends With": + return a.endswith(b), + +class RegexMatch(): + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "string": (IO.STRING, {"multiline": True}), + "regex_pattern": (IO.STRING, {"multiline": True}), + "case_insensitive": (IO.BOOLEAN, {"default": True}), + "multiline": (IO.BOOLEAN, {"default": False}), + "dotall": (IO.BOOLEAN, {"default": False}) + } + } + + RETURN_TYPES = (IO.BOOLEAN,) + RETURN_NAMES = ("matches",) + FUNCTION = "execute" + CATEGORY = "utils/string" + + def execute(self, string, regex_pattern, case_insensitive, multiline, dotall, **kwargs): + flags = 0 + + if case_insensitive: + flags |= re.IGNORECASE + if multiline: + flags |= re.MULTILINE + if dotall: + flags |= re.DOTALL + + try: + match = re.search(regex_pattern, string, flags) + result = match is not None + + except re.error: + result = False + + return result, + + +class RegexExtract(): + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "string": (IO.STRING, {"multiline": True}), + "regex_pattern": (IO.STRING, {"multiline": True}), + "mode": (IO.COMBO, {"options": ["First Match", "All Matches", "First Group", "All Groups"]}), + "case_insensitive": (IO.BOOLEAN, {"default": True}), + "multiline": (IO.BOOLEAN, {"default": False}), + "dotall": (IO.BOOLEAN, {"default": False}), + "group_index": (IO.INT, {"default": 1, "min": 0, "max": 100}) + } + } + + RETURN_TYPES = (IO.STRING,) + FUNCTION = "execute" + CATEGORY = "utils/string" + + def execute(self, string, regex_pattern, mode, case_insensitive, multiline, dotall, group_index, **kwargs): + join_delimiter = "\n" + + flags = 0 + if case_insensitive: + flags |= re.IGNORECASE + if multiline: + flags |= re.MULTILINE + if dotall: + flags |= re.DOTALL + + try: + if mode == "First Match": + match = re.search(regex_pattern, string, flags) + if match: + result = match.group(0) + else: + result = "" + + elif mode == "All Matches": + matches = re.findall(regex_pattern, string, flags) + if matches: + if isinstance(matches[0], tuple): + result = join_delimiter.join([m[0] for m in matches]) + else: + result = join_delimiter.join(matches) + else: + result = "" + + elif mode == "First Group": + match = re.search(regex_pattern, string, flags) + if match and len(match.groups()) >= group_index: + result = match.group(group_index) + else: + result = "" + + elif mode == "All Groups": + matches = re.finditer(regex_pattern, string, flags) + results = [] + for match in matches: + if match.groups() and len(match.groups()) >= group_index: + results.append(match.group(group_index)) + result = join_delimiter.join(results) + else: + result = "" + + except re.error: + result = "" + + return result, + +NODE_CLASS_MAPPINGS = { + "StringConcatenate": StringConcatenate, + "StringSubstring": StringSubstring, + "StringLength": StringLength, + "CaseConverter": CaseConverter, + "StringTrim": StringTrim, + "StringReplace": StringReplace, + "StringContains": StringContains, + "StringCompare": StringCompare, + "RegexMatch": RegexMatch, + "RegexExtract": RegexExtract +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "StringConcatenate": "Concatenate", + "StringSubstring": "Substring", + "StringLength": "Length", + "CaseConverter": "Case Converter", + "StringTrim": "Trim", + "StringReplace": "Replace", + "StringContains": "Contains", + "StringCompare": "Compare", + "RegexMatch": "Regex Match", + "RegexExtract": "Regex Extract" +} diff --git a/comfy_extras/nodes_video.py b/comfy_extras/nodes_video.py index a9e244eb..61f7171b 100644 --- a/comfy_extras/nodes_video.py +++ b/comfy_extras/nodes_video.py @@ -5,9 +5,13 @@ import av import torch import folder_paths import json +from typing import Optional, Literal from fractions import Fraction -from comfy.comfy_types import FileLocator - +from comfy.comfy_types import IO, FileLocator, ComfyNodeABC +from comfy_api.input import ImageInput, AudioInput, VideoInput +from comfy_api.util import VideoContainer, VideoCodec, VideoComponents +from comfy_api.input_impl import VideoFromFile, VideoFromComponents +from comfy.cli_args import args class SaveWEBM: def __init__(self): @@ -75,7 +79,163 @@ class SaveWEBM: return {"ui": {"images": results, "animated": (True,)}} # TODO: frontend side +class SaveVideo(ComfyNodeABC): + def __init__(self): + self.output_dir = folder_paths.get_output_directory() + self.type: Literal["output"] = "output" + self.prefix_append = "" + + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "video": (IO.VIDEO, {"tooltip": "The video to save."}), + "filename_prefix": ("STRING", {"default": "video/ComfyUI", "tooltip": "The prefix for the file to save. This may include formatting information such as %date:yyyy-MM-dd% or %Empty Latent Image.width% to include values from nodes."}), + "format": (VideoContainer.as_input(), {"default": "auto", "tooltip": "The format to save the video as."}), + "codec": (VideoCodec.as_input(), {"default": "auto", "tooltip": "The codec to use for the video."}), + }, + "hidden": { + "prompt": "PROMPT", + "extra_pnginfo": "EXTRA_PNGINFO" + }, + } + + RETURN_TYPES = () + FUNCTION = "save_video" + + OUTPUT_NODE = True + + CATEGORY = "image/video" + DESCRIPTION = "Saves the input images to your ComfyUI output directory." + + def save_video(self, video: VideoInput, filename_prefix, format, codec, prompt=None, extra_pnginfo=None): + filename_prefix += self.prefix_append + width, height = video.get_dimensions() + full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path( + filename_prefix, + self.output_dir, + width, + height + ) + results: list[FileLocator] = list() + saved_metadata = None + if not args.disable_metadata: + metadata = {} + if extra_pnginfo is not None: + metadata.update(extra_pnginfo) + if prompt is not None: + metadata["prompt"] = prompt + if len(metadata) > 0: + saved_metadata = metadata + file = f"{filename}_{counter:05}_.{VideoContainer.get_extension(format)}" + video.save_to( + os.path.join(full_output_folder, file), + format=format, + codec=codec, + metadata=saved_metadata + ) + + results.append({ + "filename": file, + "subfolder": subfolder, + "type": self.type + }) + counter += 1 + + return { "ui": { "images": results, "animated": (True,) } } + +class CreateVideo(ComfyNodeABC): + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "images": (IO.IMAGE, {"tooltip": "The images to create a video from."}), + "fps": ("FLOAT", {"default": 30.0, "min": 1.0, "max": 120.0, "step": 1.0}), + }, + "optional": { + "audio": (IO.AUDIO, {"tooltip": "The audio to add to the video."}), + } + } + + RETURN_TYPES = (IO.VIDEO,) + FUNCTION = "create_video" + + CATEGORY = "image/video" + DESCRIPTION = "Create a video from images." + + def create_video(self, images: ImageInput, fps: float, audio: Optional[AudioInput] = None): + return (VideoFromComponents( + VideoComponents( + images=images, + audio=audio, + frame_rate=Fraction(fps), + ) + ),) + +class GetVideoComponents(ComfyNodeABC): + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "video": (IO.VIDEO, {"tooltip": "The video to extract components from."}), + } + } + RETURN_TYPES = (IO.IMAGE, IO.AUDIO, IO.FLOAT) + RETURN_NAMES = ("images", "audio", "fps") + FUNCTION = "get_components" + + CATEGORY = "image/video" + DESCRIPTION = "Extracts all components from a video: frames, audio, and framerate." + + def get_components(self, video: VideoInput): + components = video.get_components() + + return (components.images, components.audio, float(components.frame_rate)) + +class LoadVideo(ComfyNodeABC): + @classmethod + def INPUT_TYPES(cls): + input_dir = folder_paths.get_input_directory() + files = [f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f))] + files = folder_paths.filter_files_content_types(files, ["video"]) + return {"required": + {"file": (sorted(files), {"video_upload": True})}, + } + + CATEGORY = "image/video" + + RETURN_TYPES = (IO.VIDEO,) + FUNCTION = "load_video" + def load_video(self, file): + video_path = folder_paths.get_annotated_filepath(file) + return (VideoFromFile(video_path),) + + @classmethod + def IS_CHANGED(cls, file): + video_path = folder_paths.get_annotated_filepath(file) + mod_time = os.path.getmtime(video_path) + # Instead of hashing the file, we can just use the modification time to avoid + # rehashing large files. + return mod_time + + @classmethod + def VALIDATE_INPUTS(cls, file): + if not folder_paths.exists_annotated_filepath(file): + return "Invalid video file: {}".format(file) + + return True NODE_CLASS_MAPPINGS = { "SaveWEBM": SaveWEBM, + "SaveVideo": SaveVideo, + "CreateVideo": CreateVideo, + "GetVideoComponents": GetVideoComponents, + "LoadVideo": LoadVideo, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "SaveVideo": "Save Video", + "CreateVideo": "Create Video", + "GetVideoComponents": "Get Video Components", + "LoadVideo": "Load Video", } diff --git a/comfy_extras/nodes_webcam.py b/comfy_extras/nodes_webcam.py index 31eddb2d..062b15cf 100644 --- a/comfy_extras/nodes_webcam.py +++ b/comfy_extras/nodes_webcam.py @@ -20,7 +20,7 @@ class WebcamCapture(nodes.LoadImage): CATEGORY = "image" - def load_capture(s, image, **kwargs): + def load_capture(self, image, **kwargs): return super().load_image(folder_paths.get_annotated_filepath(image)) diff --git a/comfyui_version.py b/comfyui_version.py index f9161b37..b740b378 100644 --- a/comfyui_version.py +++ b/comfyui_version.py @@ -1,3 +1,3 @@ # This file is automatically generated by the build process when version is # updated in pyproject.toml. -__version__ = "0.3.29" +__version__ = "0.3.34" diff --git a/execution.py b/execution.py index feb61ae8..e5d1c69d 100644 --- a/execution.py +++ b/execution.py @@ -146,6 +146,8 @@ def get_input_data(inputs, class_def, unique_id, outputs=None, dynprompt=None, e input_data_all[x] = [unique_id] if h[x] == "AUTH_TOKEN_COMFY_ORG": input_data_all[x] = [extra_data.get("auth_token_comfy_org", None)] + if h[x] == "API_KEY_COMFY_ORG": + input_data_all[x] = [extra_data.get("api_key_comfy_org", None)] return input_data_all, missing_keys map_node_over_list = None #Don't hook this please diff --git a/folder_paths.py b/folder_paths.py index 9a525e5a..f0b3fd10 100644 --- a/folder_paths.py +++ b/folder_paths.py @@ -4,7 +4,7 @@ import os import time import mimetypes import logging -from typing import Literal +from typing import Literal, List from collections.abc import Collection from comfy.cli_args import args @@ -141,7 +141,7 @@ def get_directory_by_type(type_name: str) -> str | None: return get_input_directory() return None -def filter_files_content_types(files: list[str], content_types: Literal["image", "video", "audio", "model"]) -> list[str]: +def filter_files_content_types(files: list[str], content_types: List[Literal["image", "video", "audio", "model"]]) -> list[str]: """ Example: files = os.listdir(folder_paths.get_input_directory()) diff --git a/hook_breaker_ac10a0.py b/hook_breaker_ac10a0.py new file mode 100644 index 00000000..c3e1c063 --- /dev/null +++ b/hook_breaker_ac10a0.py @@ -0,0 +1,17 @@ +# Prevent custom nodes from hooking anything important +import comfy.model_management + +HOOK_BREAK = [(comfy.model_management, "cast_to")] + + +SAVED_FUNCTIONS = [] + + +def save_functions(): + for f in HOOK_BREAK: + SAVED_FUNCTIONS.append((f[0], f[1], getattr(f[0], f[1]))) + + +def restore_functions(): + for f in SAVED_FUNCTIONS: + setattr(f[0], f[1], f[2]) diff --git a/main.py b/main.py index ac9d24b7..221e48e4 100644 --- a/main.py +++ b/main.py @@ -13,7 +13,7 @@ import logging import sys if __name__ == "__main__": - #NOTE: These do not do anything on core ComfyUI which should already have no communication with the internet, they are for custom nodes. + #NOTE: These do not do anything on core ComfyUI, they are for custom nodes. os.environ['HF_HUB_DISABLE_TELEMETRY'] = '1' os.environ['DO_NOT_TRACK'] = '1' @@ -141,7 +141,7 @@ import nodes import comfy.model_management import comfyui_version import app.logger - +import hook_breaker_ac10a0 def cuda_malloc_warning(): device = comfy.model_management.get_torch_device() @@ -215,6 +215,7 @@ def prompt_worker(q, server_instance): comfy.model_management.soft_empty_cache() last_gc_collect = current_time need_gc = False + hook_breaker_ac10a0.restore_functions() async def run(server_instance, address='', port=8188, verbose=True, call_on_start=None): @@ -268,7 +269,9 @@ def start_comfyui(asyncio_loop=None): prompt_server = server.PromptServer(asyncio_loop) q = execution.PromptQueue(prompt_server) - nodes.init_extra_nodes(init_custom_nodes=not args.disable_all_custom_nodes) + hook_breaker_ac10a0.save_functions() + nodes.init_extra_nodes(init_custom_nodes=not args.disable_all_custom_nodes, init_api_nodes=not args.disable_api_nodes) + hook_breaker_ac10a0.restore_functions() cuda_malloc_warning() diff --git a/nodes.py b/nodes.py index f3ca64e0..13d176a0 100644 --- a/nodes.py +++ b/nodes.py @@ -246,6 +246,9 @@ class ConditioningZeroOut: pooled_output = d.get("pooled_output", None) if pooled_output is not None: d["pooled_output"] = torch.zeros_like(pooled_output) + conditioning_lyrics = d.get("conditioning_lyrics", None) + if conditioning_lyrics is not None: + d["conditioning_lyrics"] = torch.zeros_like(conditioning_lyrics) n = [torch.zeros_like(t[0]), d] c.append(n) return (c, ) @@ -917,7 +920,7 @@ class CLIPLoader: @classmethod def INPUT_TYPES(s): return {"required": { "clip_name": (folder_paths.get_filename_list("text_encoders"), ), - "type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream"], ), + "type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace"], ), }, "optional": { "device": (["default", "cpu"], {"advanced": True}), @@ -2259,6 +2262,9 @@ def init_builtin_extra_nodes(): "nodes_optimalsteps.py", "nodes_hidream.py", "nodes_fresca.py", + "nodes_preview_any.py", + "nodes_ace.py", + "nodes_string.py", ] import_failed = [] @@ -2269,14 +2275,56 @@ def init_builtin_extra_nodes(): return import_failed -def init_extra_nodes(init_custom_nodes=True): +def init_builtin_api_nodes(): + api_nodes_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_api_nodes") + api_nodes_files = [ + "nodes_ideogram.py", + "nodes_openai.py", + "nodes_minimax.py", + "nodes_veo2.py", + "nodes_kling.py", + "nodes_bfl.py", + "nodes_luma.py", + "nodes_recraft.py", + "nodes_pixverse.py", + "nodes_stability.py", + "nodes_pika.py", + ] + + if not load_custom_node(os.path.join(api_nodes_dir, "canary.py"), module_parent="comfy_api_nodes"): + return api_nodes_files + + import_failed = [] + for node_file in api_nodes_files: + if not load_custom_node(os.path.join(api_nodes_dir, node_file), module_parent="comfy_api_nodes"): + import_failed.append(node_file) + + return import_failed + + +def init_extra_nodes(init_custom_nodes=True, init_api_nodes=True): import_failed = init_builtin_extra_nodes() + import_failed_api = [] + if init_api_nodes: + import_failed_api = init_builtin_api_nodes() + if init_custom_nodes: init_external_custom_nodes() else: logging.info("Skipping loading of custom nodes") + if len(import_failed_api) > 0: + logging.warning("WARNING: some comfy_api_nodes/ nodes did not import correctly. This may be because they are missing some dependencies.\n") + for node in import_failed_api: + logging.warning("IMPORT FAILED: {}".format(node)) + logging.warning("\nThis issue might be caused by new missing dependencies added the last time you updated ComfyUI.") + if args.windows_standalone_build: + logging.warning("Please run the update script: update/update_comfyui.bat") + else: + logging.warning("Please do a: pip install -r requirements.txt") + logging.warning("") + if len(import_failed) > 0: logging.warning("WARNING: some comfy_extras/ nodes did not import correctly. This may be because they are missing some dependencies.\n") for node in import_failed: diff --git a/pyproject.toml b/pyproject.toml index e8fc9555..80061b39 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ComfyUI" -version = "0.3.29" +version = "0.3.34" readme = "README.md" license = { file = "LICENSE" } requires-python = ">=3.9" @@ -12,6 +12,7 @@ documentation = "https://docs.comfy.org/" [tool.ruff] lint.select = [ + "N805", # invalid-first-argument-name-for-method "S307", # suspicious-eval-usage "S102", # exec "T", # print-usage diff --git a/requirements.txt b/requirements.txt index 90eb0461..8f7a7898 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -comfyui-frontend-package==1.16.9 -comfyui-workflow-templates==0.1.3 +comfyui-frontend-package==1.19.9 +comfyui-workflow-templates==0.1.14 torch torchsde torchvision @@ -22,4 +22,5 @@ psutil kornia>=0.7.1 spandrel soundfile -av>=14.1.0 +av>=14.2.0 +pydantic~=2.0 diff --git a/server.py b/server.py index 0cc97b24..cb1c6a8f 100644 --- a/server.py +++ b/server.py @@ -32,12 +32,13 @@ from app.frontend_management import FrontendManager from app.user_manager import UserManager from app.model_manager import ModelFileManager from app.custom_node_manager import CustomNodeManager -from typing import Optional +from typing import Optional, Union from api_server.routes.internal.internal_routes import InternalRoutes class BinaryEventTypes: PREVIEW_IMAGE = 1 UNENCODED_PREVIEW_IMAGE = 2 + TEXT = 3 async def send_socket_catch_exception(function, message): try: @@ -580,6 +581,9 @@ class PromptServer(): info['deprecated'] = True if getattr(obj_class, "EXPERIMENTAL", False): info['experimental'] = True + + if hasattr(obj_class, 'API_NODE'): + info['api_node'] = obj_class.API_NODE return info @routes.get("/object_info") @@ -875,3 +879,15 @@ class PromptServer(): logging.warning(traceback.format_exc()) return json_data + + def send_progress_text( + self, text: Union[bytes, bytearray, str], node_id: str, sid=None + ): + if isinstance(text, str): + text = text.encode("utf-8") + node_id_bytes = str(node_id).encode("utf-8") + + # Pack the node_id length as a 4-byte unsigned integer, followed by the node_id bytes + message = struct.pack(">I", len(node_id_bytes)) + node_id_bytes + text + + self.send_sync(BinaryEventTypes.TEXT, message, sid) diff --git a/tests-unit/comfy_api_nodes_test/mapper_utils_test.py b/tests-unit/comfy_api_nodes_test/mapper_utils_test.py new file mode 100644 index 00000000..69488f69 --- /dev/null +++ b/tests-unit/comfy_api_nodes_test/mapper_utils_test.py @@ -0,0 +1,297 @@ +from typing import Optional +from enum import Enum + +from pydantic import BaseModel, Field + +from comfy.comfy_types.node_typing import IO +from comfy_api_nodes.mapper_utils import model_field_to_node_input + + +def test_model_field_to_float_input(): + """Tests mapping a float field with constraints.""" + + class ModelWithFloatField(BaseModel): + cfg_scale: Optional[float] = Field( + default=0.5, + description="Flexibility in video generation", + ge=0.0, + le=1.0, + multiple_of=0.001, + ) + + expected_output = ( + IO.FLOAT, + { + "default": 0.5, + "tooltip": "Flexibility in video generation", + "min": 0.0, + "max": 1.0, + "step": 0.001, + }, + ) + + actual_output = model_field_to_node_input( + IO.FLOAT, ModelWithFloatField, "cfg_scale" + ) + + assert actual_output[0] == expected_output[0] + assert actual_output[1] == expected_output[1] + + +def test_model_field_to_float_input_no_constraints(): + """Tests mapping a float field with no constraints.""" + + class ModelWithFloatField(BaseModel): + cfg_scale: Optional[float] = Field(default=0.5) + + expected_output = ( + IO.FLOAT, + { + "default": 0.5, + }, + ) + + actual_output = model_field_to_node_input( + IO.FLOAT, ModelWithFloatField, "cfg_scale" + ) + + assert actual_output[0] == expected_output[0] + assert actual_output[1] == expected_output[1] + + +def test_model_field_to_int_input(): + """Tests mapping an int field with constraints.""" + + class ModelWithIntField(BaseModel): + num_frames: Optional[int] = Field( + default=10, + description="Number of frames to generate", + ge=1, + le=100, + multiple_of=1, + ) + + expected_output = ( + IO.INT, + { + "default": 10, + "tooltip": "Number of frames to generate", + "min": 1, + "max": 100, + "step": 1, + }, + ) + + actual_output = model_field_to_node_input(IO.INT, ModelWithIntField, "num_frames") + + assert actual_output[0] == expected_output[0] + assert actual_output[1] == expected_output[1] + + +def test_model_field_to_string_input(): + """Tests mapping a string field.""" + + class ModelWithStringField(BaseModel): + prompt: Optional[str] = Field( + default="A beautiful sunset over a calm ocean", + description="A prompt for the video generation", + ) + + expected_output = ( + IO.STRING, + { + "default": "A beautiful sunset over a calm ocean", + "tooltip": "A prompt for the video generation", + }, + ) + + actual_output = model_field_to_node_input(IO.STRING, ModelWithStringField, "prompt") + + assert actual_output[0] == expected_output[0] + assert actual_output[1] == expected_output[1] + + +def test_model_field_to_string_input_multiline(): + """Tests mapping a string field.""" + + class ModelWithStringField(BaseModel): + prompt: Optional[str] = Field( + default="A beautiful sunset over a calm ocean", + description="A prompt for the video generation", + ) + + expected_output = ( + IO.STRING, + { + "default": "A beautiful sunset over a calm ocean", + "tooltip": "A prompt for the video generation", + "multiline": True, + }, + ) + + actual_output = model_field_to_node_input( + IO.STRING, ModelWithStringField, "prompt", multiline=True + ) + + assert actual_output[0] == expected_output[0] + assert actual_output[1] == expected_output[1] + + +def test_model_field_to_combo_input(): + """Tests mapping a combo field.""" + + class MockEnum(str, Enum): + option_1 = "option 1" + option_2 = "option 2" + option_3 = "option 3" + + class ModelWithComboField(BaseModel): + model_name: Optional[MockEnum] = Field("option 1", description="Model Name") + + expected_output = ( + IO.COMBO, + { + "options": ["option 1", "option 2", "option 3"], + "default": "option 1", + "tooltip": "Model Name", + }, + ) + + actual_output = model_field_to_node_input( + IO.COMBO, ModelWithComboField, "model_name", enum_type=MockEnum + ) + + assert actual_output[0] == expected_output[0] + assert actual_output[1] == expected_output[1] + + +def test_model_field_to_combo_input_no_options(): + """Tests mapping a combo field with no options.""" + + class ModelWithComboField(BaseModel): + model_name: Optional[str] = Field(description="Model Name") + + expected_output = ( + IO.COMBO, + { + "tooltip": "Model Name", + }, + ) + + actual_output = model_field_to_node_input( + IO.COMBO, ModelWithComboField, "model_name" + ) + + assert actual_output[0] == expected_output[0] + assert actual_output[1] == expected_output[1] + + +def test_model_field_to_image_input(): + """Tests mapping an image field.""" + + class ModelWithImageField(BaseModel): + image: Optional[str] = Field( + default=None, + description="An image for the video generation", + ) + + expected_output = ( + IO.IMAGE, + { + "default": None, + "tooltip": "An image for the video generation", + }, + ) + + actual_output = model_field_to_node_input(IO.IMAGE, ModelWithImageField, "image") + + assert actual_output[0] == expected_output[0] + assert actual_output[1] == expected_output[1] + + +def test_model_field_to_node_input_no_description(): + """Tests mapping a field with no description.""" + + class ModelWithNoDescriptionField(BaseModel): + field: Optional[str] = Field(default="default value") + + expected_output = ( + IO.STRING, + { + "default": "default value", + }, + ) + + actual_output = model_field_to_node_input( + IO.STRING, ModelWithNoDescriptionField, "field" + ) + + assert actual_output[0] == expected_output[0] + assert actual_output[1] == expected_output[1] + + +def test_model_field_to_node_input_no_default(): + """Tests mapping a field with no default.""" + + class ModelWithNoDefaultField(BaseModel): + field: Optional[str] = Field(description="A field with no default") + + expected_output = ( + IO.STRING, + { + "tooltip": "A field with no default", + }, + ) + + actual_output = model_field_to_node_input( + IO.STRING, ModelWithNoDefaultField, "field" + ) + + assert actual_output[0] == expected_output[0] + assert actual_output[1] == expected_output[1] + + +def test_model_field_to_node_input_no_metadata(): + """Tests mapping a field with no metadata or properties defined on the schema.""" + + class ModelWithNoMetadataField(BaseModel): + field: Optional[str] = Field() + + expected_output = ( + IO.STRING, + {}, + ) + + actual_output = model_field_to_node_input( + IO.STRING, ModelWithNoMetadataField, "field" + ) + + assert actual_output[0] == expected_output[0] + assert actual_output[1] == expected_output[1] + + +def test_model_field_to_node_input_default_is_none(): + """ + Tests mapping a field with a default of `None`. + I.e., the default field should be included as the schema explicitly sets it to `None`. + """ + + class ModelWithNoneDefaultField(BaseModel): + field: Optional[str] = Field( + default=None, description="A field with a default of None" + ) + + expected_output = ( + IO.STRING, + { + "default": None, + "tooltip": "A field with a default of None", + }, + ) + + actual_output = model_field_to_node_input( + IO.STRING, ModelWithNoneDefaultField, "field" + ) + + assert actual_output[0] == expected_output[0] + assert actual_output[1] == expected_output[1] diff --git a/tests-unit/comfy_api_test/input_impl_test.py b/tests-unit/comfy_api_test/input_impl_test.py new file mode 100644 index 00000000..5fc21a9a --- /dev/null +++ b/tests-unit/comfy_api_test/input_impl_test.py @@ -0,0 +1,91 @@ +import io +from comfy_api.input_impl.video_types import ( + container_to_output_format, + get_open_write_kwargs, +) +from comfy_api.util import VideoContainer + + +def test_container_to_output_format_empty_string(): + """Test that an empty string input returns None. `None` arg allows default auto-detection.""" + assert container_to_output_format("") is None + + +def test_container_to_output_format_none(): + """Test that None input returns None.""" + assert container_to_output_format(None) is None + + +def test_container_to_output_format_comma_separated(): + """Test that a comma-separated list returns a valid singular format from the list.""" + comma_separated_format = "mp4,mov,m4a" + output_format = container_to_output_format(comma_separated_format) + assert output_format in comma_separated_format + + +def test_container_to_output_format_single(): + """Test that a single format string (not comma-separated list) is returned as is.""" + assert container_to_output_format("mp4") == "mp4" + + +def test_get_open_write_kwargs_filepath_no_format(): + """Test that 'format' kwarg is NOT set when dest is a file path.""" + kwargs_auto = get_open_write_kwargs("output.mp4", "mp4", VideoContainer.AUTO) + assert "format" not in kwargs_auto, "Format should not be set for file paths (AUTO)" + + kwargs_specific = get_open_write_kwargs("output.avi", "mp4", "avi") + fail_msg = "Format should not be set for file paths (Specific)" + assert "format" not in kwargs_specific, fail_msg + + +def test_get_open_write_kwargs_base_options_mode(): + """Test basic kwargs for file path: mode and movflags.""" + kwargs = get_open_write_kwargs("output.mp4", "mp4", VideoContainer.AUTO) + assert kwargs["mode"] == "w", "mode should be set to write" + + fail_msg = "movflags should be set to preserve custom metadata tags" + assert "movflags" in kwargs["options"], fail_msg + assert kwargs["options"]["movflags"] == "use_metadata_tags", fail_msg + + +def test_get_open_write_kwargs_bytesio_auto_format(): + """Test kwargs for BytesIO dest with AUTO format.""" + dest = io.BytesIO() + container_fmt = "mov,mp4,m4a" + kwargs = get_open_write_kwargs(dest, container_fmt, VideoContainer.AUTO) + + assert kwargs["mode"] == "w" + assert kwargs["options"]["movflags"] == "use_metadata_tags" + + fail_msg = ( + "Format should be a valid format from the container's format list when AUTO" + ) + assert kwargs["format"] in container_fmt, fail_msg + + +def test_get_open_write_kwargs_bytesio_specific_format(): + """Test kwargs for BytesIO dest with a specific single format.""" + dest = io.BytesIO() + container_fmt = "avi" + to_fmt = VideoContainer.MP4 + kwargs = get_open_write_kwargs(dest, container_fmt, to_fmt) + + assert kwargs["mode"] == "w" + assert kwargs["options"]["movflags"] == "use_metadata_tags" + + fail_msg = "Format should be the specified format (lowercased) when output format is not AUTO" + assert kwargs["format"] == "mp4", fail_msg + + +def test_get_open_write_kwargs_bytesio_specific_format_list(): + """Test kwargs for BytesIO dest with a specific comma-separated format.""" + dest = io.BytesIO() + container_fmt = "avi" + to_fmt = "mov,mp4,m4a" # A format string that is a list + kwargs = get_open_write_kwargs(dest, container_fmt, to_fmt) + + assert kwargs["mode"] == "w" + assert kwargs["options"]["movflags"] == "use_metadata_tags" + + fail_msg = "Format should be a valid format from the specified format list when output format is not AUTO" + assert kwargs["format"] in to_fmt, fail_msg diff --git a/tests-unit/prompt_server_test/user_manager_test.py b/tests-unit/prompt_server_test/user_manager_test.py index 7e523cbf..b939d8e6 100644 --- a/tests-unit/prompt_server_test/user_manager_test.py +++ b/tests-unit/prompt_server_test/user_manager_test.py @@ -229,3 +229,61 @@ async def test_move_userdata_full_info(aiohttp_client, app, tmp_path): assert not os.path.exists(tmp_path / "source.txt") with open(tmp_path / "dest.txt", "r") as f: assert f.read() == "test content" + + +async def test_listuserdata_v2_empty_root(aiohttp_client, app): + client = await aiohttp_client(app) + resp = await client.get("/v2/userdata") + assert resp.status == 200 + assert await resp.json() == [] + + +async def test_listuserdata_v2_nonexistent_subdirectory(aiohttp_client, app): + client = await aiohttp_client(app) + resp = await client.get("/v2/userdata?path=does_not_exist") + assert resp.status == 404 + + +async def test_listuserdata_v2_default(aiohttp_client, app, tmp_path): + os.makedirs(tmp_path / "test_dir" / "subdir") + (tmp_path / "test_dir" / "file1.txt").write_text("content") + (tmp_path / "test_dir" / "subdir" / "file2.txt").write_text("content") + + client = await aiohttp_client(app) + resp = await client.get("/v2/userdata?path=test_dir") + assert resp.status == 200 + data = await resp.json() + file_paths = {item["path"] for item in data if item["type"] == "file"} + assert file_paths == {"test_dir/file1.txt", "test_dir/subdir/file2.txt"} + + +async def test_listuserdata_v2_normalized_separators(aiohttp_client, app, tmp_path, monkeypatch): + # Force backslash as os separator + monkeypatch.setattr(os, 'sep', '\\') + monkeypatch.setattr(os.path, 'sep', '\\') + os.makedirs(tmp_path / "test_dir" / "subdir") + (tmp_path / "test_dir" / "subdir" / "file1.txt").write_text("x") + + client = await aiohttp_client(app) + resp = await client.get("/v2/userdata?path=test_dir") + assert resp.status == 200 + data = await resp.json() + for item in data: + assert "/" in item["path"] + assert "\\" not in item["path"]\ + +async def test_listuserdata_v2_url_encoded_path(aiohttp_client, app, tmp_path): + # Create a directory with a space in its name and a file inside + os.makedirs(tmp_path / "my dir") + (tmp_path / "my dir" / "file.txt").write_text("content") + + client = await aiohttp_client(app) + # Use URL-encoded space in path parameter + resp = await client.get("/v2/userdata?path=my%20dir&recurse=false") + assert resp.status == 200 + data = await resp.json() + assert len(data) == 1 + entry = data[0] + assert entry["name"] == "file.txt" + # Ensure the path is correctly decoded and uses forward slash + assert entry["path"] == "my dir/file.txt"