Make japanese hiragana and katakana characters work with ACE. (#7997)

2025-05-08 00:32:36 -07:00
parent c7c025b8d1
commit 5d3cc85e13
3 changed files with 135 additions and 2 deletions
--- a/comfy/text_encoders/ace.py
+++ b/comfy/text_encoders/ace.py
@@ -7,7 +7,7 @@ import torch
 import logging

 from tokenizers import Tokenizer
-from .ace_text_cleaners import multilingual_cleaners
+from .ace_text_cleaners import multilingual_cleaners, japanese_to_romaji

 SUPPORT_LANGUAGES = {
    "en": 259, "de": 260, "fr": 262, "es": 284, "it": 285,
@@ -65,6 +65,14 @@ class VoiceBpeTokenizer:
            if "spa" in lang:
                lang = "es"

+            try:
+                line_out = japanese_to_romaji(line)
+                if line_out != line:
+                    lang = "ja"
+                line = line_out
+            except:
+                pass
+
            try:
                if structure_pattern.match(line):
                    token_idx = self.encode(line, "en")