Make japanese hiragana and katakana characters work with ACE. (#7997)

This commit is contained in:
comfyanonymous
2025-05-08 00:32:36 -07:00
committed by GitHub
parent c7c025b8d1
commit 5d3cc85e13
3 changed files with 135 additions and 2 deletions

View File

@@ -7,7 +7,7 @@ import torch
import logging
from tokenizers import Tokenizer
from .ace_text_cleaners import multilingual_cleaners
from .ace_text_cleaners import multilingual_cleaners, japanese_to_romaji
SUPPORT_LANGUAGES = {
"en": 259, "de": 260, "fr": 262, "es": 284, "it": 285,
@@ -65,6 +65,14 @@ class VoiceBpeTokenizer:
if "spa" in lang:
lang = "es"
try:
line_out = japanese_to_romaji(line)
if line_out != line:
lang = "ja"
line = line_out
except:
pass
try:
if structure_pattern.match(line):
token_idx = self.encode(line, "en")