Make japanese hiragana and katakana characters work with ACE. (#7997)
This commit is contained in:
@@ -7,7 +7,7 @@ import torch
|
||||
import logging
|
||||
|
||||
from tokenizers import Tokenizer
|
||||
from .ace_text_cleaners import multilingual_cleaners
|
||||
from .ace_text_cleaners import multilingual_cleaners, japanese_to_romaji
|
||||
|
||||
SUPPORT_LANGUAGES = {
|
||||
"en": 259, "de": 260, "fr": 262, "es": 284, "it": 285,
|
||||
@@ -65,6 +65,14 @@ class VoiceBpeTokenizer:
|
||||
if "spa" in lang:
|
||||
lang = "es"
|
||||
|
||||
try:
|
||||
line_out = japanese_to_romaji(line)
|
||||
if line_out != line:
|
||||
lang = "ja"
|
||||
line = line_out
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
if structure_pattern.match(line):
|
||||
token_idx = self.encode(line, "en")
|
||||
|
||||
Reference in New Issue
Block a user