from tensorflow_tts.processor.ch_pinyin import PINYIN_DICT, mark, zh_pattern _pad = ["pad"] _eos = ["eos"] _pause = ["sil", "#0", "#1", "#2", "#3"] # 声母 _initials = ["^", "b", "c", "ch", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "sh", "t", "x", "z", "zh"] _tones = ["1", "2", "3", "4", "5"] # 韵母 _finals = ["a", "ai", "an", "ang", "ao", "e", "ei", "en", "eng", "er", "i", "ia", "ian", "iang", "iao", "ie", "ii", "iii", "in", "ing", "iong", "iou", "o", "ong", "ou", "u", "ua", "uai", "uan", "uang", "uei", "uen", "ueng", "uo", "v", "van", "ve", "vn"] g2p = grapheme_to_phonem.G2p() valid_symbols = g2p.phonemes _punctuation = "!'(),.:;? " _arpabet = ["@" + s for s in valid_symbols] + list(_punctuation) BAKER_SYMBOLS = _pad + _pause + _initials + _finals + ["@" + i for i in _tones] + _arpabet + _eos # BAKER_SYMBOLS = _pad + _pause + _initials + [i + j for i in _finals for j in _tones] + _arpabet + _eos def is_zh(word): match = zh_pattern.search(word) return match is not None
print("aaa") labels = np.array([[1, 3, 5], [2, 4, 6], [0, 0, 0]]) print(labels[None, ...]) print(labels[None, :]) _whitespace_re = re.compile(r"\s+") print(re.sub(_whitespace_re, " ", "nihao ma wo hen hao")) text = "nihaoma wo {B IY}henhao a " print(len(text)) _curly_re = re.compile(r"(.*?)\{(.+?)\}(.*)") print(_curly_re.match(text)) from g2p_en import g2p g2p = g2p.G2p() print(g2p('B')) # print("==========="*2) test = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" for i in test: print(g2p(i)) print(g2p('tv')) print(g2p('ad')) # print(g2p('fetch')) # print(g2p('cake')) # print(g2p('age')) # print(g2p('banana')) # print(g2p('ABCDEFG')) # # texts = ""