def pyopenjtalk_g2p_accent(text) -> List[str]: import pyopenjtalk import re phones = [] for labels in pyopenjtalk.run_frontend(text)[1]: p = re.findall(r"\-(.*?)\+.*?\/A:([0-9\-]+).*?\/F:.*?_([0-9])", labels) if len(p) == 1: phones += [p[0][0], p[0][2], p[0][1]] return phones
def test_jtalk(): for text in ["今日も良い天気ですね", "こんにちは。", "どんまい!", "パソコンのとりあえず知っておきたい使い方", ]: njd_results, labels = pyopenjtalk.run_frontend(text) _print_results(njd_results, labels) surface = "".join(map(lambda s: s.split(",")[0], njd_results)) assert surface == text
def pyopenjtalk_g2p_accent_with_pause(text) -> List[str]: import pyopenjtalk import re phones = [] for labels in pyopenjtalk.run_frontend(text)[1]: if labels.split("-")[1].split("+")[0] == "pau": phones += ["pau"] continue p = re.findall(r"\-(.*?)\+.*?\/A:([0-9\-]+).*?\/F:.*?_([0-9])", labels) if len(p) == 1: phones += [p[0][0], p[0][2], p[0][1]] return phones
def pyopenjtalk_g2p_prosody(text: str, drop_unvoiced_vowels: bool = True) -> List[str]: """Extract phoneme + prosoody symbol sequence from input full-context labels. The algorithm is based on `Prosodic features control by symbols as input of sequence-to-sequence acoustic modeling for neural TTS`_ with some r9y9's tweaks. Args: text (str): Input text. drop_unvoiced_vowels (bool): whether to drop unvoiced vowels. Returns: List[str]: List of phoneme + prosody symbols. Examples: >>> from espnet2.text.phoneme_tokenizer import pyopenjtalk_g2p_prosody >>> pyopenjtalk_g2p_prosody("こんにちは。") ['^', 'k', 'o', '[', 'N', 'n', 'i', 'ch', 'i', 'w', 'a', '$'] .. _`Prosodic features control by symbols as input of sequence-to-sequence acoustic modeling for neural TTS`: https://doi.org/10.1587/transinf.2020EDP7104 """ import pyopenjtalk labels = pyopenjtalk.run_frontend(text)[1] N = len(labels) phones = [] for n in range(N): lab_curr = labels[n] # current phoneme p3 = re.search(r"\-(.*?)\+", lab_curr).group(1) # deal unvoiced vowels as normal vowels if drop_unvoiced_vowels and p3 in "AEIOU": p3 = p3.lower() # deal with sil at the beginning and the end of text if p3 == "sil": assert n == 0 or n == N - 1 if n == 0: phones.append("^") elif n == N - 1: # check question form or not e3 = _numeric_feature_by_regex(r"!(\d+)_", lab_curr) if e3 == 0: phones.append("$") elif e3 == 1: phones.append("?") continue elif p3 == "pau": phones.append("_") continue else: phones.append(p3) # accent type and position info (forward or backward) a1 = _numeric_feature_by_regex(r"/A:([0-9\-]+)\+", lab_curr) a2 = _numeric_feature_by_regex(r"\+(\d+)\+", lab_curr) a3 = _numeric_feature_by_regex(r"\+(\d+)/", lab_curr) # number of mora in accent phrase f1 = _numeric_feature_by_regex(r"/F:(\d+)_", lab_curr) a2_next = _numeric_feature_by_regex(r"\+(\d+)\+", labels[n + 1]) # accent phrase border if a3 == 1 and a2_next == 1: phones.append("#") # pitch falling elif a1 == 0 and a2_next == a2 + 1 and a2 != f1: phones.append("]") # pitch rising elif a2 == 1 and a2_next == 2: phones.append("[") return phones
def test_fullcontext(): _, labels = pyopenjtalk.run_frontend("こんにちは") labels2 = pyopenjtalk.extract_fullcontext("こんにちは") for a, b in zip(labels, labels2): assert a == b
def test_hello(): njd_results, labels = pyopenjtalk.run_frontend("こんにちは") _print_results(njd_results, labels)
def text_to_context(self, text: str) -> List[str]: import pyopenjtalk label_list = pyopenjtalk.run_frontend(text)[1] return label_list