def text_to_sequence(text, speaker_name='baker', inference=False): sequence = [] tmp = "" if "baker" == speaker_name: if inference: my_pinyin = Pinyin(MyConverter()) pinyin = my_pinyin.pinyin(text, style=Style.TONE3, # errors="ignore" errors=alpha_handler ) print("text_to_sequence pinyin=", pinyin) new_pinyin = [] for x in pinyin: x = "".join(x) if "#" not in x: new_pinyin.append(x) print("text_to_sequence new_pinyin=", new_pinyin) phonemes = get_phoneme_from_char_and_pinyin(text, new_pinyin) text = " ".join(phonemes) print(f"phoneme seq: {text}") try: for symbol in text.split(): tmp = symbol idx = symbol_to_id[symbol] sequence.append(idx) except Exception as e: print("text_to_sequence error", tmp) else: if not inference: # in train mode text should be already transformed to phonemes sequence = symbols_to_ids(clean_g2p(text.strip().split(" "))) else: sequence = inference_text_to_seq(text) # add eos tokens sequence += ['eos_id'] return sequence
def get_audio_dict(self) -> (dict, dict, dict): """ 获取原始数据 :return: """ use_type = self._use_type dataset_path = self._dataset_path participle = self._participle id_path_dict = {} id_hanzi_dict = {} id_pinyin_dict = {} for use_type in use_type: with open(file=dataset_path.joinpath(use_type + '.txt'), mode='r', encoding='utf-8') as txt_file: for line in txt_file.readlines(): # 生成id(str) id = line.split('\t')[0] # 生成audio路径 path = dataset_path.joinpath(use_type, id) # 是否需要进行分词 生成汉字(str) hanzi = line.split('\t')[1].strip('\n') if participle: hanzi = list(jieba.cut(hanzi, cut_all=False)) else: hanzi = hanzi.split(' ') # 生成拼音(str) pinyin_dict = DataUtils.get_pinyin_dict() my_pinyin = Pinyin(MyConverter()) pinyin = '' for token in hanzi: for char in my_pinyin.pinyin(token, style=Style.TONE3, heteronym=False): if char[0] not in pinyin_dict: pinyin += ('_' + ' ') else: pinyin += (char[0] + ' ') id_path_dict[id] = path id_hanzi_dict[id] = ' '.join(list(''.join(hanzi))) id_pinyin_dict[id] = pinyin return id_path_dict, id_hanzi_dict, id_pinyin_dict