def txt2label(txt, sfsfile=None, style='default'): '''Return a generator of HTS format label of txt. Args: txt: like raw txt "向香港特别行政区同胞澳门台湾同胞" or txt with prosody make like "向#1香港#2特别行政区#1同胞#3澳门台湾#1同胞", punctuation is also allow in txt sfsfile: absolute path of sfs file (alignment file). A sfs file example(measure time by 10e-7 second, 12345678 means 1.2345678 second) -------- 239100 s 313000 a 323000 d 400000 b 480000 s --------- a stands for consonant b stands for vowel d stands for silence that is shorter than 100ms s stands for silence that is longer than 100ms style: label style, currently only support the default HTS format Return: A generator of phone label for the txt, convenient to save as a label file ''' assert style == 'default', 'Currently only default style is support in txt2label' # delete all character which is not number && alphabet && chinese word txt = re.sub(r'(?!#)\W', '', txt) # If txt with prosody mark, use prosody mark, # else use jieba position segmetation if '#' in txt: words, poses, rhythms = _adjust(txt) else: txt = re.sub('[,.,。]', '#4', txt) words = [] poses = [] for word, pos in posseg.cut(txt): words.append(word) poses.append(pos[0]) rhythms = ['#0'] * (len(words) - 1) rhythms.append('#4') syllables = txt2pinyin(''.join(words)) phone_num = 0 for syllable in syllables: phone_num += len(syllable) # syllable is like ('b', 'a3') if sfsfile: phs_type = [] times = ['0'] with open(sfsfile) as fid: for line in fid.readlines(): line = line.strip().rstrip('\n') assert len(line.split(' ')) == 2, 'check format of sfs file' time, ph = line.split(' ') times.append(int(float(time))) phs_type.extend(ph) else: phs_type = [] for i, rhythm in enumerate(rhythms): single_word_pinyin = txt2pinyin(words[i]) single_word_phone_num = sum( [len(syllable) for syllable in single_word_pinyin]) phs_type.extend(['a'] * single_word_phone_num) if i != (len(rhythms) - 1) and rhythm == '#4': phs_type.append('s') ''' phs_type = ['a'] * phone_num ''' phs_type.insert(0, 's') phs_type.append('s') times = [0] * (len(phs_type) + 1) ''' for item in words: print(item) print ('words: ', words) print ('rhythms: ',rhythms) print ('syllables: ', syllables) print ('poses: ', poses) print ('phs_type: ', phs_type) print ('times: ', times) ''' phone = tree(words, rhythms, syllables, poses, phs_type) return LabGenerator(phone, rhythms, times)
def txt2label(txt, wavfile=None, sfsfile=None, style='default'): '''Return a generator of HTS format label of txt. If only input txt without prosody mark and wavfile, it also return label (but without prosody and wav information) Args: txt: like raw txt "向香港特别行政区同胞澳门台湾同胞" or txt with prosody make like "向#1香港#2特别行政区#1同胞#3澳门台湾#1同胞", punctuation is also allow in txt wavfile: absolute path of wav file for txt sfsfile: absolute path of sfs file (alignment file). A sfs file example(measure time by 10e-7 second, 12345678 means 1.2345678 second) -------- 239100 s 313000 a 323000 d 400000 b 480000 s --------- a stands for consonant b stands for vowel d stands for silence that is shorter than 100ms s stands for silence that is longer than 100ms style: label style, currently only support the default HTS format Return: A generator of phone label for the txt, convenient to save as a label file ''' # If txt with prosody mark, use prosody mark, # else use jieba position segmetation assert wavfile == None, 'wavfile currently is not supported' assert style == 'default', 'Currently only default style is support in txt2label' if '#' in txt: words, poses, rhythms = _adjust(txt) else: words = [] poses = [] for word, pos in posseg.cut(txt): words.append(word.encode('utf-8')) poses.append(pos[0].encode('utf-8')) rhythms = ['#0'] * (len(words) - 1) rhythms.append('#4') syllables = txt2pinyin(''.join(words)) if sfsfile: phs_type = [] times = ['0'] with open(sfsfile) as fid: for line in fid.readlines(): line = line.strip().rstrip('\n') assert len(line.split(' ')) == 2, 'check format of sfs file' time, ph = line.split(' ') times.append(int(time)) phs_type.extend(ph) else: length = 0 for syllable in syllables: length += len(syllable) phs_type = ['a'] * length phs_type.insert(0, 's') phs_type.append('s') times = [0] * (length + 3) phone = tree(words, rhythms, syllables, poses, phs_type) return LabGenerator(phone, rhythms, times)
def txt2label(txt, sfsfile=None, style='default'): '''Return a generator of HTS format label of txt. Args: txt: like raw txt "向香港特别行政区同胞澳门台湾同胞" or txt with prosody make like "向#1香港#2特别行政区#1同胞#3澳门台湾#1同胞", punctuation is also allow in txt sfsfile: absolute path of sfs file (alignment file). A sfs file example(measure time by 10e-7 second, 12345678 means 1.2345678 second) -------- 239100 s 313000 a 323000 d 400000 b 480000 s --------- a stands for consonant b stands for vowel d stands for silence that is shorter than 100ms s stands for silence that is longer than 100ms style: label style, currently only support the default HTS format Return: A generator of phone label for the txt, convenient to save as a label file ''' assert style == 'default', 'Currently only default style is support in txt2label' # del all Chinese punctuation # punctuation = "·!?。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏." # txt = re.sub(r'[%s]'%punctuation, '', txt) # delete all character which is not number && alphabet && chinese word txt = re.sub(r'\W', '', txt) # If txt with prosody mark, use prosody mark, # else use jieba position segmetation if '#' in txt: words, poses, rhythms = _adjust(txt) else: words = [] poses = [] for word, pos in posseg.cut(txt): words.append(word) poses.append(pos[0]) rhythms = ['#0'] * (len(words) - 1) rhythms.append('#4') syllables = txt2pinyin(''.join(words)) phone_num = 0 for syllable in syllables: phone_num += len(syllable) if sfsfile: phs_type = [] times = ['0'] with open(sfsfile) as fid: for line in fid.readlines(): line = line.strip().rstrip('\n') assert len(line.split(' ')) == 2, 'check format of sfs file' time, ph = line.split(' ') times.append(int(float(time))) phs_type.extend(ph) else: length = 0 for syllable in syllables: length += len(syllable) phs_type = ['a'] * phone_num phs_type.insert(0, 's') phs_type.append('s') times = [0] * (phone_num + 3) ''' for item in words: print(item) print (words) print (rhythms) print (syllables) print (poses) print (phs_type) ''' phone = tree(words, rhythms, syllables, poses, phs_type) return LabGenerator(phone, rhythms, times)