def dataset_modification(config_path, output_path): data_config_path = '{}/data.yaml'.format(config_path) data = Utility.yaml_load(data_config_path) dataset = data['subset'] print dataset for i in xrange(5, 20, 4): if training_set[i-1] == 'i': continue subset = training_set[0:i] # print subset # print len(subset), len(subset)*50 dataset[ '{}-{}'.format(subset[0], subset[len(subset)-1]) ] = subset # print dataset # print data Utility.yaml_save('{}/data.yaml'.format(output_path), data) pass
def change_exp(config_path, output_path, subset): exp = Utility.yaml_load( '{}/experiment.yaml'.format(config_path) ) exp['data_set']['optim_dur'] = 'a-{}'.format(subset[len(subset)-1]) exp['data_set']['train'] = 'a-{}'.format(subset[len(subset)-1]) print exp Utility.yaml_save('{}/experiment.yaml'.format(output_path), exp) pass
Utility.make_directory(set_out_path) if Utility.is_dir_exists(set_stress_path) & Utility.is_dir_exists(set_utt_base_path): print ch for i in xrange(1, 51): name = 'tscsd{}{}'.format(ch, Utility.fill_zero(i, 2)) yaml_filename = '{}/{}.utt.yaml'.format(set_utt_base_path, name ) if not Utility.is_file_exist(yaml_filename): continue full_file = '{}/{}.lab'.format(set_syllable_full_path, name) count = [0] yaml = Utility.yaml_load(yaml_filename) add_stress(yaml, count, name) if not (len(Utility.read_file_line_by_line(full_file)) == count[0] ): print 'Not equal' print name, len(Utility.read_file_line_by_line(full_file)), count[0] out_file = '{}/{}.utt.yaml'.format(set_out_path, name) Utility.yaml_save(out_file, yaml) print 'all no {} names '.format(coun_no) pass
def gen_utt(file_path, out_file): out_list = [] phrase, word, syllable, phone = collections.OrderedDict( ), collections.OrderedDict(), collections.OrderedDict( ), collections.OrderedDict() word_index, syllable_index, phone_index = 0, 0, 0 output_array = [] lines = Utility.read_file_line_by_line(file_path) for line in lines: #regex pat = re.compile( r""" ^(?P<prev_phoneme>.+)-(?P<cur_phoneme>.+)\+(?P<next_phoneme>.+) /A:(?P<prev_phone_position>.+)_.+-(?P<cur_phone_position>.+)_.+\+(?P<next_phone_position>.+)_.+ /B:(?P<prev_tone>.+)-(?P<cur_tone>.+)\+(?P<next_tone>.+) /C:(?P<prev_syllable_position>.+)_.+-(?P<cur_syllable_position>.+)_.+\+(?P<next_syllable_position>.+)_.+ /D:(?P<prev_number_of_phone>.+)-(?P<cur_number_of_phone>.+)\+(?P<next_number_of_phone>.+) /E:(?P<prev_word_position>.+)-(?P<cur_word_position>.+)\+(?P<next_word_position>.+) /F:.+_(?P<prev_number_of_syllable>.+)-.+_(?P<cur_number_of_syllable>.+)\+.+_(?P<next_number_of_syllable>.+) /G:.+_(?P<number_of_syllable_in_sentence>.+)_(?P<number_of_word_in_sentence>.+) /H:(?P<prev_word_partofspeech>.+)_.+-(?P<cur_word_partofspeech>.+)_.+\+(?P<next_word_partofspeech>.+)_.+ /I:(?P<cur_stress>.+) ? """, re.VERBOSE) match = re.match(pat, line) if match == None: raise (UtteranceException('Unmatched context: {:}'.format(line))) #Check if pau or sil add new entity if (match.group('cur_phoneme') == 'sil'): #Phone unit od = collections.OrderedDict() od['unit'] = 'phone' od['entity'] = match.group('cur_phoneme') output_array.append(od) elif (match.group('cur_phoneme') == 'pau'): #Phone unit od = collections.OrderedDict() od['unit'] = 'phone' od['entity'] = match.group('cur_phoneme') output_array[len(output_array) - 1]['inners'].append(od) else: # Check if Start of utterance, add new utterance if (match.group('cur_word_position') == '1') & (match.group('cur_syllable_position') == '1') & ( match.group('cur_phone_position') == '1'): #Utterance unit od = collections.OrderedDict() od['unit'] = 'utterance' od['inners'] = [] output_array.append(od) #print output_array # Check if Start of word, add new word if (match.group('cur_syllable_position') == '1') & (match.group('cur_phone_position') == '1'): #Word unit od = collections.OrderedDict() od['unit'] = 'word' word_index += 1 od['word_index'] = word_index od['part_of_speech'] = match.group('cur_word_partofspeech') od['inners'] = [] output_array[len(output_array) - 1]['inners'].append(od) #print "output_array" # Check if Start of syllable, add new syllable if (match.group('cur_phone_position') == '1'): # Tone entity od = collections.OrderedDict() od['unit'] = 'syllable' syllable_index += 1 od['syllable_index'] = syllable_index od['tone_type'] = int(match.group('cur_tone')) od['stress'] = match.group('cur_stress') od['inners'] = [] word_index = len(output_array[len(output_array) - 1]['inners']) output_array[len(output_array) - 1]['inners'][word_index - 1]['inners'].append(od) #print output_array # Add new phone od = collections.OrderedDict() od['unit'] = 'phone' phone_index += 1 od['phone_index'] = phone_index od['entity'] = match.group('cur_phoneme') word_index = len(output_array[len(output_array) - 1]['inners']) syllable_index = len(output_array[len(output_array) - 1]['inners'][word_index - 1]['inners']) output_array[len(output_array) - 1]['inners'][word_index - 1]['inners'][syllable_index - 1]['inners'].append(od) # print output_array Utility.yaml_save(out_file, output_array) pass