def test(self, gaussians=1, iter=8, mmi=False, diag=False, xword_id='', output_dir=None): ## Copy config file to the experiment dir config_output = '%s/config' % self.exp self.config.write(open(config_output, 'w')) log(self.logfh, 'TESTING with config [%s]' % config_output) if self.test_pipeline['coding']: import coding coding_dir = '%s/Coding' % self.exp util.create_new_dir(coding_dir) count = coding.wav_to_mfc(self, coding_dir, self.mfc_list) log(self.logfh, 'CODING finished [%d files]' % count) if self.test_pipeline['test']: import dict_and_lm start_time = time.time() num_utts, words = dict_and_lm.make_mlf_from_transcripts( model, self.dict, self.setup, self.data, self.word_mlf, self.mfc_list, skip_oov=True) log(self.logfh, 'wrote word mlf [%d utts] [%s]' % (num_utts, self.word_mlf)) self.decode(model, self.mfc_list, self.word_mlf, self.lm, gaussians, iter, mmi, diag, xword_id, output_dir) total_time = time.time() - start_time log(self.logfh, 'TESTING finished; secs elapsed [%1.2f]' % total_time)
def test(self, gaussians=1, iter=8, mmi=False, diag=False, xword_id='', output_dir=None): ## Copy config file to the experiment dir config_output = '%s/config' %self.exp self.config.write(open(config_output, 'w')) log(self.logfh, 'TESTING with config [%s]' %config_output) if self.test_pipeline['coding']: import coding coding_dir = '%s/Coding' %self.exp util.create_new_dir(coding_dir) count = coding.wav_to_mfc(self, coding_dir, self.mfc_list) log(self.logfh, 'CODING finished [%d files]' %count) if self.test_pipeline['test']: import dict_and_lm start_time = time.time() num_utts, words = dict_and_lm.make_mlf_from_transcripts(model, self.dict, self.setup, self.data, self.word_mlf, self.mfc_list, skip_oov=True) log(self.logfh, 'wrote word mlf [%d utts] [%s]' %(num_utts, self.word_mlf)) self.decode(model, self.mfc_list, self.word_mlf, self.lm, gaussians, iter, mmi, diag, xword_id, output_dir) total_time = time.time() - start_time log(self.logfh, 'TESTING finished; secs elapsed [%1.2f]' %total_time)
def train(self): ## Copy config file to the experiment dir config_output = '%s/config' %self.exp self.config.write(open(config_output, 'w')) log(self.logfh, 'TRAINING with config [%s]' %config_output) if self.train_pipeline['coding']: log(self.logfh, 'CODING started') import coding util.create_new_dir(self.coding_root) coding.create_config(self) count = coding.wav_to_mfc(self, self.coding_root, self.mfc_list) os.system('cp %s %s/mfc.list.original' %(self.mfc_list, self.misc)) log(self.logfh, 'wrote mfc files [%d]' %count) log(self.logfh, 'CODING finished') if self.train_pipeline['lm']: log(self.logfh, 'MLF/LM/DICT started') import dict_and_lm phone_set = dict_and_lm.fix_cmu_dict(self.orig_dict, self.htk_dict) num_utts, words = dict_and_lm.make_mlf_from_transcripts(self, self.htk_dict, self.setup, self.data, self.word_mlf, self.mfc_list) log(self.logfh, 'wrote word mlf [%d utts] [%s]' %(num_utts, self.word_mlf)) os.system('cp %s %s/mfc.list.filtered.by.dict' %(self.mfc_list, self.misc)) num_entries = dict_and_lm.make_train_dict(self.htk_dict, self.train_dict, words) dict_and_lm.make_decode_dict(self.htk_dict, self.decode_dict, words) log(self.logfh, 'wrote training dictionary [%d entries] [%s]' %(num_entries, self.train_dict)) util.create_new_dir(self.lm_dir) train_vocab = '%s/vocab' %self.lm_dir ppl = dict_and_lm.build_lm_from_mlf(self, self.word_mlf, self.train_dict, train_vocab, self.lm_dir, self.lm, self.lm_order) log(self.logfh, 'wrote lm [%s] training ppl [%1.2f]' %(self.lm, ppl)) log(self.logfh, 'MLF/LM/DICT finished') if self.train_pipeline['flat_start']: log(self.logfh, 'FLAT START started') import init_hmm init_hmm.word_to_phone_mlf(self, self.train_dict, self.word_mlf, self.phone_mlf, self.phone_list) log(self.logfh, 'wrote phone mlf [%s]' %self.phone_mlf) os.system('cp %s %s/phone.mlf.from.dict' %(self.phone_mlf, self.misc)) os.system('bzip2 -f %s/phone.mlf.from.dict' %self.misc) init_hmm.make_proto_hmm(self, self.mfc_list, self.proto_hmm) hmm_dir, num_mfcs = init_hmm.initialize_hmms(self, self.mono_root, self.mfc_list, self.phone_list, self.proto_hmm) log(self.logfh, 'initialized an HMM for each phone in [%s]' %hmm_dir) log(self.logfh, 'used [%d] mfc files to compute variance floor' %num_mfcs) import train_hmm for iter in range(1, self.initial_mono_iters+1): hmm_dir, k, L = train_hmm.run_iter(self, self.mono_root, hmm_dir, self.phone_mlf, self.phone_list, 1, iter, '') log(self.logfh, 'ran an iteration of BW in [%s] lik/fr [%1.4f]' %(hmm_dir, L)) align_config = '%s/config.align' %self.mono_root fh = open(align_config, 'w') fh.write('HPARM: TARGETKIND = MFCC_0_D_A_Z\n') fh.close() align_dir = train_hmm.align(self, self.mono_root, self.mfc_list, hmm_dir, self.word_mlf, self.phone_mlf, self.phone_list, self.train_dict, align_config) log(self.logfh, 'aligned with model in [%s], wrote phone mlf [%s]' %(hmm_dir, self.phone_mlf)) os.system('cp %s %s/mfc.list.filtered.by.mono.align' %(self.mfc_list, self.misc)) os.system('cp %s %s/phone.mlf.from.mono.align' %(self.phone_mlf, self.misc)) os.system('bzip2 -f %s/phone.mlf.from.mono.align' %self.misc) for iter in range(self.initial_mono_iters+1, self.initial_mono_iters+1+self.mono_iters): hmm_dir, k, L = train_hmm.run_iter(self, self.mono_root, hmm_dir, self.phone_mlf, self.phone_list, 1, iter, '') log(self.logfh, 'ran an iteration of BW in [%s] lik/fr [%1.4f]' %(hmm_dir, L)) log(self.logfh, 'FLAT START finished') if self.train_pipeline['mixup_mono']: log(self.logfh, 'MIXUP MONO started') import train_hmm hmm_dir = '%s/HMM-%d-%d' %(self.mono_root, 1, self.initial_mono_iters+self.mono_iters) ## mixup everything for mix_size in self.mono_mixup_schedule: hmm_dir = train_hmm.mixup(self, self.mixup_mono_root, hmm_dir, self.phone_list, mix_size) log(self.logfh, 'mixed up to [%d] in [%s]' %(mix_size, hmm_dir)) for iter in range(1, self.mono_iters+1): hmm_dir, k, L = train_hmm.run_iter(self, self.mixup_mono_root, hmm_dir, self.phone_mlf, self.phone_list, mix_size, iter, '') log(self.logfh, 'ran an iteration of BW in [%s] lik/fr [%1.4f]' %(hmm_dir, L)) log(self.logfh, 'MIXUP MONO finished') if self.train_pipeline['mixdown_mono']: log(self.logfh, 'MIXDOWN MONO started') import train_hmm num_gaussians = self.mono_mixup_schedule[-1] hmm_dir = '%s/HMM-%d-%d' %(self.mixup_mono_root, num_gaussians, self.mono_iters) train_hmm.mixdown_mono(self, self.mixdown_mono_root, hmm_dir, self.phone_list) log(self.logfh, 'MIXDOWN MONO finished') if self.train_pipeline['mono_to_tri']: log(self.logfh, 'MONO TO TRI started') import train_hmm if self.train_pipeline['mixdown_mono']: mono_final_dir = '%s/HMM-1-0' %self.mixdown_mono_root else: mono_final_dir = '%s/HMM-%d-%d' %(self.mono_root, 1, self.initial_mono_iters+self.mono_iters) hmm_dir = train_hmm.mono_to_tri(self, self.xword_root, mono_final_dir, self.phone_mlf, self.tri_mlf, self.phone_list, self.tri_list) log(self.logfh, 'initialized triphone models in [%s]' %hmm_dir) log(self.logfh, 'created triphone mlf [%s]' %self.tri_mlf) os.system('cp %s %s/tri.mlf.from.mono.align' %(self.tri_mlf, self.misc)) os.system('bzip2 -f %s/tri.mlf.from.mono.align' %self.misc) os.system('cp %s %s/tri.list.from.mono.align' %(self.tri_list, self.misc)) for iter in range(1, self.initial_tri_iters+1): hmm_dir, k, L = train_hmm.run_iter(self, self.xword_root, hmm_dir, self.tri_mlf, self.tri_list, 1, iter, '') log(self.logfh, 'ran an iteration of BW in [%s] lik/fr [%1.4f]' %(hmm_dir, L)) xword_tie_dir = '%s/HMM-%d-%d' %(self.xword_root, 1, self.initial_tri_iters+1) hmm_dir = train_hmm.tie_states_search(self, xword_tie_dir, hmm_dir, self.phone_list, self.tri_list, self.tied_list) log(self.logfh, 'tied states in [%s]' %hmm_dir) os.system('cp %s %s/tied.list.initial' %(self.tied_list, self.misc)) hmm_dir = '%s/HMM-%d-%d' %(self.xword_root, 1, self.initial_tri_iters+1) for iter in range(self.initial_tri_iters+2, self.initial_tri_iters+1+self.tri_iters+1): hmm_dir, k, L = train_hmm.run_iter(self, self.xword_root, hmm_dir, self.tri_mlf, self.tied_list, 1, iter, '') log(self.logfh, 'ran an iteration of BW in [%s] lik/fr [%1.4f]' %(hmm_dir, L)) log(self.logfh, 'MONO TO TRI finished') if self.train_pipeline['mixup_tri']: log(self.logfh, 'MIXUP TRI started') import train_hmm ## mixup everything start_gaussians = 1 start_iter = self.initial_tri_iters+self.tri_iters+1 hmm_dir = '%s/HMM-%d-%d' %(self.xword_root, start_gaussians, start_iter) for mix_size in self.tri_mixup_schedule: if mix_size==2: hmm_dir = train_hmm.mixup(self, self.xword_root, hmm_dir, self.tied_list, mix_size, estimateVarFloor=1) else: hmm_dir = train_hmm.mixup(self, self.xword_root, hmm_dir, self.tied_list, mix_size) log(self.logfh, 'mixed up to [%d] in [%s]' %(mix_size, hmm_dir)) for iter in range(1, self.tri_iters_per_split+1): hmm_dir, k, L = train_hmm.run_iter(self, self.xword_root, hmm_dir, self.tri_mlf, self.tied_list, mix_size, iter, '') log(self.logfh, 'ran an iteration of BW in [%s] lik/fr [%1.4f]' %(hmm_dir, L)) log(self.logfh, 'MIXUP TRI finished') if self.train_pipeline['align_with_xword']: log(self.logfh, 'XWORD ALIGN started') import train_hmm align_config = '%s/config.align' %self.xword_root train_hmm.make_hvite_xword_config(self, align_config, 'MFCC_0_D_A_Z') num_gaussians = self.tri_mixup_schedule[-1] iter_num = self.tri_iters_per_split hmm_dir = '%s/HMM-%d-%d' %(self.xword_root, num_gaussians, iter_num) realigned_mlf = '%s/raw_tri_xword_realigned.mlf' %self.misc # Use the original, mfc list that has prons for every word os.system('cp %s/mfc.list.filtered.by.dict %s' %(self.misc, self.mfc_list)) align_dir = train_hmm.align(self, self.xword_root, self.mfc_list, hmm_dir, self.word_mlf, realigned_mlf, self.tied_list, self.train_dict, align_config) log(self.logfh, 'aligned with model in [%s], tri mlf [%s]' %(hmm_dir, realigned_mlf)) # Because of state tying, the triphones in the mlf will only be # valid for this state tying. Strip down to monophones, the # correct triphones will be created later in mono_to_tri train_hmm.map_tri_to_mono(self, align_dir, realigned_mlf, self.phone_mlf) os.system('cp %s %s/phone.mlf.from.xword.align' %(self.phone_mlf, self.misc)) os.system('bzip2 -f %s/phone.mlf.from.xword.align' %self.misc) os.system('bzip2 -f %s' %realigned_mlf) log(self.logfh, 'XWORD ALIGN finished') if self.train_pipeline['mono_to_tri_from_xword']: log(self.logfh, 'MONO TO TRI FROM XWORD started') import train_hmm #Assume that midown mono happened? mono_final_dir = '%s/HMM-1-0' %self.mixdown_mono_root hmm_dir = train_hmm.mono_to_tri(self, self.xword_1_root, mono_final_dir, self.phone_mlf, self.tri_mlf, self.phone_list, self.tri_list) log(self.logfh, 'initialized triphone models in [%s]' %hmm_dir) os.system('cp %s %s/tri.mlf.from.xword.align' %(self.tri_mlf, self.misc)) os.system('bzip2 -f %s/tri.mlf.from.xword.align' %self.misc) os.system('cp %s %s/tri.list.from.xword.align' %(self.tri_list, self.misc)) two_model_config = '%s/config.two_model' %self.xword_1_root fh = open(two_model_config, 'w') fh.write('ALIGNMODELMMF = %s/HMM-%d-%d/MMF\n' %(self.xword_root, self.tri_mixup_schedule[-1], self.tri_iters_per_split)) fh.write('ALIGNHMMLIST = %s\n' %self.tied_list) fh.close() # Do one pass of two-model re-estimation extra = ' -C %s' %two_model_config hmm_dir, k, L = train_hmm.run_iter(self, self.xword_1_root, hmm_dir, self.tri_mlf, self.tri_list, 1, 1, extra) log(self.logfh, 'ran an iteration of BW in [%s] lik/fr [%1.4f]' %(hmm_dir, L)) xword_tie_dir = '%s/HMM-1-2' %self.xword_1_root hmm_dir = train_hmm.tie_states_search(self, xword_tie_dir, hmm_dir, self.phone_list, self.tri_list, self.tied_list) log(self.logfh, 'tied states in [%s]' %hmm_dir) os.system('cp %s %s/tied.list.second' %(self.tied_list, self.misc)) hmm_dir = '%s/HMM-1-2' %self.xword_1_root for iter in range(3, self.tri_iters+3): hmm_dir, k, L = train_hmm.run_iter(self, self.xword_1_root, hmm_dir, self.tri_mlf, self.tied_list, 1, iter, '') log(self.logfh, 'ran an iteration of BW in [%s] lik/fr [%1.4f]' %(hmm_dir, L)) log(self.logfh, 'MONO TO TRI FROM XWORD finished') if self.train_pipeline['mixup_tri_2']: log(self.logfh, 'MIXUP TRI 2 started') import train_hmm ## mixup everything start_gaussians = 1 start_iter = self.tri_iters+2 hmm_dir = '%s/HMM-%d-%d' %(self.xword_1_root, start_gaussians, start_iter) for mix_size in self.tri_mixup_schedule: if mix_size==2: hmm_dir = train_hmm.mixup(self, self.xword_1_root, hmm_dir, self.tied_list, mix_size, estimateVarFloor=1) else: hmm_dir = train_hmm.mixup(self, self.xword_1_root, hmm_dir, self.tied_list, mix_size) log(self.logfh, 'mixed up to [%d] in [%s]' %(mix_size, hmm_dir)) for iter in range(1, self.tri_iters_per_split+1): hmm_dir, k, L = train_hmm.run_iter(self, self.xword_1_root, hmm_dir, self.tri_mlf, self.tied_list, mix_size, iter, '') log(self.logfh, 'ran an iteration of BW in [%s] lik/fr [%1.4f]' %(hmm_dir, L)) log(self.logfh, 'MIXUP TRI 2 finished') if self.train_pipeline['diag']: log(self.logfh, 'DIAG started') import train_hmm num_gaussians = self.tri_mixup_schedule[-1] iter_num = self.tri_iters_per_split if self.train_pipeline['mixup_tri_2']: seed_dir = '%s/HMM-%d-%d' %(self.xword_1_root, num_gaussians, iter_num) else: seed_dir = '%s/HMM-%d-%d' %(self.xword_root, num_gaussians, iter_num) hmm_dir, L = train_hmm.diagonalize(self, self.diag_root, seed_dir, self.tied_list, self.tri_mlf, num_gaussians) log(self.logfh, 'ran diag in [%s] lik/fr [%1.4f]' %(hmm_dir, L)) for iter in range(1, self.tri_iters_per_split+1): hmm_dir, k, L = train_hmm.run_iter(self, self.diag_root, hmm_dir, self.tri_mlf, self.tied_list, num_gaussians, iter, '') log(self.logfh, 'ran an iteration of BW in [%s] lik/fr [%1.4f]' %(hmm_dir, L)) log(self.logfh, 'DIAG finished') if self.train_pipeline['mmi']: log(self.logfh, 'DISCRIM started') ## Common items import mmi mmi_dir = '%s/MMI' %self.exp util.create_new_dir(mmi_dir) mfc_list_mmi = '%s/mfc.list' %mmi_dir os.system('cp %s %s' %(self.mfc_list, mfc_list_mmi)) ## Create weak LM import dict_and_lm train_vocab = '%s/vocab' %self.lm_dir lm_order = 2 target_ppl_ratio = 8 ppl = dict_and_lm.build_lm_from_mlf(self, self.word_mlf, self.train_dict, train_vocab, self.lm_dir, self.mmi_lm, lm_order, target_ppl_ratio) log(self.logfh, 'wrote lm for mmi [%s] training ppl [%1.2f]' %(self.mmi_lm, ppl)) ## Create decoding lattices for every utterance lattice_dir = '%s/Denom/Lat_word' %mmi_dir util.create_new_dir(lattice_dir) num_gaussians = self.tri_mixup_schedule[-1] iter_num = self.tri_iters_per_split if self.train_pipeline['diag']: model_dir = '%s/HMM-%d-%d' %(self.diag_root, num_gaussians, iter_num) elif self.train_pipeline['mixup_tri_2']: model_dir = '%s/HMM-%d-%d' %(self.xword_1_root, num_gaussians, iter_num) else: model_dir = '%s/HMM-%d-%d' %(self.xword_root, num_gaussians, iter_num) mmi.decode_to_lattices(model, lattice_dir, model_dir, mfc_list_mmi, self.mmi_lm, self.decode_dict, self.tied_list, self.word_mlf) log(self.logfh, 'generated training lattices in [%s]' %lattice_dir) ## Prune and determinize lattices pruned_lattice_dir = '%s/Denom/Lat_prune' %mmi_dir util.create_new_dir(pruned_lattice_dir) mmi.prune_lattices(model, lattice_dir, pruned_lattice_dir, self.decode_dict) log(self.logfh, 'pruned lattices in [%s]' %pruned_lattice_dir) ## Phone-mark lattices phone_lattice_dir = '%s/Denom/Lat_phone' %mmi_dir util.create_new_dir(phone_lattice_dir) mmi.phonemark_lattices(model, pruned_lattice_dir, phone_lattice_dir, model_dir, mfc_list_mmi, self.mmi_lm, self.decode_dict, self.tied_list) log(self.logfh, 'phone-marked lattices in [%s]' %phone_lattice_dir) ## Create numerator word lattices num_lattice_dir = '%s/Num/Lat_word' %mmi_dir util.create_new_dir(num_lattice_dir) mmi.create_num_lattices(model, num_lattice_dir, self.mmi_lm, self.decode_dict, self.word_mlf) log(self.logfh, 'generated numerator lattices in [%s]' %num_lattice_dir) ## Phone-mark numerator lattices num_phone_lattice_dir = '%s/Num/Lat_phone' %mmi_dir util.create_new_dir(num_phone_lattice_dir) mmi.phonemark_lattices(model, num_lattice_dir, num_phone_lattice_dir, model_dir, mfc_list_mmi, self.mmi_lm, self.decode_dict, self.tied_list) log(self.logfh, 'phone-marked numerator lattices in [%s]' %num_phone_lattice_dir) ## Add LM scores to numerator phone lattices num_phone_lm_lattice_dir = '%s/Num/Lat_phone_lm' %mmi_dir util.create_new_dir(num_phone_lm_lattice_dir) mmi.add_lm_lattices(model, num_phone_lattice_dir, num_phone_lm_lattice_dir, self.decode_dict, self.mmi_lm) log(self.logfh, 'added LM scores to numerator lattices in [%s]' %num_phone_lm_lattice_dir) ## Modified Baum-Welch estimation root_dir = '%s/Models' %mmi_dir util.create_new_dir(root_dir) mmi_iters = 12 mix_size = num_gaussians for iter in range(1, mmi_iters+1): model_dir = mmi.run_iter(model, model_dir, num_phone_lm_lattice_dir, phone_lattice_dir, root_dir, self.tied_list, mfc_list_mmi, mix_size, iter) log(self.logfh, 'ran an iteration of Modified BW in [%s]' %model_dir) log(self.logfh, 'DISCRIM finished')
def train(self): ## Copy config file to the experiment dir config_output = '%s/config' % self.exp self.config.write(open(config_output, 'w')) log(self.logfh, 'TRAINING with config [%s]' % config_output) if self.train_pipeline['coding']: log(self.logfh, 'CODING started') import coding util.create_new_dir(self.coding_root) coding.create_config(self) count = coding.wav_to_mfc(self, self.coding_root, self.mfc_list) os.system('cp %s %s/mfc.list.original' % (self.mfc_list, self.misc)) log(self.logfh, 'wrote mfc files [%d]' % count) log(self.logfh, 'CODING finished') if self.train_pipeline['lm']: log(self.logfh, 'MLF/LM/DICT started') import dict_and_lm phone_set = dict_and_lm.fix_cmu_dict(self.orig_dict, self.htk_dict) num_utts, words = dict_and_lm.make_mlf_from_transcripts( self, self.htk_dict, self.setup, self.data, self.word_mlf, self.mfc_list) log(self.logfh, 'wrote word mlf [%d utts] [%s]' % (num_utts, self.word_mlf)) os.system('cp %s %s/mfc.list.filtered.by.dict' % (self.mfc_list, self.misc)) num_entries = dict_and_lm.make_train_dict(self.htk_dict, self.train_dict, words) dict_and_lm.make_decode_dict(self.htk_dict, self.decode_dict, words) log( self.logfh, 'wrote training dictionary [%d entries] [%s]' % (num_entries, self.train_dict)) util.create_new_dir(self.lm_dir) train_vocab = '%s/vocab' % self.lm_dir ppl = dict_and_lm.build_lm_from_mlf(self, self.word_mlf, self.train_dict, train_vocab, self.lm_dir, self.lm, self.lm_order) log(self.logfh, 'wrote lm [%s] training ppl [%1.2f]' % (self.lm, ppl)) log(self.logfh, 'MLF/LM/DICT finished') if self.train_pipeline['flat_start']: log(self.logfh, 'FLAT START started') import init_hmm init_hmm.word_to_phone_mlf(self, self.train_dict, self.word_mlf, self.phone_mlf, self.phone_list) log(self.logfh, 'wrote phone mlf [%s]' % self.phone_mlf) os.system('cp %s %s/phone.mlf.from.dict' % (self.phone_mlf, self.misc)) os.system('bzip2 -f %s/phone.mlf.from.dict' % self.misc) init_hmm.make_proto_hmm(self, self.mfc_list, self.proto_hmm) hmm_dir, num_mfcs = init_hmm.initialize_hmms( self, self.mono_root, self.mfc_list, self.phone_list, self.proto_hmm) log(self.logfh, 'initialized an HMM for each phone in [%s]' % hmm_dir) log(self.logfh, 'used [%d] mfc files to compute variance floor' % num_mfcs) import train_hmm for iter in range(1, self.initial_mono_iters + 1): hmm_dir, k, L = train_hmm.run_iter(self, self.mono_root, hmm_dir, self.phone_mlf, self.phone_list, 1, iter, '') log( self.logfh, 'ran an iteration of BW in [%s] lik/fr [%1.4f]' % (hmm_dir, L)) align_config = '%s/config.align' % self.mono_root fh = open(align_config, 'w') fh.write('HPARM: TARGETKIND = MFCC_0_D_A_Z\n') fh.close() align_dir = train_hmm.align(self, self.mono_root, self.mfc_list, hmm_dir, self.word_mlf, self.phone_mlf, self.phone_list, self.train_dict, align_config) log( self.logfh, 'aligned with model in [%s], wrote phone mlf [%s]' % (hmm_dir, self.phone_mlf)) os.system('cp %s %s/mfc.list.filtered.by.mono.align' % (self.mfc_list, self.misc)) os.system('cp %s %s/phone.mlf.from.mono.align' % (self.phone_mlf, self.misc)) os.system('bzip2 -f %s/phone.mlf.from.mono.align' % self.misc) for iter in range(self.initial_mono_iters + 1, self.initial_mono_iters + 1 + self.mono_iters): hmm_dir, k, L = train_hmm.run_iter(self, self.mono_root, hmm_dir, self.phone_mlf, self.phone_list, 1, iter, '') log( self.logfh, 'ran an iteration of BW in [%s] lik/fr [%1.4f]' % (hmm_dir, L)) log(self.logfh, 'FLAT START finished') if self.train_pipeline['mixup_mono']: log(self.logfh, 'MIXUP MONO started') import train_hmm hmm_dir = '%s/HMM-%d-%d' % ( self.mono_root, 1, self.initial_mono_iters + self.mono_iters) ## mixup everything for mix_size in self.mono_mixup_schedule: hmm_dir = train_hmm.mixup(self, self.mixup_mono_root, hmm_dir, self.phone_list, mix_size) log(self.logfh, 'mixed up to [%d] in [%s]' % (mix_size, hmm_dir)) for iter in range(1, self.mono_iters + 1): hmm_dir, k, L = train_hmm.run_iter(self, self.mixup_mono_root, hmm_dir, self.phone_mlf, self.phone_list, mix_size, iter, '') log( self.logfh, 'ran an iteration of BW in [%s] lik/fr [%1.4f]' % (hmm_dir, L)) log(self.logfh, 'MIXUP MONO finished') if self.train_pipeline['mixdown_mono']: log(self.logfh, 'MIXDOWN MONO started') import train_hmm num_gaussians = self.mono_mixup_schedule[-1] hmm_dir = '%s/HMM-%d-%d' % (self.mixup_mono_root, num_gaussians, self.mono_iters) train_hmm.mixdown_mono(self, self.mixdown_mono_root, hmm_dir, self.phone_list) log(self.logfh, 'MIXDOWN MONO finished') if self.train_pipeline['mono_to_tri']: log(self.logfh, 'MONO TO TRI started') import train_hmm if self.train_pipeline['mixdown_mono']: mono_final_dir = '%s/HMM-1-0' % self.mixdown_mono_root else: mono_final_dir = '%s/HMM-%d-%d' % (self.mono_root, 1, self.initial_mono_iters + self.mono_iters) hmm_dir = train_hmm.mono_to_tri(self, self.xword_root, mono_final_dir, self.phone_mlf, self.tri_mlf, self.phone_list, self.tri_list) log(self.logfh, 'initialized triphone models in [%s]' % hmm_dir) log(self.logfh, 'created triphone mlf [%s]' % self.tri_mlf) os.system('cp %s %s/tri.mlf.from.mono.align' % (self.tri_mlf, self.misc)) os.system('bzip2 -f %s/tri.mlf.from.mono.align' % self.misc) os.system('cp %s %s/tri.list.from.mono.align' % (self.tri_list, self.misc)) for iter in range(1, self.initial_tri_iters + 1): hmm_dir, k, L = train_hmm.run_iter(self, self.xword_root, hmm_dir, self.tri_mlf, self.tri_list, 1, iter, '') log( self.logfh, 'ran an iteration of BW in [%s] lik/fr [%1.4f]' % (hmm_dir, L)) xword_tie_dir = '%s/HMM-%d-%d' % (self.xword_root, 1, self.initial_tri_iters + 1) hmm_dir = train_hmm.tie_states_search(self, xword_tie_dir, hmm_dir, self.phone_list, self.tri_list, self.tied_list) log(self.logfh, 'tied states in [%s]' % hmm_dir) os.system('cp %s %s/tied.list.initial' % (self.tied_list, self.misc)) hmm_dir = '%s/HMM-%d-%d' % (self.xword_root, 1, self.initial_tri_iters + 1) for iter in range(self.initial_tri_iters + 2, self.initial_tri_iters + 1 + self.tri_iters + 1): hmm_dir, k, L = train_hmm.run_iter(self, self.xword_root, hmm_dir, self.tri_mlf, self.tied_list, 1, iter, '') log( self.logfh, 'ran an iteration of BW in [%s] lik/fr [%1.4f]' % (hmm_dir, L)) log(self.logfh, 'MONO TO TRI finished') if self.train_pipeline['mixup_tri']: log(self.logfh, 'MIXUP TRI started') import train_hmm ## mixup everything start_gaussians = 1 start_iter = self.initial_tri_iters + self.tri_iters + 1 hmm_dir = '%s/HMM-%d-%d' % (self.xword_root, start_gaussians, start_iter) for mix_size in self.tri_mixup_schedule: if mix_size == 2: hmm_dir = train_hmm.mixup(self, self.xword_root, hmm_dir, self.tied_list, mix_size, estimateVarFloor=1) else: hmm_dir = train_hmm.mixup(self, self.xword_root, hmm_dir, self.tied_list, mix_size) log(self.logfh, 'mixed up to [%d] in [%s]' % (mix_size, hmm_dir)) for iter in range(1, self.tri_iters_per_split + 1): hmm_dir, k, L = train_hmm.run_iter(self, self.xword_root, hmm_dir, self.tri_mlf, self.tied_list, mix_size, iter, '') log( self.logfh, 'ran an iteration of BW in [%s] lik/fr [%1.4f]' % (hmm_dir, L)) log(self.logfh, 'MIXUP TRI finished') if self.train_pipeline['align_with_xword']: log(self.logfh, 'XWORD ALIGN started') import train_hmm align_config = '%s/config.align' % self.xword_root train_hmm.make_hvite_xword_config(self, align_config, 'MFCC_0_D_A_Z') num_gaussians = self.tri_mixup_schedule[-1] iter_num = self.tri_iters_per_split hmm_dir = '%s/HMM-%d-%d' % (self.xword_root, num_gaussians, iter_num) realigned_mlf = '%s/raw_tri_xword_realigned.mlf' % self.misc # Use the original, mfc list that has prons for every word os.system('cp %s/mfc.list.filtered.by.dict %s' % (self.misc, self.mfc_list)) align_dir = train_hmm.align(self, self.xword_root, self.mfc_list, hmm_dir, self.word_mlf, realigned_mlf, self.tied_list, self.train_dict, align_config) log( self.logfh, 'aligned with model in [%s], tri mlf [%s]' % (hmm_dir, realigned_mlf)) # Because of state tying, the triphones in the mlf will only be # valid for this state tying. Strip down to monophones, the # correct triphones will be created later in mono_to_tri train_hmm.map_tri_to_mono(self, align_dir, realigned_mlf, self.phone_mlf) os.system('cp %s %s/phone.mlf.from.xword.align' % (self.phone_mlf, self.misc)) os.system('bzip2 -f %s/phone.mlf.from.xword.align' % self.misc) os.system('bzip2 -f %s' % realigned_mlf) log(self.logfh, 'XWORD ALIGN finished') if self.train_pipeline['mono_to_tri_from_xword']: log(self.logfh, 'MONO TO TRI FROM XWORD started') import train_hmm #Assume that midown mono happened? mono_final_dir = '%s/HMM-1-0' % self.mixdown_mono_root hmm_dir = train_hmm.mono_to_tri(self, self.xword_1_root, mono_final_dir, self.phone_mlf, self.tri_mlf, self.phone_list, self.tri_list) log(self.logfh, 'initialized triphone models in [%s]' % hmm_dir) os.system('cp %s %s/tri.mlf.from.xword.align' % (self.tri_mlf, self.misc)) os.system('bzip2 -f %s/tri.mlf.from.xword.align' % self.misc) os.system('cp %s %s/tri.list.from.xword.align' % (self.tri_list, self.misc)) two_model_config = '%s/config.two_model' % self.xword_1_root fh = open(two_model_config, 'w') fh.write('ALIGNMODELMMF = %s/HMM-%d-%d/MMF\n' % (self.xword_root, self.tri_mixup_schedule[-1], self.tri_iters_per_split)) fh.write('ALIGNHMMLIST = %s\n' % self.tied_list) fh.close() # Do one pass of two-model re-estimation extra = ' -C %s' % two_model_config hmm_dir, k, L = train_hmm.run_iter(self, self.xword_1_root, hmm_dir, self.tri_mlf, self.tri_list, 1, 1, extra) log(self.logfh, 'ran an iteration of BW in [%s] lik/fr [%1.4f]' % (hmm_dir, L)) xword_tie_dir = '%s/HMM-1-2' % self.xword_1_root hmm_dir = train_hmm.tie_states_search(self, xword_tie_dir, hmm_dir, self.phone_list, self.tri_list, self.tied_list) log(self.logfh, 'tied states in [%s]' % hmm_dir) os.system('cp %s %s/tied.list.second' % (self.tied_list, self.misc)) hmm_dir = '%s/HMM-1-2' % self.xword_1_root for iter in range(3, self.tri_iters + 3): hmm_dir, k, L = train_hmm.run_iter(self, self.xword_1_root, hmm_dir, self.tri_mlf, self.tied_list, 1, iter, '') log( self.logfh, 'ran an iteration of BW in [%s] lik/fr [%1.4f]' % (hmm_dir, L)) log(self.logfh, 'MONO TO TRI FROM XWORD finished') if self.train_pipeline['mixup_tri_2']: log(self.logfh, 'MIXUP TRI 2 started') import train_hmm ## mixup everything start_gaussians = 1 start_iter = self.tri_iters + 2 hmm_dir = '%s/HMM-%d-%d' % (self.xword_1_root, start_gaussians, start_iter) for mix_size in self.tri_mixup_schedule: if mix_size == 2: hmm_dir = train_hmm.mixup(self, self.xword_1_root, hmm_dir, self.tied_list, mix_size, estimateVarFloor=1) else: hmm_dir = train_hmm.mixup(self, self.xword_1_root, hmm_dir, self.tied_list, mix_size) log(self.logfh, 'mixed up to [%d] in [%s]' % (mix_size, hmm_dir)) for iter in range(1, self.tri_iters_per_split + 1): hmm_dir, k, L = train_hmm.run_iter(self, self.xword_1_root, hmm_dir, self.tri_mlf, self.tied_list, mix_size, iter, '') log( self.logfh, 'ran an iteration of BW in [%s] lik/fr [%1.4f]' % (hmm_dir, L)) log(self.logfh, 'MIXUP TRI 2 finished') if self.train_pipeline['diag']: log(self.logfh, 'DIAG started') import train_hmm num_gaussians = self.tri_mixup_schedule[-1] iter_num = self.tri_iters_per_split if self.train_pipeline['mixup_tri_2']: seed_dir = '%s/HMM-%d-%d' % (self.xword_1_root, num_gaussians, iter_num) else: seed_dir = '%s/HMM-%d-%d' % (self.xword_root, num_gaussians, iter_num) hmm_dir, L = train_hmm.diagonalize(self, self.diag_root, seed_dir, self.tied_list, self.tri_mlf, num_gaussians) log(self.logfh, 'ran diag in [%s] lik/fr [%1.4f]' % (hmm_dir, L)) for iter in range(1, self.tri_iters_per_split + 1): hmm_dir, k, L = train_hmm.run_iter(self, self.diag_root, hmm_dir, self.tri_mlf, self.tied_list, num_gaussians, iter, '') log( self.logfh, 'ran an iteration of BW in [%s] lik/fr [%1.4f]' % (hmm_dir, L)) log(self.logfh, 'DIAG finished') if self.train_pipeline['mmi']: log(self.logfh, 'DISCRIM started') ## Common items import mmi mmi_dir = '%s/MMI' % self.exp util.create_new_dir(mmi_dir) mfc_list_mmi = '%s/mfc.list' % mmi_dir os.system('cp %s %s' % (self.mfc_list, mfc_list_mmi)) ## Create weak LM import dict_and_lm train_vocab = '%s/vocab' % self.lm_dir lm_order = 2 target_ppl_ratio = 8 ppl = dict_and_lm.build_lm_from_mlf(self, self.word_mlf, self.train_dict, train_vocab, self.lm_dir, self.mmi_lm, lm_order, target_ppl_ratio) log( self.logfh, 'wrote lm for mmi [%s] training ppl [%1.2f]' % (self.mmi_lm, ppl)) ## Create decoding lattices for every utterance lattice_dir = '%s/Denom/Lat_word' % mmi_dir util.create_new_dir(lattice_dir) num_gaussians = self.tri_mixup_schedule[-1] iter_num = self.tri_iters_per_split if self.train_pipeline['diag']: model_dir = '%s/HMM-%d-%d' % (self.diag_root, num_gaussians, iter_num) elif self.train_pipeline['mixup_tri_2']: model_dir = '%s/HMM-%d-%d' % (self.xword_1_root, num_gaussians, iter_num) else: model_dir = '%s/HMM-%d-%d' % (self.xword_root, num_gaussians, iter_num) mmi.decode_to_lattices(model, lattice_dir, model_dir, mfc_list_mmi, self.mmi_lm, self.decode_dict, self.tied_list, self.word_mlf) log(self.logfh, 'generated training lattices in [%s]' % lattice_dir) ## Prune and determinize lattices pruned_lattice_dir = '%s/Denom/Lat_prune' % mmi_dir util.create_new_dir(pruned_lattice_dir) mmi.prune_lattices(model, lattice_dir, pruned_lattice_dir, self.decode_dict) log(self.logfh, 'pruned lattices in [%s]' % pruned_lattice_dir) ## Phone-mark lattices phone_lattice_dir = '%s/Denom/Lat_phone' % mmi_dir util.create_new_dir(phone_lattice_dir) mmi.phonemark_lattices(model, pruned_lattice_dir, phone_lattice_dir, model_dir, mfc_list_mmi, self.mmi_lm, self.decode_dict, self.tied_list) log(self.logfh, 'phone-marked lattices in [%s]' % phone_lattice_dir) ## Create numerator word lattices num_lattice_dir = '%s/Num/Lat_word' % mmi_dir util.create_new_dir(num_lattice_dir) mmi.create_num_lattices(model, num_lattice_dir, self.mmi_lm, self.decode_dict, self.word_mlf) log(self.logfh, 'generated numerator lattices in [%s]' % num_lattice_dir) ## Phone-mark numerator lattices num_phone_lattice_dir = '%s/Num/Lat_phone' % mmi_dir util.create_new_dir(num_phone_lattice_dir) mmi.phonemark_lattices(model, num_lattice_dir, num_phone_lattice_dir, model_dir, mfc_list_mmi, self.mmi_lm, self.decode_dict, self.tied_list) log( self.logfh, 'phone-marked numerator lattices in [%s]' % num_phone_lattice_dir) ## Add LM scores to numerator phone lattices num_phone_lm_lattice_dir = '%s/Num/Lat_phone_lm' % mmi_dir util.create_new_dir(num_phone_lm_lattice_dir) mmi.add_lm_lattices(model, num_phone_lattice_dir, num_phone_lm_lattice_dir, self.decode_dict, self.mmi_lm) log( self.logfh, 'added LM scores to numerator lattices in [%s]' % num_phone_lm_lattice_dir) ## Modified Baum-Welch estimation root_dir = '%s/Models' % mmi_dir util.create_new_dir(root_dir) mmi_iters = 12 mix_size = num_gaussians for iter in range(1, mmi_iters + 1): model_dir = mmi.run_iter(model, model_dir, num_phone_lm_lattice_dir, phone_lattice_dir, root_dir, self.tied_list, mfc_list_mmi, mix_size, iter) log(self.logfh, 'ran an iteration of Modified BW in [%s]' % model_dir) log(self.logfh, 'DISCRIM finished')