num_window_step_samples=80, fft_length=512, kernel_length=7, freq_cutoff=3000, use_mel=True) ep = gtrd.EdgemapParameters(block_length=40, spread_length=1, threshold=.7) utterances_path = '/home/mark/Template-Speech-Recognition/Data/Train/' file_indices = gtrd.get_data_files_indices(utterances_path) syllable=('aa','r') syllable_features,avg_bgd=gtrd.get_syllable_features_directory(utterances_path,file_indices,syllable, S_config=sp,E_config=ep,offset=0, E_verbose=False,return_avg_bgd=True) np.save('data/aar_bgd_mel.npy',avg_bgd.E) example_mat = gtrd.recover_example_map(syllable_features) lengths,waveforms = gtrd.recover_waveforms(syllable_features,example_mat) np.savez('data/aar_waveforms_lengths.npz',waveforms,lengths,example_mat) Slengths,Ss = gtrd.recover_specs(syllable_features,example_mat) np.savez('data/aar_Ss_mel_lengths.npz',Ss,Slengths,example_mat) lengths,Es = gtrd.recover_edgemaps(syllable_features,example_mat)
num_mix_params, phn_mapping=leehon_mapping, waveform_offset=15) use_phns = list(set(leehon_mapping.values())) for phn_idx, phn in enumerate(use_phns[12:]): print phn_idx, phn phn_tuple = (phn,) print phn waveform_offset = 15 phn_mapping=leehon_mapping chunk_length=1000 phn_features,avg_bgd=gtrd.get_syllable_features_directory(utterances_path,file_indices,phn_tuple, S_config=sp,E_config=ep,offset=0, E_verbose=False,return_avg_bgd=True, waveform_offset=15, phn_mapping=phn_mapping) bgd = np.clip(avg_bgd.E,.01,.99) np.save('data/bgd.npy',bgd) example_mat = gtrd.recover_example_map(phn_features) lengths,waveforms = gtrd.recover_waveforms(phn_features,example_mat) np.savez('data/waveforms_lengths.npz',waveforms=waveforms, lengths=lengths, example_mat=example_mat) Slengths,Ss = gtrd.recover_specs(phn_features,example_mat) Ss = Ss.astype(np.float32) np.savez('data/Ss_lengths.npz' ,Ss=Ss,Slengths=Slengths,example_mat=example_mat) Elengths,Es = gtrd.recover_edgemaps(phn_features,example_mat,bgd=bgd) Es = Es.astype(np.uint8) np.savez('data/Es_lengths.npz' ,Es=Es,Elengths=Elengths,example_mat=example_mat)
def perform_phn_template_estimation(phn,utterances_path, file_indices,sp,ep, num_mix_params, phn_mapping=None, waveform_offset=15, chunk_length=1000): phn_tuple = (phn,) print phn phn_features,avg_bgd=gtrd.get_syllable_features_directory(utterances_path,file_indices,phn_tuple, S_config=sp,E_config=ep,offset=0, E_verbose=False,return_avg_bgd=True, waveform_offset=15, phn_mapping=phn_mapping) bgd = np.clip(avg_bgd.E,.01,.99) np.save('data/bgd.npy',bgd) example_mat = gtrd.recover_example_map(phn_features) lengths,waveforms = gtrd.recover_waveforms(phn_features,example_mat) np.savez('data/waveforms_lengths.npz',waveforms=waveforms, lengths=lengths, example_mat=example_mat) Slengths,Ss = gtrd.recover_specs(phn_features,example_mat) Ss = Ss.astype(np.float32) np.savez('data/Ss_lengths.npz' ,Ss=Ss,Slengths=Slengths,example_mat=example_mat) Elengths,Es = gtrd.recover_edgemaps(phn_features,example_mat,bgd=bgd) Es = Es.astype(np.uint8) np.savez('data/Es_lengths.npz' ,Es=Es,Elengths=Elengths,example_mat=example_mat) # the Es are padded from recover_edgemaps f = open('data/mixture_estimation_stats_%s.data' % phn,'w') for num_mix in num_mix_params: print num_mix if num_mix == 1: affinities = np.ones((Es.shape[0],1),dtype=np.float64) mean_length = int(np.mean(Elengths) + .5) templates = (np.mean(Es,0)[:mean_length],) spec_templates = (np.mean(Ss,0)[:mean_length],) np.save('data/%d_affinities.npy' % (num_mix), affinities) np.save('data/%d_templates.npy' % (num_mix), templates) np.save('data/%d_spec_templates.npy' % (num_mix), spec_templates) np.save('data/%d_templates_%s.npy' % (num_mix,phn), templates) np.save('data/%d_spec_templates_%s.npy' % (num_mix,phn), spec_templates) # # write the data to the mixture file for checking purposes # format is: # num_components total c0 c1 c2 ... ck f.write('%d %d %g\n' % (num_mix, len(affinities),np.sum(affinities[:,0]))) else: if len(Es) > chunk_length: bem = bm.BernoulliMixture(num_mix,Es[:chunk_length]) bem.run_EM(.000001) for i in xrange(1,len(Es)/chunk_length): start_idx = i*chunk_length block_length = min(chunk_length,len(Es)-start_idx) if block_length < chunk_length: end_idx = len(Es) start_idx = len(Es)-chunk_length block_length = chunk_length else: end_idx = start_idx + block_length bem.data_mat = Es[start_idx:end_idx].reshape( block_length,bem.data_length) bem.run_EM(.000001) else: bem = bm.BernoulliMixture(num_mix,Es) bem.run_EM(.000001) templates = et.recover_different_length_templates(bem.affinities, Es[start_idx:end_idx], Elengths[start_idx:end_idx]) spec_templates = et.recover_different_length_templates(bem.affinities, Ss[start_idx:end_idx], Slengths[start_idx:end_idx]) np.save('data/%d_affinities.npy' % (num_mix), bem.affinities) np.savez('data/%d_templates.npz' % (num_mix), *templates) np.savez('data/%d_spec_templates.npz' % (num_mix), *spec_templates) np.savez('data/%d_templates_%s.npz' % (num_mix,phn), *templates) np.savez('data/%d_spec_templates_%s.npz' % (num_mix,phn), *spec_templates) f.write('%d %d ' % (num_mix, len(affinities)) + ' '.join(str(np.sum(affinities[:,i])) for i in xrange(affinities.shape[1])) +'\n') f.close()
def save_syllable_features_to_data_dir( args, phn_tuple, utterances_path, file_indices, sp, ep, phn_mapping, tag_data_with_syllable_string=False, save_tag="train", waveform_offset=10, block_features=False, ): """ Wrapper function to get all the examples processed """ print "Collecting the data for phn_tuple " + " ".join("%s" % k for k in phn_tuple) syllable_string = "_".join(p for p in phn_tuple) phn_features, avg_bgd = gtrd.get_syllable_features_directory( utterances_path, file_indices, phn_tuple, S_config=sp, E_config=ep, offset=0, E_verbose=False, return_avg_bgd=True, waveform_offset=15, phn_mapping=phn_mapping, ) bgd = np.clip(avg_bgd.E, 0.01, 0.4) np.save("data/bgd.npy", bgd) example_mat = gtrd.recover_example_map(phn_features) lengths, waveforms = gtrd.recover_waveforms(phn_features, example_mat) if tag_data_with_syllable_string: np.savez( "data/%s_waveforms_lengths_%s.npz" % (syllable_string, save_tag), waveforms=waveforms, lengths=lengths, example_mat=example_mat, ) else: np.savez( "data/waveforms_lengths_%s.npz" % save_tag, waveforms=waveforms, lengths=lengths, example_mat=example_mat ) Slengths, Ss = gtrd.recover_specs(phn_features, example_mat) Ss = Ss.astype(np.float32) if tag_data_with_syllable_string: np.savez( "data/%s_Ss_lengths_%s.npz" % (syllable_string, save_tag), Ss=Ss, Slengths=Slengths, example_mat=example_mat ) else: np.savez("data/Ss_lengths_%s.npz" % (save_tag), Ss=Ss, Slengths=Slengths, example_mat=example_mat) Elengths, Es = gtrd.recover_edgemaps(phn_features, example_mat, bgd=bgd) Es = Es.astype(np.uint8) if tag_data_with_syllable_string: np.savez( "data/%s_Es_lengths_%s.npz" % (syllable_string, save_tag), Es=Es, Elengths=Elengths, example_mat=example_mat ) else: np.savez("data/Es_lengths_%s.npz" % (save_tag), Es=Es, Elengths=Elengths, example_mat=example_mat) if args.doBlockFeatures: out = code_parts.code_parts(E.astype(np.uint8), logParts, logInvParts, args.bernsteinEdgeThreshold) max_responses = np.argmax(out, -1) Bs = code_parts.spread_patches(max_responses, 2, 2, out.shape[-1] - 1)