def normalize_data(db_file, name_out_path): db = Utility.load_obj(db_file) new_data = [] for syl in db: d = syl['TF']['missing151']['data'] dd = np.array(d) dd[np.argwhere(np.isnan(d))] = un_voice new_data.append(dd) new_data = np.array(new_data) print new_data X_scaled = preprocessing.scale(new_data) print X_scaled print X_scaled.shape new_db = [] for idx, syl in enumerate(db): syl['TF']['missing151_standardization'] = dict() syl['TF']['missing151_standardization']['data'] = X_scaled[idx] syl['TF']['missing151_standardization'][ 'description'] = 'Standardized version of missing151' new_db.append(syl) Utility.save_obj(new_db, name_out_path) pass
def remove_duration_data(db_file, name_out_path): db = Utility.load_obj(db_file) new_data = [] for syl in db: d = syl['TF']['intepolate151normailize']['data'] new_data.append(d) new_data = np.array(new_data) print new_data new_data = np.delete(new_data, [150, 151], axis=1) print new_data print new_data.shape new_db = [] for idx, syl in enumerate(db): syl['TF']['intepolate150_normailize_no_duration'] = dict() syl['TF']['intepolate150_normailize_no_duration']['data'] = new_data[ idx] syl['TF']['intepolate150_normailize_no_duration'][ 'description'] = 'Normalized version of intepolate151, but remove duration' new_db.append(syl) Utility.save_obj(new_db, name_out_path) pass
def add_data_object(): obj = Utility.load_obj( '/home/h1/decha/Dropbox/Inter_speech_2016/Syllable_object/mix_object/current_version/all_vowel_type/syllable_object_01234.pickle' ) name_index = Utility.load_obj( '/work/w13/decha/Inter_speech_2016_workplace/Tonal_projection/11_missing_data/all_vowel_type/input_dims_10/delta-True_delta-delta-True/BayesianGPLVMMiniBatch_Missing_Tone_01234/name_index.npy' ) name_index = np.array(name_index) model = Utility.load_obj( '/work/w13/decha/Inter_speech_2016_workplace/Tonal_projection/11_missing_data/all_vowel_type/input_dims_10/delta-True_delta-delta-True/BayesianGPLVMMiniBatch_Missing_Tone_01234/GP_model.npy' ) data = np.array(model.X.mean) print data.shape for syl in obj.syllables_list: name = syl.name_index if 'gpr' not in name: continue name_position = np.where(name_index == name) # print name_position latent_data = data[name_position][0] # print latent_data syl.set_latent_for_single_space(latent_data) # print syl.single_space_latent # sys.exit() Utility.save_obj( obj, '/home/h1/decha/Dropbox/Inter_speech_2016/Syllable_object/mix_object/current_version/all_vowel_type/syllable_object_01234.pickle' )
def run_separate(db_obj, outpath): for syl in db_obj: # print syl vowel = syl['vowel'] finalconsonant = syl['finalconsonant'] tone = syl['tone'] # print vowel, finalconsonant if vowel in short_vowel: v = 'short' else: v = 'long' if finalconsonant in nasal_list: f = 'nasal' elif finalconsonant == 'z^': f = 'no' else: f = 'non-nasal' name = '{}_{}_{}'.format(tone, v, f) if name in sep_list: sep_list[name].append(syl) # sys.exit() for key in sep_list: print key, ' : ', len(sep_list[key]) if not len(sep_list[key]) == 0: Utility.save_obj(sep_list[key], '{}/{}.npy'.format(outpath, key)) pass
def gen_dct_data(syllable_management_path): syl_object = Utility.load_obj(syllable_management_path) for syl in syl_object.syllables_list: data = syl.get_Y_features( Syllable. Training_feature_tonal_part_raw_remove_head_tail_interpolated, 50, False, False, exp=True, subtract_means=False, output=None, missing_data=False) data_dct = dct(data, 2, norm='ortho') idct = dct(data_dct, 3, norm='ortho') print syl.name_index # print data # print data_dct # print idct syl.training_feature[ Syllable.Training_feature_tonal_part_dct_coeff] = data_dct Utility.save_obj(syl_object, syllable_management_path) pass
def normalize_data(db_file, name_out_path): db = Utility.load_obj(db_file) new_data = [] for syl in db: d = syl['TF']['intepolate151_with_consonant_unvoice_ratio']['data'] new_data.append(d) new_data = np.array(new_data) print new_data X_scaled = preprocessing.scale(new_data) print X_scaled new_db = [] for idx, syl in enumerate(db): syl['TF'][ 'intepolate151_with_consonant_unvoice_ratio_standardization'] = dict( ) syl['TF'][ 'intepolate151_with_consonant_unvoice_ratio_standardization'][ 'data'] = X_scaled[idx] syl['TF']['intepolate151_with_consonant_unvoice_ratio_standardization'][ 'description'] = 'Standardized version of intepolate151_with_consonant_unvoice_ratio' new_db.append(syl) Utility.save_obj(new_db, name_out_path) pass
def gen_data(db_file, name_out_path): out = [] for syl in Utility.load_obj(db_file): y = Syllable.get_normailze_with_missing_data(syl['raw_lf0'], 50, syl['dur']) # print len(y) syl['TF'] = dict() missing_data = dict() missing_data['data'] = y missing_data[ 'description'] = 'Raw lf0 (first 50 + delta + delta-delta) + duration in frame unit (the last one). Unvoice frames are defined as missing data ' syl['TF']['missing151'] = missing_data # print syl out.append(syl) # sys.exit(0) Utility.save_obj(out, name_out_path) pass
def normalize_data(db_file, name_out_path): db = Utility.load_obj(db_file) new_data = [] for syl in db: d = syl['TF']['intepolate151']['data'] new_data.append(d) new_data = np.array(new_data) # print new_data # norm = Utility.normalize_by_column(new_data) min_max_scaler = preprocessing.MinMaxScaler() X_train_minmax = min_max_scaler.fit_transform(new_data) # print X_train_minmax new_db = [] for idx, syl in enumerate(db): syl['TF']['intepolate151normailize'] = dict() syl['TF']['intepolate151normailize']['data'] = X_train_minmax[idx] syl['TF']['intepolate151normailize'][ 'description'] = 'Normalized version of intepolate151' # print syl new_db.append(syl) Utility.save_obj(new_db, name_out_path) pass
def set_pre_suc(): tones = ['01234'] name_list_path = '/home/h1/decha/Dropbox/python_workspace/Inter_speech_2016/playground/list_file_for_preceeding_suceeding/list_gpr_file/' for t in tones: path = '/home/h1/decha/Dropbox/Inter_speech_2016/Syllable_object/mix_object/current_version/all_vowel_type/syllable_object_{}.pickle'.format( t) print path syl_management = Utility.load_obj(path) for syl in syl_management.syllables_list: if 'manual' in syl.name_index: continue name = syl.name_index.split('_') file_tar = '{}/{}/{}.lab'.format(name_list_path, name[2][0], name[2]) list_file = Utility.read_file_line_by_line(file_tar) for idx, l in enumerate(list_file): f = Utility.trim(l) if f == syl.name_index: # print '--------------------' preceeding = Utility.trim(list_file[idx - 1]) # print f succeeding = Utility.trim(list_file[idx + 1]) # print '--------------------' syl.set_preceeding_succeeding_name_index( preceeding, succeeding) # sys.exit() Utility.save_obj(syl_management, path)
def fix(): base_path = '/home/h1/decha/Dropbox/Inter_speech_2016/Syllable_object/Tonal_object/remove_all_silence_file/' fixed_list_path = '/work/w13/decha/Inter_speech_2016_workplace/Fix_stress_label/fix_list/' fixed_list = load_fix_list(fixed_list_path) fixed_list = np.array(fixed_list) for v in Utility.list_file(base_path): if v.startswith('.'): continue vowel_path = '{}/{}/'.format(base_path, v) for tone in Utility.list_file(vowel_path): if tone.startswith('.'): continue tone_file_path = '{}/{}'.format(vowel_path, tone) print tone_file_path syl_obj = Utility.load_obj(tone_file_path) for syl in syl_obj.syllables_list: # print syl.stress_manual if syl.name_index in fixed_list: print syl.name_index, syl.stress_manual if syl.stress_manual == 0: syl.stress_manual = 1 else: syl.stress_manual = 0 # print syl.name_index Utility.save_obj(syl_obj, tone_file_path) pass
def normalize_data(db_file, name_out_path): db = Utility.load_obj(db_file) new_data = [] for syl in db: d = syl['TF']['intepolate151']['data'] new_data.append(d) new_data = np.array(new_data) print new_data X_normalized = preprocessing.normalize(new_data, norm='l2') print X_normalized print X_normalized.shape new_db = [] for idx, syl in enumerate(db): syl['TF']['intepolate151_normalize_by_preprocessing.normalize'] = dict( ) syl['TF']['intepolate151_normalize_by_preprocessing.normalize'][ 'data'] = X_normalized[idx] syl['TF']['intepolate151_normalize_by_preprocessing.normalize'][ 'description'] = 'preprocessing.normalize version of intepolate151' new_db.append(syl) Utility.save_obj(new_db, name_out_path) pass
def link_cluster_caller(name, base_path, db_file, name_out_path): global log x = Utility.load_obj('{}/x.pkl'.format(base_path)) inverselengthscale = Utility.load_obj('{}/input_sensitivity.pkl'.format(base_path)) for n_clusters in xrange(2, 6): for mul in [0.025, 0.05, 0.075, 0.1]: n_neighbors = int( len(x)*mul ) title = 'param_n_cluster_{}_n_neighbors_{}x'.format(n_clusters, mul) name_out_file = '{}/{}.eps'.format(name_out_path, title) log.append(title) log.append('n_cluster : {}'.format(n_clusters)) log.append('n_neighbors for kernel : {}'.format(n_neighbors)) labels = link_clustering(x, inverselengthscale, n_clusters, n_neighbors) plot(x, inverselengthscale, labels, name_out_file, title) Utility.save_obj(labels, '{}/{}.pkl'.format(name_out_path, title) ) Utility.write_to_file_line_by_line('{}/{}_log.txt'.format(name_out_path, name), log) pass
def get_j_set(db_file, out_file, sort_list_out_file): j_set_db = [] j_set_sort_list = [] db = Utility.load_obj(db_file) for syl in db: if 'j' in syl['id']: j_set_db.append(syl) # print syl['dur'] dur = 0 for idx, d in enumerate(syl['dur']): if idx == 0: continue dur = dur + d j_set_sort_list.append((syl['id'], d, syl['stress'])) Utility.sort_by_index(j_set_sort_list, 1) print j_set_sort_list Utility.save_obj(j_set_sort_list, sort_list_out_file) Utility.save_obj(j_set_db, out_file) pass
def fix_database(db_file, change_list_file, out_file): global db db = None db = Utility.load_obj(db_file) change_list = [] less_than = None for line in Utility.read_file_line_by_line(change_list_file): if 'tsc' in line: n = Utility.trim(line).replace(' ', '_') change_list.append(n) elif '<' in line: # print line less_than = line.split(' ')[1] pass # print change_list # print less_than if (len(change_list) == 0) | (less_than == None): raise 'Change list file false' new_list = change_stress(change_list, less_than) Utility.save_obj(new_list, out_file) pass
def run_nn(trndata, tstdata, outpath, name, fold): fnn = buildNetwork(trndata.indim, 20, trndata.outdim, hiddenclass=TanhLayer, bias=True) trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, weightdecay=0.01) acc = 0.0 real_obj = [] predicted_obj = [] # for i in range(5): for i in range(50): trainer.trainEpochs( 1 ) trnresult = percentError( trainer.testOnClassData(), trndata['class'] ) tstresult = percentError( trainer.testOnClassData( dataset=tstdata ), tstdata['class'] ) # print "epoch: %4d" % trainer.totalepochs, \ # " train error: %5.2f%%" % trnresult, \ # " test error: %5.2f%%" % tstresult predicted = np.array( trainer.testOnClassData(dataset=tstdata) ) real = np.array( tstdata['class'][:,0] ) # print real if (accuracy_score(real, predicted) > acc) & (0 != len(np.where(predicted==1)[0])): real_obj = real predicted_obj = predicted acc = accuracy_score(real, predicted) Utility.save_obj(real_obj, '{}/{}_fold_{}_real.npy'.format(outpath, name, fold)) Utility.save_obj(predicted_obj, '{}/{}_fold_{}_predicted.npy'.format(outpath, name, fold)) print 'Accuracy : {}'.format(acc) return acc
def normalize_data(db_file, name_out_path): db = Utility.load_obj(db_file) new_data = [] for syl in db: d = syl['TF']['missing151']['data'] dd = np.array(d) dd[np.argwhere(np.isnan(d))] = un_voice new_data.append(dd) new_data = np.array(new_data) print new_data robust_scaler = RobustScaler() Xtr_r = robust_scaler.fit_transform(new_data) print Xtr_r print Xtr_r.shape new_db = [] for idx, syl in enumerate(db): syl['TF']['missing151_robust_scale'] = dict() syl['TF']['missing151_robust_scale']['data'] = Xtr_r[idx] syl['TF']['missing151_robust_scale'][ 'description'] = 'robust_scale version of missing151' new_db.append(syl) Utility.save_obj(new_db, name_out_path) pass
def gen_database(v, t, data_obj, outpath): obj = Utility.load_obj(data_obj) out_obj = '{}/syllable_object_{}.pickle'.format(outpath, t) out_list = [] for syl in obj.syllables_list: if v[0] == 'all_vowel_type': if t == '01234': out_list.append(syl) elif syl.tone == int(t): out_list.append(syl) elif syl.get_vowel_length_type() in v: if t == '01234': out_list.append(syl) elif syl.tone == int(t): out_list.append(syl) print out_obj Utility.save_obj(SyllableDatabaseManagement(syllable_list=out_list), out_obj) pass
def gen(syl_object_path): syllable_management = Utility.load_obj(syl_object_path) for syl in syllable_management.syllables_list: syl.gen_tonal_part_training_feature() Utility.save_obj(syllable_management, syl_object_path) pass
def run_processor(db_file, name_out_path, name): Y, real = run_data_processor(db_file) real = map(int, real) acc_score = 0.0 f1_score = None best_neighbors = 0.025 best_pred = None for n_neighbors in [0.025, 0.05, 0.075, 0.1, 0.2]: labels = link_clustering(Y, xrange(len(Y[0])), 2, n_neighbors) Utility.save_obj( labels, '{}/n_neighbors_{}x.pkl'.format(name_out_path, n_neighbors)) log.append('Name {}, Neighbor : {}'.format(name, n_neighbors)) log.append('result set : {}'.format(set(labels))) print 'Neighbor : {}'.format(n_neighbors) print 'result set : {}'.format(set(labels)) for s in set(labels): print s, len(labels[labels == s]) log.append('{} : {}'.format(s, len(labels[labels == s]))) acc, f1, pred = cal_accuracy_and_f1(real, labels) log.append('acc : {}'.format(acc)) log.append('f1 : {}'.format(f1)) if acc > acc_score: acc_score = acc best_neighbors = n_neighbors f1_score = f1 best_pred = pred # sys.exit() log.append('------------------------------') print 'Name : {}'.format(name) print 'Best ng : {}'.format(best_neighbors) print 'Best acc : {}'.format(acc_score) print 'Best f1 : {}'.format(f1_score) log.append('Name : {}'.format(name)) log.append('Best ng : {}'.format(best_neighbors)) log.append('Best acc : {}'.format(acc_score)) log.append('Best f1 : {}'.format(f1_score)) Utility.save_obj(best_pred, '{}/prediction_labels.pkl'.format(name_out_path)) log.append('------------------------------') pass
def normalize_data(db_file, name_out_path, target_type, missing_db_file, missing_type): db = Utility.load_obj(db_file) missing_db = Utility.load_obj(missing_db_file) new_data = [] for syl in db: d = syl['TF'][target_type]['data'] # print syl['dur'] dur = 0 for du in syl['dur']: dur = dur + du consonant_ratio = syl['dur'][0] / dur # print consonant_ratio missing = None for m in missing_db: if syl['id'] == m['id']: missing = m break unvoice_frames = np.argwhere( np.isnan(missing['TF'][missing_type]['data'])) # print unvoice_frames unvoice_frames_ratio = float(len(unvoice_frames)) / float(len(d) - 1) d = np.append(d, consonant_ratio) d = np.append(d, unvoice_frames_ratio) # print d new_data.append(d) # if not len(unvoice_frames) == 0: # sys.exit() new_data = np.array(new_data) print new_data.shape new_db = [] for idx, syl in enumerate(db): syl['TF']['intepolate151_with_consonant_unvoice_ratio'] = dict() syl['TF']['intepolate151_with_consonant_unvoice_ratio'][ 'data'] = new_data[idx] syl['TF']['intepolate151_with_consonant_unvoice_ratio'][ 'description'] = 'intepolate151 adding ratio of consonant and unvoice frame in syllable' new_db.append(syl) Utility.save_obj(new_db, name_out_path) pass
def run_cal_optimal_and_rmse(opt, outname, main_out_data, org_for_distortion, stress_list, mono_label, phone_type, is_save_object=False): print 'Alpha: const_ph_1toN: const_ph_0: const_syl_1toN: const_syl_0:' print opt[0], opt[1], opt[2], opt[3], opt[4] # outbase = outname # outpath = '{}/Alpha_{}_const_ph_1toN_{}_const_ph_0_{}_const_syl_1toN_{}_const_syl_0_{}/lf0/'.format(outbase, opt[0], opt[1], opt[2], opt[3], opt[4] ) # figure_path = '{}/Alpha_{}_const_ph_1toN_{}_const_ph_0_{}_const_syl_1toN_{}_const_syl_0_{}/fig/'.format(outbase, opt[0], opt[1], opt[2], opt[3], opt[4] ) opt_used = [opt[0], opt[1], opt[2], opt[3], opt[4]] # Utility.make_directory(outpath) # Utility.make_directory(figure_path) syn_result = dict() for n in main_out_data: A_list, B_list, vuv = main_out_data[n] a_sum = np.zeros(A_list[0].shape, dtype=np.float) b_sum = np.zeros(B_list[0].shape, dtype=np.float) for a, b, const in zip(A_list, B_list, opt_used): a_sum = a_sum + (const * a) b_sum = b_sum + (const * b) L = linalg.cholesky(a_sum, lower=True) lf0 = linalg.cho_solve((L, True), b_sum) lf0 = lf0_gen_with_vuv(lf0, vuv) syn_result[n] = lf0 # outfile = '{}/{}.npy'.format(outpath, n) # np.save(outfile, lf0) # Start for distortion # syn_for_distortion = outpath rmse = lf0_distortion_syn_is_gpr_format(org_for_distortion, syn_result, stress_list, mono_label, phone_type) if is_save_object: now = datetime.datetime.now() Utility.save_obj( syn_result, '{}/lf0_generated_by_{}_{}_{}_{}_{}_time_{}.pkl'.format( outname, opt[0], opt[1], opt[2], opt[3], opt[4], now)) return rmse
def call_accuracy(db_file, x_base_file, setting, name): db = Utility.load_obj(db_file) real = load_real_label(db) n_cluster, n_neighbor = find_config(name) unstress_list = setting[0] stress_list = setting[1] x_file = '{}/param_n_cluster_{}_n_neighbors_{}x.pkl'.format( x_base_file, n_cluster, n_neighbor) pred = Utility.load_obj(x_file) print pred.shape print set(pred), setting for un in unstress_list: pred[pred == un] = 555 # Unstress for st in stress_list: pred[pred == st] = 999 # Stress pred[pred == 999] = 1 pred[pred == 555] = 0 if name == '1_non-nasal': print set(pred) acc = accuracy_score(real, pred) f1 = f1_score(real, pred, average=None) print 'acc : ', acc print 'f1 : ', f1 global acc_scores global f1_scores # spl = name.split('_') acc_scores[name] = acc f1_scores[name] = f1 result_file = dict() result_file['pred'] = pred result_file['real'] = real result_file['acc'] = acc result_file['f1'] = f1 result_file['name'] = name result_file['n_cluster'] = n_cluster result_file['n_neighbors'] = n_neighbor Utility.save_obj(result_file, '{}/result_file.pkl'.format(x_base_file)) pass
def run_training(db_file, name_out_path, data_type, input_dim): db = Utility.load_obj(db_file) Y = [] names = [] for syl in db: feat = syl['TF'][data_type]['data'] Y.append(feat) names.append(syl['id']) # sys.exit() Y = np.array(Y) print Y.shape # print Y[0] num_inducing = int(len(Y) * 0.01) if num_inducing > 100: num_inducing = 100 elif num_inducing < 10: num_inducing = 10 config = { 'input_dim': input_dim, 'data': Y, 'num_inducing': num_inducing, 'max_iters': 500, 'missing_data': True, 'optimize_algo': 'scg' } print config m = GPy_Interface.Bayesian_GPLVM_Training(config) print m print '---------------------------' print m.X print '---------------------------' print m.input_sensitivity() print '---------------------------' Utility.save_obj(m, '{}/model.pkl'.format(name_out_path)) Utility.save_obj(np.array(m.X.mean), '{}/x.pkl'.format(name_out_path)) Utility.save_obj(m.input_sensitivity(), '{}/input_sensitivity.pkl'.format(name_out_path)) Utility.save_obj(names, '{}/names.pkl'.format(name_out_path)) Utility.save_obj(Y, '{}/training_data.pkl'.format(name_out_path)) pass
def run_gen_easy_access(dict_outpath): global db_dict for d in db: db_dict[d['id']] = d print d['id'] Utility.save_obj(db_dict, dict_outpath) pass
def gen_tonal_part_duration(phone_level_label, pattern, start_set, end_set, outpath): for sett in Utility.char_range(start_set, end_set): set_path = '{}/{}/'.format(phone_level_label, sett) for f in Utility.list_file(set_path): if f.startswith('.'): continue file_path = '{}/{}'.format(set_path, f) phone_frame_list = [] syllable_count = 0 for line in Utility.read_file_line_by_line(file_path): match = re.match(pattern, line) if match: start_time = match.group('start_time') end_time = match.group('end_time') if match.group('phone_position_in_syllable') == 'x': continue phone_position_in_syllable = int( match.group('phone_position_in_syllable')) phone_number_in_syllable = int( match.group('phone_number_in_syllable')) frame = (float(end_time) - float(start_time)) / 50000 if phone_position_in_syllable == 1: phone_frame_list = [] phone_frame_list.append(frame) elif phone_position_in_syllable == phone_number_in_syllable: phone_frame_list.append(frame) if phone_number_in_syllable == 2: phone_frame_list.append(0) syllable_count += 1 print phone_frame_list outfile = '{}/{}/{}/{}_dur.npy'.format( outpath, sett, f.split('.')[0], syllable_count) print outfile Utility.make_directory('{}/{}/{}/'.format( outpath, sett, f.split('.')[0])) Utility.save_obj(phone_frame_list, outfile) elif phone_position_in_syllable == 2: phone_frame_list.append(frame) else: print 'Not match', f pass
def gen(syl_object_path): syllable_management = Utility.load_obj(syl_object_path) for syl in syllable_management.syllables_list: syl.gen_tonal_part_training_feature( Syllable. Training_feature_tonal_part_raw_remove_head_tail_having_missing, Syllable. Training_feature_tonal_part_raw_remove_head_tail_interpolated) Utility.save_obj(syllable_management, syl_object_path) pass
def add_phone_dur(dur_path, object_list_path): for obj_path in object_list_path: syl_object = Utility.load_obj(obj_path) syl_list = syl_object.syllables_list for syl in syl_list: syl.set_phone_duration(dur_path) Utility.save_obj(syl_object, obj_path) pass
def gen_phone_duration_in_syllable(filepath, outpath_file): pattern = re.compile( r"""(?P<start>.+)\s(?P<end>.+)\s.+\-(?P<curphone>.+)\+.+/A:.+\-(?P<cur_phone_index>.+)_.+\+.+/B:.+/D:.+\-(?P<phone_number>.+)\+.+/E:.+""", re.VERBOSE) # 87545000 108545472 n^-sil+X/A:3_10-x_x+x_x/B:0-x+x/C:4_29-x_x+x_x/D:3-x+x/E:18-x+x/F:10_4-x_x+x_x/G:x_29_18/H:2-x+x # 85143750 87545000 aa-n^+sil/A:2_9-3_10+x_x/B:0-0+x/C:3_28-4_29+x_x/D:3-3+x/E:17-18+x/F:2_1-10_4+x_x/G:x_29_18/H:45-2+x main_duration = [] temp_dur = [] for line in Utility.read_file_line_by_line(filepath): # print line match = re.match(pattern, line) if match: start = float(match.group('start')) end = float(match.group('end')) phone_index = match.group('cur_phone_index') phone_number = match.group('phone_number') # print phone_index, phone_number, start, end if phone_index == 'x': temp_dur = [] temp_dur.append(end - start) main_duration.append(temp_dur) elif phone_index == '1': temp_dur = [] temp_dur.append(end - start) elif phone_index == phone_number: temp_dur.append(end - start) main_duration.append(temp_dur) else: temp_dur.append(end - start) # print main_duration c = 0 for m in main_duration: # print m c = c + len(m) if c != len(Utility.read_file_line_by_line(filepath)): print 'No equal', filepath Utility.save_obj(main_duration, outpath_file) pass
def get_stress_unstress(name_index, label_clustered, outpath): name_index = np.array(name_index) label_clustered = np.array(label_clustered) print len(set(label_clustered)) group_dict = dict() for g in set(label_clustered): group_dict[g] = name_index[label_clustered == g] print g, len(group_dict[g]) # print group_dict[g] Utility.save_obj(group_dict[g], '{}/{}.npy'.format(outpath, g))
def run_training(db_file, name_out_path, n_components, data_type): db = Utility.load_obj(db_file) Y = [] names = [] for syl in db: feat = syl['TF'][data_type]['data'] Y.append(feat) names.append(syl['id']) # sys.exit() Y = np.array(Y) print Y.shape # print Y[0] config = {'n_components': n_components, 'data': Y} print config m, Y_r = GPy_Interface.pca(config) # print Y_r.shape Utility.save_obj(m, '{}/model.pkl'.format(name_out_path)) Utility.save_obj(Y_r, '{}/pca_reduction_output.pkl'.format(name_out_path)) Utility.save_obj(names, '{}/names.pkl'.format(name_out_path)) Utility.save_obj(Y, '{}/training_data.pkl'.format(name_out_path)) pass