def find_data_point_from_coordinate(filepath, input_sen_path, labels, syllable_data_tag, area): # Read data file data_point = Utility.load_obj(filepath) # print data_point # Get input sensitivity input_sen_obj = Utility.load_obj(input_sen_path) input_sensitivety = Utility.get_input_sensitivity(input_sen_obj, 3) # print input_sensitivety x_coordinate = data_point[:, [input_sensitivety[0], input_sensitivety[1]]] # print x_coordinate x_cor = np.array(x_coordinate) index = DataReader.filter_data(x_cor, area) print index lab = Utility.load_obj(labels) print len(lab) print lab[index] syllable_tag = Utility.load_obj(syllable_data_tag) print len(syllable_tag) print syllable_tag # Return pass
def analysis(main_path): # main_path = '/work/w13/decha/Inter_speech_2016_workplace/Data/07c-5dims_missing_data_delta_deltadelta/BayesianGPLVMMiniBatch_Missing/Tone_4/' gpmodel = Utility.load_obj('{}/GP2dRegression.npy'.format(main_path)) model_path = '{}/GP_model.npy'.format(main_path) model = Utility.load_obj(model_path) data = model.X.mean x = [] input_sensitivity = model.input_sensitivity() print input_sensitivity index = Utility.get_input_sensitivity(input_sensitivity, 2) print index for i in range(len(data)): x.append([data[i, index[0]], data[i, index[1]]]) x = np.array(x) y = np.array(gpmodel.predict(x)[0]) print y.shape plt.clf() plt.scatter(x[:, 0], x[:, 1], c=y, cmap='gray') plt.savefig('{}/gpregression.pdf'.format(main_path)) pass
def run_training(base_path, db_file, name_out_path): names_file = '{}/names.pkl'.format(base_path) out_data = '{}/x.pkl'.format(base_path) input_sensitivity = '{}/input_sensitivity.pkl'.format(base_path) names = Utility.load_obj(names_file) db = Utility.load_obj(db_file) name_list = [] for d in db: name_list.append(d['id']) label = [] for nn in names: idx = name_list.index(nn) if nn in potential_list: label.append('3') elif db[idx]['stress'] == '1': label.append(db[idx]['stress']) else: label.append(db[idx]['stress']) out = Utility.load_obj(out_data) input_sent = Utility.load_obj(input_sensitivity) print 'Input sensitivity', input_sent most_dominants = Utility.get_input_sensitivity(input_sent, 2) label = map(int, label) label = np.array(label) train = np.append(out[label == 2], out[label == 3], axis=0) train = np.c_[train[:, most_dominants[0]], train[:, most_dominants[1]]] print train.shape global kern lengthscale = 1 / np.array(input_sent, dtype=float) kern = GPy.kern.RBF(len(train[0]), ARD=True, lengthscale=[ lengthscale[most_dominants[0]], lengthscale[most_dominants[1]] ]) print most_dominants xx, yy = np.meshgrid(np.linspace(-5, 5, 500), np.linspace(-5, 5, 500)) plane = np.c_[xx.ravel(), yy.ravel()] svm_classifier(train, '', '', '', '', plane, xx, yy) pass
def plot_result(model, data_object, out_file_path): data = model.X.mean y, name_index, tone, stress, syllable_short_long_type, syllable_positions, phonemes, syllable_type = data_object.get_GP_LVM_training_data( Syllable.TRAINING_FEATURE_POLYNOMIAL_2_DEGREE_VOICE, dur_position=[1,2] , num_sampling=25) # print syllable_type # print model.X.mean x = [] y = [] input_sensitivity = model.input_sensitivity() print input_sensitivity index = Utility.get_input_sensitivity(input_sensitivity, 2) print index data = np.array(data) stress = np.array(stress) labels_true = np.arange(len(stress), dtype=int) labels_true[stress == 'Stress'] = 1 labels_true[stress == 'Unstress'] = 0 new_label = [] for idx, t in enumerate(tone): if (labels_true[idx] == 1): if (t in [0,1]) : new_label.append(1) elif (t in [2]) : new_label.append(2) else : new_label.append(3) else: new_label.append(0) try: DBSCAN_executioner.run( data, new_label, os.path.dirname(outpath), [index[0], index[1]], input_sensitivity, stress_only=False, stress_list=labels_true) # Kmeans_executioner.run(data, labels_true, os.path.dirname(outpath), [index[0], index[1]], input_sensitivity) except: print 'Error at path : {}'.format(outpath) traceback.print_exc()
def plot(data, inverselengthscale, labels): most_dominants = Utility.get_input_sensitivity(inverselengthscale, 2) x = data[:, most_dominants[0]] y = data[:, most_dominants[1]] label = map(int, labels) label = np.array(labels) print set(labels) colors = ['red', 'green', 'blue', 'purple'] plt.clf() plt.scatter(x, y, c=labels, cmap=matplotlib.colors.ListedColormap(colors)) plt.savefig('./link_clustering_test.eps')
def plot_latent_space(base_path, db_file, name_out_path): names_file = '{}/names.pkl'.format(base_path) out_data = '{}/x.pkl'.format(base_path) input_sensitivity = '{}/input_sensitivity.pkl'.format(base_path) if not Utility.is_file_exist(out_data): print out_data print 'Not exist' return names = Utility.load_obj(names_file) db = Utility.load_obj(db_file) name_list = [] for d in db: name_list.append(d['id']) label = [] for nn in names: idx = name_list.index(nn) label.append(db[idx]['stress']) out = Utility.load_obj(out_data) print out.shape input_sent = Utility.load_obj(input_sensitivity) print input_sent most_dominants = Utility.get_input_sensitivity(input_sent, 2) x = out[:, most_dominants[0]] y = out[:, most_dominants[1]] label = map(int, label) label = np.array(label) print set(label) colors = ['red', 'green', 'blue', 'purple'] plt.clf() plt.scatter(x, y, c=label, cmap=matplotlib.colors.ListedColormap(colors)) plt.savefig(name_out_path)
def plot(data, inverselengthscale, labels): most_dominants = Utility.get_input_sensitivity(inverselengthscale, 2) x = data[ :, most_dominants[0] ] y = data[ :, most_dominants[1] ] label = map(int, labels) label = np.array(labels) print set(labels) colors = Utility.get_color_map(len(set(labels))) plt.clf() for idx, s in enumerate(set(labels)): print s plt.scatter(x[labels==s], y[labels==s], c=colors[idx] , label=s) plt.legend() plt.savefig( './dbscan_test.eps' )
def find_data_point_from_coordinate(filepath, input_sen_path, labels, syllable_data_path, area, tone): # Read data file data_point = Utility.load_obj(filepath) # print data_point # Get input sensitivity input_sen_obj = Utility.load_obj(input_sen_path) input_sensitivety = Utility.get_input_sensitivity(input_sen_obj, 3) # print input_sensitivety x_coordinate = data_point[:, [input_sensitivety[0], input_sensitivety[1]]] # print x_coordinate x_cor = np.array(x_coordinate) index = DataReader.filter_data(x_cor, area) print index lab = Utility.load_obj(labels) print len(lab) print lab[index] syllable_tag = DataReader.gen_syllable_tag(syllable_data_path, tone, 'a', 'h', 'tscsd_manual') print len(syllable_tag) syllable_tag = np.array(syllable_tag) print syllable_tag[index] lab_indexed = lab[index] syllable_tag_indexed = syllable_tag[index] print syllable_tag_indexed[lab_indexed == 'Tone 2'] # Return pass
def run(main_path, syllable_management_path): # main_path = '/work/w13/decha/Inter_speech_2016_workplace/Data/07c-5dims_missing_data_delta_deltadelta/BayesianGPLVMMiniBatch_Missing/Tone_4/' # syllable_management_path = '/home/h1/decha/Dropbox/Inter_speech_2016/Syllable_object/01_manual_labeling_object/syllable_4.pickle' model_path = '{}/GP_model.npy'.format(main_path) outpath = '{}/GP2dRegression.npy'.format(main_path) model = Utility.load_obj(model_path) data = model.X.mean x = [] input_sensitivity = model.input_sensitivity() print input_sensitivity index = Utility.get_input_sensitivity(input_sensitivity, 2) print index for i in range(len(data)): x.append([data[i, index[0]], data[i, index[1]]]) x = np.array(x) syllable_management = Utility.load_obj(syllable_management_path) y, name_index, tone, stress, syllable_short_long_type, syllable_positions, phonemes, syllable_type = syllable_management.get_GP_LVM_training_data( Syllable.TRAINING_FEATURE_POLYNOMIAL_2_DEGREE_VOICE, subtract_typical_contour=False) y = np.array(y) # print y[:,50] y = y[:, 50] y = y[np.newaxis].T print y.shape GPModelByGPy.execute_training(x, y, outpath) pass
def plot(data, inverselengthscale, labels, name_out_file, title): most_dominants = Utility.get_input_sensitivity(inverselengthscale, 2) x = data[ :, most_dominants[0] ] y = data[ :, most_dominants[1] ] label = map(int, labels) label = np.array(labels) # print set(labels) # colors = ['red','green','blue','purple'] colors = Utility.get_color_map(len(set(labels))) plt.clf() # plt.scatter(x, y, c=labels, cmap=matplotlib.colors.ListedColormap(colors)) for idx, s in enumerate( sorted( set(labels)) ): plt.scatter(x[labels==s], y[labels==s], c=colors[idx] , label=s) plt.legend() plt.title(title) plt.savefig( name_out_file )
model_path = '/work/w13/decha/Inter_speech_2016_workplace/mix-projection-addtional/01_mix_a-5dims_BayesianGPLVMMiniBatch_data_no_delta_missing_data_subtract_typical_contour/BayesianGPLVMMiniBatch_Missing/Tone_01234/GP_model.npy' syllable_management = Utility.load_obj(data_path) y, name_index, tone, stress, syllable_short_long_type, syllable_positions, phonemes = syllable_management.get_GP_LVM_training_data(Syllable.TRAINING_FEATURE_POLYNOMIAL_2_DEGREE_VOICE, subtract_typical_contour=False) model = Utility.load_obj(model_path) data = model.X.mean x = [] y = [] input_sensitivity = model.input_sensitivity() print input_sensitivity index = Utility.get_input_sensitivity(input_sensitivity, 2) print index for i in range(len(data)): x.append(data[i,index[0]]) y.append(data[i,index[1]]) x = np.asarray(x) y = np.asarray(y) stress = np.array(stress) stress_index = np.where(stress=='Stress') x_stress = x[stress_index] y_stress = y[stress_index]
def run_training(base_path, db_file, name_out_path, name): names_file = '{}/names.pkl'.format(base_path) out_data = '{}/x.pkl'.format(base_path) input_sensitivity = '{}/input_sensitivity.pkl'.format(base_path) names = Utility.load_obj(names_file) db = Utility.load_obj(db_file) name_list = [] for d in db: name_list.append( d['id'] ) label = [] train_name_list = [] train_idx = [] true_label = [] for i, nn in enumerate(names) : idx = name_list.index(nn) if 'j' in nn: train_name_list.append(nn) train_idx.append(i) elif db[idx]['stress'] == '2': train_name_list.append(nn) train_idx.append(i) if db[idx]['stress'] == '1': # label.append(db[idx]['stress']) label.append(-1) elif nn in potential_list: # label.append('3') label.append(1) elif db[idx]['stress'] == '2': label.append(1) else : # label.append(db[idx]['stress']) label.append(-1) if db[idx]['stress'] == '2': true_label.append(1) else: true_label.append(int(db[idx]['stress'])) out = Utility.load_obj(out_data) input_sent = Utility.load_obj(input_sensitivity) most_dominants = Utility.get_input_sensitivity(input_sent, 2) label = map(int, label) label = np.array(label) train = out[train_idx] train_lab = label[train_idx] # print len(train), len(train_lab), set(train_lab) global kern lengthscale=1/np.array(input_sent, dtype=float) lengthscale = lengthscale/lengthscale.min() # print 'lengthscale : ', lengthscale # kern = GPy.kern.RBF(len(train[0]), ARD=True, lengthscale=lengthscale) kern = GPy.kern.RBF(len(train[0]), ARD=True, lengthscale=lengthscale) min_max_dims = [] ten_or_not = [] for d in xrange(len(out[0])): out_d = out[:, d] m = [] m.append(min(out_d)) m.append(max(out_d)) min_max_dims.append(m) if d in most_dominants: ten_or_not.append(100), else : ten_or_not.append(1) m = min_max_dims # for mm in m: # print mm print most_dominants m_grid = [] for d in xrange(len(out[0])): if ten_or_not[d] != 1: m_grid.append( np.linspace(m[d][0], m[d][1], ten_or_not[d]) ) else: m_grid.append([0]) d0, d1, d2, d3, d4, d5, d6, d7, d8, d9 = np.meshgrid( m_grid[0], m_grid[1], m_grid[2], m_grid[3], m_grid[4], m_grid[5], m_grid[6], m_grid[7], m_grid[8], m_grid[9] ) plane = np.c_[ d0.ravel(), d1.ravel(), d2.ravel(), d3.ravel(), d4.ravel(), d5.ravel(), d6.ravel(), d7.ravel(), d8.ravel(), d9.ravel() ] # print plane y_pred_test = svm_classifier(train, train_lab, out, '', name_out_path, plane, np.linspace(m[most_dominants[0]][0], m[most_dominants[0]][1], ten_or_not[most_dominants[0]]), np.linspace(m[most_dominants[1]][0], m[most_dominants[1]][1], ten_or_not[most_dominants[1]]), most_dominants, input_sent) global syl_dict for n, y_pred, true_lab in zip(names, y_pred_test, true_label) : syllable = dict() if y_pred == 1: syllable['stress'] = 2 else: syllable['stress'] = true_lab syl_dict[n] = syllable pass
def run_training(base_path, db_file, name_out_path, name): names_file = '{}/names.pkl'.format(base_path) out_data = '{}/x.pkl'.format(base_path) input_sensitivity = '{}/input_sensitivity.pkl'.format(base_path) names = Utility.load_obj(names_file) db = Utility.load_obj(db_file) name_list = [] for d in db: name_list.append(d['id']) label = [] for nn in names: idx = name_list.index(nn) if db[idx]['stress'] == '1': label.append(db[idx]['stress']) elif nn in potential_list: label.append('3') else: label.append(db[idx]['stress']) out = Utility.load_obj(out_data) input_sent = Utility.load_obj(input_sensitivity) print 'Input sensitivity', input_sent most_dominants = Utility.get_input_sensitivity(input_sent, 2) label = map(int, label) label = np.array(label) train = np.append(out[label == 2], out[label == 3], axis=0) global kern lengthscale = 1 / np.array(input_sent, dtype=float) lengthscale = lengthscale / lengthscale.min() kern = GPy.kern.RBF(len(train[0]), ARD=True, lengthscale=lengthscale) min_max_dims = [] ten_or_not = [] for d in xrange(len(out[0])): out_d = out[:, d] m = [] m.append(min(out_d)) m.append(max(out_d)) min_max_dims.append(m) if d in most_dominants: ten_or_not.append(100), else: ten_or_not.append(1) m = min_max_dims # print ten_or_not, m print most_dominants m_grid = [] for d in xrange(len(out[0])): if ten_or_not[d] != 1: m_grid.append(np.linspace(m[d][0], m[d][1], ten_or_not[d])) else: m_grid.append([0]) d0, d1, d2, d3, d4, d5, d6, d7, d8, d9 = np.meshgrid( m_grid[0], m_grid[1], m_grid[2], m_grid[3], m_grid[4], m_grid[5], m_grid[6], m_grid[7], m_grid[8], m_grid[9]) plane = np.c_[d0.ravel(), d1.ravel(), d2.ravel(), d3.ravel(), d4.ravel(), d5.ravel(), d6.ravel(), d7.ravel(), d8.ravel(), d9.ravel()] # print plane svm_classifier( train, '', out, '', name_out_path, plane, np.linspace(m[most_dominants[0]][0], m[most_dominants[0]][1], ten_or_not[most_dominants[0]]), np.linspace(m[most_dominants[1]][0], m[most_dominants[1]][1], ten_or_not[most_dominants[1]]), most_dominants) pass
def plot_latent_space(base_path, db_file, name_out_path): names_file = '{}/names.pkl'.format(base_path) out_data = '{}/x.pkl'.format(base_path) input_sensitivity = '{}/input_sensitivity.pkl'.format(base_path) if not Utility.is_file_exist(out_data) : print out_data print 'Not exist' return names = Utility.load_obj(names_file) db = Utility.load_obj(db_file) name_list = [] for d in db: name_list.append( d['id'] ) label = [] for nn in names: idx = name_list.index(nn) if db[idx]['stress'] == '1': label.append(db[idx]['stress']) elif nn in potential_list: label.append('3') else : label.append(db[idx]['stress']) out = Utility.load_obj(out_data) # print out.shape input_sent = Utility.load_obj(input_sensitivity) # print 'input_sensitivity : ', sorted(input_sent) most_dominants = Utility.get_input_sensitivity(input_sent, 2) x = out[ :, most_dominants[0] ] y = out[ :, most_dominants[1] ] label = map(int, label) label = np.array(label) # print set(label) train = np.append( out[label==2] , out[label==3], axis=0 ) # train = out[label==2] # print train.shape test = out[label==0] lengthscale=1/np.array(input_sent, dtype=float) k = GPy.kern.RBF(len(train[0]), ARD=True, lengthscale=lengthscale) plt.clf() colors = ['red','green','blue','purple'] md = most_dominants[0] mean = np.mean(train, axis=0) var = np.var(train, axis=0) # rv = multivariate_normal(mean=np.mean(train, axis=0), cov=np.var(train, axis=0)) sd = np.std(train[:,most_dominants[0]], axis=0) for idx, lab in enumerate(label): # if lab == 2: continue # if lab == 3: continue # print out[idx][md], mean d = distance.euclidean(out[idx][md], mean[md]) # print d if d < sd: # label[idx] = 4 label[idx] = 2 elif d < 2*sd: # label[idx] = 6 label[idx] = 2 pass label[label==3] = 2 print len(out), len(names) print set(label), len(label) if len(set(label)) > 3: print 'error : ', name_out_path raise global syl_dict for n, lab in zip(names, label) : syllable = dict() syllable['stress'] = lab syl_dict[n] = syllable # print names # print label # return for idx, s in enumerate( [0,1,4,5,6,2,3] ): # if (s == 3) | (s == 2): if (s == 2): # plt.scatter(x[label==s], y[label==s], c=colors[s], label=s, s=100) plt.scatter(x[label==s], y[label==s], c=colors[s], label='Manual weak stress labeling', s=100) pass elif (s==-1): plt.scatter(x[label==s], y[label==s], c='red', label=s, s=20) elif (s==1): plt.scatter(x[label==s], y[label==s], c='red', label='Stress', s=7) pass elif (s==5): # plt.scatter(x[label==s], y[label==s], c='yellow', label=s, s=20, marker='^', linewidth='0') pass elif (s==4): plt.scatter(x[label==s], y[label==s], c='green', label='Weak stress in 1 SD', s=20, marker='*', linewidth='0') elif (s==6): plt.scatter(x[label==s], y[label==s], c='orange', label='Weak stress in 2 SD', s=20, marker='h', linewidth='0') # else: elif (s==0): plt.scatter(x[label==s], y[label==s], c='black', label='Unstress', s=7, marker='.', linewidth='0') pass plt.scatter(mean[most_dominants[0]], mean[most_dominants[1]], c='red', label=s, s=200, marker='x') x_lim = plt.xlim() y_lim = plt.ylim() xx, yy = np.mgrid[x_lim[0]:x_lim[1]:.01, y_lim[0]:y_lim[1]:.01] pos = np.empty(xx.shape + (2,)) pos[:, :, 0] = xx; pos[:, :, 1] = yy x_train = train[ :, most_dominants[0] ] y_train = train[ :, most_dominants[1] ] # rv = multivariate_normal( # [mean[most_dominants[0]], mean[most_dominants[1]] ], # [var[most_dominants[0]], var[most_dominants[1]] ]) # print rv # print 'means : ', rv.pdf([np.mean(x_train), np.mean(y_train)]) # plt.contourf(xx, yy, rv.pdf(pos), alpha=0.5) # plt.legend(prop={'size':12}) plt.savefig( name_out_path )
def plot_latent_space(base_path, db_file, name_out_path): names_file = '{}/names.pkl'.format(base_path) out_data = '{}/x.pkl'.format(base_path) input_sensitivity = '{}/input_sensitivity.pkl'.format(base_path) if not Utility.is_file_exist(out_data): print out_data print 'Not exist' return names = Utility.load_obj(names_file) db = Utility.load_obj(db_file) name_list = [] for d in db: name_list.append(d['id']) label = [] iden = [] target_id = [] for nn in names: idx = name_list.index(nn) label.append(db[idx]['stress']) iden.append(nn) if nn in ['tscsdm38_55', 'tscsdu01_32', 'tscsdg02_21']: target_id.append(idx) target_id = np.array(target_id) out = Utility.load_obj(out_data) iden = np.array(iden) print out.shape input_sent = Utility.load_obj(input_sensitivity) print input_sent most_dominants = Utility.get_input_sensitivity(input_sent, 2) x = out[:, most_dominants[0]] y = out[:, most_dominants[1]] label = map(int, label) label = np.array(label) print set(label) # ind = np.random.choice(len(label), 20) # x = x[ind] # y = y[ind] # label = label[ind] # iden = iden[ind] colors = ['red', 'green', 'blue', 'purple'] plt.clf() plt.scatter(x, y, c=label, cmap=matplotlib.colors.ListedColormap(colors), alpha=0.5) el = Ellipse((2, -1), 0.5, 0.5) for lab, xx, yy, yyy in zip(['Unstress', 'Strong stress', 'Weak stress'], x[target_id], y[target_id], [50, 100, 50]): # for lab, xx, yy in zip(iden, x, y): # yyy = 20 plt.annotate(lab, xy=(xx, yy), xytext=(0, yyy), textcoords='offset points', ha='left', va='bottom', bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5), arrowprops=dict(arrowstyle="simple", fc="0.6", ec="none", patchB=el, connectionstyle="arc3,rad=0.3", color='g')) plt.savefig(name_out_path)
def plot_scatter(model, data_object, outpath, label_type=None, target_tone=None, name_index_list=None, phoneme_list=None, plotted_tone=None, bivariate=False, followed_list_file=None, perform_unsupervised=False, get_only_stress=False, non_unlabelled_stress=False, get_only_gpr_data=False, return_after_dbscan=False, get_only_manual_data=False, no_short_duration=False): data = model.X.mean y, name_index, tone, stress, syllable_short_long_type, syllable_positions, phonemes, syllable_type = data_object.get_GP_LVM_training_data( Syllable.Training_feature_tonal_part_raw_remove_head_tail_interpolated , dur_position=[1,2] , no_short_duration=no_short_duration, num_sampling=50, get_only_stress=get_only_stress, non_unlabelled_stress=non_unlabelled_stress, get_only_gpr_data=get_only_gpr_data, get_only_manual_data=get_only_manual_data) # print 'Plot scatter' # print stress # sys.exit() # print syllable_type # print model.X.mean x = [] y = [] input_sensitivity = model.input_sensitivity() print input_sensitivity index = Utility.get_input_sensitivity(input_sensitivity, 3) print index data = np.array(data) name_index_list = np.array(name_index_list) index_filter = [] for n in name_index: # print n idx = np.where( name_index_list==n ) [0] # print idx index_filter.append(idx[0]) data = data[index_filter] stress = np.array(stress) labels_true = np.arange(len(stress), dtype=int) labels_true[stress == 'Stress'] = 1 labels_true[stress == 'Unstress'] = 0 # print len(data), len(stress) # print len(labels_true), set(labels_true) # sys.exit() if len(data) != len(stress): print 'Error data is not equal' return plt.clf() if perform_unsupervised: try: DBSCAN_executioner.run(data, labels_true, os.path.dirname(outpath), [index[0], index[1]], input_sensitivity) # Kmeans_executioner.run(data, labels_true, os.path.dirname(outpath), [index[0], index[1]], input_sensitivity) except: print 'Error at path : {}'.format(outpath) traceback.print_exc() if return_after_dbscan: return plt.clf() print 'Data : {}'.format(len(data)) print 'Stress : {}'.format(len(stress)) # print stress x = data[:,index[0]] x = data[:,1] y = data[:,index[1]] y = data[:,0] z = data[:,index[2]] print 'syllable_positions', len(syllable_positions) if label_type is GP_LVM_Scatter.LABEL_TYPE_STRESS: # Scatter.plot(x, y, outpath, label_list=stress, color=['r','b','g']) stress_index = np.where(stress == 'Stress') unstress_index = np.where(stress == 'Unstress') mask = np.ones(len(stress), dtype=bool) mask[unstress_index] = False # print stress # sys.exit() # Scatter.plot(x[mask], y[mask], outpath, label_list=stress[mask], color=['r','b','g'], bivariate=bivariate, X_bi=x[stress_index], Y_bi=y[stress_index]) Scatter.plot(x, y, outpath, label_list=stress, color=['r','b','g'], bivariate=bivariate, X_bi=x[stress_index], Y_bi=y[stress_index]) elif label_type is GP_LVM_Scatter.LABEL_TYPE_STRESS_3D_COLORING: # Scatter.plot(x, y, outpath, label_list=stress, color=['r','b','g']) stress_index = np.where(stress == 'Stress') unstress_index = np.where(stress == 'Unstress') normalized = (z-min(z))/(max(z)-min(z)) * 100 Scatter.plot(x, y, outpath, label_list=None, color=normalized.astype(int).tolist(), cmap='gray') elif label_type is GP_LVM_Scatter.LABEL_TYPE_STRESS_SEP_GPR: gpr_file_list = [] for idx, n in enumerate(name_index): if 'gpr' in n: gpr_file_list.append(idx) gpr_file_list = np.array(gpr_file_list) stress[gpr_file_list] = 'GPR_Stress' stress_index = np.where(stress == 'Stress') unstress_index = np.where(stress == 'Unstress') mask = np.ones(len(stress), dtype=bool) mask[unstress_index] = False Scatter.plot(x, y, outpath, label_list=stress, color=['r','b','g'], bivariate=bivariate, X_bi=x[stress_index], Y_bi=y[stress_index]) elif label_type is GP_LVM_Scatter.LABEL_TYPE_STRESS_AND_SPLIT_TONE: stress_index = np.where(stress == 'Stress') unstress_index = np.where(stress == 'Unstress') tone = np.array(tone) mask = np.ones(len(stress), dtype=bool) mask[unstress_index] = False outpath = Utility.get_base_path(outpath) canplot = True try: labels_object = Utility.load_obj('{}/clustered_label.npy'.format(outpath)) if len(labels_object)!=len(stress): canplot = False except: canplot = False for t in set(tone): Utility.make_directory('{}/tone_stress_label/'.format(outpath)) Utility.make_directory('{}/clustering_label/'.format(outpath)) print len(x), len(y), len(tone), len(stress) Scatter.plot(x[tone==t], y[tone==t], '{}/tone_stress_label/tone_{}.eps'.format(outpath, t), label_list=stress[tone==t], bivariate=bivariate, X_bi=x[stress_index], Y_bi=y[stress_index]) if canplot: 'Plot label tone {}'.format(t) Scatter.plot(x[tone==t], y[tone==t], '{}/clustering_label//tone_{}.eps'.format(outpath, t), label_list=labels_object[tone==t], bivariate=bivariate, X_bi=x[stress_index], Y_bi=y[stress_index]) elif label_type is GP_LVM_Scatter.LABEL_TYPE_SYLLABLE_SHORT_LONG: Scatter.plot(x, y, outpath, label_list=syllable_short_long_type) elif label_type is GP_LVM_Scatter.LABEL_TYPE_SYLLABLE_POSITIONS: long_list = [] short_list = [] for idx, p in enumerate(phonemes): v = p.split('-')[1] if v not in Syllable.short_vowel: long_list.append(idx) else: short_list.append(idx) print len(long_list) , len(x) x = np.array(x) y = np.array(y) syllable_positions = np.array(syllable_positions) Scatter.plot(x[long_list], y[long_list], outpath, label_list=syllable_positions[long_list]) elif label_type is GP_LVM_Scatter.LABEL_TYPE_TONES: Scatter.plot(x, y, outpath, label_list=tone, color=['r','g','b','black','yellow']) elif label_type is GP_LVM_Scatter.LABEL_TYPE_ONE_TONE_STRESS_UNSTRESS: tone = np.array(map(str, tone)) stress = np.core.defchararray.add(stress, '_' ) stress_tone = np.core.defchararray.add(stress, tone) target_list = np.array([]) print target_tone for t in target_tone: print t, target_list, np.where(tone == t) target_list = np.union1d(target_list, np.where(tone == t)[0]) stress_tone = stress_tone[target_list.astype(int)]#np.delete(stress_tone, delete_list) x = x[target_list.astype(int)]#np.delete(x, delete_list) y = y[target_list.astype(int)]#np.delete(y, delete_list) Scatter.plot(x, y, outpath, label_list=stress_tone) elif label_type is None : Scatter.plot(x, y, outpath, label_list=None) elif label_type is GP_LVM_Scatter.LABEL_TYPE_SYLLABLE_IN_MANUAL_PHRASE: name_index = np.array(name_index) # print name_index single_list = np.array(Utility.load_obj(name_index_list['single'])) followed_by_sil_list = np.array(Utility.load_obj(name_index_list['followed_by_sil'])) poly_list = np.array(Utility.load_obj(name_index_list['poly'])) all_union = [] single_indices = [] for syl in single_list: single_indices = np.union1d(single_indices, np.where( name_index == syl)[0]) followed_by_sil_indices = [] for syl in followed_by_sil_list: followed_by_sil_indices = np.union1d(followed_by_sil_indices, np.where( name_index == syl)[0]) poly_indices = [] for syl in poly_list: poly_indices = np.union1d(poly_indices, np.where( name_index == syl)[0]) name_index[single_indices.astype(int)] = 'Single ' name_index[followed_by_sil_indices.astype(int)] = 'Followed' name_index[poly_indices.astype(int)] = 'Poly' all_union = np.union1d(all_union, single_indices) all_union = np.union1d(all_union, followed_by_sil_indices) all_union = np.union1d(all_union, poly_indices) mask = np.ones(len(name_index), dtype=bool) mask[all_union.astype(int)] = False name_index[mask] = 'Other' Scatter.plot(x, y, outpath, label_list=name_index, color=['r','g','b','y']) elif label_type is GP_LVM_Scatter.LABEL_TYPE_SYLLABLE_IN_MANUAL_PHRASE_PLUS_SHORT_LONG_SYLLABLE: name_index = np.array(name_index) # print name_index single_list = np.array(Utility.load_obj(name_index_list['single'])) followed_by_sil_list = np.array(Utility.load_obj(name_index_list['followed_by_sil'])) poly_list = np.array(Utility.load_obj(name_index_list['poly'])) all_union = [] single_indices = [] for syl in single_list: single_indices = np.union1d(single_indices, np.where( name_index == syl)[0]) followed_by_sil_indices = [] for syl in followed_by_sil_list: followed_by_sil_indices = np.union1d(followed_by_sil_indices, np.where( name_index == syl)[0]) poly_indices = [] for syl in poly_list: poly_indices = np.union1d(poly_indices, np.where( name_index == syl)[0]) name_index[single_indices.astype(int)] = 'Single ' name_index[followed_by_sil_indices.astype(int)] = 'Followed' name_index[poly_indices.astype(int)] = 'Poly' all_union = np.union1d(all_union, single_indices) all_union = np.union1d(all_union, followed_by_sil_indices) all_union = np.union1d(all_union, poly_indices) mask = np.ones(len(name_index), dtype=bool) mask[all_union.astype(int)] = False name_index[mask] = 'Other' outpath = outpath.split('.')[0] syllable_short_long_type = np.array(syllable_short_long_type) short_list = np.where(syllable_short_long_type=='short')[0] long_list = np.where(syllable_short_long_type=='long')[0] # print short_list, long_list Scatter.plot(x[short_list], y[short_list], '{}_short.pdf'.format(outpath), label_list=name_index[short_list], color=['r','g','b','y']) Scatter.plot(x[long_list], y[long_list], '{}_long.pdf'.format(outpath), label_list=name_index[long_list], color=['r','g','b','y']) elif label_type is GP_LVM_Scatter.LABEL_TYPE_PHONEME: phonemes = np.array(phonemes) stress = np.array(stress) for phoneme in phoneme_list: if plotted_tone != '01234': if plotted_tone not in phoneme: continue target_index = np.where(phonemes == phoneme) stress_index = np.where(stress == 'Stress') # print stress_index outpath = outpath.split('.')[0] Scatter.plot(x[target_index], y[target_index], '{}_{}.pdf'.format(outpath, phoneme), label_list=stress[target_index], bivariate=True, X_bi=x[stress_index], Y_bi=y[stress_index], title=phoneme, xlim=(-4.4657748693986417, 8.1238328278216105), ylim=(-7.2366812187855185, 6.1187134324317736)) elif label_type is GP_LVM_Scatter.LABEL_TYPE_SYLLABLE_TYPE: syllable_type = np.array(syllable_type) stress = np.array(stress) types = set(syllable_type) for typ in types: print typ typ_index = np.where(syllable_type==typ) sub_stress = stress[typ_index] sub_x = x[typ_index] sub_y = y[typ_index] stress_index = np.where(sub_stress == 'Stress') unstress_index = np.where(sub_stress == 'Unstress') mask = np.ones(len(sub_stress), dtype=bool) mask[unstress_index] = False outpath = outpath.split('.')[0] Scatter.plot(sub_x, sub_y, '{}_{}.pdf'.format(outpath, typ), label_list=sub_stress, color=['r','b','g'], bivariate=False, X_bi=sub_x[stress_index], Y_bi=sub_y[stress_index], title=typ, xlim=(-4.4657748693986417, 8.1238328278216105), ylim=(-7.2366812187855185, 6.1187134324317736)) elif label_type is GP_LVM_Scatter.LABEL_TYPE_FOLLOWED_BY_SIL: followed_list = Utility.load_obj(followed_list_file) fow_index = [] name_index = np.array(name_index) for f in followed_list: k = np.where(name_index == f)[0] for kk in k: fow_index.append(kk.astype(int)) # print fow_index stress = np.array(stress) stress_index = np.where(stress == 'Stress') unstress_index = np.where(stress == 'Unstress') stress[stress_index] = 'Unstress' stress[fow_index] = 'Stress' Scatter.plot(x, y, outpath, label_list=stress, color=['r','b','g'], bivariate=True, X_bi=x[fow_index], Y_bi=y[fow_index]) tone = np.array(tone) for t in [0,1,2,3,4]: x_tone = x[np.where(tone == t)] y_tone = y[np.where(tone == t)] stress_tone = stress[np.where(tone == t)] tone_path = '{}_{}.pdf'.format(outpath.split('.')[0], t) Scatter.plot(x_tone, y_tone, tone_path, label_list=stress_tone, color=['r','b','g'], bivariate=True, X_bi=x[fow_index], Y_bi=y[fow_index], title='Tone {}'.format(t), xlim=(-3.7420549236630576, 3.7939531202951904), ylim=(-4.2426927228030289, 6.3913714950885101)) base_path = outpath.split('.')[0] Utility.save_obj(x, '{}_{}.pickle'.format(base_path,'x')) Utility.save_obj(y, '{}_{}.pickle'.format(base_path,'y')) Utility.save_obj(stress, '{}_{}.pickle'.format(base_path,'stress_followed')) Utility.save_obj(tone, '{}_{}.pickle'.format(base_path,'tone')) # elif label_type is GP_LVM_Scatter.LABEL_TYPE_SEPARATED_UNSUPERVISED_GROUP: pass