Exemplo n.º 1
0
print("all rec_name array is: " + str(all_rec_name_array))
print("size of all rec name array i.e num of egs is : " +
      str(len(all_rec_name_array)))
#generate class labels
# y_afpdb=cl.generate_labels("afpdb",rec_name_array_afpdb)
# y_nsrdb=cl.generate_labels("nsrdb",rec_name_array_nsrdb)
# y_afdb=cl.generate_labels("afdb",rec_name_array_afdb)
y_aftdb = cl.generate_labels_bool("aftdb", rec_name_array_aftdb)
y_afpdb_patient = cl.generate_labels_bool("afdb", rec_name_array_afpdb_patient)
y_afpdb_normal = cl.generate_labels_bool("nsrdb", rec_name_array_afpdb_normal)
y_all = np.array(y_aftdb + y_afpdb_patient + y_afpdb_normal)
#y_all=cl.generate_labels("afpdb", rec_name_array)
#print ("all label array is: " + str(y_all))

#convert list of lists to matrix
all_feature_matrix_old = cl.covert_array_to_matrix(all_features)
print("shape of all feature matrix  is: " + str(all_feature_matrix_old.shape))
all_feature_matrix = np.delete(all_feature_matrix_old, 3, 1)

print("shape of all feature matrix  is: " + str(all_feature_matrix.shape))

#################### SEPARATING EVALUATION DATA #########################
#X_cv, X_eval, y_cv, y_eval = cross_validation.train_test_split(all_feature_matrix, y, test_size=0.2, random_state=0)

###############################################################################
# Classification

# Run classifier with cross-validation and plot ROC curves
#
folds = 10
cv = StratifiedKFold(y_all, n_folds=folds, shuffle=True)
Exemplo n.º 2
0
rec_name_array=rw.read_features_frm_file(output_folder,"rec_name_array_pickle.txt")

##################### change key value pairs of global vocab ####################
inv_global_vocab = dict(zip(global_vocab.values(), global_vocab.keys()))
#print type(inv_global_vocab.values())
all_features_list=inv_global_vocab.values()
np.savetxt(output_folder+"all_features_list.txt",all_features_list,fmt="%s",delimiter=',',newline='\n')


#generate class labels
y=np.array(cl.generate_labels(rec_name_array))
print ("label array is: " + str(y))


#convert list of lists to matrix
all_feature_matrix=cl.covert_array_to_matrix(all_features,len(all_features),max(global_vocab.values())+1);

#print all_feature_matrix
#print ("type of all feature matrix  is: " + str(type(all_feature_matrix)))


#################### SEPARATING EVALUATION DATA #########################
X_cv, X_eval, y_cv, y_eval = cross_validation.train_test_split(all_feature_matrix, y, test_size=0.2, random_state=0)

X_cv_normalized_matrix=cl.normalise_mean_var(X_cv)

X_eval_normalized_matrix=cl.normalise_mean_var(X_eval)

############## with normalisation ######################
# Classification
normalised="  "
Exemplo n.º 3
0
##################### change key value pairs of global vocab ####################
inv_global_vocab = dict(zip(global_vocab.values(), global_vocab.keys()))
#print type(inv_global_vocab.values())
all_features_list = inv_global_vocab.values()
np.savetxt(output_folder + "all_features_list.txt",
           all_features_list,
           fmt="%s",
           delimiter=',',
           newline='\n')

#generate class labels
y = np.array(cl.generate_labels(rec_name_array))
print("label array is: " + str(y))

#convert list of lists to matrix
all_feature_matrix = cl.covert_array_to_matrix(all_features, len(all_features),
                                               max(global_vocab.values()) + 1)

#print all_feature_matrix
#print ("type of all feature matrix  is: " + str(type(all_feature_matrix)))

#################### SEPARATING EVALUATION DATA #########################
X_cv, X_eval, y_cv, y_eval = cross_validation.train_test_split(
    all_feature_matrix, y, test_size=0.2, random_state=0)

X_cv_normalized_matrix = cl.normalise_mean_var(X_cv)

X_eval_normalized_matrix = cl.normalise_mean_var(X_eval)

############## with normalisation ######################
# Classification
normalised = "  "
#y_aftdb=cl.generate_labels("aftdb",rec_name_array_aftdb)
#y_afpdb_patient=cl.generate_labels("afdb",rec_name_array_afpdb_patient)
#y_afpdb_normal=cl.generate_labels("nsrdb",rec_name_array_afpdb_normal)
#y_all=np.array(y_afpdb_patient+y_afpdb_normal)
y_all=cl.generate_labels("afpdb", rec_name_array)
print ("all label array is: " + str(y_all))

##################### change key value pairs of global vocab ####################
inv_global_vocab = dict(zip(global_vocab.values(), global_vocab.keys()))
#print type(inv_global_vocab.values())
all_features_list=inv_global_vocab.values()
np.savetxt(output_folder+"all_features_list.txt",all_features_list,fmt="%s",delimiter=',',newline='\n')


#combine lists and convert list of lists to one big matrix
all_feature_matrix=cl.covert_array_to_matrix(all_features);
#all_feature_matrix=cl.covert_array_to_matrix(all_features_afpdb_patient+all_features_afpdb_normal);

#normalized_matrix=cl.normalise_mean_var(all_feature_matrix)
#print all_feature_matrix
print ("shape of all feature matrix  is: " + str(all_feature_matrix.shape))

#################### GENERATE TRAIN TEST INDICES FOR SHUFFLE SPLIT #########################
#X_cv, X_eval, y_cv, y_eval = cross_validation.train_test_split(all_feature_matrix, y, test_size=0.2, random_state=0)

cv_shufflesplit=cross_validation.ShuffleSplit(len(y_all),1,test_size=0.2,train_size=None, random_state=0)

#################### Save feaures and y_all to csv file #########################
csv_indexes=sorted(inv_global_vocab.keys())
#print(csv_indexes)
csv_header=[]
Exemplo n.º 5
0
##################### change key value pairs of global vocab ####################
inv_global_vocab = dict(zip(global_vocab.values(), global_vocab.keys()))
#print type(inv_global_vocab.values())
all_features_list = inv_global_vocab.values()
np.savetxt(output_folder + "all_features_list.txt",
           all_features_list,
           fmt="%s",
           delimiter=',',
           newline='\n')

#generate class labels
y = np.array(cl.generate_labels(rec_name_array))
print("label array is: " + str(y))

#convert list of lists to matrix
all_feature_matrix = cl.covert_array_to_matrix(all_features, len(all_features))

#print all_feature_matrix
print("shape of all feature matrix  is: " + str(all_feature_matrix.shape))
#################### SEPARATING EVALUATION DATA #########################
#X_cv, X_eval, y_cv, y_eval = cross_validation.train_test_split(all_feature_matrix, y, test_size=0.2, random_state=0)

exit()

############## with normalisation ######################
# Classification
# normalised="  "
# normalized_matrix=cl.normalise_mean_var(all_feature_matrix)
# rw.write_df_to_csv(normalized_matrix, csv_header, output_folder, "features_normalised_test.csv")
# exit()