def main_EXP_G_VULN_GEO_LOCATION_evaluate_ANN_remapping(): read_command_line_options() gpu_setup.gpu_setup(id_gpu=ID_GPU, memory_percentage=PERC_GPU) if len(VALIDATION_SET_SIZE) != len(TRAINING_SET_SIZE): sys.exit( "The set size lists must all contain the same amount of items.") loaded_g_matrix = pn.read_pickle(path=G_MATRIX_PATH) loaded_g_matrix_rows = pn.read_pickle(path=G_MATRIX_ROWS_PATH) loaded_g_matrix_cols = pn.read_pickle(path=G_MATRIX_COLS_PATH) # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ANN approach %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if 'ANN' in APPROACHES: print( "\n####################################################################################" ) print( "################################### ANN approach ##################################" ) print( "####################################################################################\n" ) # store the values of the error estimations via all the possible methods ANN_Rf_values = [] # number of test samples number_of_test_samples = [] # number of training samples number_of_training_samples = [] # iterator over different training sets iterations training_iteration = [] # iterator over different test sets iterations test_iteration = [] for size_list_iterator in range(len(TRAINING_SET_SIZE)): # select the current values for the sizes (useful to keep track in the names of the training_set_size = TRAINING_SET_SIZE[size_list_iterator] validation_set_size = VALIDATION_SET_SIZE[size_list_iterator] test_set_size = TEST_SET_SIZE[size_list_iterator] for train_iteration in range(TRAIN_ITERATIONS): filepath = RESULT_FOLDER + MODEL_NAME + "/" + str( training_set_size) + "_training_size_and_" + str( validation_set_size ) + "_validation_size_iteration_" + str(train_iteration) tr_set = pn.read_pickle( path=DATA_FOLDER + str(training_set_size) + "_training_and_" + str(validation_set_size) + "_validation_store_folder_train_iteration_" + str(train_iteration) + "/training_set.pkl") X_train = tr_set[:, 0] min_max_scaler = preprocessing.MinMaxScaler() X_train = X_train.reshape(-1, 1) X_train = min_max_scaler.fit_transform(X_train) if not os.path.exists(filepath): continue else: ANN_model = load_model(filepath=filepath + "/classifier_net_model") # print ANN_model.summary() print "\n\n\n\n\n\n\n\n################################# test_size: " + str( test_set_size) + " ################################" ANN_file_Rf_ANN_g_leak = open( filepath + "/ANN_" + str(training_set_size) + "_training_and_" + str(validation_set_size) + "_validation_file_R_estimate_iteration_" + str(train_iteration) + "_" + str(test_set_size) + "_test_set_size_test_iter_up_to_" + str(TEST_ITERATIONS_END) + ".txt", "wa") for test_iterator in range(TEST_ITERATIONS_BEG, TEST_ITERATIONS_END): print "\n\n\n################################# test_set_" + str( test_iterator) + " ################################" ANN_file_Rf_ANN_g_leak.write( "\n\n\n################################# test_set_" + str(test_iterator) + " ################################") test_set = pn.read_pickle(path=DATA_FOLDER + str(test_set_size) + "_size_test_sets/test_set_" + str(test_iterator) + ".pkl") X_test = test_set[:, 0] X_test_unique = np.unique(X_test) X_test = X_test.reshape(-1, 1) X_test_unique = X_test_unique.reshape(-1, 1) y_test = test_set[:, 1] # z_test = test_set[:, 2] """z_test = preprocess.array_one_hot_encoder(supervision_=z_test) X_test_final_list = [] y_test_final_list = [] z_test_final_list = [] for unique_ob in X_test_unique: ob_idx = np.where(X_test == unique_ob)[0] unique_secr = np.unique(y_test[ob_idx]) for unq_sec in unique_secr: z_idx = np.where((X_test == unique_ob) & (y_test == unq_sec))[0] tmp = np.mean(z_test[z_idx, :], axis=0) idx_max = np.argmax(tmp) X_test_final_list.append(unique_ob) y_test_final_list.append(unq_sec) z_test_final_list.append(idx_max) X_test = np.array(X_test_final_list).reshape((len(X_test_final_list), 1)) y_test = np.array(y_test_final_list).reshape((len(y_test_final_list), 1)) z_test = np.array(z_test_final_list).reshape((len(z_test_final_list), 1))""" # this will have an element for each element in the test set # X_test_preprocessed = preprocess.scaler_between_minus_one_and_one(column=X_test, # min_column=MIN_OBSERVABLE, # max_column=MAX_OBSERVABLE) X_test_preprocessed = min_max_scaler.transform(X_test) # this too will have an element for each unique value in the test set # X_test_preprocessed_unique = preprocess.scaler_between_minus_one_and_one(column=X_test_unique, # min_column=MIN_OBSERVABLE, # max_column=MAX_OBSERVABLE) X_test_preprocessed_unique = min_max_scaler.transform( X_test_unique) if len(X_test_preprocessed_unique) != len( np.unique(X_test_preprocessed_unique)): sys.exit( "The preprocessing created some collision which might affect the computation" ) # print X_test_preprocessed_unique new_old_obs = {} for i in range(len(X_test_preprocessed_unique)): new_old_obs[X_test_preprocessed_unique[i] [0]] = X_test_unique[i][0] # print new_old_obs ########################################################### Prediction print "X_test_preprocessed: ", X_test_preprocessed.shape print "y_test.shape: ", y_test.shape ANN_prediction_test = [] pred = ANN_model.predict(x=X_test_preprocessed) for row_iter in range(pred.shape[0]): ANN_prediction_test.append(np.argmax( pred[row_iter, :])) ANN_prediction_test = np.array( ANN_prediction_test).reshape(len(ANN_prediction_test), 1) final_matrix = np.column_stack((X_test, y_test)) final_matrix = np.column_stack( (final_matrix, ANN_prediction_test)) Rf_ANN_g_leak = g_vuln_computation.compute_g_vuln_with_remapping( final_mat=final_matrix, g_mat=loaded_g_matrix, g_mat_rows=loaded_g_matrix_rows, g_mat_cols=loaded_g_matrix_cols) print("\nRf_ANN_g_leak = " + str(Rf_ANN_g_leak)) ANN_file_Rf_ANN_g_leak.write( "\nANN_file_Rf_ANN_g_leak = " + str(Rf_ANN_g_leak)) ANN_Rf_values.append(Rf_ANN_g_leak) number_of_test_samples.append(test_set_size) number_of_training_samples.append(training_set_size) training_iteration.append(train_iteration) test_iteration.append(test_iterator) ########################################################### Accuracy computation # accuracy = round(utilities.compute_accuracy(y_classes=z_test, # y_pred_classes=ANN_prediction_test), 3) # # print "\nAccuracy ---> ", accuracy # ANN_file_Rf_ANN_g_leak.write("\nAccuracy --->" + str(accuracy)) # # ########################################################### Accuracy computation (tf fashion) # # accuracy_tf_fashion = round(utilities.compute_accuracy_tf_fashion(y_classes=z_test, # y_pred_classes= # ANN_prediction_test), 3) # # print "\nAccuracy tf fashion ---> ", accuracy_tf_fashion # # ANN_file_Rf_ANN_g_leak.write("\nAccuracy tf fashion ---> " + str(accuracy_tf_fashion)) # # ########################################################### Precision computation # # precision = utilities.compute_precision(y_classes=z_test, # y_pred_classes=ANN_prediction_test) # # print "\nPrecision ---> ", precision # # ANN_file_Rf_ANN_g_leak.write("\nPrecision ---> " + str(precision)) # # ########################################################### Recall computation # # recall = utilities.compute_recall(y_classes=z_test, # y_pred_classes=ANN_prediction_test) # # print "\nRecall ---> ", recall # # ANN_file_Rf_ANN_g_leak.write("\nRecall ---> " + str(recall)) # # ########################################################### F1_score computation # # F1_score = utilities.compute_f1_score(y_classes=y_test, # y_pred_classes=ANN_prediction_test) # # print "\nF1_score ---> ", F1_score # # ANN_file_Rf_ANN_g_leak.write("\nF1_score ---> " + str(F1_score)) ANN_file_Rf_ANN_g_leak.close() ANN_Rf_values = np.array(ANN_Rf_values, dtype=np.float64) number_of_test_samples = np.array(number_of_test_samples, dtype=np.int32) number_of_training_samples = np.array(number_of_training_samples, dtype=np.int32) training_iteration = np.array(training_iteration, dtype=np.int32) test_iteration = np.array(test_iteration, dtype=np.int32) result_matrix = np.column_stack( (ANN_Rf_values, number_of_test_samples)) # print result_matrix.shape result_matrix = np.column_stack( (result_matrix, number_of_training_samples)) # print result_matrix.shape result_matrix = np.column_stack((result_matrix, training_iteration)) # print result_matrix.shape result_matrix = np.column_stack((result_matrix, test_iteration)) # print result_matrix.shape result_df = pn.DataFrame(data=result_matrix, columns=[ "ANN_Rf_values", "number_of_test_samples", "number_of_training_samples", "train_iteration", "test_iteration" ]) result_df.to_pickle( path=RESULT_FOLDER + MODEL_NAME + "/ANN_training_and_validation_result_df_train_size_" + str(TRAINING_SET_SIZE[0]) + "_up_to_test_iter_" + str(TEST_ITERATIONS_END) + ".pkl")
def main_EXP_G_VULN_GEO_LOCATION_get_stats_from_classifiers(): read_command_line_options() gpu_setup.gpu_setup(id_gpu="3", memory_percentage=0.5) if len(TEST_SET_SIZE) != len(TRAINING_SET_SIZE) or len(VALIDATION_SET_SIZE) != len(TRAINING_SET_SIZE): sys.exit("The set size lists must all contain the same amount of items.") RESULT_FOLDER = EXP_G_VULN_GEO_LOCATION_FOLDER + "RESULT_FOLDER_REMAPPING/" + MODEL_NAME + "/" # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ANN approach %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if 'ANN' in APPROACHES: print("\n####################################################################################") print("################################### ANN approach ##################################") print("####################################################################################\n") ANN_file_get_stats_from_classifiers = open(RESULT_FOLDER + "ANN_file_get_stats_from_classifiers.txt", "wa") for size_list_iterator in range(len(TRAINING_SET_SIZE)): # select the current values for the sizes (useful to keep track in the names of the training_set_size = TRAINING_SET_SIZE[size_list_iterator] validation_set_size = VALIDATION_SET_SIZE[size_list_iterator] test_set_size = TEST_SET_SIZE[size_list_iterator] for train_iteration in range(TRAIN_ITERATIONS): filepath = RESULT_FOLDER + str(training_set_size) + "_training_size_and_" + str( validation_set_size) + "_validation_size_iteration_" + str(train_iteration) if not os.path.exists(filepath): sys.exit("ERROR") else: ANN_model = load_model(filepath=filepath + "/classifier_net_model") # print ANN_model.summary() training_set = pn.read_pickle( path=DATA_FOLDER + str(training_set_size) + "_training_and_" + str( validation_set_size) + "_validation_and_" + str( test_set_size) + "_test_store_folder_train_iteration_" + str(train_iteration) + "/training_set.pkl").values X_train = training_set[:, 0] min_max_scaler = preprocessing.MinMaxScaler() X_train = X_train.reshape(-1, 1) X_train = min_max_scaler.fit_transform(X_train) ANN_file_get_stats_from_classifiers.write( "\n\n\n#################################################################") ANN_file_get_stats_from_classifiers.write("\n\n\n################################# training_set_" + str( train_iteration) + " ################################") ANN_file_get_stats_from_classifiers.write( "\n\n\n#################################################################") tr_loss, tr_acc, tr_myacc = eval_on_dataset(data_set=training_set, model=ANN_model, min_max_scaler=min_max_scaler) print "model ---> ", MODEL_NAME print "\nTraining set, ", str(training_set_size), "size, iteration ", str(train_iteration) ANN_file_get_stats_from_classifiers.write( "\nTraining set, " + str(training_set_size) + " size, iteration " + str(train_iteration)) print "\ntraining_loss: ", round(tr_loss, 3) ANN_file_get_stats_from_classifiers.write("\ntraining_loss: " + str(round(tr_loss, 3))) print "\ntraining_accuracy: ", round(tr_acc, 3) ANN_file_get_stats_from_classifiers.write("\ntraining_accuracy: " + str(round(tr_acc, 3)) + "\n") print "\ntraining_my_accuracy", round(tr_myacc, 3) ANN_file_get_stats_from_classifiers.write("\ntraining_my_accuracy: " + str(round(tr_myacc, 3)) + "\n") ts_loss_list_for_avg = [] ts_accuracy_list_for_avg = [] ts_my_accuracy_list_for_avg = [] for test_iterator in range(0, TEST_ITERATIONS): print "\n\n\n################################# test_set_" + str( test_iterator) + " ################################" # ANN_file_get_stats_from_classifiers.write("\n\n\n################################# test_set_" + str( # test_iterator) + " ################################") test_set = pn.read_pickle( path=DATA_FOLDER + str(training_set_size) + "_training_and_" + str( validation_set_size) + "_validation_and_" + str( test_set_size) + "_test_store_folder_train_iteration_" + str(train_iteration) + "/" + str( test_set_size) + "_size_test_sets/test_set_" + str(test_iterator)).values ts_loss, ts_acc, ts_myacc = eval_on_dataset(data_set=test_set, model=ANN_model, min_max_scaler=min_max_scaler) ts_loss_list_for_avg.append(ts_loss) ts_accuracy_list_for_avg.append(ts_acc) ts_my_accuracy_list_for_avg.append(ts_myacc) ts_loss_array_for_avg = np.array(ts_loss_list_for_avg) test_loss_avg = round(np.mean(ts_loss_array_for_avg, axis=0), 3) test_loss_avg_var = round(np.var(a=ts_loss_array_for_avg, ddof=1), 3) test_loss_avg_standard_deviation = round(np.std(a=ts_loss_array_for_avg, ddof=1), 3) test_loss_avg_standard_error = round(stats.sem(a=ts_loss_array_for_avg, ddof=1), 3) print "test_loss_avg: " + str(test_loss_avg) print "test_loss_avg_var: " + str(test_loss_avg_var) print "test_loss_avg_standard_deviation: " + str(test_loss_avg_standard_deviation) print "test_loss_avg_standard_error: " + str(test_loss_avg_standard_error) ANN_file_get_stats_from_classifiers.write("test_loss_avg: " + str(test_loss_avg) + "\n") ANN_file_get_stats_from_classifiers.write("test_loss_avg_var: " + str(test_loss_avg_var) + "\n") ANN_file_get_stats_from_classifiers.write( "test_loss_avg_standard_deviation: " + str(test_loss_avg_standard_deviation) + "\n") ANN_file_get_stats_from_classifiers.write( "test_loss_avg_standard_error: " + str(test_loss_avg_standard_error) + "\n") ts_accuracy_array_for_avg = np.array(ts_accuracy_list_for_avg) ts_accuracy_avg = round(np.mean(ts_accuracy_array_for_avg, axis=0), 3) ts_accuracy_avg_var = round(np.var(a=ts_accuracy_array_for_avg, ddof=1), 3) ts_accuracy_avg_standard_deviation = round(np.std(a=ts_accuracy_array_for_avg, ddof=1), 3) ts_accuracy_avg_standard_error = round(stats.sem(a=ts_accuracy_array_for_avg, ddof=1), 3) print "ts_accuracy_avg: " + str(ts_accuracy_avg) print "ts_accuracy_avg_var: " + str(ts_accuracy_avg_var) print "ts_accuracy_avg_standard_deviation: " + str(ts_accuracy_avg_standard_deviation) print "ts_accuracy_avg_standard_error: " + str(ts_accuracy_avg_standard_error) ANN_file_get_stats_from_classifiers.write("ts_accuracy_avg: " + str(ts_accuracy_avg) + "\n") ANN_file_get_stats_from_classifiers.write("ts_accuracy_avg_var: " + str(ts_accuracy_avg_var) + "\n") ANN_file_get_stats_from_classifiers.write( "ts_accuracy_avg_standard_deviation: " + str(ts_accuracy_avg_standard_deviation) + "\n") ANN_file_get_stats_from_classifiers.write( "ts_accuracy_avg_standard_error: " + str(ts_accuracy_avg_standard_error) + "\n") ts_my_accuracy_array_for_avg = np.array(ts_my_accuracy_list_for_avg) ts_my_accuracy_avg = round(np.mean(ts_my_accuracy_array_for_avg, axis=0), 3) ts_my_accuracy_avg_var = round(np.var(a=ts_my_accuracy_array_for_avg, ddof=1), 3) ts_my_accuracy_avg_standard_deviation = round(np.std(a=ts_my_accuracy_array_for_avg, ddof=1), 3) ts_my_accuracy_avg_standard_error = round(stats.sem(a=ts_my_accuracy_array_for_avg, ddof=1), 3) print "ts_my_accuracy_avg: " + str(ts_my_accuracy_avg) print "ts_my_accuracy_avg_var: " + str(ts_my_accuracy_avg_var) print "ts_my_accuracy_avg_standard_deviation: " + str(ts_my_accuracy_avg_standard_deviation) print "ts_my_accuracy_avg_standard_error: " + str(ts_my_accuracy_avg_standard_error) ANN_file_get_stats_from_classifiers.write("ts_my_accuracy_avg: " + str(ts_my_accuracy_avg) + "\n") ANN_file_get_stats_from_classifiers.write("ts_my_accuracy_avg_var: " + str(ts_my_accuracy_avg_var) + "\n") ANN_file_get_stats_from_classifiers.write( "ts_my_accuracy_avg_standard_deviation: " + str(ts_my_accuracy_avg_standard_deviation) + "\n") ANN_file_get_stats_from_classifiers.write( "ts_my_accuracy_avg_standard_error: " + str(ts_my_accuracy_avg_standard_error) + "\n") ANN_file_get_stats_from_classifiers.close()
def main_BIS_EXP_G_VULN_DP_evaluate_ANN_remapping(): read_command_line_options() gpu_setup.gpu_setup(id_gpu=ID_GPU, memory_percentage=PERC_GPU) if len(TEST_SET_SIZE) != len(TRAINING_SET_SIZE) or len( VALIDATION_SET_SIZE) != len(TRAINING_SET_SIZE): sys.exit( "The set size lists must all contain the same amount of items.") loaded_g_matrix = pn.read_pickle(path=G_MATRIX_PATH) loaded_g_matrix_rows = pn.read_pickle(path=G_MATRIX_ROWS_PATH) loaded_g_matrix_cols = pn.read_pickle(path=G_MATRIX_COLS_PATH) # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ANN approach %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if 'ANN' in APPROACHES: print( "\n####################################################################################" ) print( "################################### ANN approach ##################################" ) print( "####################################################################################\n" ) # store the values of the error estimations via all the possible methods ANN_Rf_values = [] # number of test samples number_of_test_samples = [] # number of training samples number_of_training_samples = [] # iterator over different training sets iterations training_iteration = [] # iterator over different test sets iterations test_iteration = [] for size_list_iterator in range(len(TRAINING_SET_SIZE)): # select the current values for the sizes (useful to keep track in the names of the training_set_size = TRAINING_SET_SIZE[size_list_iterator] validation_set_size = VALIDATION_SET_SIZE[size_list_iterator] test_set_size = TEST_SET_SIZE[0] for train_iteration in range(TRAIN_ITERATIONS): filepath = RESULT_FOLDER + MODEL_NAME + "/" + str( training_set_size) + "_training_size_and_" + str( validation_set_size ) + "_validation_size_iteration_" + str(train_iteration) if not os.path.exists(filepath): continue else: ANN_model = load_model(filepath=filepath + "/classifier_net_model") # print ANN_model.summary() training_set = pn.read_pickle( DATA_FOLDER + str(training_set_size) + "_training_and_" + str(validation_set_size) + "_validation_store_folder_train_iteration_" + str(train_iteration) + "/training_set.pkl") min_tr = np.min(training_set[:, 0:training_set.shape[1] - 2]) max_tr = np.max(training_set[:, 0:training_set.shape[1] - 2]) print "\n\n\n\n\n\n\n\n################################# test_size: " + str( test_set_size) + " ################################" g_vuln_ANN_file = open( filepath + "/ANN_" + str(training_set_size) + "_training_and_" + str(validation_set_size) + "_validation_file_R_estimate_iteration_" + str(train_iteration) + "_" + str(test_set_size) + "_test_set_size_test_iter_up_to_" + str(TEST_ITERATIONS_END) + ".txt", "wa") for test_iterator in range(TEST_ITERATIONS_BEG, TEST_ITERATIONS_END): print "\n\n\n################################# test_set_" + str( test_iterator) + " ################################" g_vuln_ANN_file.write( "\n\n\n################################# test_set_" + str(test_iterator) + " ################################") test_set = pn.read_pickle(path=DATA_FOLDER + str(test_set_size) + "_size_test_set/test_set_" + str(test_iterator) + ".pkl") # X_test = test_set[:, 0:test_set.shape[1] - 2] # y_test = test_set[:, -2] # z_test = test_set[:, -1] X_test = test_set[:, 0:test_set.shape[1] - 1] y_test = test_set[:, -1] dt = np.dtype( (np.void, X_test.dtype.itemsize * X_test.shape[1])) b = np.ascontiguousarray(X_test).view(dt) X_test_unique, X_test_unique_cnt = np.unique( b, return_counts=True) X_test_unique = X_test_unique.view(X_test.dtype).reshape( -1, X_test.shape[1]) print X_test print max_tr, min_tr X_test_preprocessed = preprocess.scaler_zero_one_all_cols_ts( data_tr=training_set[:, 0:training_set.shape[1] - 2], data=X_test, max_=max_tr, min_=min_tr) print X_test_preprocessed X_test_preprocessed_unique = preprocess.scaler_zero_one_all_cols_ts( data_tr=training_set[:, 0:training_set.shape[1] - 2], data=X_test_unique, max_=max_tr, min_=min_tr) # if len(X_test_preprocessed_unique) != len(np.unique(X_test_preprocessed_unique)): # sys.exit("The preprocessing created some collision which might affect the computation") # print X_test_preprocessed_unique # new_old_obs = {} # for i in range(len(X_test_preprocessed_unique)): # new_old_obs[X_test_preprocessed_unique[i][0]] = X_test_unique[i][0] # # print new_old_obs ########################################################### Prediction print "X_test_preprocessed: ", X_test_preprocessed.shape print "y_test.shape: ", y_test.shape ANN_prediction_test = [] pred = ANN_model.predict(x=X_test_preprocessed) print pred for row_iter in range(pred.shape[0]): ANN_prediction_test.append(np.argmax( pred[row_iter, :])) ANN_prediction_test = np.array( ANN_prediction_test).reshape(len(ANN_prediction_test), 1) final_matrix = np.column_stack((X_test, y_test)) final_matrix = np.column_stack( (final_matrix, ANN_prediction_test)) g_vuln_ANN = g_vuln_computation.compute_g_vuln_with_remapping_multidimesional_inputs( final_mat=final_matrix, g_mat=loaded_g_matrix, g_mat_rows=loaded_g_matrix_rows, g_mat_cols=loaded_g_matrix_cols) print("\ng_vuln_ANN = " + str(g_vuln_ANN)) g_vuln_ANN_file.write("\ng_vuln_ANN_file = " + str(g_vuln_ANN)) ANN_Rf_values.append(g_vuln_ANN) number_of_test_samples.append(test_set_size) number_of_training_samples.append(training_set_size) training_iteration.append(train_iteration) test_iteration.append(test_iterator) # ########################################################### Accuracy computation # # accuracy = round(utilities.compute_accuracy(y_classes=z_test, # y_pred_classes=ANN_prediction_test), 3) # # print "\nAccuracy ---> ", accuracy # g_vuln_ANN_file.write("\nAccuracy --->" + str(accuracy)) # # ########################################################### Accuracy computation (tf fashion) # # accuracy_tf_fashion = round(utilities.compute_accuracy_tf_fashion(y_classes=z_test, # y_pred_classes= # ANN_prediction_test), 3) # # print "\nAccuracy tf fashion ---> ", accuracy_tf_fashion # # g_vuln_ANN_file.write("\nAccuracy tf fashion ---> " + str(accuracy_tf_fashion)) # # ########################################################### Precision computation # # precision = utilities.compute_precision(y_classes=z_test, # y_pred_classes=ANN_prediction_test) # # print "\nPrecision ---> ", precision # # g_vuln_ANN_file.write("\nPrecision ---> " + str(precision)) # # ########################################################### Recall computation # # recall = utilities.compute_recall(y_classes=z_test, # y_pred_classes=ANN_prediction_test) # # print "\nRecall ---> ", recall # # g_vuln_ANN_file.write("\nRecall ---> " + str(recall)) # # ########################################################### F1_score computation # # F1_score = utilities.compute_f1_score(y_classes=y_test, # y_pred_classes=ANN_prediction_test) # # print "\nF1_score ---> ", F1_score # # g_vuln_ANN_file.write("\nF1_score ---> " + str(F1_score)) g_vuln_ANN_file.close() ANN_Rf_values = np.array(ANN_Rf_values, dtype=np.float64) number_of_test_samples = np.array(number_of_test_samples, dtype=np.int32) number_of_training_samples = np.array(number_of_training_samples, dtype=np.int32) training_iteration = np.array(training_iteration, dtype=np.int32) test_iteration = np.array(test_iteration, dtype=np.int32) result_matrix = np.column_stack( (ANN_Rf_values, number_of_test_samples)) # print result_matrix.shape result_matrix = np.column_stack( (result_matrix, number_of_training_samples)) # print result_matrix.shape result_matrix = np.column_stack((result_matrix, training_iteration)) # print result_matrix.shape result_matrix = np.column_stack((result_matrix, test_iteration)) # print result_matrix.shape result_df = pn.DataFrame(data=result_matrix, columns=[ "ANN_Rf_values", "number_of_test_samples", "number_of_training_samples", "train_iteration", "test_iteration" ]) result_df.to_pickle( path=RESULT_FOLDER + MODEL_NAME + "/ANN_training_and_validation_result_df_train_size_" + str(TRAINING_SET_SIZE[0]) + "_up_to_test_iter_" + str(TEST_ITERATIONS_END) + ".pkl")
def train_classifier_net(self, results_folder, training_set, training_supervision, validation_set, validation_supervision, test_set=None, test_supervision=None): log_file = open(results_folder + "/log_file.txt", "a") epochs = int(self.epochs) batch_size = int(self.batch_size) perc_gpu = float(self.perc_gpu) gpu_setup.gpu_setup(id_gpu=self.id_gpu, memory_percentage=perc_gpu) classifier_net_model = self.build_classifier_network() self.results_folder = results_folder for epoch in range(epochs): print "\n\n\nEpoch " + str(epoch) log_file.write("\n\n\nEpoch " + str(epoch)) history_classifier_net = classifier_net_model.fit( x=training_set, y=training_supervision, batch_size=batch_size, epochs=1, shuffle=True, validation_data=(validation_set, validation_supervision)) self.classifier_network_epochs.append( len(history_classifier_net.history.get('loss'))) if len(history_classifier_net.history.get('loss')) != 1: err_hndl(str_="error_epochs_repartition", add=inspect.stack()[0][3]) self.classifier_network_loss_vec.append( history_classifier_net.history.get('loss')[0]) log_file.write("\nClassifier loss ---> " + str(history_classifier_net.history.get('loss')[0])) self.classifier_network_categ_acc_vec.append( history_classifier_net.history.get('categorical_accuracy')[0]) log_file.write("\nClassifier categorical accuracy ---> " + str( history_classifier_net.history.get('categorical_accuracy')[0])) self.classifier_network_val_loss_vec.append( history_classifier_net.history.get('val_loss')[0]) log_file.write( "\nClassifier validation loss ---> " + str(history_classifier_net.history.get('val_loss')[0])) self.classifier_network_val_categ_acc_vec.append( history_classifier_net.history.get('val_categorical_accuracy') [0]) log_file.write( "\nClassifier validation categorical accuracy ---> " + str( history_classifier_net.history.get( 'val_categorical_accuracy')[0])) """# evaluation over the test set test_eval = classifier_net_model.evaluate(x=test_set, y=test_supervision, batch_size=batch_size) self.classifier_network_evaluation_on_test_set_loss_vec.append( test_eval[0] ) self.classifier_network_evaluation_on_test_set_accuracy_vec.append( test_eval[1] )""" ########################### these operations needs prediction and argmax transformation ########################## training_set_classes_supervision = np.argmax(training_supervision, axis=1) training_set_classes_prediction = np.argmax( classifier_net_model.predict(x=training_set, batch_size=batch_size), axis=1) validation_set_classes_supervision = np.argmax( validation_supervision, axis=1) validation_set_classes_prediction = np.argmax( classifier_net_model.predict(x=validation_set, batch_size=batch_size), axis=1) """test_set_classes_supervision = np.argmax(test_supervision, axis=1) test_set_classes_prediction = np.argmax( classifier_net_model.predict(x=test_set, batch_size=batch_size), axis=1)""" training_precision = utilities.compute_precision( y_classes=training_set_classes_supervision, y_pred_classes=training_set_classes_prediction) log_file.write("\nClassifier training_precision ---> " + str(training_precision)) training_recall = utilities.compute_recall( y_classes=training_set_classes_supervision, y_pred_classes=training_set_classes_prediction) log_file.write("\nClassifier training_recall ---> " + str(training_recall)) training_f1 = utilities.compute_f1_score( y_classes=training_set_classes_supervision, y_pred_classes=training_set_classes_prediction) log_file.write("\nClassifier training_f1 ---> " + str(training_f1)) self.f1_value_training.append(training_f1) # %%%%%%%%%%%%%%%%%%%%%%%%%% validation_precision = utilities.compute_precision( y_classes=validation_set_classes_supervision, y_pred_classes=validation_set_classes_prediction) log_file.write("\nClassifier validation_precision ---> " + str(validation_precision)) validation_recall = utilities.compute_recall( y_classes=validation_set_classes_supervision, y_pred_classes=validation_set_classes_prediction) log_file.write("\nClassifier validation_recall ---> " + str(validation_recall)) validation_f1 = utilities.compute_f1_score( y_classes=validation_set_classes_supervision, y_pred_classes=validation_set_classes_prediction) log_file.write("\nClassifier validation_f1 ---> " + str(validation_f1)) self.f1_value_validation.append(validation_f1) """self.f1_value_test.append(utilities.compute_f1_score(y_classes=test_set_classes_supervision, y_pred_classes=test_set_classes_prediction))""" #################################################################################################################### # save all vectors with open(results_folder + '/classifier_network_epochs.pkl', 'wb') as f: pickle.dump(self.classifier_network_epochs, f) with open(results_folder + '/classifier_network_loss_vec.pkl', 'wb') as f: pickle.dump(self.classifier_network_loss_vec, f) with open(results_folder + '/classifier_network_categ_acc_vec.pkl', 'wb') as f: pickle.dump(self.classifier_network_categ_acc_vec, f) with open(results_folder + '/classifier_network_val_loss_vec.pkl', 'wb') as f: pickle.dump(self.classifier_network_val_loss_vec, f) with open( results_folder + '/classifier_network_val_categ_acc_vec.pkl', 'wb') as f: pickle.dump(self.classifier_network_val_categ_acc_vec, f) """# classifier_net_model.evaluate ---> ['loss', 'categorical_accuracy'] with open(results_folder + 'classifier_network_evaluation_on_test_set_loss_vec.pkl', 'wb') as f: pickle.dump(self.classifier_network_evaluation_on_test_set_loss_vec, f) with open(results_folder + 'classifier_network_evaluation_on_test_set_accuracy_vec.pkl', 'wb') as f: pickle.dump(self.classifier_network_evaluation_on_test_set_accuracy_vec, f)""" with open(results_folder + '/f1_value_training_vec.pkl', 'wb') as f: pickle.dump(self.f1_value_training, f) with open(results_folder + '/f1_value_validation_vec.pkl', 'wb') as f: pickle.dump(self.f1_value_validation, f) """with open(results_folder + 'f1_value_test_vec.pkl', 'wb') as f: pickle.dump(self.f1_value_test, f)""" classifier_net_model.save(filepath=results_folder + "/classifier_net_model") classifier_net_model.save_weights(filepath=results_folder + "/classifier_net_model_weights") log_file.close() return None
def main_EXP_PSW_evaluate_classifiers_remapping(): read_command_line_options() gpu_setup.gpu_setup(id_gpu=ID_GPU, memory_percentage=PERC_GPU) if len(VALIDATION_SET_SIZE) != len(TRAINING_SET_SIZE): sys.exit( "The set size lists must all contain the same amount of items.") loaded_g_matrix = pn.read_pickle(path=G_MATRIX_PATH) # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ANN approach %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if 'ANN' in APPROACHES: print( "\n####################################################################################" ) print( "################################### ANN approach ##################################" ) print( "####################################################################################\n" ) # store the values of the error estimations via all the possible methods ANN_Rf_values = [] # number of test samples number_of_test_samples = [] # number of training samples number_of_training_samples = [] # iterator over different training sets iterations training_iteration = [] # iterator over different test sets iterations test_iteration = [] for size_list_iterator in range(len(TRAINING_SET_SIZE)): # select the current values for the sizes (useful to keep track in the names of the training_set_size = TRAINING_SET_SIZE[size_list_iterator] validation_set_size = VALIDATION_SET_SIZE[size_list_iterator] test_set_size = TEST_SET_SIZE[0] for train_iteration in range(TRAIN_ITERATIONS): filepath = RESULT_FOLDER + MODEL_NAME + "/" + str( training_set_size) + "_training_size_and_" + str( validation_set_size ) + "_validation_size_iteration_" + str(train_iteration) if not os.path.exists(filepath): print filepath sys.exit("NO DATA") else: ANN_model = load_model(filepath=filepath + "/classifier_net_model") # print ANN_model.summary() training_set = pn.read_pickle( DATA_FOLDER + str(training_set_size) + "_training_and_" + str(validation_set_size) + "_validation_store_folder_train_iteration_" + str(train_iteration) + "/training_set.pkl") min_max_scaler = preprocessing.MinMaxScaler() training_set = training_set[:, 0].reshape(-1, 1) training_set = min_max_scaler.fit_transform(training_set) print "\n\n\n\n\n\n\n\n################################# test_size: " + str( test_set_size) + " ################################" g_vuln_ANN_file = open( filepath + "/ANN_" + str(training_set_size) + "_training_and_" + str(validation_set_size) + "_validation_file_R_estimate_iteration_" + str(train_iteration) + "_" + str(test_set_size) + "_test_set_size_test_iter_up_to_" + str(TEST_ITERATIONS_END) + ".txt", "wa") for test_iterator in range(TEST_ITERATIONS_BEG, TEST_ITERATIONS_END): print "\n\n\n################################# test_set_" + str( test_iterator) + " ################################" g_vuln_ANN_file.write( "\n\n\n################################# test_set_" + str(test_iterator) + " ################################") test_set = pn.read_pickle(path=DATA_FOLDER_TEST + str(test_set_size) + "_size_test_sets/test_set_" + str(test_iterator) + ".pkl") X_test = test_set[:, 0] y_test = test_set[:, 1] X_test_unique = np.unique(X_test) X_test = X_test.reshape(-1, 1) X_test_preprocessed = min_max_scaler.transform(X_test) X_test_unique = X_test_unique.reshape(-1, 1) X_test_preprocessed_unique = min_max_scaler.transform( X_test_unique) if len(X_test_preprocessed_unique) != len( np.unique(X_test_preprocessed_unique)): sys.exit( "The preprocessing created some collision which might affect the computation" ) # print X_test_preprocessed_unique new_old_obs = {} for i in range(len(X_test_preprocessed_unique)): new_old_obs[X_test_preprocessed_unique[i] [0]] = X_test_unique[i][0] # print new_old_obs ########################################################### Prediction print "X_test_preprocessed: ", X_test_preprocessed.shape print "y_test.shape: ", y_test.shape ANN_prediction_test = [] pred = ANN_model.predict(x=X_test_preprocessed) for row_iter in range(pred.shape[0]): ANN_prediction_test.append(np.argmax( pred[row_iter, :])) ANN_prediction_test = np.array( ANN_prediction_test).reshape(len(ANN_prediction_test), 1) final_matrix = np.column_stack((X_test, y_test)) final_matrix = np.column_stack( (final_matrix, ANN_prediction_test)) g_vuln_ANN = g_vuln_computation.compute_g_vuln_with_remapping_positional( final_mat=final_matrix, g_mat=loaded_g_matrix) print("\ng_vuln_ANN = " + str(g_vuln_ANN)) g_vuln_ANN_file.write("\ng_vuln_ANN_file = " + str(g_vuln_ANN)) ANN_Rf_values.append(g_vuln_ANN) number_of_test_samples.append(test_set_size) number_of_training_samples.append(training_set_size) training_iteration.append(train_iteration) test_iteration.append(test_iterator) g_vuln_ANN_file.close() ANN_Rf_values = np.array(ANN_Rf_values, dtype=np.float64) number_of_test_samples = np.array(number_of_test_samples, dtype=np.int32) number_of_training_samples = np.array(number_of_training_samples, dtype=np.int32) training_iteration = np.array(training_iteration, dtype=np.int32) test_iteration = np.array(test_iteration, dtype=np.int32) result_matrix = np.column_stack( (ANN_Rf_values, number_of_test_samples)) # print result_matrix.shape result_matrix = np.column_stack( (result_matrix, number_of_training_samples)) # print result_matrix.shape result_matrix = np.column_stack((result_matrix, training_iteration)) # print result_matrix.shape result_matrix = np.column_stack((result_matrix, test_iteration)) # print result_matrix.shape result_df = pn.DataFrame(data=result_matrix, columns=[ "ANN_Rf_values", "number_of_test_samples", "number_of_training_samples", "train_iteration", "test_iteration" ]) result_df.to_pickle( path=RESULT_FOLDER + MODEL_NAME + "/ANN_training_and_validation_result_df_train_size_" + str(TRAINING_SET_SIZE[0]) + "_up_to_test_iter_" + str(TEST_ITERATIONS_END) + ".pkl")