def reportBestResult(): C = [16, 32, 64, 128, 256, 512] bestAUC = 0 bestParam = 0 for c in C: auc_history = load_np_array("results/ada_auc_" + str(c) + ".bin") mean_auc = auc_history.mean() if (mean_auc > bestAUC): bestAUC = mean_auc bestParam = c print bestParam confusion_matrix_history = load_np_array("results/rfc_folds_confusion_" + str(c) + ".bin") print confusion_matrix_history mean_cm = np.mean(confusion_matrix_history, axis=2) std_cm = np.std(confusion_matrix_history, axis=2) # for i in range(0,2): # for j in range(0,2): # mean_cm[i][j] = confusion_matrix_history[i][j].mean() print mean_cm print std_cm compute_performance_metrics(mean_cm) acc_mean = [] recall_mean = [] for c in C: accuracy_history = load_np_array("results/ada_accuracy_" + str(c) + ".bin") recall_history = load_np_array("results/ada_recall_" + str(c) + ".bin") acc_mean.append(accuracy_history.mean()) recall_mean.append(recall_history.mean())
# cost_history = clf.fit(homesite_data, batch_size = 128, \ # max_iterations = 100, save_interval = 10, \ # path = "classifiers_data/ann_weights.bin", # return_cost = True) # # # Save cost and accuracy history # save_np_array(cost_path, cost_history) # # Test neural network. # oversampled_path = "resources/oversampled_normalized_data_ratio_2.bin" # homesite_data = Data() # homesite_data.load_sliptted_data(oversampled_path, one_hot = True) # clf = NeuralNetwork(path = "classifiers_data/ann_weights.bin", \ # lr = 0.005, lamb = 0.) # prob_predicted_labels = clf.predict(homesite_data.validation_x) # predicted_labels = np.argmax(prob_predicted_labels, axis = 1) # validation_labels = np.argmax(homesite_data.validation_y, axis = 1) # # # Show final results. # results = confusion_matrix(validation_labels, predicted_labels) # accuracy, precision, recall = compute_performance_metrics(results) # auc = compute_auc(validation_labels, prob_predicted_labels[:, 1]) # Save plot. non_normalized_cost = load_np_array( "results/ann_non_normalized_cost_history.bin") normalized_cost = load_np_array("results/ann_normalized_cost_history.bin") plot("results/normalization_vs_non_normalization.png", [normalized_cost, non_normalized_cost], \ ["com normalizacao", "sem normalizacao"], "iteracoes", "custo", 'center right')
# recall_history.append(recall) # auc_history.append(auc) # balancing_rate = np.count_nonzero(homesite.train_y) * 1.0 / len(homesite.train_y) # balancing_rate_history.append(balancing_rate) # # print 'Saving result.', i * 0.1 # save_np_array("../homesite_data/results/random_forest_balancing_accuracy.bin", np.array(accuracy_history)) # save_np_array("../homesite_data/results/random_forest_balancing_precision.bin", np.array(precision_history)) # save_np_array("../homesite_data/results/random_forest_balancing_recall.bin", np.array(recall_history)) # save_np_array("../homesite_data/results/random_forest_balancing_auc.bin", np.array(auc_history)) # save_np_array("../homesite_data/results/random_forest_balancing_rate.bin", np.array(balancing_rate_history)) # # del homesite # del clf accuracy_history = load_np_array( "../homesite_data/results/random_forest_balancing_accuracy.bin") precision_history = load_np_array( "../homesite_data/results/random_forest_balancing_precision.bin") recall_history = load_np_array( "../homesite_data/results/random_forest_balancing_recall.bin") auc_history = load_np_array( "../homesite_data/results/random_forest_balancing_auc.bin") balancing_rate_history = load_np_array( "../homesite_data/results/random_forest_balancing_rate.bin") # for accuracy, precision, recall, auc, balancing_rate in zip(accuracy_history, precision_history, recall_history, auc_history, balancing_rate_history): # print accuracy, precision, recall, auc, balancing_rate plot("../homesite_data/results/random_forest_balacing.png", [recall_history, auc_history], \ ["sensitividade ", "AUC"], "taxa de balanceamento", "metricas", 'center right', \
# # # Plot results. # accuracy_history = load_np_array("../homesite_data/results/random_forest_grid_search_accuracy.bin") # precision_history = load_np_array("../homesite_data/results/random_forest_grid_search_precision.bin") # recall_history = load_np_array("../homesite_data/results/random_forest_grid_search_recall.bin") # auc_history = load_np_array("../homesite_data/results/random_forest_grid_search_auc.bin") # # for accuracy, precision, recall, auc in zip(accuracy_history, precision_history, recall_history, auc_history): # print accuracy, precision, recall, auc # # plot("../homesite_data/results/random_forest_grid_search.png", [recall_history, auc_history], \ # ["sensitividade ", "AUC"], "numero de arvores", "metricas", 'center right', \ # x = np.linspace(1, len(recall_history) * 10, num = len(recall_history), endpoint = True)) # c = 300 accuracy_history = load_np_array("results/random_forests/rf_accuracy_" + str(c) + ".bin") precision_history = load_np_array("results/random_forests/rf_precision_" + str(c) + ".bin") recall_history = load_np_array("results/random_forests/rf_recall_" + str(c) + ".bin") auc_history = load_np_array("results/random_forests/rf_auc_" + str(c) + ".bin") confusion_matrix_history = load_np_array("results/random_forests/rf_confusion_matrix_" + str(c) + ".bin") # Show confusion matrix for best c. confusion_matrix_mean = np.zeros(4) confusion_matrix_std = np.zeros(4) confusion_matrix_mean[0] = np.mean(confusion_matrix_history[0, 0, :] * 100.0 / (confusion_matrix_history[0, 0, :] + confusion_matrix_history[0, 1, :])) confusion_matrix_mean[1] = np.mean(confusion_matrix_history[0, 1, :] * 100.0 / (confusion_matrix_history[0, 0, :] + confusion_matrix_history[0, 1, :])) confusion_matrix_mean[2] = np.mean(confusion_matrix_history[1, 0, :] * 100.0 / (confusion_matrix_history[1, 0, :] + confusion_matrix_history[1, 1, :])) confusion_matrix_mean[3] = np.mean(confusion_matrix_history[1, 1, :] * 100.0 / (confusion_matrix_history[1, 0, :] + confusion_matrix_history[1, 1, :])) confusion_matrix_std[0] = np.std(confusion_matrix_history[0, 0, :] * 100.0 / (confusion_matrix_history[0, 0, :] + confusion_matrix_history[0, 1, :])) confusion_matrix_std[1] = np.std(confusion_matrix_history[0, 1, :] * 100.0 / (confusion_matrix_history[0, 0, :] + confusion_matrix_history[0, 1, :])) confusion_matrix_std[2] = np.std(confusion_matrix_history[1, 0, :] * 100.0 / (confusion_matrix_history[1, 0, :] + confusion_matrix_history[1, 1, :]))
# # Plot results. # accuracy_history = load_np_array("../homesite_data/results/random_forest_grid_search_accuracy.bin") # precision_history = load_np_array("../homesite_data/results/random_forest_grid_search_precision.bin") # recall_history = load_np_array("../homesite_data/results/random_forest_grid_search_recall.bin") # auc_history = load_np_array("../homesite_data/results/random_forest_grid_search_auc.bin") # # for accuracy, precision, recall, auc in zip(accuracy_history, precision_history, recall_history, auc_history): # print accuracy, precision, recall, auc # # plot("../homesite_data/results/random_forest_grid_search.png", [recall_history, auc_history], \ # ["sensitividade ", "AUC"], "numero de arvores", "metricas", 'center right', \ # x = np.linspace(1, len(recall_history) * 10, num = len(recall_history), endpoint = True)) c = 50 accuracy_history = load_np_array("results/ann/ann_accuracy_" + str(c) + ".bin") precision_history = load_np_array("results/ann/ann_precision_" + str(c) + ".bin") recall_history = load_np_array("results/ann/ann_recall_" + str(c) + ".bin") auc_history = load_np_array("results/ann/ann_auc_" + str(c) + ".bin") confusion_matrix_history = load_np_array("results/ann/ann_confusion_matrix_" + str(c) + ".bin") # Show confusion matrix for best c. confusion_matrix_mean = np.zeros(4) confusion_matrix_std = np.zeros(4) confusion_matrix_mean[0] = np.mean( confusion_matrix_history[0, 0, :] * 100.0 / (confusion_matrix_history[0, 0, :] + confusion_matrix_history[0, 1, :])) confusion_matrix_mean[1] = np.mean( confusion_matrix_history[0, 1, :] * 100.0 /
''' Created on 30/11/2015 @author: Alexandre Yukio Yamashita ''' from data.numpy_file import load_np_array import numpy as np from data.plot import plot # Plot grid search. C = [16, 32, 64, 128, 256, 512] auc = [] recall = [] for c in C: recall_history = load_np_array("results/adaboost/ada_recall_" + str(c) + ".bin") recall_history = recall_history[len(recall_history) - 5:] accuracy_history = load_np_array("results/adaboost/ada_accuracy_" + str(c) + ".bin") accuracy_history = recall_history[len(accuracy_history) - 5:] precision_history = load_np_array("results/adaboost/ada_precision_" + str(c) + ".bin") precision_history = precision_history[len(precision_history) - 5:] auc_history = load_np_array("results/adaboost/ada_auc_" + str(c) + ".bin") auc_history = auc_history[len(auc_history) - 5:] auc.append(np.mean(auc_history)) recall.append(np.mean(recall_history)) recall = np.array(recall) auc = np.array(auc) plot("results/adaboost/adaboost_grid_search.png", [recall, auc], \ ["sensitividade ", "AUC"], "numero de estimadores", "metricas", 'center right', \