def test_svm(X, y, path): data_set = 'cardio' print("Predicting 1") probabilit_list = [] dtc = pickle.load(open(path + 'model/' + data_set + '/svm_model_1', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) calculate_f1_score(dtc, X, y) print("Predicting 2") dtc = pickle.load(open(path + 'model/' + data_set + '/svm_model_2', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) calculate_f1_score(dtc, X, y) color_list = ['r', 'b'] label_list = ['kernel = linear', 'kernel = polynomial'] plt = multiple_precision_recall_curves(y, probabilit_list, color_list, label_list) plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.5, 1.05]) plt.xlim([0.0, 1.0]) plt.title('Support Vector Machine Precision-Recall Curve') plt.legend(loc="best") # plt.show() save_figure(plt, path + "plot/" + data_set, 'svm_pr_curve.png')
def test_boosted_decision_tree_loan(X, y, path): data_set = 'loan' probabilit_list = [] dtc = pickle.load( open(path + 'model/' + data_set + '/boosted_dtc_model_nodes_1', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) calculate_f1_score(dtc, X, y) dtc = pickle.load( open(path + 'model/' + data_set + '/boosted_dtc_model_nodes_2', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) calculate_f1_score(dtc, X, y) dtc = pickle.load( open(path + 'model/' + data_set + '/boosted_dtc_model_nodes_3', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) calculate_f1_score(dtc, X, y) dtc = pickle.load( open(path + 'model/' + data_set + '/boosted_dtc_model_nodes_4', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) calculate_f1_score(dtc, X, y) dtc = pickle.load( open(path + 'model/' + data_set + '/boosted_dtc_none', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) calculate_f1_score(dtc, X, y) color_list = ['r', 'y', 'g', 'm', 'b'] label_list = ['MD = 4', 'MD = 8', 'MD = 15', 'MD = 30', 'MD = None'] plt = multiple_precision_recall_curves(y, probabilit_list, color_list, label_list) plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title( 'Boosted Decision Tree Classifier with Max Depth Pruning (MD) \n Precision-Recall Curve ' ) plt.legend(loc="best") save_figure(plt, path + "plot/" + data_set, 'boosted_dtc_max_depth_plots.png')
def test_decision_tree_max_leaf(X, y, path): data_set = 'cardio' probabilit_list = [] dtc = pickle.load( open(path + 'model/' + data_set + '/dtc_model_leaf_nodes_1', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) calculate_f1_score(dtc, X, y) dtc = pickle.load( open(path + 'model/' + data_set + '/dtc_model_leaf_nodes_2', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) calculate_f1_score(dtc, X, y) dtc = pickle.load( open(path + 'model/' + data_set + '/dtc_model_leaf_nodes_3', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) calculate_f1_score(dtc, X, y) dtc = pickle.load( open(path + 'model/' + data_set + '/dtc_model_leaf_nodes_4', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) calculate_f1_score(dtc, X, y) dtc = pickle.load(open(path + 'model/' + data_set + '/dtc_none', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) color_list = ['r', 'y', 'g', 'm', 'b'] label_list = [ 'MLN = 20', 'MLN = 100', 'MLN = 1000', 'MLN = 2000', 'MLN = None' ] plt = multiple_precision_recall_curves(y, probabilit_list, color_list, label_list) plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.5, 1.05]) plt.xlim([0.0, 1.0]) plt.title( 'Decision Tree Classifier with Max Leaf Nodes (MLN) \n Precision-Recall Curve ' ) plt.legend(loc="best") save_figure(plt, path + "plot/" + data_set, 'dtc_max_leaf_nodes_plots.png')
def test_best_models_loan(X, y, path): data_set = 'loan' probabilit_list = [] dtc = pickle.load( open(path + 'model/' + data_set + '/dtc_model_depth_2', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) dtc = pickle.load( open(path + 'model/' + data_set + '/boosted_dtc_model_nodes_1', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) dtc = pickle.load( open(path + 'model/' + data_set + '/neural_net_model_4', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) # dtc = pickle.load(open(path + 'model/' + data_set + '/svm_model_1', 'rb')) # probs = dtc.predict_proba(X) # probs = probs[:, 1] # probabilit_list.append(probs) dtc = pickle.load(open(path + 'model/' + data_set + '/kNN_model_4', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) # with svm # color_list = ['r', 'b', 'm', 'y', 'g'] # label_list = ['model = decision tree', 'model = boosted decision tree', 'model = neural network', 'model = SVM', 'model = kNN'] # without svm color_list = ['r', 'b', 'm', 'g'] label_list = [ 'model = decision tree', 'model = boosted decision tree', 'model = neural network', 'model = kNN' ] plt = multiple_precision_recall_curves(y, probabilit_list, color_list, label_list) plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('Precision-Recall Curve of Best Model for each Algorithm') plt.legend(loc="best") # plt.show() save_figure(plt, path + "plot/" + data_set, 'best_models_pr_curve.png')
def test_neural_net_loan(X, y, path): data_set = 'loan' probabilit_list = [] dtc = pickle.load( open(path + 'model/' + data_set + '/neural_net_model_1', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) calculate_f1_score(dtc, X, y) dtc = pickle.load( open(path + 'model/' + data_set + '/neural_net_model_2', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) calculate_f1_score(dtc, X, y) dtc = pickle.load( open(path + 'model/' + data_set + '/neural_net_model_3', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) calculate_f1_score(dtc, X, y) dtc = pickle.load( open(path + 'model/' + data_set + '/neural_net_model_4', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) calculate_f1_score(dtc, X, y) color_list = ['r', 'b', 'm', 'y'] label_list = [ 'HLS = 20 x 5', 'HLS = 50 x 5', 'HLS = 100 x 5', 'HLS = 500 x 5' ] plt = multiple_precision_recall_curves(y, probabilit_list, color_list, label_list) plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title( 'Neural Network with Varying Hidden Layer Size (HLS) \n Precision-Recall Curve' ) plt.legend(loc="best") # plt.show() save_figure(plt, path + "plot/" + data_set, 'neural_net_pr_curve.png')
def test_kNN(X, y, path): data_set = 'cardio' probabilit_list = [] dtc = pickle.load(open(path + 'model/' + data_set + '/kNN_model_1', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) calculate_f1_score(dtc, X, y) dtc = pickle.load(open(path + 'model/' + data_set + '/kNN_model_2', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) calculate_f1_score(dtc, X, y) dtc = pickle.load(open(path + 'model/' + data_set + '/kNN_model_3', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) calculate_f1_score(dtc, X, y) dtc = pickle.load(open(path + 'model/' + data_set + '/kNN_model_4', 'rb')) probs = dtc.predict_proba(X) probs = probs[:, 1] probabilit_list.append(probs) calculate_f1_score(dtc, X, y) color_list = ['r', 'b', 'm', 'y'] label_list = ['k = 25', 'k = 150', 'k = 225', 'k = 300'] plt = multiple_precision_recall_curves(y, probabilit_list, color_list, label_list) plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.5, 1.05]) plt.xlim([0.0, 1.0]) plt.title('k Nearest Neighbors Precision-Recall Curve') plt.legend(loc="best") # plt.show() save_figure(plt, path + "plot/" + data_set, 'kNN_pr_curve.png')