def train_k_NN(path, with_plots): data_set = 'cardio' x_train, y_train = load_data(path + 'data/' + data_set + '/train/') if with_plots == "False": model_1 = train_and_time(KNeighborsClassifier(n_neighbors=25), x_train, y_train) model_2 = train_and_time(KNeighborsClassifier(n_neighbors=150), x_train, y_train) model_3 = train_and_time(KNeighborsClassifier(n_neighbors=225), x_train, y_train) model_4 = train_and_time(KNeighborsClassifier(n_neighbors=300), x_train, y_train) save_model(model_1, path + 'model/' + data_set, 'kNN_model_1') save_model(model_2, path + 'model/' + data_set, 'kNN_model_2') save_model(model_3, path + 'model/' + data_set, 'kNN_model_3') save_model(model_4, path + 'model/' + data_set, 'kNN_model_4') else: print('Training kNN...') model_1 = KNeighborsClassifier(n_neighbors=25) model_2 = KNeighborsClassifier(n_neighbors=150) model_3 = KNeighborsClassifier(n_neighbors=225) model_4 = KNeighborsClassifier(n_neighbors=300) plt = multiple_learning_curves_plot( [model_1, model_2, model_3, model_4], x_train, y_train, ["r", "y", "b", "m"], ['k = 25', 'k = 150', 'k = 225', 'k = 300']) plt.title("k Nearest Neighbor \n Learning Curves") plt.xlabel("Training examples") plt.ylabel("F1 Score") plt.grid() plt.legend(loc="best") # plt.show() save_figure(plt, path + "plot/" + data_set, 'kNN_learning_curves.png')
def train_boosted_dtc(path, with_plots): data_set = 'cardio' x_train, y_train = load_data(path + 'data/' + data_set + '/train/') if with_plots == "False": model_nodes_1 = train_and_time(AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=5)), x_train, y_train) model_nodes_2 = train_and_time(AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=10)), x_train, y_train) model_nodes_3 = train_and_time(AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=15)), x_train, y_train) model_nodes_4 = train_and_time(AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=20)), x_train, y_train) model_nodes_5 = train_and_time(AdaBoostClassifier(tree.DecisionTreeClassifier()), x_train, y_train) save_model(model_nodes_1, path + "model/" + data_set, 'boosted_dtc_model_nodes_1') save_model(model_nodes_2, path + "model/" + data_set, 'boosted_dtc_model_nodes_2') save_model(model_nodes_3, path + "model/" + data_set, 'boosted_dtc_model_nodes_3') save_model(model_nodes_4, path + "model/" + data_set, 'boosted_dtc_model_nodes_4') save_model(model_nodes_5, path + "model/" + data_set, 'boosted_dtc_none') else: print('Training boosted dtc...') model_1 = AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=5)) model_2 = AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=10)) model_3 = AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=15)) model_4 = AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=20)) model_5 = AdaBoostClassifier(tree.DecisionTreeClassifier()) plt = multiple_learning_curves_plot( [model_1, model_2, model_3, model_4, model_5], x_train, y_train, ["r", "y", "g", "m", "b"], ["MD = 5", "MD = 10", "MD = 15", "MD = 20", "MD = None"] ) plt.title("Boosted Decision Tree With Max Depth (MD) \n Pruning Learning Curves") plt.xlabel("Training examples") plt.ylabel("F1 Score") plt.grid() plt.legend(loc="best") save_figure(plt, path + "plot/" + data_set, 'boosted_dtc_md_learning_curves.png')
def train_dtc_loan(path, with_plots): data_set = "loan" print("Training Decision Tree Classifier...") x_train, y_train = load_data(path + 'data/' + data_set + '/train/') if with_plots == "False": model_nodes_1 = train_and_time( tree.DecisionTreeClassifier(max_depth=4), x_train, y_train) model_nodes_2 = train_and_time( tree.DecisionTreeClassifier(max_depth=8), x_train, y_train) model_nodes_3 = train_and_time( tree.DecisionTreeClassifier(max_depth=15), x_train, y_train) model_nodes_4 = train_and_time( tree.DecisionTreeClassifier(max_depth=30), x_train, y_train) model_nodes_5 = train_and_time(tree.DecisionTreeClassifier(), x_train, y_train) save_model(model_nodes_1, path + "model/" + data_set, 'dtc_model_depth_1') save_model(model_nodes_2, path + "model/" + data_set, 'dtc_model_depth_2') save_model(model_nodes_3, path + "model/" + data_set, 'dtc_model_depth_3') save_model(model_nodes_4, path + "model/" + data_set, 'dtc_model_depth_4') save_model(model_nodes_5, path + "model/" + data_set, 'dtc_none') model_leaf_nodes_1 = train_and_time( tree.DecisionTreeClassifier(max_leaf_nodes=5), x_train, y_train) model_leaf_nodes_2 = train_and_time( tree.DecisionTreeClassifier(max_leaf_nodes=20), x_train, y_train) model_leaf_nodes_3 = train_and_time( tree.DecisionTreeClassifier(max_leaf_nodes=100), x_train, y_train) model_leaf_nodes_4 = train_and_time( tree.DecisionTreeClassifier(max_leaf_nodes=300), x_train, y_train) save_model(model_leaf_nodes_1, path + "model/" + data_set, 'dtc_model_leaf_nodes_1') save_model(model_leaf_nodes_2, path + "model/" + data_set, 'dtc_model_leaf_nodes_2') save_model(model_leaf_nodes_3, path + "model/" + data_set, 'dtc_model_leaf_nodes_3') save_model(model_leaf_nodes_4, path + "model/" + data_set, 'dtc_model_leaf_nodes_4') else: # model_1 = tree.DecisionTreeClassifier(max_leaf_nodes=5) # model_2 = tree.DecisionTreeClassifier(max_leaf_nodes=20) # model_3 = tree.DecisionTreeClassifier(max_leaf_nodes=100) # model_4 = tree.DecisionTreeClassifier(max_leaf_nodes=300) # model_5 = tree.DecisionTreeClassifier() # # plt = multiple_learning_curves_plot( # [model_1, model_2, model_3, model_4, model_5], # x_train, y_train, # ["r", "y", "g", "m", "b"], # ["MLN = 5", "MLN = 20", "MLN = 100", "MLN = 300", "MLN = None"] # ) # # plt.title("Decision Tree Learning Curves \n With Max Leaf Nodes (MLN)") # plt.xlabel("Training examples") # plt.ylabel("F1 Score") # plt.grid() # plt.legend(loc="best") # save_figure(plt, path + "plot/" + data_set, 'dtc_mln_learning_curve.png') # --------------------------------------------------------------------- model_1 = tree.DecisionTreeClassifier(max_depth=4) model_2 = tree.DecisionTreeClassifier(max_depth=8) model_3 = tree.DecisionTreeClassifier(max_depth=15) model_4 = tree.DecisionTreeClassifier(max_depth=30) model_5 = tree.DecisionTreeClassifier() plt = multiple_learning_curves_plot( [model_1, model_2, model_3, model_4, model_5], x_train, y_train, ["r", "y", "g", "m", "b"], ["MD = 4", "MD = 8", "MD = 15", "MD = 30", "MD = None"]) plt.title("Decision Tree Learning Curves \n With Max Depth (MD)") plt.xlabel("Training examples") plt.ylabel("F1 Score") plt.grid() plt.legend(loc="best") save_figure(plt, path + "plot/" + data_set, 'dtc_md_learning_curve.png')
def train_neural_net_with_loan_data(path, with_plots): data_set = 'loan' x_train, y_train = load_data(path + 'data/' + data_set + '/train/') if with_plots == "False": model_1 = train_and_time( MLPClassifier(solver='sgd', validation_fraction=0.0, alpha=1e-1, hidden_layer_sizes=(20, 5), random_state=1), x_train, y_train) model_2 = train_and_time( MLPClassifier(solver='sgd', validation_fraction=0.0, alpha=1e-1, hidden_layer_sizes=(50, 5), random_state=1), x_train, y_train) model_3 = train_and_time( MLPClassifier(solver='sgd', validation_fraction=0.0, alpha=1e-1, hidden_layer_sizes=(100, 5), random_state=1), x_train, y_train) model_4 = train_and_time( MLPClassifier(solver='sgd', validation_fraction=0.0, alpha=1e-1, hidden_layer_sizes=(500, 5), random_state=1), x_train, y_train) save_model(model_1, path + 'model/' + data_set, 'neural_net_model_1') save_model(model_2, path + 'model/' + data_set, 'neural_net_model_2') save_model(model_3, path + 'model/' + data_set, 'neural_net_model_3') save_model(model_4, path + 'model/' + data_set, 'neural_net_model_4') else: print('Training Neural Network...') model_1 = MLPClassifier(solver='sgd', validation_fraction=0.0, alpha=1e-3, hidden_layer_sizes=(20, 5), random_state=1) model_2 = MLPClassifier(solver='sgd', validation_fraction=0.0, alpha=1e-3, hidden_layer_sizes=(50, 5), random_state=1) model_3 = MLPClassifier(solver='sgd', validation_fraction=0.0, alpha=1e-3, hidden_layer_sizes=(100, 5), random_state=1) model_4 = MLPClassifier(solver='sgd', validation_fraction=0.0, alpha=1e-3, hidden_layer_sizes=(500, 5), random_state=1) plt = multiple_learning_curves_plot( [model_1, model_2, model_3, model_4], x_train, y_train, ["r", "y", "b", "m"], ['HLS = 20 x 5', 'HLS = 50 x 5', 'HLS = 100 x 5', 'HLS = 500 x 5']) plt.title( "Neural Network with Varying Hidden Layer Size (HLS) \n Learning Curves" ) plt.xlabel("Training examples") plt.ylabel("F1 Score") plt.grid() plt.legend(loc="best") # plt.show() save_figure(plt, path + "plot/" + data_set, 'neural_net_learning_curves.png') print("done")