예제 #1
0
def test_svm(X, y, path):
    data_set = 'cardio'

    print("Predicting 1")
    probabilit_list = []
    dtc = pickle.load(open(path + 'model/' + data_set + '/svm_model_1', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)
    calculate_f1_score(dtc, X, y)

    print("Predicting 2")
    dtc = pickle.load(open(path + 'model/' + data_set + '/svm_model_2', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)
    calculate_f1_score(dtc, X, y)

    color_list = ['r', 'b']
    label_list = ['kernel = linear', 'kernel = polynomial']

    plt = multiple_precision_recall_curves(y, probabilit_list, color_list,
                                           label_list)

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.5, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('Support Vector Machine Precision-Recall Curve')
    plt.legend(loc="best")

    # plt.show()
    save_figure(plt, path + "plot/" + data_set, 'svm_pr_curve.png')
def test_boosted_decision_tree_loan(X, y, path):
    data_set = 'loan'
    probabilit_list = []
    dtc = pickle.load(
        open(path + 'model/' + data_set + '/boosted_dtc_model_nodes_1', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)
    calculate_f1_score(dtc, X, y)

    dtc = pickle.load(
        open(path + 'model/' + data_set + '/boosted_dtc_model_nodes_2', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)
    calculate_f1_score(dtc, X, y)

    dtc = pickle.load(
        open(path + 'model/' + data_set + '/boosted_dtc_model_nodes_3', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)
    calculate_f1_score(dtc, X, y)

    dtc = pickle.load(
        open(path + 'model/' + data_set + '/boosted_dtc_model_nodes_4', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)
    calculate_f1_score(dtc, X, y)

    dtc = pickle.load(
        open(path + 'model/' + data_set + '/boosted_dtc_none', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)
    calculate_f1_score(dtc, X, y)

    color_list = ['r', 'y', 'g', 'm', 'b']
    label_list = ['MD = 4', 'MD = 8', 'MD = 15', 'MD = 30', 'MD = None']

    plt = multiple_precision_recall_curves(y, probabilit_list, color_list,
                                           label_list)

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title(
        'Boosted Decision Tree Classifier with Max Depth Pruning (MD) \n Precision-Recall Curve '
    )
    plt.legend(loc="best")

    save_figure(plt, path + "plot/" + data_set,
                'boosted_dtc_max_depth_plots.png')
예제 #3
0
def test_decision_tree_max_leaf(X, y, path):
    data_set = 'cardio'
    probabilit_list = []

    dtc = pickle.load(
        open(path + 'model/' + data_set + '/dtc_model_leaf_nodes_1', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)
    calculate_f1_score(dtc, X, y)

    dtc = pickle.load(
        open(path + 'model/' + data_set + '/dtc_model_leaf_nodes_2', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)
    calculate_f1_score(dtc, X, y)

    dtc = pickle.load(
        open(path + 'model/' + data_set + '/dtc_model_leaf_nodes_3', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)
    calculate_f1_score(dtc, X, y)

    dtc = pickle.load(
        open(path + 'model/' + data_set + '/dtc_model_leaf_nodes_4', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)
    calculate_f1_score(dtc, X, y)

    dtc = pickle.load(open(path + 'model/' + data_set + '/dtc_none', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)

    color_list = ['r', 'y', 'g', 'm', 'b']
    label_list = [
        'MLN = 20', 'MLN = 100', 'MLN = 1000', 'MLN = 2000', 'MLN = None'
    ]

    plt = multiple_precision_recall_curves(y, probabilit_list, color_list,
                                           label_list)

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.5, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title(
        'Decision Tree Classifier with Max Leaf Nodes (MLN) \n Precision-Recall Curve '
    )
    plt.legend(loc="best")

    save_figure(plt, path + "plot/" + data_set, 'dtc_max_leaf_nodes_plots.png')
예제 #4
0
def test_best_models_loan(X, y, path):
    data_set = 'loan'
    probabilit_list = []
    dtc = pickle.load(
        open(path + 'model/' + data_set + '/dtc_model_depth_2', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)

    dtc = pickle.load(
        open(path + 'model/' + data_set + '/boosted_dtc_model_nodes_1', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)

    dtc = pickle.load(
        open(path + 'model/' + data_set + '/neural_net_model_4', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)

    # dtc = pickle.load(open(path + 'model/' + data_set + '/svm_model_1', 'rb'))
    # probs = dtc.predict_proba(X)
    # probs = probs[:, 1]
    # probabilit_list.append(probs)

    dtc = pickle.load(open(path + 'model/' + data_set + '/kNN_model_4', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)

    # with svm
    # color_list = ['r', 'b', 'm', 'y', 'g']
    # label_list = ['model = decision tree', 'model = boosted decision tree', 'model = neural network', 'model = SVM', 'model = kNN']

    # without svm
    color_list = ['r', 'b', 'm', 'g']
    label_list = [
        'model = decision tree', 'model = boosted decision tree',
        'model = neural network', 'model = kNN'
    ]

    plt = multiple_precision_recall_curves(y, probabilit_list, color_list,
                                           label_list)

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('Precision-Recall Curve of Best Model for each Algorithm')
    plt.legend(loc="best")

    # plt.show()
    save_figure(plt, path + "plot/" + data_set, 'best_models_pr_curve.png')
예제 #5
0
def test_neural_net_loan(X, y, path):
    data_set = 'loan'
    probabilit_list = []
    dtc = pickle.load(
        open(path + 'model/' + data_set + '/neural_net_model_1', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)
    calculate_f1_score(dtc, X, y)

    dtc = pickle.load(
        open(path + 'model/' + data_set + '/neural_net_model_2', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)
    calculate_f1_score(dtc, X, y)

    dtc = pickle.load(
        open(path + 'model/' + data_set + '/neural_net_model_3', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)
    calculate_f1_score(dtc, X, y)

    dtc = pickle.load(
        open(path + 'model/' + data_set + '/neural_net_model_4', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)
    calculate_f1_score(dtc, X, y)

    color_list = ['r', 'b', 'm', 'y']
    label_list = [
        'HLS = 20 x 5', 'HLS = 50 x 5', 'HLS = 100 x 5', 'HLS = 500 x 5'
    ]

    plt = multiple_precision_recall_curves(y, probabilit_list, color_list,
                                           label_list)

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title(
        'Neural Network with Varying Hidden Layer Size (HLS) \n Precision-Recall Curve'
    )
    plt.legend(loc="best")

    # plt.show()
    save_figure(plt, path + "plot/" + data_set, 'neural_net_pr_curve.png')
예제 #6
0
def test_kNN(X, y, path):
    data_set = 'cardio'

    probabilit_list = []
    dtc = pickle.load(open(path + 'model/' + data_set + '/kNN_model_1', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)
    calculate_f1_score(dtc, X, y)

    dtc = pickle.load(open(path + 'model/' + data_set + '/kNN_model_2', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)
    calculate_f1_score(dtc, X, y)

    dtc = pickle.load(open(path + 'model/' + data_set + '/kNN_model_3', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)
    calculate_f1_score(dtc, X, y)

    dtc = pickle.load(open(path + 'model/' + data_set + '/kNN_model_4', 'rb'))
    probs = dtc.predict_proba(X)
    probs = probs[:, 1]
    probabilit_list.append(probs)
    calculate_f1_score(dtc, X, y)

    color_list = ['r', 'b', 'm', 'y']
    label_list = ['k = 25', 'k = 150', 'k = 225', 'k = 300']

    plt = multiple_precision_recall_curves(y, probabilit_list, color_list, label_list)

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.5, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('k Nearest Neighbors Precision-Recall Curve')
    plt.legend(loc="best")

    # plt.show()
    save_figure(plt, path + "plot/" + data_set, 'kNN_pr_curve.png')