def decision_tree_performance(X_train, y_train, X_test, y_test, eval_method,
                              threshold_list):
    '''
    This function generate the performance matrix of decision tree model
    Inputs:
        X_train, y_train, X_test, y_test: Traing and testing dataframes
        eval_method: evaluation method used in the function
        threshold_list: list of the percentiles of the population for
                        extracting thresholds from the predicted probability
    Returns: performance matrix
    '''
    columns = ["criterion", "splitter", "max_depth", "best_thres", "best_eval"]
    m_performance = pd.DataFrame(columns=columns)

    for crit in DECISION_TREE_HP["criterion"]:
        for split in DECISION_TREE_HP["splitter"]:
            for m_depth in DECISION_TREE_HP["max_depth"]:
                dt = decision_tree.classifier_settings_dt(\
                     criterion = crit, splitter=split, max_depth=m_depth)
                y_predp = decision_tree.train_decision_tree(X_train, \
                                               y_train, X_test, dt)[1]
                best_thres, best_eval = eva.best_threshold(y_test, \
                                        y_predp, eval_method, threshold_list)
                output = {"criterion":[crit], "splitter":[split], "max_depth":[m_depth], \
                          "best_thres":[best_thres], "best_eval":[best_eval]}
                sub_p = pd.DataFrame(data=output)
                m_performance = pd.concat([m_performance, sub_p], join="inner")
                m_performance["method"] = "decision_tree"

    return m_performance
def bagging_performance(X_train, y_train, X_test, y_test, base_method,
                        eval_method, threshold_list):
    '''
    This function generate the performance matrix of bagging model
    Inputs:
        X_train, y_train, X_test, y_test: Traing and testing dataframes
        base_method: the method to be passed in for boosting
        eval_method: evaluation method used in the function
        threshold_list: list of the percentiles of the population for
                        extracting thresholds from the predicted probability
    Returns: performance matrix
    '''
    columns = ["n_estimators", "best_thres", "best_eval"]
    m_performance = pd.DataFrame(columns=columns)

    for n in BAGGING_HP["n_estimators"]:
        bag = bagging.classifier_settings_bagging(n_estimators=n,
                                                  base_estimator=base_method)
        y_predp = bagging.train_bagging(X_train, y_train, X_test, bag)[1]
        best_thres, best_eval = eva.best_threshold(y_test, \
                                y_predp, eval_method, threshold_list)
        output = {
            "n_estimators": [n],
            "best_thres": [best_thres],
            "best_eval": [best_eval]
        }
        sub_p = pd.DataFrame(data=output)
        m_performance = pd.concat([m_performance, sub_p], join="inner")
        m_performance["method"] = "bagging"

    return m_performance
def logistics_performance(X_train, y_train, X_test, y_test, eval_method,
                          threshold_list):
    '''
    This function generate the performance matrix of logistics model
    Inputs:
        X_train, y_train, X_test, y_test: Traing and testing dataframes
        eval_method: evaluation method used in the function
        threshold_list: list of the percentiles of the population for
                        extracting thresholds from the predicted probability
    Returns: performance matrix
    '''
    columns = ["penalty", "C", "best_thres", "best_eval"]
    m_performance = pd.DataFrame(columns=columns)

    for pan in LOG_HP["penalty"]:
        for c in LOG_HP["C"]:
            log_m = logistic.classifier_settings_log(penalty=pan, C=c)
            y_predp = logistic.train_logistics(X_train, y_train, X_test,
                                               log_m)[1]
            best_thres, best_eval = eva.best_threshold(y_test, \
                                    y_predp, eval_method, threshold_list)
            output = {"penalty":[pan], "C":[c], "best_thres":[best_thres], \
                                                  "best_eval":[best_eval]}
            sub_p = pd.DataFrame(data=output)
            m_performance = pd.concat([m_performance, sub_p], join="inner")
            m_performance["method"] = "logistics"

    return m_performance
def svm_performance(X_train, y_train, X_test, y_test, eval_method,
                    threshold_list):
    '''
    This function generate the performance matrix of svm model
    Inputs:
        X_train, y_train, X_test, y_test: Traing and testing dataframes
        eval_method: evaluation method used in the function
        threshold_list: list of the percentiles of the population for
                        extracting thresholds from the predicted probability
    Returns: performance matrix
    '''
    columns = ["kernel", "C", "best_thres", "best_eval"]
    m_performance = pd.DataFrame(columns=columns)

    for ker in SVM_HP["kernel"]:
        for c in SVM_HP["C"]:
            svm_m = svm.classifier_settings_svm(kernel=ker, C=c)
            y_predp = svm.train_svm(X_train, y_train, X_test, svm_m)[1]
            best_thres, best_eval = eva.best_threshold(y_test, \
                                    y_predp, eval_method, threshold_list)
            output = {"kernel":[ker], "C":[c], "best_thres":[best_thres], \
                                                  "best_eval":[best_eval]}
            sub_p = pd.DataFrame(data=output)
            m_performance = pd.concat([m_performance, sub_p], join="inner")
            m_performance["method"] = "svm"

    return m_performance
def knn_performance(X_train, y_train, X_test, y_test, eval_method,
                    threshold_list):
    '''
    This function generate the performance matrix of knn model
    Inputs:
        X_train, y_train, X_test, y_test: Traing and testing dataframes
        eval_method: evaluation method used in the function
        threshold_list: list of the percentiles of the population for
                        extracting thresholds from the predicted probability
    Returns: performance matrix
    '''
    columns = ["weights", "metric", "n_neighbors", "best_thres", "best_eval"]
    m_performance = pd.DataFrame(columns=columns)

    for wgt in KNN_HP["weights"]:
        for met in KNN_HP["metric"]:
            for n in KNN_HP["n_neighbors"]:
                knn_m = knn.classifier_settings_knn(\
                        weights=wgt, metric=met, n_neighbors=n)
                y_predp = knn.train_knn(X_train, y_train, X_test, knn_m)[1]
                best_thres, best_eval = eva.best_threshold(y_test, \
                                        y_predp, eval_method, threshold_list)
                output = {"weight":[wgt], "metric":[met], "n_neighbors":[n], \
                          "best_thres":[best_thres], "best_eval":[best_eval]}
                sub_p = pd.DataFrame(data=output)
                m_performance = pd.concat([m_performance, sub_p], join="inner")
                m_performance["method"] = "knn"

    return m_performance
def random_forest_performance(X_train, y_train, X_test, y_test, eval_method,
                              threshold_list):
    '''
    This function generate the performance matrix of random forest model
    Inputs:
        X_train, y_train, X_test, y_test: Traing and testing dataframes
        eval_method: evaluation method used in the function
        threshold_list: list of the percentiles of the population for
                        extracting thresholds from the predicted probability
    Returns: performance matrix
    '''
    columns = [
        "criterion", "n_estimators", "max_depth", "best_thres", "best_eval"
    ]
    m_performance = pd.DataFrame(columns=columns)

    for crit in RANDOM_FOREST_HP["criterion"]:
        for n in RANDOM_FOREST_HP["n_estimators"]:
            for m_depth in RANDOM_FOREST_HP["max_depth"]:
                rf = decision_tree.classifier_settings_rf(\
                     criterion = crit, n_estimators=n, max_depth=m_depth)
                y_predp = decision_tree.train_random_forest(X_train, \
                                               y_train, X_test, rf)[1]
                best_thres, best_eval = eva.best_threshold(y_test, \
                                        y_predp, eval_method, threshold_list)
                output = {"criterion":[crit], "n_estimators":[n], "max_depth":[m_depth], \
                          "best_thres":[best_thres], "best_eval":[best_eval]}
                sub_p = pd.DataFrame(data=output)
                m_performance = pd.concat([m_performance, sub_p], join="inner")
                m_performance["method"] = "random_forset"

    return m_performance