示例#1
0
def NuSVRRegressor(X_train, X_test, y_train, y_test):
    y_train1 = y_train[:, 0]
    y_train2 = y_train[:, 1]
    reg1 = NuSVR()
    reg1.fit(X_train, y_train1)
    reg2 = NuSVR()
    reg2.fit(X_train, y_train2)
    y_pred1 = reg1.predict(X=X_test)
    y_pred2 = reg2.predict(X=X_test)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred1 = reg1.predict(X=X_train)
    y_pred2 = reg2.predict(X=X_train)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    logSave(nameOfModel="NuSVRRegressor",
            reg=[reg1, reg2],
            metrics=metrics,
            val_metrics=val_metrics)
def XGBClassifierModel(splitData, X_train, X_test, y_train, y_test):
    clf = xgb.XGBClassifier(objective="binary:logistic", eval_metric="auc")
    grid_values = {
        'learning_rate': [x / 10 for x in range(1, 11)],
        'max_depth': list(range(10, 21, 1)),
        'gamma ': [x / 10 for x in range(1, 11)]
    }
    grid_clf_acc = GridSearchCV(clf,
                                param_grid=grid_values,
                                scoring=['roc_auc', 'f1', 'accuracy'],
                                refit='roc_auc')
    grid_clf_acc.fit(X_train, y_train.ravel())
    clf = grid_clf_acc.best_estimator_
    if splitData:
        y_preds = clf.predict(X_test)
        # printMetrics(y_test, y_preds)
        val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
            y_test, y_preds)
    else:
        val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0
    y_preds = clf.predict(X_train)
    acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds)
    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)
    # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n")

    logAndSave(name_of_model="XGBClassifierGS",
               clf=clf,
               metrics=metrics,
               val_metrics=val_metrics)
def XGBClassifierModelV2(X_train, X_test, y_train, y_test):
    multi_class = True
    clf = xgb.XGBClassifier(objective="multi:softmax", eval_metric="mlogloss")
    grid_values = {
        'learning_rate': [x / 10 for x in range(1, 5)],
        'max_depth': list(range(10, 21, 1))
    }
    grid_clf_acc = GridSearchCV(
        clf,
        param_grid=grid_values,
        scoring=['roc_auc_ovr_weighted', 'f1_weighted', 'accuracy'],
        refit='f1_weighted',
        n_jobs=2,
        verbose=0)
    grid_clf_acc.fit(X_train, y_train)
    clf = grid_clf_acc.best_estimator_
    # print(clf)
    y_preds = clf.predict(X_test)
    # printMetrics(y_test, y_preds, multi_class=multi_class)
    val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
        y_test, y_preds, multi_class=multi_class)

    y_preds = clf.predict(X_train)
    # printMetrics(y_train, y_preds, multi_class=multi_class)
    acc, pre, recall, auc, f1 = getMetrics(y_train,
                                           y_preds,
                                           multi_class=multi_class)
    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)
    logAndSave(name_of_model="XGBClassifierModelV2GS",
               clf=clf,
               metrics=metrics,
               val_metrics=val_metrics)
def LinearSVRRegressor(X_train, X_test, y_train, y_test):
    y_train1 = y_train[:, 0]
    y_train2 = y_train[:, 1]
    reg1 = LinearSVR(epsilon=0.001,
                     max_iter=5000,
                     C=3,
                     loss='squared_epsilon_insensitive')
    reg1.fit(X_train, y_train1)
    reg2 = LinearSVR(epsilon=0.001,
                     max_iter=5000,
                     C=3,
                     loss='squared_epsilon_insensitive')
    reg2.fit(X_train, y_train2)
    y_pred1 = reg1.predict(X=X_test)
    y_pred2 = reg2.predict(X=X_test)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred1 = reg1.predict(X=X_train)
    y_pred2 = reg2.predict(X=X_train)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    logSave(nameOfModel="LinearSVRRegressor",
            reg=[reg1, reg2],
            metrics=metrics,
            val_metrics=val_metrics)
示例#5
0
def ExtraTreeGS(X_train, X_test, y_train, y_test):
    reg = ExtraTreeRegressor()
    grid_values = {
        'criterion': ["mse", "mae"],
        'max_depth': list(range(20, 25))
    }
    grid_reg = GridSearchCV(
        reg,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg.fit(X_train, y_train)
    reg = grid_reg.best_estimator_
    reg.fit(X_train, y_train)
    y_pred = reg.predict(X_test)
    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred = reg.predict(X=X_train)
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    best_params: dict = grid_reg.best_params_
    saveBestParams(nameOfModel="ExtraTreeGS", best_params=best_params)
    logSave(nameOfModel="ExtraTreeGS",
            reg=reg,
            metrics=metrics,
            val_metrics=val_metrics)
示例#6
0
def AdaBoost(X_train, X_test, y_train, y_test):
    y_train1 = y_train[:, 0]
    y_train2 = y_train[:, 1]
    reg1 = AdaBoostRegressor(base_estimator=LinearSVR(),
                             loss='exponential',
                             n_estimators=5)
    reg1.fit(X_train, y_train1)
    reg2 = AdaBoostRegressor(base_estimator=LinearSVR(),
                             loss='exponential',
                             n_estimators=5)
    reg2.fit(X_train, y_train2)
    y_pred1 = reg1.predict(X=X_test)
    y_pred2 = reg2.predict(X=X_test)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred1 = reg1.predict(X=X_train)
    y_pred2 = reg2.predict(X=X_train)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    logSave(nameOfModel="AdaBoost",
            reg=[reg1, reg2],
            metrics=metrics,
            val_metrics=val_metrics)
def LogisticRegressionModelV2(X_train, X_test, y_train, y_test):
    multi_class = True
    clf = LogisticRegression(penalty='l2',
                             solver='lbfgs',
                             multi_class='multinomial',
                             max_iter=700,
                             class_weight='balanced')
    grid_values = {'C': [0.01, .09, 1, 5, 25, 50, 100, 1000]}
    grid_clf_acc = GridSearchCV(
        clf,
        param_grid=grid_values,
        scoring=['roc_auc_ovr_weighted', 'f1_weighted', 'accuracy'],
        refit='f1_weighted',
        n_jobs=2,
        verbose=0)
    grid_clf_acc.fit(X_train, y_train)
    clf = grid_clf_acc.best_estimator_
    # print(clf)
    y_preds = clf.predict(X_test)
    # printMetrics(y_test, y_preds, multi_class=multi_class)
    val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
        y_test, y_preds, multi_class=multi_class)

    y_preds = clf.predict(X_train)
    # printMetrics(y_train, y_preds, multi_class=multi_class)
    acc, pre, recall, auc, f1 = getMetrics(y_train,
                                           y_preds,
                                           multi_class=multi_class)
    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)

    logAndSaveV2(name_of_model="LogisticRegressionModelV2GS",
                 clf=clf,
                 metrics=metrics,
                 val_metrics=val_metrics)
示例#8
0
def AdaBoostModel(splitData, X_train, X_test, y_train, y_test):
    svc = SVC()
    clf = AdaBoostClassifier(base_estimator=svc, algorithm='SAMME')
    grid_values = {
        'base_estimator__kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
        'base_estimator__C': [x / 10 for x in range(1, 11)],
        'base_estimator__degree': list(range(3, 5))
    }
    grid_clf_acc = GridSearchCV(clf,
                                param_grid=grid_values,
                                scoring=['roc_auc', 'f1', 'accuracy'],
                                refit='roc_auc')
    grid_clf_acc.fit(X_train, y_train.ravel())
    clf = grid_clf_acc.best_estimator_

    if splitData:
        y_preds = clf.predict(X_test)
        # printMetrics(y_test, y_preds)
        val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
            y_test, y_preds)
    else:
        val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0

    y_preds = clf.predict(X_train).reshape(-1, 1)
    acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds)
    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)
    # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n")

    logAndSave(name_of_model="AdaBoostGS",
               clf=clf,
               metrics=metrics,
               val_metrics=val_metrics)
示例#9
0
def NeuralNetGS(X_train, X_test, y_train, y_test):
    reg = MLPRegressor()
    grid_values = {
        'hidden_layer_sizes': [(8, 16, 32, 64, 128, 64, 32, 64, 16, 8),
                               (8, 16, 32, 64, 32, 16, 8), (8, 16, 32, 16, 8)],
        'solver': ['adam'],
        'learning_rate': ['constant', 'invscaling']
    }
    grid_reg = GridSearchCV(
        reg,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg.fit(X_train, y_train)
    reg = grid_reg.best_estimator_
    reg.fit(X_train, y_train)
    y_pred = reg.predict(X_test)
    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred = reg.predict(X=X_train)
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    best_params: dict = grid_reg.best_params_
    saveBestParams(nameOfModel="NeuralNetGS", best_params=best_params)
    logSave(nameOfModel="NeuralNetGS",
            reg=reg,
            metrics=metrics,
            val_metrics=val_metrics)
def GradientBoosting(X_train, X_test, y_train, y_test):
    y_train1 = y_train[:, 0]
    y_train2 = y_train[:, 1]
    reg1 = GradientBoostingRegressor(loss='huber')
    reg1.fit(X_train, y_train1)
    reg2 = GradientBoostingRegressor(loss='huber')
    reg2.fit(X_train, y_train2)
    y_pred1 = reg1.predict(X=X_test)
    y_pred2 = reg2.predict(X=X_test)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred1 = reg1.predict(X=X_train)
    y_pred2 = reg2.predict(X=X_train)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    logSave(nameOfModel="GradientBoosting",
            reg=[reg1, reg2],
            metrics=metrics,
            val_metrics=val_metrics)
def NeuralNetworkModel(splitData, X_train, X_test, y_train, y_test):
    clf = MLPClassifier(alpha=1e-4, max_iter=1000)
    layers = [(4, 6), (5, 7), (8, 10)]
    grid_values = {
        'hidden_layer_sizes': layers,
        'activation': ['tanh', 'relu'],
        'learning_rate': ['constant', 'invscaling']
    }
    grid_clf_acc = GridSearchCV(clf,
                                param_grid=grid_values,
                                scoring=['roc_auc', 'f1', 'accuracy'],
                                refit='roc_auc')
    grid_clf_acc.fit(X_train, y_train.ravel())
    clf = grid_clf_acc.best_estimator_

    if splitData:
        y_preds = clf.predict(X_test)
        # printMetrics(y_test, y_preds)
        val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
            y_test, y_preds)
    else:
        val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0
    y_preds = clf.predict(X_train)
    acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds)

    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)
    # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n")

    logAndSave(name_of_model="NeuralNetworkGS",
               clf=clf,
               metrics=metrics,
               val_metrics=val_metrics)
示例#12
0
def XgBoost(X_train, X_test, y_train, y_test):
    y_train1 = y_train[:, 0]
    y_train2 = y_train[:, 1]
    reg1 = xg.XGBRegressor(objective='reg:squarederror')
    reg1.fit(X=X_train, y=y_train1)

    reg2 = xg.XGBRegressor(objective='reg:squarederror')
    reg2.fit(X=X_train, y=y_train2)

    y_pred1 = reg1.predict(X_test)
    y_pred2 = reg2.predict(X_test)

    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred1 = reg1.predict(X_train)
    y_pred2 = reg2.predict(X_train)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)
    logSave(nameOfModel="XgBoost",
            reg=[reg1, reg2],
            metrics=metrics,
            val_metrics=val_metrics)
示例#13
0
def LogisticRegressionModelV2(X_train, X_test, y_train, y_test):
    multi_class = True
    clf = LogisticRegression(penalty='l2',
                             solver='lbfgs',
                             multi_class='multinomial',
                             max_iter=700)
    clf.fit(X_train, y_train)

    y_preds = clf.predict(X_test)
    # printMetrics(y_test, y_preds, multi_class=multi_class)
    val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
        y_test, y_preds, multi_class=multi_class)

    y_preds = clf.predict(X_train)
    # printMetrics(y_train, y_preds, multi_class=multi_class)
    acc, pre, recall, auc, f1 = getMetrics(y_train,
                                           y_preds,
                                           multi_class=multi_class)
    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)
    # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\n")

    logAndSaveV2(name_of_model="LogisticRegressionModelV2",
                 clf=clf,
                 metrics=metrics,
                 val_metrics=val_metrics)
示例#14
0
def LogisticRegressionModel(splitData, X_train, X_test, y_train, y_test):
    clf = LogisticRegression(penalty='l1',
                             solver='liblinear',
                             multi_class='ovr',
                             class_weight={
                                 0: 0.7,
                                 1: 1.5
                             })
    clf.fit(X_train, y_train.ravel())

    if splitData:
        y_preds = clf.predict(X_test)
        printMetrics(y_test, y_preds)
        val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
            y_test, y_preds)
    else:
        val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0
    y_preds = clf.predict(X_train)
    acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds)
    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)
    # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n")

    logAndSave(name_of_model="LogisticRegression",
               clf=clf,
               metrics=metrics,
               val_metrics=val_metrics)
示例#15
0
def LarsRegressorGS(X_train, X_test, y_train, y_test):
    reg = Lars()
    grid_values = {
        'n_nonzero_coefs': list(range(100, 500, 100)),
    }
    grid_reg = GridSearchCV(
        reg,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg.fit(X_train, y_train)
    reg = grid_reg.best_estimator_
    reg.fit(X_train, y_train)
    y_pred = reg.predict(X_test)
    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred = reg.predict(X=X_train)
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    best_params: dict = grid_reg.best_params_
    saveBestParams(nameOfModel="LarsRegressorGS", best_params=best_params)
    logSave(nameOfModel="LarsRegressorGS",
            reg=reg,
            metrics=metrics,
            val_metrics=val_metrics)
示例#16
0
def RidgeRegressorGS(X_train, X_test, y_train, y_test):
    reg = Ridge()
    grid_values = {
        'alpha': list(range(1, 3)) + [value * 0.01 for value in range(1, 3)],
        'solver': ['svd', 'cholesky', 'saga']
    }
    grid_reg = GridSearchCV(
        reg,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg.fit(X_train, y_train)
    reg = grid_reg.best_estimator_
    reg.fit(X_train, y_train)
    y_pred = reg.predict(X_test)
    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred = reg.predict(X=X_train)
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    best_params: dict = grid_reg.best_params_
    saveBestParams(nameOfModel="RidgeRegressorGS", best_params=best_params)
    logSave(nameOfModel="RidgeRegressorGS",
            reg=reg,
            metrics=metrics,
            val_metrics=val_metrics)
def LogisticRegressionModel(splitData, X_train, X_test, y_train, y_test):
    clf = LogisticRegression(solver='liblinear',
                             multi_class='ovr',
                             class_weight={
                                 0: 0.7,
                                 1: 1.5
                             })
    grid_values = {
        'penalty': ['l1', 'l2'],
        'C': [0.01, .09, 1, 5, 25, 50, 100]
    }
    grid_clf_acc = GridSearchCV(clf,
                                param_grid=grid_values,
                                scoring=['roc_auc', 'f1', 'accuracy'],
                                refit='roc_auc')
    grid_clf_acc.fit(X_train, y_train.ravel())
    clf = grid_clf_acc.best_estimator_
    if splitData:
        y_preds = clf.predict(X_test)
        # printMetrics(y_test, y_preds)
        val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
            y_test, y_preds)
    else:
        val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0
    y_preds = clf.predict(X_train)
    acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds)
    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)
    # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n")

    logAndSave(name_of_model="LogisticRegressionGS",
               clf=clf,
               metrics=metrics,
               val_metrics=val_metrics)
示例#18
0
def AdaBoostGS(X_train, X_test, y_train, y_test):
    y_train1 = y_train[:, 0]
    y_train2 = y_train[:, 1]
    reg1 = AdaBoostRegressor(base_estimator=LinearSVR(), n_estimators=3)
    reg2 = AdaBoostRegressor(base_estimator=LinearSVR(), n_estimators=3)
    grid_values = {
        'base_estimator__epsilon': [value * 0.1 for value in range(0, 2)],
        'base_estimator__C':
        list(range(1, 2)),
        'base_estimator__loss':
        ['epsilon_insensitive', 'squared_epsilon_insensitive']
    }

    grid_reg1 = GridSearchCV(
        reg1,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg1.fit(X_train, y_train1)
    reg1 = grid_reg1.best_estimator_
    reg1.fit(X_train, y_train1)
    grid_reg2 = GridSearchCV(
        reg2,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg2.fit(X_train, y_train2)
    reg2 = grid_reg1.best_estimator_
    reg2.fit(X_train, y_train2)
    y_pred1 = reg1.predict(X=X_test)
    y_pred2 = reg2.predict(X=X_test)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred1 = reg1.predict(X=X_train)
    y_pred2 = reg2.predict(X=X_train)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    best_params1: dict = grid_reg1.best_params_
    best_params2: dict = grid_reg2.best_params_
    best_params = {}
    for key in best_params1.keys():
        best_params[key] = [best_params1[key], best_params2[key]]
    saveBestParams(nameOfModel="AdaBoostGS", best_params=best_params)
    logSave(nameOfModel="AdaBoostGS",
            reg=[reg1, reg2],
            metrics=metrics,
            val_metrics=val_metrics)
示例#19
0
def SGD_GS(X_train, X_test, y_train, y_test):
    y_train1 = y_train[:, 0]
    y_train2 = y_train[:, 1]
    reg1 = SGDRegressor()
    reg2 = SGDRegressor()
    grid_values = {
        'alpha': [value * 0.001 for value in range(1, 3)],
        'loss': ['squared_loss', 'huber'],
        'penalty': ['l2', 'l1'],
        'l1_ratio': [value * 0.1 for value in range(0, 3)]
    }

    grid_reg1 = GridSearchCV(
        reg1,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg1.fit(X_train, y_train1)
    reg1 = grid_reg1.best_estimator_
    reg1.fit(X_train, y_train1)
    grid_reg2 = GridSearchCV(
        reg2,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg2.fit(X_train, y_train2)
    reg2 = grid_reg1.best_estimator_
    reg2.fit(X_train, y_train2)
    y_pred1 = reg1.predict(X=X_test)
    y_pred2 = reg2.predict(X=X_test)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred1 = reg1.predict(X=X_train)
    y_pred2 = reg2.predict(X=X_train)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    best_params1: dict = grid_reg1.best_params_
    best_params2: dict = grid_reg2.best_params_
    best_params = {}
    for key in best_params1.keys():
        best_params[key] = [best_params1[key], best_params2[key]]
    saveBestParams(nameOfModel="SGD_GS", best_params=best_params)
    logSave(nameOfModel="SGD_GS",
            reg=[reg1, reg2],
            metrics=metrics,
            val_metrics=val_metrics)
def GradientBoostingGS(X_train, X_test, y_train, y_test):
    y_train1 = y_train[:, 0]
    y_train2 = y_train[:, 1]
    reg1 = GradientBoostingRegressor()
    reg2 = GradientBoostingRegressor()
    grid_values = {
        'loss': ['ls', 'huber'],
        'learning_rate': [value * 0.1 for value in range(1, 3)],
        'criterion': ["mse", "mae"],
        'alpha': [0.25, 0.5, 0.75, 0.9],
    }

    grid_reg1 = GridSearchCV(
        reg1,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg1.fit(X_train, y_train1)
    reg1 = grid_reg1.best_estimator_
    reg1.fit(X_train, y_train1)
    grid_reg2 = GridSearchCV(
        reg2,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg2.fit(X_train, y_train2)
    reg2 = grid_reg1.best_estimator_
    reg2.fit(X_train, y_train2)
    y_pred1 = reg1.predict(X=X_test)
    y_pred2 = reg2.predict(X=X_test)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred1 = reg1.predict(X=X_train)
    y_pred2 = reg2.predict(X=X_train)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    best_params1: dict = grid_reg1.best_params_
    best_params2: dict = grid_reg2.best_params_
    best_params = {}
    for key in best_params1.keys():
        best_params[key] = [best_params1[key], best_params2[key]]
    saveBestParams(nameOfModel="GradientBoostingGS", best_params=best_params)
    logSave(nameOfModel="GradientBoostingGS",
            reg=[reg1, reg2],
            metrics=metrics,
            val_metrics=val_metrics)
示例#21
0
def NuSVRRegressorGS(X_train, X_test, y_train, y_test):
    y_train1 = y_train[:, 0]
    y_train2 = y_train[:, 1]
    reg1 = NuSVR()
    reg2 = NuSVR()
    grid_values = {
        'nu': [value * 0.1 for value in range(1, 3)],
        'C': list(range(1, 3)),
        'kernel': ['poly', 'rbf'],
        'degree': list(range(1, 3))
    }

    grid_reg1 = GridSearchCV(
        reg1,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg1.fit(X_train, y_train1)
    reg1 = grid_reg1.best_estimator_
    reg1.fit(X_train, y_train1)
    grid_reg2 = GridSearchCV(
        reg2,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg2.fit(X_train, y_train2)
    reg2 = grid_reg1.best_estimator_
    reg2.fit(X_train, y_train2)
    y_pred1 = reg1.predict(X=X_test)
    y_pred2 = reg2.predict(X=X_test)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred1 = reg1.predict(X=X_train)
    y_pred2 = reg2.predict(X=X_train)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    best_params1: dict = grid_reg1.best_params_
    best_params2: dict = grid_reg2.best_params_
    best_params = {}
    for key in best_params1.keys():
        best_params[key] = [best_params1[key], best_params2[key]]
    saveBestParams(nameOfModel="NuSVRRegressorGS", best_params=best_params)
    logSave(nameOfModel="NuSVRRegressorGS",
            reg=[reg1, reg2],
            metrics=metrics,
            val_metrics=val_metrics)
示例#22
0
def XgBoostGS(X_train, X_test, y_train, y_test):
    y_train1 = y_train[:, 0]
    y_train2 = y_train[:, 1]
    reg1 = xg.XGBRegressor(objective='reg:squarederror')
    reg2 = xg.XGBRegressor(objective='reg:squarederror')
    grid_values = {
        'learning_rate': [x / 10 for x in range(1, 5)],
        'max_depth': list(range(11, 15))
    }
    grid_reg1 = GridSearchCV(
        reg1,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg1.fit(X_train, y_train1)
    reg1 = grid_reg1.best_estimator_
    reg1.fit(X_train, y_train1)
    grid_reg2 = GridSearchCV(
        reg2,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg2.fit(X_train, y_train2)

    reg2 = grid_reg1.best_estimator_
    reg2.fit(X_train, y_train2)
    y_pred1 = reg1.predict(X_test)
    y_pred2 = reg2.predict(X_test)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred1 = reg1.predict(X_train)
    y_pred2 = reg2.predict(X_train)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    best_params1: dict = grid_reg1.best_params_
    best_params2: dict = grid_reg2.best_params_
    best_params = {}
    for key in best_params1.keys():
        best_params[key] = [best_params1[key], best_params2[key]]
    saveBestParams(nameOfModel="XgBoostGS", best_params=best_params)
    logSave(nameOfModel="XgBoostGS",
            reg=[reg1, reg2],
            metrics=metrics,
            val_metrics=val_metrics)
示例#23
0
def LassoRegressor(X_train, X_test, y_train, y_test):
	reg = Lasso(alpha=0.01)
	reg.fit(X_train, y_train)
	y_pred = reg.predict(X_test)

	printMetrics(y_true=y_test, y_pred=y_pred)

	val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
	y_pred = reg.predict(X=X_train)
	metrics = getMetrics(y_true=y_train, y_pred=y_pred)

	printMetrics(y_true=y_train, y_pred=y_pred)

	logSave(nameOfModel="LassoRegressor", reg=reg, metrics=metrics, val_metrics=val_metrics)
示例#24
0
def ElasticNetRegressor(X_train, X_test, y_train, y_test):
    reg = ElasticNet(alpha=10, l1_ratio=0.2)
    reg.fit(X_train, y_train)
    y_pred = reg.predict(X_test)

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred = reg.predict(X=X_train)
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    logSave(nameOfModel="ElasticNetRegressor",
            reg=reg,
            metrics=metrics,
            val_metrics=val_metrics)
示例#25
0
def NeuralNet(X_train, X_test, y_train, y_test):
    reg = MLPRegressor(hidden_layer_sizes=(32, 64, 128, 256, 128, 64))
    reg.fit(X_train, y_train)
    y_pred = reg.predict(X_test)

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred = reg.predict(X=X_train)
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    logSave(nameOfModel="NeuralNet",
            reg=reg,
            metrics=metrics,
            val_metrics=val_metrics)
示例#26
0
def RidgeRegressor(X_train, X_test, y_train, y_test):
    reg = Ridge()
    reg.fit(X_train, y_train)
    y_pred = reg.predict(X_test)

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred = reg.predict(X=X_train)
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    logSave(nameOfModel="RidgeRegressor",
            reg=reg,
            metrics=metrics,
            val_metrics=val_metrics)
示例#27
0
def AdaBoostModelV2(X_train, X_test, y_train, y_test):
	multi_class = True
	clf = AdaBoostClassifier(base_estimator=RandomForestClassifier(), n_estimators=200, algorithm='SAMME')
	clf.fit(X_train, y_train)

	y_preds = clf.predict(X_test)
	# printMetrics(y_test, y_preds, multi_class=multi_class)
	val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(y_test, y_preds, multi_class=multi_class)

	y_preds = clf.predict(X_train)
	# printMetrics(y_train, y_preds, multi_class=multi_class)
	acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds, multi_class=multi_class)
	val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
	metrics = (acc, pre, recall, auc, f1)
	# print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\n")

	logAndSaveV2(name_of_model="AdaBoostModelV2", clf=clf, metrics=metrics, val_metrics=val_metrics)
示例#28
0
def DecisionTree(X_train, X_test, y_train, y_test):
    reg = DecisionTreeRegressor()
    reg.fit(X_train, y_train)
    y_pred1 = reg.predict(X_test)

    printMetrics(y_true=y_test, y_pred=y_pred1)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred1)
    y_pred = reg.predict(X=X_train)
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    logSave(nameOfModel="DecisionTree",
            reg=reg,
            metrics=metrics,
            val_metrics=val_metrics)
示例#29
0
def AdaBoostModel(splitData, X_train, X_test, y_train, y_test):
	svc = SVC()
	clf = AdaBoostClassifier(base_estimator=svc, n_estimators=100, algorithm='SAMME')
	clf.fit(X_train, y_train.ravel())

	if splitData:
		y_preds = clf.predict(X_test)
		printMetrics(y_test, y_preds)
		val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(y_test, y_preds)
	else:
		val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0

	y_preds = clf.predict(X_train).reshape(-1, 1)
	acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds)
	val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
	metrics = (acc, pre, recall, auc, f1)
	# print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\n")

	logAndSave(name_of_model="AdaBoost", clf=clf, metrics=metrics, val_metrics=val_metrics)
示例#30
0
def RandomForestModelV2(X_train, X_test, y_train, y_test):
	multi_class = True
	clf = RandomForestClassifier()
	grid_values = {'n_estimators': list(range(100, 501, 50)), 'criterion': ['gini', 'entropy'], 'max_depth': list(range(10, 21, 1))}
	grid_clf_acc = GridSearchCV(clf, param_grid=grid_values, scoring=['roc_auc_ovr_weighted', 'f1_weighted', 'accuracy'], refit='f1_weighted', n_jobs=2, verbose=0)
	grid_clf_acc.fit(X_train, y_train)
	clf = grid_clf_acc.best_estimator_
	# print(clf)
	y_preds = clf.predict(X_test)
	# printMetrics(y_test, y_preds, multi_class=multi_class)
	val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(y_test, y_preds, multi_class=multi_class)

	y_preds = clf.predict(X_train)
	# printMetrics(y_train, y_preds, multi_class=multi_class)
	acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds, multi_class=multi_class)
	val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
	metrics = (acc, pre, recall, auc, f1)

	logAndSaveV2(name_of_model="RandomForestModelV2GS", clf=clf, metrics=metrics, val_metrics=val_metrics)