def LogisticRegressionModelV2(X_train, X_test, y_train, y_test):
    multi_class = True
    clf = LogisticRegression(penalty='l2',
                             solver='lbfgs',
                             multi_class='multinomial',
                             max_iter=700,
                             class_weight='balanced')
    grid_values = {'C': [0.01, .09, 1, 5, 25, 50, 100, 1000]}
    grid_clf_acc = GridSearchCV(
        clf,
        param_grid=grid_values,
        scoring=['roc_auc_ovr_weighted', 'f1_weighted', 'accuracy'],
        refit='f1_weighted',
        n_jobs=2,
        verbose=0)
    grid_clf_acc.fit(X_train, y_train)
    clf = grid_clf_acc.best_estimator_
    # print(clf)
    y_preds = clf.predict(X_test)
    # printMetrics(y_test, y_preds, multi_class=multi_class)
    val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
        y_test, y_preds, multi_class=multi_class)

    y_preds = clf.predict(X_train)
    # printMetrics(y_train, y_preds, multi_class=multi_class)
    acc, pre, recall, auc, f1 = getMetrics(y_train,
                                           y_preds,
                                           multi_class=multi_class)
    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)

    logAndSaveV2(name_of_model="LogisticRegressionModelV2GS",
                 clf=clf,
                 metrics=metrics,
                 val_metrics=val_metrics)
示例#2
0
def LogisticRegressionModelV2(X_train, X_test, y_train, y_test):
    multi_class = True
    clf = LogisticRegression(penalty='l2',
                             solver='lbfgs',
                             multi_class='multinomial',
                             max_iter=700)
    clf.fit(X_train, y_train)

    y_preds = clf.predict(X_test)
    # printMetrics(y_test, y_preds, multi_class=multi_class)
    val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
        y_test, y_preds, multi_class=multi_class)

    y_preds = clf.predict(X_train)
    # printMetrics(y_train, y_preds, multi_class=multi_class)
    acc, pre, recall, auc, f1 = getMetrics(y_train,
                                           y_preds,
                                           multi_class=multi_class)
    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)
    # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\n")

    logAndSaveV2(name_of_model="LogisticRegressionModelV2",
                 clf=clf,
                 metrics=metrics,
                 val_metrics=val_metrics)
示例#3
0
def AdaBoostModelV2(X_train, X_test, y_train, y_test):
	multi_class = True
	clf = AdaBoostClassifier(base_estimator=RandomForestClassifier(), n_estimators=200, algorithm='SAMME')
	clf.fit(X_train, y_train)

	y_preds = clf.predict(X_test)
	# printMetrics(y_test, y_preds, multi_class=multi_class)
	val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(y_test, y_preds, multi_class=multi_class)

	y_preds = clf.predict(X_train)
	# printMetrics(y_train, y_preds, multi_class=multi_class)
	acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds, multi_class=multi_class)
	val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
	metrics = (acc, pre, recall, auc, f1)
	# print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\n")

	logAndSaveV2(name_of_model="AdaBoostModelV2", clf=clf, metrics=metrics, val_metrics=val_metrics)
示例#4
0
def RandomForestModelV2(X_train, X_test, y_train, y_test):
	multi_class = True
	clf = RandomForestClassifier()
	grid_values = {'n_estimators': list(range(100, 501, 50)), 'criterion': ['gini', 'entropy'], 'max_depth': list(range(10, 21, 1))}
	grid_clf_acc = GridSearchCV(clf, param_grid=grid_values, scoring=['roc_auc_ovr_weighted', 'f1_weighted', 'accuracy'], refit='f1_weighted', n_jobs=2, verbose=0)
	grid_clf_acc.fit(X_train, y_train)
	clf = grid_clf_acc.best_estimator_
	# print(clf)
	y_preds = clf.predict(X_test)
	# printMetrics(y_test, y_preds, multi_class=multi_class)
	val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(y_test, y_preds, multi_class=multi_class)

	y_preds = clf.predict(X_train)
	# printMetrics(y_train, y_preds, multi_class=multi_class)
	acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds, multi_class=multi_class)
	val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
	metrics = (acc, pre, recall, auc, f1)

	logAndSaveV2(name_of_model="RandomForestModelV2GS", clf=clf, metrics=metrics, val_metrics=val_metrics)
示例#5
0
def XGBClassifierModelV2(X_train, X_test, y_train, y_test):
    multi_class = True
    clf = xgb.XGBClassifier(objective="multi:softmax", eval_metric="mlogloss")
    clf.fit(X_train, y_train)
    y_preds = clf.predict(X_test)
    # printMetrics(y_test, y_preds, multi_class=multi_class)
    val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
        y_test, y_preds, multi_class=multi_class)

    y_preds = clf.predict(X_train)
    # printMetrics(y_train, y_preds, multi_class=multi_class)
    acc, pre, recall, auc, f1 = getMetrics(y_train,
                                           y_preds,
                                           multi_class=multi_class)
    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)
    # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\n")

    logAndSaveV2(name_of_model="XGBClassifierModelV2",
                 clf=clf,
                 metrics=metrics,
                 val_metrics=val_metrics)
def RandomForestModelV2(X_train, X_test, y_train, y_test):
    multi_class = True
    clf = RandomForestClassifier(max_depth=14)
    clf.fit(X_train, y_train)

    y_preds = clf.predict(X_test)
    # printMetrics(y_test, y_preds, multi_class=multi_class)
    val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
        y_test, y_preds, multi_class=multi_class)

    y_preds = clf.predict(X_train)
    # printMetrics(y_train, y_preds, multi_class=multi_class)
    acc, pre, recall, auc, f1 = getMetrics(y_train,
                                           y_preds,
                                           multi_class=multi_class)
    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)
    # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\n")

    logAndSaveV2(name_of_model="RandomForestModelV2",
                 clf=clf,
                 metrics=metrics,
                 val_metrics=val_metrics)
def NeuralNetworkModelV2(X_train, X_test, y_train, y_test):
    multi_class = True
    clf = MLPClassifier(alpha=1e-4, max_iter=1000)
    layers = [(4, 6), (5, 7), (8, 10), (256, 128, 64, 32, 16, 8)]
    grid_values = {
        'solver': ['adam', 'lbfgs'],
        'hidden_layer_sizes': layers,
        'activation': ['tanh', 'relu', 'logistic'],
        'learning_rate': ['constant', 'invscaling']
    }
    grid_clf_acc = GridSearchCV(
        clf,
        param_grid=grid_values,
        scoring=['roc_auc_ovr_weighted', 'f1_weighted', 'accuracy'],
        refit='f1_weighted',
        n_jobs=2,
        verbose=0)
    grid_clf_acc.fit(X_train, y_train)
    clf = grid_clf_acc.best_estimator_
    # print(clf)
    y_preds = clf.predict(X_test)
    # printMetrics(y_test, y_preds, multi_class=multi_class)
    val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
        y_test, y_preds, multi_class=multi_class)

    y_preds = clf.predict(X_train)
    # printMetrics(y_train, y_preds, multi_class=multi_class)
    acc, pre, recall, auc, f1 = getMetrics(y_train,
                                           y_preds,
                                           multi_class=multi_class)
    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)

    logAndSaveV2(name_of_model="NeuralNetworkModelV2GS",
                 clf=clf,
                 metrics=metrics,
                 val_metrics=val_metrics)
示例#8
0
def AdaBoostModelV2(X_train, X_test, y_train, y_test):
    multi_class = True
    clf = AdaBoostClassifier(base_estimator=RandomForestClassifier(),
                             algorithm='SAMME')
    grid_values = {
        'base_estimator__n_estimators': [100, 200],
        'base_estimator__criterion': ['gini', 'entropy'],
        'base_estimator__max_depth': list(range(12, 15)),
        'learning_rate': [0.01, 0.05, 0.5, 1]
    }
    grid_clf_acc = GridSearchCV(
        clf,
        param_grid=grid_values,
        scoring=['roc_auc_ovr_weighted', 'f1_weighted', 'accuracy'],
        refit='f1_weighted',
        n_jobs=2,
        verbose=0)
    grid_clf_acc.fit(X_train, y_train)
    clf = grid_clf_acc.best_estimator_
    # print(clf)
    y_preds = clf.predict(X_test)
    # printMetrics(y_test, y_preds, multi_class=multi_class)
    val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
        y_test, y_preds, multi_class=multi_class)

    y_preds = clf.predict(X_train)
    # printMetrics(y_train, y_preds, multi_class=multi_class)
    acc, pre, recall, auc, f1 = getMetrics(y_train,
                                           y_preds,
                                           multi_class=multi_class)
    val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
    metrics = (acc, pre, recall, auc, f1)

    logAndSaveV2(name_of_model="AdaBoostModelV2GS",
                 clf=clf,
                 metrics=metrics,
                 val_metrics=val_metrics)
示例#9
0
            "LightGBM", "RandomForest", "ExtraTrees", "Tree", "Bagging",
            "AdaBoost", "Linear"
        ]
    }
}
df = {"train": pd.DataFrame(X_train), "target": pd.Series(y_train)}
best = opt.optimise(space, df, 21)

clf_feature_selector = classification.Clf_feature_selector(
    strategy=best['fs__strategy'])
newDf = clf_feature_selector.fit_transform(df['train'], df['target'])
testdf = clf_feature_selector.transform(pd.DataFrame(X_test))

clf = classification.Classifier(strategy=best['est__strategy'])
clf.fit(newDf, df['target'])
y_preds = clf.predict(testdf)
# printMetrics(y_true=y_test, y_pred=y_preds, multi_class=multi_class)
val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
    y_test, y_preds, multi_class=multi_class)
y_preds = clf.predict(newDf)
# printMetrics(y_true=y_train, y_pred=y_preds, multi_class=multi_class)
acc, pre, recall, auc, f1 = getMetrics(y_train,
                                       y_preds,
                                       multi_class=multi_class)
val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
metrics = (acc, pre, recall, auc, f1)
logAndSaveV2(name_of_model="MlBoxV2",
             clf=clf,
             metrics=metrics,
             val_metrics=val_metrics)
示例#10
0
trainFrame[y_labels] = trainFrame[y_labels].asfactor()
testFrame[y_labels] = testFrame[y_labels].asfactor()

aml = H2OAutoML(max_runtime_secs=60)
aml.train(x=x_labels,
          y=y_labels,
          training_frame=trainFrame,
          validation_frame=testFrame)

y_predsFrame = aml.leader.predict(testFrame)
y_test_pred_df = y_predsFrame.as_data_frame()
y_predsFrame = aml.leader.predict(trainFrame)
y_train_pred_df = y_predsFrame.as_data_frame()

y_preds = y_test_pred_df['predict'].values
# printMetrics(y_true=y_test, y_pred=y_preds, multi_class=multi_class)
val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(
    y_test, y_preds, multi_class=multi_class)
# print("*" * 100)
y_preds = y_train_pred_df['predict'].values
# printMetrics(y_true=y_train, y_pred=y_preds, multi_class=multi_class)
acc, pre, recall, auc, f1 = getMetrics(y_train,
                                       y_preds,
                                       multi_class=multi_class)
val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
metrics = (acc, pre, recall, auc, f1)
logAndSaveV2(name_of_model="H2OModelV2",
             clf=aml,
             metrics=metrics,
             val_metrics=val_metrics)
import autokeras as ak
import tensorflow as tf

from Utility import getAnnealingData, printMetrics, getMetrics, logAndSaveV2

X_train, X_test, y_train, y_test = getAnnealingData()
multi_class = True

clf = ak.StructuredDataClassifier(multi_label=True, metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tf.keras.metrics.AUC()], overwrite=True, max_trials=20)
clf.fit(x=X_train, y=y_train, epochs=15, validation_data=(X_test, y_test))

model = clf.export_model()

y_preds = clf.predict(X_test).ravel()
# printMetrics(y_true=y_test, y_pred=y_preds, multi_class=multi_class)
val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(y_true=y_test, y_pred=y_preds, multi_class=multi_class)

y_preds = clf.predict(X_train).ravel()
# printMetrics(y_true=y_train, y_pred=y_preds, multi_class=multi_class)
acc, pre, recall, auc, f1 = getMetrics(y_true=y_train, y_pred=y_preds, multi_class=multi_class)

val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1)
metrics = (acc, pre, recall, auc, f1)

logAndSaveV2(name_of_model="AutoKerasV2", clf=clf, metrics=metrics, val_metrics=val_metrics)