def LogisticRegressionModelV2(X_train, X_test, y_train, y_test): multi_class = True clf = LogisticRegression(penalty='l2', solver='lbfgs', multi_class='multinomial', max_iter=700, class_weight='balanced') grid_values = {'C': [0.01, .09, 1, 5, 25, 50, 100, 1000]} grid_clf_acc = GridSearchCV( clf, param_grid=grid_values, scoring=['roc_auc_ovr_weighted', 'f1_weighted', 'accuracy'], refit='f1_weighted', n_jobs=2, verbose=0) grid_clf_acc.fit(X_train, y_train) clf = grid_clf_acc.best_estimator_ # print(clf) y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds, multi_class=multi_class) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds, multi_class=multi_class) y_preds = clf.predict(X_train) # printMetrics(y_train, y_preds, multi_class=multi_class) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds, multi_class=multi_class) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) logAndSaveV2(name_of_model="LogisticRegressionModelV2GS", clf=clf, metrics=metrics, val_metrics=val_metrics)
def LogisticRegressionModelV2(X_train, X_test, y_train, y_test): multi_class = True clf = LogisticRegression(penalty='l2', solver='lbfgs', multi_class='multinomial', max_iter=700) clf.fit(X_train, y_train) y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds, multi_class=multi_class) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds, multi_class=multi_class) y_preds = clf.predict(X_train) # printMetrics(y_train, y_preds, multi_class=multi_class) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds, multi_class=multi_class) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\n") logAndSaveV2(name_of_model="LogisticRegressionModelV2", clf=clf, metrics=metrics, val_metrics=val_metrics)
def AdaBoostModelV2(X_train, X_test, y_train, y_test): multi_class = True clf = AdaBoostClassifier(base_estimator=RandomForestClassifier(), n_estimators=200, algorithm='SAMME') clf.fit(X_train, y_train) y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds, multi_class=multi_class) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(y_test, y_preds, multi_class=multi_class) y_preds = clf.predict(X_train) # printMetrics(y_train, y_preds, multi_class=multi_class) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds, multi_class=multi_class) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\n") logAndSaveV2(name_of_model="AdaBoostModelV2", clf=clf, metrics=metrics, val_metrics=val_metrics)
def RandomForestModelV2(X_train, X_test, y_train, y_test): multi_class = True clf = RandomForestClassifier() grid_values = {'n_estimators': list(range(100, 501, 50)), 'criterion': ['gini', 'entropy'], 'max_depth': list(range(10, 21, 1))} grid_clf_acc = GridSearchCV(clf, param_grid=grid_values, scoring=['roc_auc_ovr_weighted', 'f1_weighted', 'accuracy'], refit='f1_weighted', n_jobs=2, verbose=0) grid_clf_acc.fit(X_train, y_train) clf = grid_clf_acc.best_estimator_ # print(clf) y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds, multi_class=multi_class) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(y_test, y_preds, multi_class=multi_class) y_preds = clf.predict(X_train) # printMetrics(y_train, y_preds, multi_class=multi_class) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds, multi_class=multi_class) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) logAndSaveV2(name_of_model="RandomForestModelV2GS", clf=clf, metrics=metrics, val_metrics=val_metrics)
def XGBClassifierModelV2(X_train, X_test, y_train, y_test): multi_class = True clf = xgb.XGBClassifier(objective="multi:softmax", eval_metric="mlogloss") clf.fit(X_train, y_train) y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds, multi_class=multi_class) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds, multi_class=multi_class) y_preds = clf.predict(X_train) # printMetrics(y_train, y_preds, multi_class=multi_class) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds, multi_class=multi_class) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\n") logAndSaveV2(name_of_model="XGBClassifierModelV2", clf=clf, metrics=metrics, val_metrics=val_metrics)
def RandomForestModelV2(X_train, X_test, y_train, y_test): multi_class = True clf = RandomForestClassifier(max_depth=14) clf.fit(X_train, y_train) y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds, multi_class=multi_class) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds, multi_class=multi_class) y_preds = clf.predict(X_train) # printMetrics(y_train, y_preds, multi_class=multi_class) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds, multi_class=multi_class) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\n") logAndSaveV2(name_of_model="RandomForestModelV2", clf=clf, metrics=metrics, val_metrics=val_metrics)
def NeuralNetworkModelV2(X_train, X_test, y_train, y_test): multi_class = True clf = MLPClassifier(alpha=1e-4, max_iter=1000) layers = [(4, 6), (5, 7), (8, 10), (256, 128, 64, 32, 16, 8)] grid_values = { 'solver': ['adam', 'lbfgs'], 'hidden_layer_sizes': layers, 'activation': ['tanh', 'relu', 'logistic'], 'learning_rate': ['constant', 'invscaling'] } grid_clf_acc = GridSearchCV( clf, param_grid=grid_values, scoring=['roc_auc_ovr_weighted', 'f1_weighted', 'accuracy'], refit='f1_weighted', n_jobs=2, verbose=0) grid_clf_acc.fit(X_train, y_train) clf = grid_clf_acc.best_estimator_ # print(clf) y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds, multi_class=multi_class) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds, multi_class=multi_class) y_preds = clf.predict(X_train) # printMetrics(y_train, y_preds, multi_class=multi_class) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds, multi_class=multi_class) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) logAndSaveV2(name_of_model="NeuralNetworkModelV2GS", clf=clf, metrics=metrics, val_metrics=val_metrics)
def AdaBoostModelV2(X_train, X_test, y_train, y_test): multi_class = True clf = AdaBoostClassifier(base_estimator=RandomForestClassifier(), algorithm='SAMME') grid_values = { 'base_estimator__n_estimators': [100, 200], 'base_estimator__criterion': ['gini', 'entropy'], 'base_estimator__max_depth': list(range(12, 15)), 'learning_rate': [0.01, 0.05, 0.5, 1] } grid_clf_acc = GridSearchCV( clf, param_grid=grid_values, scoring=['roc_auc_ovr_weighted', 'f1_weighted', 'accuracy'], refit='f1_weighted', n_jobs=2, verbose=0) grid_clf_acc.fit(X_train, y_train) clf = grid_clf_acc.best_estimator_ # print(clf) y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds, multi_class=multi_class) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds, multi_class=multi_class) y_preds = clf.predict(X_train) # printMetrics(y_train, y_preds, multi_class=multi_class) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds, multi_class=multi_class) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) logAndSaveV2(name_of_model="AdaBoostModelV2GS", clf=clf, metrics=metrics, val_metrics=val_metrics)
"LightGBM", "RandomForest", "ExtraTrees", "Tree", "Bagging", "AdaBoost", "Linear" ] } } df = {"train": pd.DataFrame(X_train), "target": pd.Series(y_train)} best = opt.optimise(space, df, 21) clf_feature_selector = classification.Clf_feature_selector( strategy=best['fs__strategy']) newDf = clf_feature_selector.fit_transform(df['train'], df['target']) testdf = clf_feature_selector.transform(pd.DataFrame(X_test)) clf = classification.Classifier(strategy=best['est__strategy']) clf.fit(newDf, df['target']) y_preds = clf.predict(testdf) # printMetrics(y_true=y_test, y_pred=y_preds, multi_class=multi_class) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds, multi_class=multi_class) y_preds = clf.predict(newDf) # printMetrics(y_true=y_train, y_pred=y_preds, multi_class=multi_class) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds, multi_class=multi_class) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) logAndSaveV2(name_of_model="MlBoxV2", clf=clf, metrics=metrics, val_metrics=val_metrics)
trainFrame[y_labels] = trainFrame[y_labels].asfactor() testFrame[y_labels] = testFrame[y_labels].asfactor() aml = H2OAutoML(max_runtime_secs=60) aml.train(x=x_labels, y=y_labels, training_frame=trainFrame, validation_frame=testFrame) y_predsFrame = aml.leader.predict(testFrame) y_test_pred_df = y_predsFrame.as_data_frame() y_predsFrame = aml.leader.predict(trainFrame) y_train_pred_df = y_predsFrame.as_data_frame() y_preds = y_test_pred_df['predict'].values # printMetrics(y_true=y_test, y_pred=y_preds, multi_class=multi_class) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds, multi_class=multi_class) # print("*" * 100) y_preds = y_train_pred_df['predict'].values # printMetrics(y_true=y_train, y_pred=y_preds, multi_class=multi_class) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds, multi_class=multi_class) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) logAndSaveV2(name_of_model="H2OModelV2", clf=aml, metrics=metrics, val_metrics=val_metrics)
import autokeras as ak import tensorflow as tf from Utility import getAnnealingData, printMetrics, getMetrics, logAndSaveV2 X_train, X_test, y_train, y_test = getAnnealingData() multi_class = True clf = ak.StructuredDataClassifier(multi_label=True, metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tf.keras.metrics.AUC()], overwrite=True, max_trials=20) clf.fit(x=X_train, y=y_train, epochs=15, validation_data=(X_test, y_test)) model = clf.export_model() y_preds = clf.predict(X_test).ravel() # printMetrics(y_true=y_test, y_pred=y_preds, multi_class=multi_class) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(y_true=y_test, y_pred=y_preds, multi_class=multi_class) y_preds = clf.predict(X_train).ravel() # printMetrics(y_true=y_train, y_pred=y_preds, multi_class=multi_class) acc, pre, recall, auc, f1 = getMetrics(y_true=y_train, y_pred=y_preds, multi_class=multi_class) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) logAndSaveV2(name_of_model="AutoKerasV2", clf=clf, metrics=metrics, val_metrics=val_metrics)