verbose=False) else: print("wrong fuzzy option") # grid search results data frame: gs_results_df = training_utils.get_gs_results(clf_gs) # best parameters from grid search: best_param_df = pd.DataFrame(clf_gs.best_params_, index=[0]) # evaluation: clf_for_eval.set_params(**clf_gs.best_params_) metrics, std = training_utils.evaluate_on_cv(training_data, train_X_all, clf_for_eval, fuzzy_option, fuzzy_dist_column, fuzzy_err_column, xgb_flag=True) pr_curve = training_utils.predict_and_pr_curve_on_cv(training_data, train_X_all, clf_for_eval, fuzzy_option, fuzzy_dist_column, fuzzy_err_column, xgb_flag=True) # best model from grid search: clf_best = clf_gs.best_estimator_ # generalization:
train_X = training_data[features].values general_X = general_data[features].values for fuzzy_option in fuzzy_options: print(fuzzy_option) clf = ExtraTreesClassifier(n_estimators=500, criterion='gini', class_weight='balanced', bootstrap=True, random_state=476, n_jobs=-1) metrics, std = training_utils.evaluate_on_cv(training_data, train_X, clf, fuzzy_option, fuzzy_dist_column, fuzzy_err_column) pr_curve = training_utils.predict_and_pr_curve_on_cv( training_data, train_X, clf, fuzzy_option, fuzzy_dist_column, fuzzy_err_column) # fit to the data: if fuzzy_option == "normal": clf.fit(X=train_X, y=training_data["Y"]) elif fuzzy_option == "fuzzy_dist": clf.fit(X=train_X, y=training_data["Y"], sample_weight=training_data[fuzzy_dist_column].values.T[0]) elif fuzzy_option == "fuzzy_err": clf.fit(X=train_X, y=training_data["Y"],