def predictMan(res): try: X, _ = load_svmlight_file(res.data_file, n_features=res.nfeat) except Exception: X, _ = load_svmlight_file(res.data_file) X = X.toarray() if res.meth[0] == 'Classification': ens = EnsembleSelectionClassifier(db_file=res.db_file, models=None) elif res.meth[0] == 'Regression': ens = EnsembleSelectionRegressor(db_file=res.db_file, models=None) else: msg = "Invalid method passed (-T does not conform to ['Regression','Classification']" raise ValueError(msg) if (res.pred_src == 'best'): preds = ens.best_model_predict_proba(X) else: preds = ens.predict_proba(X) if res.meth[0] == 'Classification': if (not res.return_probs): preds = np.argmax(preds, axis=1) for p in preds: if (res.return_probs): mesg = " ".join(["%.5f" % v for v in p]) else: mesg = p print(str(mesg)) return preds
'n_best': res.n_best, 'n_folds': res.n_folds, 'n_bags': res.n_bags, 'bag_fraction': res.bag_fraction, 'prune_fraction': res.prune_fraction, 'score_metric': res.score_metric, 'verbose': res.verbose, 'epsilon': res.epsilon, 'use_epsilon': res.use_epsilon, 'use_bootstrap': res.use_bootstrap, 'max_models': res.max_models, 'random_state': res.seed, } try: ens = EnsembleSelectionClassifier(**param_dict) except ValueError as e: print('ERROR: %s' % e) import sys sys.exit(1) print('fitting ensemble:\n%s\n' % ens) # fit models, score, build ensemble ens.fit(X_train, y_train) preds = ens.best_model_predict(X_train) score = accuracy_score(y_train, preds) print('Train set accuracy from best model: %.5f' % score) preds = ens.predict(X_train)
def trainMan(res): X_train, y_train = load_svmlight_file(res.data_file) X_train = X_train.toarray() # train_test_split for testing set if test_size>0.0 if (res.test_size > 0.0): do_test = True splits = train_test_split(X_train, y_train, test_size=res.test_size, random_state=res.seed) X_train, X_test, y_train, y_test = splits ''' #for speedups, convert to sparse matrices X_train = sparse.csc_matrix(X_train) y_train = sparse.csc_matrix(y_train) X_test = sparse.csr_matrix(X_test) y_test = sparse.csr_matrix(y_test) ''' print('Train/hillclimbing set size: %d' % len(X_train)) print(' Test set size: %d\n' % len(X_test)) else: do_test = False print('Train/hillclimbing set size: %d' % len(X_train)) # get model lib models = build_model_library(res.model_types, res.seed) print('built %d models\n' % len(models)) param_dict = { 'models': models, 'db_file': res.db_file, 'n_best': res.n_best, 'n_folds': res.n_folds, 'n_bags': res.n_bags, 'bag_fraction': res.bag_fraction, 'prune_fraction': res.prune_fraction, 'score_metric': res.score_metric, 'verbose': res.verbose, 'epsilon': res.epsilon, 'use_epsilon': res.use_epsilon, 'use_bootstrap': res.use_bootstrap, 'max_models': res.max_models, 'random_state': res.seed, 'meth': res.meth, 'sweight': res.sweight, } print(str(res.meth)) try: if res.meth[0] == 'Classification': ens = EnsembleSelectionClassifier(**param_dict) print('fitting ensemble:\n%s\n' % ens) elif res.meth[0] == 'Regression': ens = EnsembleSelectionRegressor(**param_dict) print('fitting ensemble:\n%s\n' % ens) else: msg = "Invalid method passed (-T does not conform to ['Regression','Classification']" raise ValueError(msg) except ValueError as e: print('ERROR: %s' % e) import sys sys.exit(1) # fit models, score, build ensemble ens.fit(X_train, y_train) list_of_results = {} preds = ens.best_model_predict(X_train) if res.meth[0] == 'Classification': score = accuracy_score(y_train, preds) list_of_results['best_train_score'] = score elif res.meth[0] == 'Regression': score = r2_score(y_train, preds) rmse = sqrt(mean_squared_error(y_train, preds)) print('Train set RMSE from best model: %.5f' % rmse) list_of_results['best_train_score'] = score list_of_results['best_train_rmse'] = rmse print('Train set accuracy from best model: %.5f' % score) preds = ens.predict(X_train) if res.meth[0] == 'Classification': score = accuracy_score(y_train, preds) list_of_results['ens_train_score'] = score elif res.meth[0] == 'Regression': score = r2_score(y_train, preds) rmse = sqrt(mean_squared_error(y_train, preds)) list_of_results['ens_train_score'] = score list_of_results['ens_train_rmse'] = rmse print('Train set RMSE from final ensemble: %.5f' % rmse) print('Train set accuracy from final ensemble: %.5f' % score) if (do_test): preds = ens.best_model_predict(X_test) if res.meth[0] == 'Classification': score = accuracy_score(y_test, preds) list_of_results['best_test_score'] = score fmt = '\n Test set classification report for best model:\n%s' report = classification_report(y_test, preds) print(fmt % report) elif res.meth[0] == 'Regression': score = r2_score(y_test, preds) rmse = sqrt(mean_squared_error(y_test, preds)) list_of_results['best_test_score'] = score list_of_results['best_test_rmse'] = rmse print('Test set RMSE from best model: %.5f' % rmse) print('\n Test set accuracy from best model: %.5f' % score) preds = ens.predict(X_test) if res.meth[0] == 'Classification': score = accuracy_score(y_test, preds) list_of_results['ens_test_score'] = score elif res.meth[0] == 'Regression': score = r2_score(y_test, preds) rmse = sqrt(mean_squared_error(y_test, preds)) list_of_results['ens_test_score'] = score list_of_results['ens_test_rmse'] = rmse print('Test set RMSE from final ensemble: %.5f' % rmse) print(' Test set accuracy from final ensemble: %.5f' % score) if res.meth[0] == 'Classification': fmt = '\n Test set classification report for final ensemble:\n%s' report = classification_report(y_test, preds) print(fmt % report) if res.retrain: X_train, y_train = load_svmlight_file(res.data_file) X_train = X_train.toarray() print('Retraining models comprising ensemble on full training set!') ens.retrain_ensemble(X_train, y_train) if res.removal: try: db_cleanup(res.db_file) print("Removing unwanted models...") except: print("Error pruning db_file") metric_keys = ['best_train_score', 'best_train_rmse', 'ens_train_score', 'ens_train_rmse', 'best_test_score', 'best_test_rmse', 'ens_test_score', 'ens_test_rmse'] if set(list_of_results.keys()) <> set(metric_keys): for keynm in set(metric_keys) - set(list_of_results.keys()): list_of_results[keynm] = 0.0 return list_of_results
parser.add_argument('-p', dest='return_probs', action='store_true', default=False, help='predict probabilities') return parser.parse_args() if (__name__ == '__main__'): res = parse_args() X, _ = load_svmlight_file(res.data_file) X = X.toarray() ens = EnsembleSelectionClassifier(db_file=res.db_file, models=None) if (res.pred_src == 'best'): preds = ens.best_model_predict_proba(X) else: preds = ens.predict_proba(X) if (not res.return_probs): preds = np.argmax(preds, axis=1) for p in preds: if (res.return_probs): mesg = " ".join(["%.8f" % v for v in p]) else: mesg = p
help=help_fmt, default='ens') parser.add_argument('-p', dest='return_probs', action='store_true', default=False, help='predict probabilities') return parser.parse_args() if (__name__ == '__main__'): res = parse_args() X, _ = load_svmlight_file(res.data_file) X = X.toarray() ens = EnsembleSelectionClassifier(db_file=res.db_file, models=None) if (res.pred_src == 'best'): preds = ens.best_model_predict_proba(X) else: preds = ens.predict_proba(X) if (not res.return_probs): preds = np.argmax(preds, axis=1) for p in preds: if (res.return_probs): mesg = " ".join(["%.5f" % v for v in p]) else: mesg = p