def predictMan(res):
    try:
        X, _ = load_svmlight_file(res.data_file, n_features=res.nfeat)
    except Exception:
        X, _ = load_svmlight_file(res.data_file)

    X = X.toarray()

    if res.meth[0] == 'Classification':
        ens = EnsembleSelectionClassifier(db_file=res.db_file, models=None)
    elif res.meth[0] == 'Regression':
        ens = EnsembleSelectionRegressor(db_file=res.db_file, models=None)
    else:
        msg = "Invalid method passed (-T does not conform to ['Regression','Classification']"
        raise ValueError(msg)

    if (res.pred_src == 'best'):
        preds = ens.best_model_predict_proba(X)
    else:
        preds = ens.predict_proba(X)

    if res.meth[0] == 'Classification':
        if (not res.return_probs):
            preds = np.argmax(preds, axis=1)

    for p in preds:
        if (res.return_probs):
            mesg = " ".join(["%.5f" % v for v in p])
        else:
            mesg = p
        print(str(mesg))
    return preds
示例#2
0
        'n_best': res.n_best,
        'n_folds': res.n_folds,
        'n_bags': res.n_bags,
        'bag_fraction': res.bag_fraction,
        'prune_fraction': res.prune_fraction,
        'score_metric': res.score_metric,
        'verbose': res.verbose,
        'epsilon': res.epsilon,
        'use_epsilon': res.use_epsilon,
        'use_bootstrap': res.use_bootstrap,
        'max_models': res.max_models,
        'random_state': res.seed,
    }

    try:
        ens = EnsembleSelectionClassifier(**param_dict)
    except ValueError as e:
        print('ERROR: %s' % e)
        import sys
        sys.exit(1)

    print('fitting ensemble:\n%s\n' % ens)

    # fit models, score, build ensemble
    ens.fit(X_train, y_train)

    preds = ens.best_model_predict(X_train)
    score = accuracy_score(y_train, preds)
    print('Train set accuracy from best model: %.5f' % score)

    preds = ens.predict(X_train)
示例#3
0
def trainMan(res):
    X_train, y_train = load_svmlight_file(res.data_file)
    X_train = X_train.toarray()

    # train_test_split for testing set if test_size>0.0
    if (res.test_size > 0.0):
        do_test = True
        splits = train_test_split(X_train, y_train,
                                  test_size=res.test_size,
                                  random_state=res.seed)

        X_train, X_test, y_train, y_test = splits
        '''
        #for speedups, convert to sparse matrices
        X_train = sparse.csc_matrix(X_train)
        y_train = sparse.csc_matrix(y_train)

        X_test = sparse.csr_matrix(X_test)
        y_test = sparse.csr_matrix(y_test)
        '''

        print('Train/hillclimbing set size: %d' % len(X_train))
        print('              Test set size: %d\n' % len(X_test))
    else:
        do_test = False
        print('Train/hillclimbing set size: %d' % len(X_train))

    # get model lib
    models = build_model_library(res.model_types, res.seed)
    print('built %d models\n' % len(models))

    param_dict = {
        'models': models,
        'db_file': res.db_file,
        'n_best': res.n_best,
        'n_folds': res.n_folds,
        'n_bags': res.n_bags,
        'bag_fraction': res.bag_fraction,
        'prune_fraction': res.prune_fraction,
        'score_metric': res.score_metric,
        'verbose': res.verbose,
        'epsilon': res.epsilon,
        'use_epsilon': res.use_epsilon,
        'use_bootstrap': res.use_bootstrap,
        'max_models': res.max_models,
        'random_state': res.seed,
        'meth': res.meth,
        'sweight': res.sweight,
    }
    print(str(res.meth))
    try:
        if res.meth[0] == 'Classification':
            ens = EnsembleSelectionClassifier(**param_dict)
            print('fitting ensemble:\n%s\n' % ens)
        elif res.meth[0] == 'Regression':
            ens = EnsembleSelectionRegressor(**param_dict)
            print('fitting ensemble:\n%s\n' % ens)
        else:
            msg = "Invalid method passed (-T does not conform to ['Regression','Classification']"
            raise ValueError(msg)
    except ValueError as e:
        print('ERROR: %s' % e)
        import sys
        sys.exit(1)

    # fit models, score, build ensemble
    ens.fit(X_train, y_train)

    list_of_results = {}

    preds = ens.best_model_predict(X_train)
    if res.meth[0] == 'Classification':
        score = accuracy_score(y_train, preds)
        list_of_results['best_train_score'] = score
    elif res.meth[0] == 'Regression':
        score = r2_score(y_train, preds)
        rmse = sqrt(mean_squared_error(y_train, preds))
        print('Train set RMSE from best model: %.5f' % rmse)
        list_of_results['best_train_score'] = score
        list_of_results['best_train_rmse'] = rmse
    print('Train set accuracy from best model: %.5f' % score)

    preds = ens.predict(X_train)
    if res.meth[0] == 'Classification':
        score = accuracy_score(y_train, preds)
        list_of_results['ens_train_score'] = score
    elif res.meth[0] == 'Regression':
        score = r2_score(y_train, preds)
        rmse = sqrt(mean_squared_error(y_train, preds))
        list_of_results['ens_train_score'] = score
        list_of_results['ens_train_rmse'] = rmse
        print('Train set RMSE from final ensemble: %.5f' % rmse)
    print('Train set accuracy from final ensemble: %.5f' % score)

    if (do_test):
        preds = ens.best_model_predict(X_test)
        if res.meth[0] == 'Classification':
            score = accuracy_score(y_test, preds)
            list_of_results['best_test_score'] = score
            fmt = '\n Test set classification report for best model:\n%s'
            report = classification_report(y_test, preds)
            print(fmt % report)
        elif res.meth[0] == 'Regression':
            score = r2_score(y_test, preds)
            rmse = sqrt(mean_squared_error(y_test, preds))
            list_of_results['best_test_score'] = score
            list_of_results['best_test_rmse'] = rmse
            print('Test set RMSE from best model: %.5f' % rmse)
        print('\n Test set accuracy from best model: %.5f' % score)

        preds = ens.predict(X_test)

        if res.meth[0] == 'Classification':
            score = accuracy_score(y_test, preds)
            list_of_results['ens_test_score'] = score
        elif res.meth[0] == 'Regression':
            score = r2_score(y_test, preds)
            rmse = sqrt(mean_squared_error(y_test, preds))
            list_of_results['ens_test_score'] = score
            list_of_results['ens_test_rmse'] = rmse
            print('Test set RMSE from final ensemble: %.5f' % rmse)
        print(' Test set accuracy from final ensemble: %.5f' % score)

        if res.meth[0] == 'Classification':
            fmt = '\n Test set classification report for final ensemble:\n%s'
            report = classification_report(y_test, preds)
            print(fmt % report)

        if res.retrain:
            X_train, y_train = load_svmlight_file(res.data_file)
            X_train = X_train.toarray()
            print('Retraining models comprising ensemble on full training set!')
            ens.retrain_ensemble(X_train, y_train)

        if res.removal:
            try:
                db_cleanup(res.db_file)
                print("Removing unwanted models...")
            except:
                print("Error pruning db_file")
    metric_keys = ['best_train_score', 'best_train_rmse', 'ens_train_score', 'ens_train_rmse',
                   'best_test_score', 'best_test_rmse', 'ens_test_score', 'ens_test_rmse']
    if set(list_of_results.keys()) <> set(metric_keys):
        for keynm in set(metric_keys) - set(list_of_results.keys()):
            list_of_results[keynm] = 0.0

    return list_of_results
示例#4
0
    parser.add_argument('-p',
                        dest='return_probs',
                        action='store_true',
                        default=False,
                        help='predict probabilities')

    return parser.parse_args()


if (__name__ == '__main__'):
    res = parse_args()

    X, _ = load_svmlight_file(res.data_file)
    X = X.toarray()

    ens = EnsembleSelectionClassifier(db_file=res.db_file, models=None)

    if (res.pred_src == 'best'):
        preds = ens.best_model_predict_proba(X)
    else:
        preds = ens.predict_proba(X)

    if (not res.return_probs):
        preds = np.argmax(preds, axis=1)

    for p in preds:
        if (res.return_probs):
            mesg = " ".join(["%.8f" % v for v in p])
        else:
            mesg = p
                        help=help_fmt, default='ens')

    parser.add_argument('-p', dest='return_probs',
                        action='store_true', default=False,
                        help='predict probabilities')

    return parser.parse_args()


if (__name__ == '__main__'):
    res = parse_args()

    X, _ = load_svmlight_file(res.data_file)
    X = X.toarray()

    ens = EnsembleSelectionClassifier(db_file=res.db_file, models=None)

    if (res.pred_src == 'best'):
        preds = ens.best_model_predict_proba(X)
    else:
        preds = ens.predict_proba(X)

    if (not res.return_probs):
        preds = np.argmax(preds, axis=1)

    for p in preds:
        if (res.return_probs):
            mesg = " ".join(["%.5f" % v for v in p])
        else:
            mesg = p
        'n_best': res.n_best,
        'n_folds': res.n_folds,
        'n_bags': res.n_bags,
        'bag_fraction': res.bag_fraction,
        'prune_fraction': res.prune_fraction,
        'score_metric': res.score_metric,
        'verbose': res.verbose,
        'epsilon': res.epsilon,
        'use_epsilon': res.use_epsilon,
        'use_bootstrap': res.use_bootstrap,
        'max_models': res.max_models,
        'random_state': res.seed,
    }

    try:
        ens = EnsembleSelectionClassifier(**param_dict)
    except ValueError as e:
        print('ERROR: %s' % e)
        import sys
        sys.exit(1)

    print('fitting ensemble:\n%s\n' % ens)

    # fit models, score, build ensemble
    ens.fit(X_train, y_train)

    preds = ens.best_model_predict(X_train)
    score = accuracy_score(y_train, preds)
    print('Train set accuracy from best model: %.5f' % score)

    preds = ens.predict(X_train)