Пример #1
0
def test_rgs():
    time_limit = 120
    print('==> Start to evaluate with Budget %d' % time_limit)
    ensemble_method = 'ensemble_selection'
    eval_type = 'cv'

    boston = load_boston()
    X, y = boston.data, boston.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
    dm = DataManager(X_train, y_train)
    train_data = dm.get_data_node(X_train, y_train)
    test_data = dm.get_data_node(X_test, y_test)

    save_dir = './data/eval_exps/soln-ml'
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    rgs = Regressor(metric='mse',
                    ensemble_method=ensemble_method,
                    enable_meta_algorithm_selection=False,
                    evaluation=eval_type,
                    time_limit=time_limit,
                    output_dir=save_dir)
    rgs.fit(train_data)
    print(rgs.summary())
    rgs.refit()

    pred = rgs.predict(test_data)
    print(mean_squared_error(test_data.data[1], pred))

    shutil.rmtree(save_dir)
Пример #2
0
def evaluate_sys(run_id, task_type, mth, dataset, ens_method, enable_meta,
                 eval_type='holdout', time_limit=1200, seed=1, tree_id=0):
    _task_type = MULTICLASS_CLS if task_type == 'cls' else REGRESSION
    train_data, test_data = load_train_test_data(dataset, task_type=_task_type)
    _enable_meta = True if enable_meta == 'true' else False
    if task_type == 'cls':
        from mindware.estimators import Classifier
        estimator = Classifier(time_limit=time_limit,
                               per_run_time_limit=30,
                               output_dir=save_folder,
                               ensemble_method=ens_method,
                               enable_meta_algorithm_selection=_enable_meta,
                               evaluation=eval_type,
                               metric='bal_acc',
                               include_algorithms=['extra_trees', 'random_forest',
                                                   'adaboost', 'gradient_boosting',
                                                   'k_nearest_neighbors', 'liblinear_svc',
                                                   'libsvm_svc', 'lightgbm',
                                                   'logistic_regression', 'random_forest'],
                               n_jobs=1)
    else:
        from mindware.estimators import Regressor
        estimator = Regressor(time_limit=time_limit,
                              per_run_time_limit=90,
                              output_dir=save_folder,
                              ensemble_method=ens_method,
                              enable_meta_algorithm_selection=_enable_meta,
                              evaluation=eval_type,
                              metric='mse',
                              # include_preprocessors=['percentile_selector_regression'],
                              # include_algorithms=['random_forest'],
                              n_jobs=1)

    start_time = time.time()
    estimator.fit(train_data, opt_strategy=mth, dataset_id=dataset, tree_id=tree_id)
    pred = estimator.predict(test_data)
    if task_type == 'cls':
        test_score = balanced_accuracy_score(test_data.data[1], pred)
    else:
        test_score = mean_squared_error(test_data.data[1], pred)
    validation_score = estimator._ml_engine.solver.incumbent_perf
    # eval_dict = estimator._ml_engine.solver.get_eval_dict()
    print('Run ID         : %d' % run_id)
    print('Dataset        : %s' % dataset)
    print('Val/Test score : %f - %f' % (validation_score, test_score))

    save_path = save_folder + '%s_%s_%s_%s_%d_%d_%d_%d.pkl' % (
        task_type, mth, dataset, enable_meta, time_limit, (ens_method is None), tree_id, run_id)
    with open(save_path, 'wb') as f:
        pickle.dump([dataset, validation_score, test_score, start_time], f)

    # Delete output dir
    shutil.rmtree(os.path.join(estimator.get_output_dir()))
Пример #3
0
print('==> Start to evaluate with Budget %d' % time_limit)

boston = load_boston()
X, y = boston.data, boston.target
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=1)
dm = DataManager(X_train, y_train)
train_data = dm.get_data_node(X_train, y_train)
test_data = dm.get_data_node(X_test, y_test)

save_dir = './data/eval_exps/soln-ml'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

rgs = Regressor(metric='mse',
                dataset_name='boston',
                ensemble_method=ensemble_method,
                evaluation=eval_type,
                time_limit=time_limit,
                output_dir=save_dir,
                random_state=1,
                n_jobs=n_jobs)

rgs.fit(train_data)
pred = rgs.predict(test_data)

print(mean_squared_error(test_data.data[1], pred))