def test_rgs(): time_limit = 120 print('==> Start to evaluate with Budget %d' % time_limit) ensemble_method = 'ensemble_selection' eval_type = 'cv' boston = load_boston() X, y = boston.data, boston.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1) dm = DataManager(X_train, y_train) train_data = dm.get_data_node(X_train, y_train) test_data = dm.get_data_node(X_test, y_test) save_dir = './data/eval_exps/soln-ml' if not os.path.exists(save_dir): os.makedirs(save_dir) rgs = Regressor(metric='mse', ensemble_method=ensemble_method, enable_meta_algorithm_selection=False, evaluation=eval_type, time_limit=time_limit, output_dir=save_dir) rgs.fit(train_data) print(rgs.summary()) rgs.refit() pred = rgs.predict(test_data) print(mean_squared_error(test_data.data[1], pred)) shutil.rmtree(save_dir)
def evaluate_sys(run_id, task_type, mth, dataset, ens_method, enable_meta, eval_type='holdout', time_limit=1200, seed=1, tree_id=0): _task_type = MULTICLASS_CLS if task_type == 'cls' else REGRESSION train_data, test_data = load_train_test_data(dataset, task_type=_task_type) _enable_meta = True if enable_meta == 'true' else False if task_type == 'cls': from mindware.estimators import Classifier estimator = Classifier(time_limit=time_limit, per_run_time_limit=30, output_dir=save_folder, ensemble_method=ens_method, enable_meta_algorithm_selection=_enable_meta, evaluation=eval_type, metric='bal_acc', include_algorithms=['extra_trees', 'random_forest', 'adaboost', 'gradient_boosting', 'k_nearest_neighbors', 'liblinear_svc', 'libsvm_svc', 'lightgbm', 'logistic_regression', 'random_forest'], n_jobs=1) else: from mindware.estimators import Regressor estimator = Regressor(time_limit=time_limit, per_run_time_limit=90, output_dir=save_folder, ensemble_method=ens_method, enable_meta_algorithm_selection=_enable_meta, evaluation=eval_type, metric='mse', # include_preprocessors=['percentile_selector_regression'], # include_algorithms=['random_forest'], n_jobs=1) start_time = time.time() estimator.fit(train_data, opt_strategy=mth, dataset_id=dataset, tree_id=tree_id) pred = estimator.predict(test_data) if task_type == 'cls': test_score = balanced_accuracy_score(test_data.data[1], pred) else: test_score = mean_squared_error(test_data.data[1], pred) validation_score = estimator._ml_engine.solver.incumbent_perf # eval_dict = estimator._ml_engine.solver.get_eval_dict() print('Run ID : %d' % run_id) print('Dataset : %s' % dataset) print('Val/Test score : %f - %f' % (validation_score, test_score)) save_path = save_folder + '%s_%s_%s_%s_%d_%d_%d_%d.pkl' % ( task_type, mth, dataset, enable_meta, time_limit, (ens_method is None), tree_id, run_id) with open(save_path, 'wb') as f: pickle.dump([dataset, validation_score, test_score, start_time], f) # Delete output dir shutil.rmtree(os.path.join(estimator.get_output_dir()))
print('==> Start to evaluate with Budget %d' % time_limit) boston = load_boston() X, y = boston.data, boston.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1) dm = DataManager(X_train, y_train) train_data = dm.get_data_node(X_train, y_train) test_data = dm.get_data_node(X_test, y_test) save_dir = './data/eval_exps/soln-ml' if not os.path.exists(save_dir): os.makedirs(save_dir) rgs = Regressor(metric='mse', dataset_name='boston', ensemble_method=ensemble_method, evaluation=eval_type, time_limit=time_limit, output_dir=save_dir, random_state=1, n_jobs=n_jobs) rgs.fit(train_data) pred = rgs.predict(test_data) print(mean_squared_error(test_data.data[1], pred))