def evaluate_hmab(algorithms, dataset, run_id, trial_num, seed, time_limit=1200): print('%s-%s-%d: %d' % (hmab_flag, dataset, run_id, time_limit)) _start_time = time.time() train_data, test_data = load_train_test_data(dataset, task_type=MULTICLASS_CLS) cls_task_type = BINARY_CLS if len(set( train_data.data[1])) == 2 else MULTICLASS_CLS balanced_acc_metric = make_scorer(balanced_accuracy) if is_unbalanced_dataset(train_data): from solnml.components.feature_engineering.transformations.preprocessor.smote_balancer import DataBalancer train_data = DataBalancer().operate(train_data) bandit = FirstLayerBandit(cls_task_type, trial_num, algorithms, train_data, output_dir='logs', per_run_time_limit=per_run_time_limit, dataset_name=dataset, ensemble_size=50, inner_opt_algorithm=opt_algo, metric=balanced_acc_metric, fe_algo='bo', seed=seed, time_limit=time_limit, eval_type='holdout') bandit.optimize() time_taken = time.time() - _start_time model_desc = [ bandit.nbest_algo_ids, bandit.optimal_algo_id, bandit.final_rewards, bandit.action_sequence ] validation_accuracy = np.max(bandit.final_rewards) best_pred = bandit._best_predict(test_data) test_accuracy = balanced_accuracy(test_data.data[1], best_pred) bandit.refit() es_pred = bandit._es_predict(test_data) test_accuracy_with_ens = balanced_accuracy(test_data.data[1], es_pred) data = [ dataset, validation_accuracy, test_accuracy, test_accuracy_with_ens, time_taken, model_desc ] print(model_desc) print(data) save_path = project_dir + '%s_%s_%s_%d_%d_%d_%d_%d.pkl' % ( hmab_flag, opt_algo, dataset, trial_num, len(algorithms), seed, run_id, time_limit) with open(save_path, 'wb') as f: pickle.dump(data, f)
def evaluate_2rd_hmab(run_id, mth, dataset, algo, eval_type='holdout', time_limit=1200, seed=1): task_type = MULTICLASS_CLS train_data, test_data = load_train_test_data(dataset, task_type=task_type) from solnml.estimators import Classifier clf = Classifier(time_limit=time_limit, per_run_time_limit=300, output_dir=save_folder, ensemble_method=None, evaluation=eval_type, enable_meta_algorithm_selection=False, metric='bal_acc', include_algorithms=[algo], n_jobs=1) clf.fit(train_data, opt_strategy=mth) pred = clf.predict(test_data) test_score = balanced_accuracy_score(test_data.data[1], pred) timestamps, perfs = clf.get_val_stats() validation_score = np.max(perfs) print('Evaluation Num : %d' % len(perfs)) print('Run ID : %d' % run_id) print('Dataset : %s' % dataset) print('Val/Test score : %f - %f' % (validation_score, test_score)) save_path = save_folder + '%s_%s_%d_%d_%s.pkl' % (mth, dataset, time_limit, run_id, algo) with open(save_path, 'wb') as f: pickle.dump([dataset, validation_score, test_score], f)
def evaluate_hmab(algorithms, run_id, time_limit=600, dataset='credit', eval_type='holdout', enable_ens=True, seed=1): task_id = '[hmab][%s-%d-%d]' % (dataset, len(algorithms), time_limit) _start_time = time.time() train_data, test_data = load_train_test_data(dataset, task_type=MULTICLASS_CLS) if enable_ens is True: ensemble_method = 'ensemble_selection' else: ensemble_method = None clf = Classifier(time_limit=time_limit, amount_of_resource=None, output_dir=save_dir, ensemble_method=ensemble_method, evaluation=eval_type, metric='bal_acc', n_jobs=1) clf.fit(train_data) clf.refit() pred = clf.predict(test_data) test_score = balanced_accuracy_score(test_data.data[1], pred) timestamps, perfs = clf.get_val_stats() validation_score = np.max(perfs) print('Dataset : %s' % dataset) print('Validation/Test score : %f - %f' % (validation_score, test_score)) save_path = save_dir + '%s-%d.pkl' % (task_id, run_id) with open(save_path, 'wb') as f: stats = [timestamps, perfs] pickle.dump([validation_score, test_score, stats], f)
def evaluate_hpsklearn(dataset, run_id, time_limit, seed=1): from solnml.utils.hpsklearn_config import tpe_classifier # TODO: Specify max_evals automl = HyperoptEstimator(preprocessing=None, ex_preprocs=None, classifier=tpe_classifier(), algo=tpe.suggest, max_evals=200, trial_timeout=time_limit, seed=seed) raw_data, test_raw_data = load_train_test_data(dataset) X_train, y_train = raw_data.data X_test, y_test = test_raw_data.data X_train, y_train = X_train.astype('float64'), y_train.astype('int') X_test, y_test = X_test.astype('float64'), y_test.astype('int') automl.fit(X_train, y_train) y_hat = automl.predict(X_test) test_accuracy = accuracy_score(y_test, y_hat) print("%d-th Evaluation: accuracy score => %.4f" % (run_id, test_accuracy)) save_path = save_dir + 'hpsklearn-%s-%d-%d.pkl' % (dataset, time_limit, run_id) with open(save_path, 'wb') as f: pickle.dump([test_accuracy], f)
def evaluate_hmab(algorithms, run_id, dataset='credit', trial_num=200, seed=1, eval_type='holdout', enable_ens=False): task_id = '%s-hmab-%d-%d' % (dataset, len(algorithms), trial_num) _start_time = time.time() raw_data, test_raw_data = load_train_test_data(dataset) bandit = FirstLayerBandit(trial_num, algorithms, raw_data, output_dir='logs/%s/' % task_id, per_run_time_limit=per_run_time_limit, dataset_name='%s-%d' % (dataset, run_id), seed=seed, eval_type=eval_type) bandit.optimize() time_cost = int(time.time() - _start_time) print(bandit.final_rewards) print(bandit.action_sequence) validation_accuracy = np.max(bandit.final_rewards) test_accuracy = bandit.score(test_raw_data, metric_func=balanced_accuracy) test_accuracy_with_ens = EnsembleBuilder(bandit).score(test_raw_data, metric_func=balanced_accuracy) print('Dataset : %s' % dataset) print('Validation/Test score : %f - %f' % (validation_accuracy, test_accuracy)) print('Test score with ensem : %f' % test_accuracy_with_ens) save_path = save_dir + '%s-%d.pkl' % (task_id, run_id) with open(save_path, 'wb') as f: stats = [time_cost, test_accuracy_with_ens, bandit.time_records, bandit.final_rewards] pickle.dump([validation_accuracy, test_accuracy, stats], f) return time_cost
def get_configspace(): if benchmark == 'hpo': cs = _classifiers[algo_name].get_hyperparameter_search_space() model = UnParametrizedHyperparameter("estimator", algo_name) cs.add_hyperparameter(model) return cs train_data, test_data = load_train_test_data('splice', task_type=MULTICLASS_CLS) cs = _classifiers[algo_name].get_hyperparameter_search_space() model = UnParametrizedHyperparameter("estimator", algo_name) cs.add_hyperparameter(model) default_hpo_config = cs.get_default_configuration() fe_evaluator = ClassificationEvaluator(default_hpo_config, scorer=metric, name='fe', resampling_strategy='holdout', seed=1) fe_optimizer = BayesianOptimizationOptimizer(task_type=CLASSIFICATION, input_data=train_data, evaluator=fe_evaluator, model_id=algo_name, time_limit_per_trans=600, mem_limit_per_trans=5120, number_of_unit_resource=10, seed=1) hyper_space = fe_optimizer.hyperparameter_space return hyper_space
def evaluate_tpot(dataset, run_id, time_limit, seed=1, use_fe=True): n_job = args.n_job # Construct the ML model. config = None if not use_fe: from solnml.utils.tpot_config import classifier_config_dict config = classifier_config_dict automl = TPOTClassifier(config_dict=config, generations=10000, population_size=20, verbosity=2, n_jobs=n_job, cv=0.2, max_eval_time_mins=2.5, max_time_mins=int(time_limit / 60), random_state=seed) raw_data, test_raw_data = load_train_test_data(dataset) X_train, y_train = raw_data.data X_test, y_test = test_raw_data.data X_train, y_train = X_train.astype('float64'), y_train.astype('int') X_test, y_test = X_test.astype('float64'), y_test.astype('int') automl.fit(X_train, y_train) y_hat = automl.predict(X_test) test_accuracy = accuracy_score(y_test, y_hat) print("%d-th Evaluation: accuracy score => %.4f" % (run_id, test_accuracy)) save_path = save_dir + 'tpot-%s-%d-%d.pkl' % (dataset, time_limit, run_id) with open(save_path, 'wb') as f: pickle.dump([test_accuracy], f)
def check_datasets(datasets, task_type=None): for _dataset in datasets: try: _, _ = load_train_test_data(_dataset, random_state=1, task_type=task_type) except Exception as e: raise ValueError('Dataset - %s does not exist!' % _dataset)
def evaluate_autosklearn(algorithms, rep_id, trial_num=100, dataset='credit', time_limit=1200, seed=1, enable_ens=True, enable_meta_learning=False): print('%s\nDataset: %s, Run_id: %d, Budget: %d.\n%s' % ('=' * 50, dataset, rep_id, time_limit, '=' * 50)) ausk_id = 'ausk' if enable_ens else 'ausk-no-ens' ausk_id += '-meta' if enable_meta_learning else '' task_id = '%s-%s-%d-%d' % (dataset, ausk_id, len(algorithms), trial_num) if enable_ens: ensemble_size, ensemble_nbest = 50, 50 else: ensemble_size, ensemble_nbest = 1, 1 if enable_meta_learning: init_config_via_metalearning = 25 else: init_config_via_metalearning = 0 include_models = algorithms automl = autosklearn.classification.AutoSklearnClassifier( time_left_for_this_task=int(time_limit), per_run_time_limit=per_run_time_limit, n_jobs=1, include_estimators=include_models, ensemble_memory_limit=12288, ml_memory_limit=12288, ensemble_size=ensemble_size, ensemble_nbest=ensemble_nbest, initial_configurations_via_metalearning=init_config_via_metalearning, seed=seed, resampling_strategy='holdout', resampling_strategy_arguments={'train_size': 0.8} ) print(automl) raw_data, test_raw_data = load_train_test_data(dataset, random_state=seed) X, y = raw_data.data X_test, y_test = test_raw_data.data feat_type = ['Categorical' if _type == CATEGORICAL else 'Numerical' for _type in raw_data.feature_types] automl.fit(X.copy(), y.copy(), feat_type=feat_type) model_desc = automl.show_models() str_stats = automl.sprint_statistics() valid_results = automl.cv_results_['mean_test_score'] time_records = automl.cv_results_['mean_fit_time'] validation_accuracy = np.max(valid_results) # Test performance. predictions = automl.predict(X_test) test_accuracy = accuracy_score(y_test, predictions) # Print statistics about the auto-sklearn run such as number of # iterations, number of models failed with a time out. print(str_stats) print(model_desc) print('Validation Accuracy:', validation_accuracy) print("Test Accuracy :", test_accuracy) save_path = save_dir + '%s-%d.pkl' % (task_id, rep_id) with open(save_path, 'wb') as f: stats = [model_desc, str_stats, valid_results, time_records, time_limit] pickle.dump([validation_accuracy, test_accuracy, stats], f)
def evaluate_hmab(algorithms, dataset, run_id, trial_num, seed, time_limit=1200): print('%s-%s-%d: %d' % (hmab_flag, dataset, run_id, time_limit)) exclude_datasets = ['gina_prior2', 'pc2', 'abalone', 'wind', 'waveform-5000(2)', 'page-blocks(1)', 'winequality_white', 'pollen'] alad = AlgorithmAdvisor(task_type=MULTICLASS_CLS, n_algorithm=9, metric='bal_acc', exclude_datasets=exclude_datasets) n_algo = 5 assert dataset in exclude_datasets meta_infos = alad.fit_meta_learner() assert dataset not in meta_infos model_candidates = alad.fetch_algorithm_set(dataset) include_models = list() print(model_candidates) for algo in model_candidates: if algo in algorithms and len(include_models) < n_algo: include_models.append(algo) print('After algorithm recommendation', include_models) _start_time = time.time() train_data, test_data = load_train_test_data(dataset, task_type=MULTICLASS_CLS) cls_task_type = BINARY_CLS if len(set(train_data.data[1])) == 2 else MULTICLASS_CLS balanced_acc_metric = make_scorer(balanced_accuracy) if is_unbalanced_dataset(train_data): from solnml.components.feature_engineering.transformations.balancer.smote_balancer import DataBalancer train_data = DataBalancer().operate(train_data) bandit = FirstLayerBandit(cls_task_type, trial_num, include_models, train_data, output_dir='logs', per_run_time_limit=per_run_time_limit, dataset_name=dataset, ensemble_size=50, inner_opt_algorithm=opt_algo, metric=balanced_acc_metric, fe_algo='bo', seed=seed, time_limit=time_limit, eval_type='holdout') bandit.optimize() time_taken = time.time() - _start_time model_desc = [bandit.nbest_algo_ids, bandit.optimal_algo_id, bandit.final_rewards, bandit.action_sequence] validation_accuracy = np.max(bandit.final_rewards) best_pred = bandit._best_predict(test_data) test_accuracy = balanced_accuracy(test_data.data[1], best_pred) bandit.refit() es_pred = bandit._es_predict(test_data) test_accuracy_with_ens = balanced_accuracy(test_data.data[1], es_pred) data = [dataset, validation_accuracy, test_accuracy, test_accuracy_with_ens, time_taken, model_desc] print(model_desc) print(data) save_path = project_dir + '%s_%s_%s_%d_%d_%d_%d_%d.pkl' % ( hmab_flag, opt_algo, dataset, trial_num, len(algorithms), seed, run_id, time_limit) with open(save_path, 'wb') as f: pickle.dump(data, f)
def evaluate_bo_optimizer(dataset, time_limit, run_id, seed): from solnml.components.fe_optimizers.bo_optimizer import BayesianOptimizationOptimizer # Prepare the configuration for random forest. from ConfigSpace.hyperparameters import UnParametrizedHyperparameter from autosklearn.pipeline.components.classification.random_forest import RandomForest cs = RandomForest.get_hyperparameter_search_space() clf_hp = UnParametrizedHyperparameter("estimator", 'random_forest') cs.add_hyperparameter(clf_hp) print(cs.get_default_configuration()) evaluator = ClassificationEvaluator(cs.get_default_configuration(), name='fe', seed=seed, resampling_strategy='holdout') train_data, test_data = load_train_test_data(dataset) cls_task_type = BINARY_CLS if len(set( train_data.data[1])) == 2 else MULTICLASS_CLS optimizer = BayesianOptimizationOptimizer(cls_task_type, train_data, evaluator, 'random_forest', 300, 10000, seed, time_budget=time_limit) optimizer.optimize() inc = optimizer.incumbent_config val_score = 1 - optimizer.evaluate_function(inc) print(val_score) print(optimizer.incumbent_score) optimizer.fetch_nodes(n=10) print("Refit finished!") final_train_data = optimizer.apply(train_data, optimizer.incumbent, phase='train') X_train, y_train = final_train_data.data final_test_data = optimizer.apply(test_data, optimizer.incumbent) X_test, y_test = final_test_data.data clf = fetch_predict_estimator( cls_task_type, cs.get_default_configuration(), X_train, y_train, weight_balance=final_train_data.enable_balance, data_balance=final_train_data.data_balance) y_pred = clf.predict(X_test) from solnml.components.metrics.cls_metrics import balanced_accuracy test_score = balanced_accuracy(y_test, y_pred) print('==> Test score', test_score) save_path = save_dir + 'bo_fe_%s_%d_%d.pkl' % (dataset, time_limit, run_id) with open(save_path, 'wb') as f: pickle.dump([dataset, val_score, test_score], f)
def evaluate_sys(run_id, task_type, mth, dataset, ens_method, enable_meta, eval_type='holdout', time_limit=1200, seed=1): _task_type = MULTICLASS_CLS if task_type == 'cls' else REGRESSION train_data, test_data = load_train_test_data(dataset, task_type=_task_type) _enable_meta = True if enable_meta == 'true' else False if task_type == 'cls': from solnml.estimators import Classifier estimator = Classifier(time_limit=time_limit, per_run_time_limit=300, output_dir=save_folder, ensemble_method=ens_method, enable_meta_algorithm_selection=_enable_meta, evaluation=eval_type, metric='bal_acc', include_algorithms=['random_forest'], include_preprocessors=['extra_trees_based_selector', 'generic_univariate_selector', 'liblinear_based_selector', 'percentile_selector'], n_jobs=1) else: from solnml.estimators import Regressor estimator = Regressor(time_limit=time_limit, per_run_time_limit=300, output_dir=save_folder, ensemble_method=ens_method, enable_meta_algorithm_selection=_enable_meta, evaluation=eval_type, metric='mse', include_algorithms=['random_forest'], include_preprocessors=['extra_trees_based_selector_regression', 'generic_univariate_selector', 'liblinear_based_selector', 'percentile_selector_regression'], n_jobs=1) start_time = time.time() estimator.fit(train_data, opt_strategy=mth, dataset_id=dataset) pred = estimator.predict(test_data) if task_type == 'cls': test_score = balanced_accuracy_score(test_data.data[1], pred) else: test_score = mean_squared_error(test_data.data[1], pred) validation_score = estimator._ml_engine.solver.incumbent_perf eval_dict = estimator._ml_engine.solver.get_eval_dict() print('Run ID : %d' % run_id) print('Dataset : %s' % dataset) print('Val/Test score : %f - %f' % (validation_score, test_score)) save_path = save_folder + 'extremely_small_%s_%s_%s_%s_%d_%d_%d.pkl' % ( task_type, mth, dataset, enable_meta, time_limit, (ens_method is None), run_id) with open(save_path, 'wb') as f: pickle.dump([dataset, validation_score, test_score, start_time, eval_dict], f) # Delete output dir shutil.rmtree(os.path.join(estimator.get_output_dir()))
def conduct_fe(dataset='pc4', classifier_id='random_forest', iter_num=100, run_id=0, seed=1): from autosklearn.pipeline.components.classification import _classifiers clf_class = _classifiers[classifier_id] cs = clf_class.get_hyperparameter_search_space() model = UnParametrizedHyperparameter("estimator", classifier_id) cs.add_hyperparameter(model) default_config = cs.get_default_configuration() raw_data, test_raw_data = load_train_test_data(dataset, random_state=seed) evaluator = ClassificationEvaluator(default_config, name='fe', data_node=raw_data, resampling_strategy='holdout', seed=seed) val_acc = evaluator(default_config) estimator = fetch_predict_estimator(default_config, raw_data.data[0], raw_data.data[1]) pred = estimator.predict(test_raw_data.data[0]) test_acc = balanced_accuracy(test_raw_data.data[1], pred) optimizer = EvaluationBasedOptimizer(task_type='classification', input_data=raw_data, evaluator=evaluator, model_id=classifier_id, time_limit_per_trans=240, mem_limit_per_trans=10000, seed=seed) task_id = 'fe-%s-%s-%d' % (dataset, classifier_id, iter_num) val_acc_list, test_acc_list = [], [] val_acc_list.append(val_acc) test_acc_list.append(test_acc) for _iter in range(iter_num): perf, _, incubent = optimizer.iterate() val_acc_list.append(perf) train_node = optimizer.apply(raw_data, incubent) test_node = optimizer.apply(test_raw_data, incubent) estimator = fetch_predict_estimator(default_config, train_node.data[0], train_node.data[1]) pred = estimator.predict(test_node.data[0]) test_perf = balanced_accuracy(test_node.data[1], pred) test_acc_list.append(test_perf) print(val_acc_list) print(test_acc_list) save_path = save_dir + '%s-%d.pkl' % (task_id, run_id) with open(save_path, 'wb') as f: pickle.dump([val_acc_list, test_acc_list], f)
def evaluate_autosklearn(algorithms, dataset, run_id, trial_num, seed, time_limit=1200): print('%s-%s-%d: %d' % (hmab_flag, dataset, run_id, time_limit)) _start_time = time.time() train_data, test_data = load_train_test_data(dataset, task_type=MULTICLASS_CLS) cls_task_type = BINARY_CLS if len(set(train_data.data[1])) == 2 else MULTICLASS_CLS balanced_acc_metric = make_scorer(balanced_accuracy) if is_unbalanced_dataset(train_data): from solnml.components.feature_engineering.transformations.balancer.smote_balancer import DataBalancer train_data = DataBalancer().operate(train_data) bandit = FirstLayerBandit(cls_task_type, trial_num, algorithms, train_data, output_dir='logs', per_run_time_limit=per_run_time_limit, dataset_name=dataset, ensemble_size=50, inner_opt_algorithm=opt_algo, metric=balanced_acc_metric, fe_algo='bo', seed=seed, time_limit=time_limit, eval_type='holdout') while time.time() - _start_time < time_limit: bandit.sub_bandits['random_forest'].optimizer['hpo'].iterate() # bandit.optimize() # fe_exp_output = bandit.sub_bandits['random_forest'].exp_output['fe'] # hpo_exp_output = bandit.sub_bandits['random_forest'].exp_output['hpo'] fe_exp_output = dict() hpo_exp_output = bandit.sub_bandits['random_forest'].optimizer['hpo'].exp_output inc_config = bandit.sub_bandits['random_forest'].optimizer['hpo'].incumbent_config.get_dictionary() inc_config.pop('estimator') from solnml.components.models.classification.random_forest import RandomForest rf = RandomForest(**inc_config) rf.fit(train_data.data[0], train_data.data[1]) validation_accuracy = bandit.sub_bandits['random_forest'].optimizer['hpo'].incumbent_perf best_pred = rf.predict(test_data.data[0]) test_accuracy = balanced_accuracy(test_data.data[1], best_pred) # es_pred = bandit._es_predict(test_data) # test_accuracy_with_ens = balanced_accuracy(test_data.data[1], es_pred) data = [dataset, validation_accuracy, test_accuracy, fe_exp_output, hpo_exp_output, _start_time] save_path = project_dir + '%s_%s_%s_%d_%d_%d_%d_%d.pkl' % ( ausk_flag, opt_algo, dataset, trial_num, len(algorithms), seed, run_id, time_limit) with open(save_path, 'wb') as f: pickle.dump(data, f) del_path = './logs/' for i in os.listdir(del_path): file_data = del_path + "/" + i if os.path.isfile(file_data): os.remove(file_data)
def evaluate_2rd_layered_bandit(run_id, mth='rb', dataset='pc4', algo='libsvm_svc', cv='holdout', time_limit=120000, seed=1): train_data, test_data = load_train_test_data(dataset) bandit = SecondLayerBandit(algo, train_data, dataset_id=dataset, mth=mth, seed=seed, eval_type=cv) _start_time = time.time() _iter_id = 0 stats = list() while True: if time.time() > time_limit + _start_time or bandit.early_stopped_flag: break res = bandit.play_once() print('Iteration %d - %.4f' % (_iter_id, res)) stats.append([_iter_id, time.time() - _start_time, res]) _iter_id += 1 print(bandit.final_rewards) print(bandit.action_sequence) print(np.mean(bandit.evaluation_cost['fe'])) print(np.mean(bandit.evaluation_cost['hpo'])) fe_optimizer = bandit.optimizer['fe'] final_train_data = fe_optimizer.apply(train_data, bandit.inc['fe']) assert final_train_data == bandit.inc['fe'] final_test_data = fe_optimizer.apply(test_data, bandit.inc['fe']) config = bandit.inc['hpo'] evaluator = ClassificationEvaluator(config, name='fe', seed=seed, resampling_strategy='holdout') val_score = evaluator(None, data_node=final_train_data) print('==> Best validation score', val_score, res) X_train, y_train = final_train_data.data clf = fetch_predict_estimator(config, X_train, y_train) X_test, y_test = final_test_data.data y_pred = clf.predict(X_test) test_score = balanced_accuracy(y_test, y_pred) print('==> Test score', test_score) # Alleviate overfitting. y_pred1 = bandit.predict(test_data.data[0]) test_score1 = balanced_accuracy(y_test, y_pred1) print('==> Test score with average ensemble', test_score1) y_pred2 = bandit.predict(test_data.data[0], is_weighted=True) test_score2 = balanced_accuracy(y_test, y_pred2) print('==> Test score with weighted ensemble', test_score2) save_path = save_folder + '%s_%s_%d_%d_%s.pkl' % (mth, dataset, time_limit, run_id, algo) with open(save_path, 'wb') as f: pickle.dump([dataset, val_score, test_score, test_score1, test_score2], f)
def evaluate_autosklearn(algorithms, dataset, run_id, trial_num, seed, time_limit=1200): print('AUSK-%s-%d: %d' % (dataset, run_id, time_limit)) include_models = algorithms automl = autosklearn.classification.AutoSklearnClassifier( time_left_for_this_task=time_limit, per_run_time_limit=per_run_time_limit, include_preprocessors=None, exclude_preprocessors=None, n_jobs=1, include_estimators=include_models, ensemble_memory_limit=8192, ml_memory_limit=8192, ensemble_size=1, ensemble_nbest=1, initial_configurations_via_metalearning=0, seed=int(seed), resampling_strategy='holdout', resampling_strategy_arguments={'train_size': 0.67}) print(automl) train_data, test_data = load_train_test_data(dataset) X, y = train_data.data feat_type = [ 'Categorical' if _type == CATEGORICAL else 'Numerical' for _type in train_data.feature_types ] from autosklearn.metrics import balanced_accuracy automl.fit(X.copy(), y.copy(), metric=balanced_accuracy, feat_type=feat_type) model_desc = automl.show_models() print(model_desc) val_result = np.max(automl.cv_results_['mean_test_score']) print('Best validation accuracy', val_result) X_test, y_test = test_data.data automl.refit(X.copy(), y.copy()) y_pred = automl.predict(X_test) test_result = balanced_accuracy(y_test, y_pred) print('Test accuracy', test_result) save_path = project_dir + 'data/%s_%s_%d_%d_%d_%d.pkl' % ( ausk_flag, dataset, trial_num, len(algorithms), seed, run_id) with open(save_path, 'wb') as f: pickle.dump([dataset, val_result, test_result, model_desc], f)
def conduct_hpo(dataset='pc4', classifier_id='random_forest', iter_num=100, run_id=0, seed=1): from autosklearn.pipeline.components.classification import _classifiers clf_class = _classifiers[classifier_id] cs = clf_class.get_hyperparameter_search_space() model = UnParametrizedHyperparameter("estimator", classifier_id) cs.add_hyperparameter(model) raw_data, test_raw_data = load_train_test_data(dataset, random_state=seed) evaluator = ClassificationEvaluator(cs.get_default_configuration(), name='hpo', data_node=raw_data, resampling_strategy='holdout', seed=seed) default_config = cs.get_default_configuration() val_acc = 1. - evaluator(default_config) estimator = fetch_predict_estimator(default_config, raw_data.data[0], raw_data.data[1]) pred = estimator.predict(test_raw_data.data[0]) test_acc = balanced_accuracy(test_raw_data.data[1], pred) optimizer = SMACOptimizer(evaluator, cs, trials_per_iter=2, output_dir='logs', per_run_time_limit=180) task_id = 'hpo-%s-%s-%d' % (dataset, classifier_id, iter_num) val_acc_list, test_acc_list = [], [] val_acc_list.append(val_acc) test_acc_list.append(test_acc) for _iter in range(iter_num): perf, _, config = optimizer.iterate() val_acc_list.append(perf) estimator = fetch_predict_estimator(config, raw_data.data[0], raw_data.data[1]) pred = estimator.predict(test_raw_data.data[0]) test_perf = balanced_accuracy(test_raw_data.data[1], pred) test_acc_list.append(test_perf) print(val_acc_list) print(test_acc_list) save_path = save_dir + '%s-%d.pkl' % (task_id, run_id) with open(save_path, 'wb') as f: pickle.dump([val_acc_list, test_acc_list], f)
def evaluate_issue_source(classifier_id, dataset, opt_type='hpo'): _start_time = time.time() train_data, test_data = load_train_test_data(dataset) from autosklearn.pipeline.components.classification import _classifiers clf_class = _classifiers[classifier_id] cs = clf_class.get_hyperparameter_search_space() model = UnParametrizedHyperparameter("estimator", classifier_id) cs.add_hyperparameter(model) default_config = cs.get_default_configuration() seed = 2343 if opt_type == 'hpo': evaluator = Evaluator(default_config, data_node=train_data, name='hpo', resampling_strategy='holdout', seed=seed) optimizer = SMACOptimizer(evaluator, cs, output_dir='logs/', per_run_time_limit=300, trials_per_iter=5, seed=seed) else: evaluator = Evaluator(default_config, name='fe', resampling_strategy='holdout', seed=seed) optimizer = EvaluationBasedOptimizer('classification', train_data, evaluator, classifier_id, 300, 1024, seed) perf_result = list() for iter_id in range(20): optimizer.iterate() print('=' * 30) print('ITERATION: %d' % iter_id) if opt_type == 'hpo': config = optimizer.incumbent_config perf = evaluate(train_data, test_data, config) else: fe_train_data = optimizer.incumbent fe_test_data = optimizer.apply(test_data, fe_train_data) perf = evaluate(fe_train_data, fe_test_data, default_config) print(perf) print('=' * 30) perf_result.append(perf) print(perf_result)
def evaluate_imbalanced(algorithms, dataset, run_id, trial_num, seed, time_limit=1200): print('%s-%s-%d: %d' % (hmab_flag, dataset, run_id, time_limit)) _start_time = time.time() train_data, test_data = load_train_test_data(dataset) cls_task_type = BINARY_CLS if len(set( train_data.data[1])) == 2 else MULTICLASS_CLS # ACC or Balanced_ACC balanced_acc_metric = make_scorer(balanced_accuracy) bandit = FirstLayerBandit(cls_task_type, trial_num, algorithms, train_data, output_dir='logs', per_run_time_limit=per_run_time_limit, dataset_name=dataset, ensemble_size=50, opt_algo=opt_algo, metric=balanced_acc_metric, fe_algo='bo', seed=seed) bandit.optimize() model_desc = [ bandit.nbest_algo_ids, bandit.optimal_algo_id, bandit.final_rewards, bandit.action_sequence ] time_taken = time.time() - _start_time validation_accuracy = np.max(bandit.final_rewards) best_pred = bandit._best_predict(test_data) test_accuracy = balanced_accuracy(test_data.data[1], best_pred) es_pred = bandit._es_predict(test_data) test_accuracy_with_ens = balanced_accuracy(test_data.data[1], es_pred) data = [ dataset, validation_accuracy, test_accuracy, test_accuracy_with_ens, time_taken, model_desc ] print(model_desc) print(data[:4]) save_path = project_dir + 'data/%s_%s_%s_%d_%d_%d_%d.pkl' % ( hmab_flag, opt_algo, dataset, trial_num, len(algorithms), seed, run_id) with open(save_path, 'wb') as f: pickle.dump(data, f)
def evaluate_ml_algorithm(dataset, algo, run_id, obj_metric, total_resource=20, seed=1, task_type=None): print('EVALUATE-%s-%s-%s: run_id=%d' % (dataset, algo, obj_metric, run_id)) train_data, test_data = load_train_test_data(dataset, task_type=task_type) if task_type in CLS_TASKS: task_type = BINARY_CLS if len(set( train_data.data[1])) == 2 else MULTICLASS_CLS print(set(train_data.data[1])) metric = get_metric(obj_metric) bandit = SecondLayerBandit(task_type, algo, train_data, metric, per_run_time_limit=300, seed=seed, eval_type='holdout', fe_algo='bo', total_resource=total_resource) bandit.optimize_fixed_pipeline() val_score = bandit.incumbent_perf best_config = bandit.inc['hpo'] fe_optimizer = bandit.optimizer['fe'] fe_optimizer.fetch_nodes(10) best_data_node = fe_optimizer.incumbent test_data_node = fe_optimizer.apply(test_data, best_data_node) estimator = fetch_predict_estimator( task_type, best_config, best_data_node.data[0], best_data_node.data[1], weight_balance=best_data_node.enable_balance, data_balance=best_data_node.data_balance) score = metric(estimator, test_data_node.data[0], test_data_node.data[1]) * metric._sign print('Test score', score) save_path = save_dir + '%s-%s-%s-%d-%d.pkl' % (dataset, algo, obj_metric, run_id, total_resource) with open(save_path, 'wb') as f: pickle.dump([dataset, algo, score, val_score, task_type], f)
def evaluate_ausk(run_id, mth, dataset, algo, eval_type='holdout', time_limit=1200, seed=1): automl = autosklearn.classification.AutoSklearnClassifier( time_left_for_this_task=int(time_limit), per_run_time_limit=300, n_jobs=1, include_estimators=[algo], ensemble_memory_limit=16384, ml_memory_limit=16384, ensemble_size=1, ensemble_nbest=1, initial_configurations_via_metalearning=0, delete_tmp_folder_after_terminate=False, delete_output_folder_after_terminate=False, seed=int(seed), resampling_strategy='holdout', resampling_strategy_arguments={'train_size': 0.67} ) print(automl) task_type = MULTICLASS_CLS train_data, test_data = load_train_test_data(dataset, task_type=task_type) X, y = train_data.data X_test, y_test = test_data.data feat_type = ['Categorical' if _type == CATEGORICAL else 'Numerical' for _type in train_data.feature_types] from autosklearn.metrics import balanced_accuracy as balanced_acc automl.fit(X.copy(), y.copy(), feat_type=feat_type, metric=balanced_acc) valid_results = automl.cv_results_['mean_test_score'] validation_score = np.max(valid_results) # automl.refit(X.copy(), y.copy()) predictions = automl.predict(X_test) test_score = balanced_accuracy_score(y_test, predictions) model_desc = automl.show_models() str_stats = automl.sprint_statistics() print('='*10) print(model_desc) print(str_stats) print('='*10) print('Validation score', validation_score) print('Test score', test_score) print(automl.show_models()) save_path = save_folder + '%s_%s_%d_%d_%s.pkl' % (mth, dataset, time_limit, run_id, algo) with open(save_path, 'wb') as f: pickle.dump([dataset, validation_score, test_score], f)
def evaluate_ausk_fe(dataset, time_limit, run_id, seed): print('[Run ID: %d] Start to Evaluate' % run_id, dataset, 'Budget', time_limit) from solnml.utils.models.default_random_forest import DefaultRandomForest # Add random forest classifier (with default hyperparameter) component to auto-sklearn. autosklearn.pipeline.components.classification.add_classifier( DefaultRandomForest) include_models = ['DefaultRandomForest'] # Construct the ML model. automl = autosklearn.classification.AutoSklearnClassifier( time_left_for_this_task=time_limit, include_preprocessors=None, n_jobs=1, include_estimators=include_models, ensemble_memory_limit=8192, ml_memory_limit=8192, ensemble_size=1, initial_configurations_via_metalearning=0, per_run_time_limit=300, seed=int(seed), resampling_strategy='holdout', resampling_strategy_arguments={'train_size': 0.67}) print(automl) train_data, test_data = load_train_test_data(dataset) X, y = train_data.data X_test, y_test = test_data.data from autosklearn.metrics import balanced_accuracy automl.fit(X.copy(), y.copy(), metric=balanced_accuracy) model_desc = automl.show_models() print(model_desc) # print(automl.cv_results_) val_result = np.max(automl.cv_results_['mean_test_score']) print('Best validation accuracy', val_result) # automl.refit(X.copy(), y.copy()) test_result = automl.score(X_test, y_test) print('Test accuracy', test_result) save_path = save_dir + 'ausk_fe_%s_%d_%d.pkl' % (dataset, time_limit, run_id) with open(save_path, 'wb') as f: pickle.dump([dataset, val_result, test_result, model_desc], f)
def evaluate_hmab(algorithms, run_id, dataset='credit', trial_num=200, n_jobs=1, meta_configs=0, seed=1, eval_type='holdout'): task_id = '%s-hmab-%d-%d' % (dataset, len(algorithms), trial_num) _start_time = time.time() raw_data, test_raw_data = load_train_test_data(dataset) bandit = FirstLayerBandit(trial_num, algorithms, raw_data, output_dir='logs/%s/' % task_id, per_run_time_limit=per_run_time_limit, dataset_name='%s-%d' % (dataset, run_id), n_jobs=n_jobs, meta_configs=meta_configs, seed=seed, eval_type=eval_type) bandit.optimize() time_cost = int(time.time() - _start_time) print(bandit.final_rewards) print(bandit.action_sequence) validation_accuracy = np.max(bandit.final_rewards) # validation_accuracy_without_ens = bandit.validate() # assert np.isclose(validation_accuracy, validation_accuracy_without_ens) test_accuracy_with_ens = EnsembleBuilder( bandit, n_jobs=n_jobs).score(test_raw_data) print('Dataset : %s' % dataset) print('Validation score without ens: %f' % validation_accuracy) print("Test score with ensemble : %f" % test_accuracy_with_ens) save_path = save_dir + '%s-%d.pkl' % (task_id, run_id) with open(save_path, 'wb') as f: stats = [time_cost, 0., bandit.time_records, bandit.final_rewards] pickle.dump([validation_accuracy, test_accuracy_with_ens, stats], f) del bandit return time_cost
def test_balancer(): dataset = 'winequality_red' sys.path.append(os.getcwd()) from solnml.datasets.utils import load_train_test_data raw_data, test_raw_data = load_train_test_data(dataset) # data = ( # np.random.random((10, 4)), np.array([0, 0, 0, 0, 0, 2, 2, 2, 2, 2])) # feature_type = [NUMERICAL, NUMERICAL, DISCRETE, DISCRETE] # datanode = DataNode(data, feature_type) print(raw_data, test_raw_data) from solnml.components.feature_engineering.transformations.balancer.data_balancer import DataBalancer balancer = DataBalancer() a = balancer.operate(raw_data) b = balancer.operate(raw_data) c = balancer.operate(raw_data) assert a == b and b == c print(balancer.operate(raw_data)) test_data = test_raw_data.copy_() test_data.data[1] = None print(balancer.operate(test_data))
def evaluate_1stlayer_bandit(algorithms, run_id, dataset='credit', trial_num=200, n_jobs=1, meta_configs=0, seed=1): task_id = '%s-hmab-%d-%d' % (dataset, len(algorithms), trial_num) _start_time = time.time() raw_data, test_raw_data = load_train_test_data(dataset, random_state=seed) bandit = FirstLayerBandit(trial_num, algorithms, raw_data, output_dir='logs/%s/' % task_id, per_run_time_limit=per_run_time_limit, dataset_name='%s-%d' % (dataset, run_id), n_jobs=n_jobs, meta_configs=meta_configs, seed=seed, eval_type='holdout') bandit.optimize() time_cost = int(time.time() - _start_time) print(bandit.final_rewards) print(bandit.action_sequence) validation_accuracy_without_ens0 = np.max(bandit.final_rewards) validation_accuracy_without_ens1 = bandit.validate() assert np.isclose(validation_accuracy_without_ens0, validation_accuracy_without_ens1) test_accuracy_without_ens = bandit.score(test_raw_data) # For debug. mode = True if mode: test_accuracy_with_ens0 = ensemble_implementation_examples(bandit, test_raw_data) test_accuracy_with_ens1 = EnsembleBuilder(bandit).score(test_raw_data) print('Dataset : %s' % dataset) print('Validation score without ens: %f - %f' % ( validation_accuracy_without_ens0, validation_accuracy_without_ens1)) print("Test score without ensemble : %f" % test_accuracy_without_ens) print("Test score with ensemble : %f - %f" % (test_accuracy_with_ens0, test_accuracy_with_ens1)) save_path = save_dir + '%s-%d.pkl' % (task_id, run_id) with open(save_path, 'wb') as f: stats = [time_cost, test_accuracy_with_ens0, test_accuracy_with_ens1, test_accuracy_without_ens] pickle.dump([validation_accuracy_without_ens0, test_accuracy_with_ens1, stats], f) del bandit return time_cost
def evaluate_fe_bugs(dataset, run_id, time_limit, seed): algorithms = [ 'lda', 'k_nearest_neighbors', 'libsvm_svc', 'sgd', 'adaboost', 'random_forest', 'extra_trees', 'decision_tree' ] algo_id = np.random.choice(algorithms, 1)[0] task_id = '%s-fe-%s-%d' % (dataset, algo_id, run_id) print(task_id) # Prepare the configuration for random forest. clf_class = _classifiers[algo_id] cs = clf_class.get_hyperparameter_search_space() clf_hp = UnParametrizedHyperparameter("estimator", algo_id) cs.add_hyperparameter(clf_hp) evaluator = ClassificationEvaluator(cs.get_default_configuration(), name='fe', seed=seed, resampling_strategy='holdout') pipeline = FEPipeline(fe_enabled=True, optimizer_type='eval_base', time_budget=time_limit, evaluator=evaluator, seed=seed, model_id=algo_id, time_limit_per_trans=per_run_time_limit, task_id=task_id) raw_data, test_raw_data = load_train_test_data(dataset) train_data = pipeline.fit_transform(raw_data.copy_()) test_data = pipeline.transform(test_raw_data.copy_()) train_data_new = pipeline.transform(raw_data.copy_()) assert (train_data.data[0] == train_data_new.data[0]).all() assert (train_data.data[1] == train_data_new.data[1]).all() assert (train_data_new == train_data) score = evaluator(None, data_node=test_data) print('==> Test score', score)
def evaluate(dataset): train_data, test_data = load_train_test_data(dataset, test_size=0.3, task_type=MULTICLASS_CLS) cs = _classifiers[algo_name].get_hyperparameter_search_space() default_hpo_config = cs.get_default_configuration() metric = get_metric('bal_acc') fe_cs = get_task_hyperparameter_space(0, algo_name) default_fe_config = fe_cs.get_default_configuration() evaluator = ClassificationEvaluator(default_hpo_config, default_fe_config, algo_name, data_node=train_data, scorer=metric, name='hpo', resampling_strategy='holdout', output_dir='./data/exp_sys', seed=1) from solnml.components.optimizers.tlbo_optimizer import TlboOptimizer optimizer = TlboOptimizer(evaluator, cs, time_limit=300, name='hpo') optimizer.run()
def evaluate_base_model(classifier_id, dataset): _start_time = time.time() train_data, test_data = load_train_test_data(dataset) from autosklearn.pipeline.components.classification import _classifiers clf_class = _classifiers[classifier_id] cs = clf_class.get_hyperparameter_search_space() model = UnParametrizedHyperparameter("estimator", classifier_id) cs.add_hyperparameter(model) default_config = cs.get_default_configuration() X_train, y_train = train_data.data X_test, y_test = test_data.data print('X_train/test shapes: %s, %s' % (str(X_train.shape), str(X_test.shape))) # Build the ML estimator. from solnml.components.evaluators.cls_evaluator import fetch_predict_estimator estimator = fetch_predict_estimator(default_config, X_train, y_train) y_pred = estimator.predict(X_test) print(balanced_accuracy(y_test, y_pred)) print(balanced_accuracy(y_pred, y_test))
def evaluate_autosklearn(algorithms, dataset, run_id, trial_num, seed, time_limit=1200): print('AUSK-%s-%d: %d' % (dataset, run_id, time_limit)) ausk_flag = 'ausk_full' if ausk_flag == 'ausk_alad': alad = AlgorithmAdvisor(task_type=MULTICLASS_CLS, n_algorithm=9, metric='acc') meta_infos = alad.fit_meta_learner() assert dataset not in meta_infos model_candidates = alad.fetch_algorithm_set(dataset) include_models = list() print(model_candidates) for algo in model_candidates: if algo in algorithms and len(include_models) < 3: include_models.append(algo) print('After algorithm recommendation', include_models) n_config_meta_learning = 0 ensemble_size = 1 elif ausk_flag == 'ausk_no_meta': include_models = algorithms n_config_meta_learning = 25 ensemble_size = 1 elif ausk_flag == 'ausk_full': include_models = algorithms n_config_meta_learning = 25 ensemble_size = 50 else: include_models = algorithms n_config_meta_learning = 0 ensemble_size = 1 automl = autosklearn.classification.AutoSklearnClassifier( time_left_for_this_task=time_limit, per_run_time_limit=per_run_time_limit, include_preprocessors=None, exclude_preprocessors=None, n_jobs=1, include_estimators=include_models, ensemble_memory_limit=8192, ml_memory_limit=8192, ensemble_size=ensemble_size, ensemble_nbest=ensemble_size, initial_configurations_via_metalearning=n_config_meta_learning, seed=int(seed), resampling_strategy='holdout', resampling_strategy_arguments={'train_size': 0.67} ) print(automl) train_data, test_data = load_train_test_data(dataset, task_type=MULTICLASS_CLS) X, y = train_data.data feat_type = ['Categorical' if _type == CATEGORICAL else 'Numerical' for _type in train_data.feature_types] from autosklearn.metrics import balanced_accuracy automl.fit(X.copy(), y.copy(), metric=balanced_accuracy, feat_type=feat_type) model_desc = automl.show_models() print(model_desc) val_result = np.max(automl.cv_results_['mean_test_score']) print('Trial number', len(automl.cv_results_['mean_test_score'])) print('Best validation accuracy', val_result) X_test, y_test = test_data.data automl.refit(X.copy(), y.copy()) y_pred = automl.predict(X_test) metric = balanced_accuracy test_result = metric(y_test, y_pred) print('Test accuracy', test_result) save_path = project_dir + '%s_%s_%d_%d_%d_%d_%d.pkl' % ( ausk_flag, dataset, trial_num, len(algorithms), seed, run_id, time_limit) with open(save_path, 'wb') as f: pickle.dump([dataset, val_result, test_result, model_desc], f)
def evaluate(mode, dataset, run_id, metric): print(mode, dataset, run_id, metric) metric = get_metric(metric) train_data, test_data = load_train_test_data(dataset, task_type=MULTICLASS_CLS) cs = _classifiers[algo_name].get_hyperparameter_search_space() model = UnParametrizedHyperparameter("estimator", algo_name) cs.add_hyperparameter(model) default_hpo_config = cs.get_default_configuration() fe_evaluator = ClassificationEvaluator(default_hpo_config, scorer=metric, name='fe', resampling_strategy='holdout', seed=1) hpo_evaluator = ClassificationEvaluator(default_hpo_config, scorer=metric, data_node=train_data, name='hpo', resampling_strategy='holdout', seed=1) fe_optimizer = BayesianOptimizationOptimizer(task_type=CLASSIFICATION, input_data=train_data, evaluator=fe_evaluator, model_id=algo_name, time_limit_per_trans=600, mem_limit_per_trans=5120, number_of_unit_resource=10, seed=1) def objective_function(config): if benchmark == 'fe': return fe_optimizer.evaluate_function(config) else: return hpo_evaluator(config) if mode == 'bo': bo = BO(objective_function, config_space, max_runs=max_runs, surrogate_model='prob_rf') bo.run() print('BO result') print(bo.get_incumbent()) perf = bo.history_container.incumbent_value runs = [bo.configurations, bo.perfs] elif mode == 'lite_bo': from litebo.facade.bo_facade import BayesianOptimization bo = BayesianOptimization(objective_function, config_space, max_runs=max_runs) bo.run() print('BO result') print(bo.get_incumbent()) perf = bo.history_container.incumbent_value runs = [bo.configurations, bo.perfs] elif mode.startswith('tlbo'): _, gp_fusion = mode.split('_') meta_feature_vec = metafeature_dict[dataset] past_datasets = test_datasets.copy() if dataset in past_datasets: past_datasets.remove(dataset) past_history = load_runhistory(past_datasets) gp_models = [ gp_models_dict[dataset_name] for dataset_name in past_datasets ] tlbo = TLBO(objective_function, config_space, past_history, gp_models=gp_models, dataset_metafeature=meta_feature_vec, max_runs=max_runs, gp_fusion=gp_fusion) tlbo.run() print('TLBO result') print(tlbo.get_incumbent()) runs = [tlbo.configurations, tlbo.perfs] perf = tlbo.history_container.incumbent_value else: raise ValueError('Invalid mode.') file_saved = '%s_%s_%s_result_%d_%d_%s.pkl' % (mode, algo_name, dataset, max_runs, run_id, benchmark) with open(data_dir + file_saved, 'wb') as f: pk.dump([perf, runs], f)