def conduct_fe(dataset='pc4', classifier_id='random_forest', iter_num=100, run_id=0, seed=1): from autosklearn.pipeline.components.classification import _classifiers clf_class = _classifiers[classifier_id] cs = clf_class.get_hyperparameter_search_space() model = UnParametrizedHyperparameter("estimator", classifier_id) cs.add_hyperparameter(model) default_config = cs.get_default_configuration() raw_data, test_raw_data = load_train_test_data(dataset, random_state=seed) evaluator = ClassificationEvaluator(default_config, name='fe', data_node=raw_data, resampling_strategy='holdout', seed=seed) val_acc = evaluator(default_config) estimator = fetch_predict_estimator(default_config, raw_data.data[0], raw_data.data[1]) pred = estimator.predict(test_raw_data.data[0]) test_acc = balanced_accuracy(test_raw_data.data[1], pred) optimizer = EvaluationBasedOptimizer(task_type='classification', input_data=raw_data, evaluator=evaluator, model_id=classifier_id, time_limit_per_trans=240, mem_limit_per_trans=10000, seed=seed) task_id = 'fe-%s-%s-%d' % (dataset, classifier_id, iter_num) val_acc_list, test_acc_list = [], [] val_acc_list.append(val_acc) test_acc_list.append(test_acc) for _iter in range(iter_num): perf, _, incubent = optimizer.iterate() val_acc_list.append(perf) train_node = optimizer.apply(raw_data, incubent) test_node = optimizer.apply(test_raw_data, incubent) estimator = fetch_predict_estimator(default_config, train_node.data[0], train_node.data[1]) pred = estimator.predict(test_node.data[0]) test_perf = balanced_accuracy(test_node.data[1], pred) test_acc_list.append(test_perf) print(val_acc_list) print(test_acc_list) save_path = save_dir + '%s-%d.pkl' % (task_id, run_id) with open(save_path, 'wb') as f: pickle.dump([val_acc_list, test_acc_list], f)
def conduct_hpo(dataset='pc4', classifier_id='random_forest', iter_num=100, run_id=0, seed=1): from autosklearn.pipeline.components.classification import _classifiers clf_class = _classifiers[classifier_id] cs = clf_class.get_hyperparameter_search_space() model = UnParametrizedHyperparameter("estimator", classifier_id) cs.add_hyperparameter(model) raw_data, test_raw_data = load_train_test_data(dataset, random_state=seed) evaluator = ClassificationEvaluator(cs.get_default_configuration(), name='hpo', data_node=raw_data, resampling_strategy='holdout', seed=seed) default_config = cs.get_default_configuration() val_acc = 1. - evaluator(default_config) estimator = fetch_predict_estimator(default_config, raw_data.data[0], raw_data.data[1]) pred = estimator.predict(test_raw_data.data[0]) test_acc = balanced_accuracy(test_raw_data.data[1], pred) optimizer = SMACOptimizer(evaluator, cs, trials_per_iter=2, output_dir='logs', per_run_time_limit=180) task_id = 'hpo-%s-%s-%d' % (dataset, classifier_id, iter_num) val_acc_list, test_acc_list = [], [] val_acc_list.append(val_acc) test_acc_list.append(test_acc) for _iter in range(iter_num): perf, _, config = optimizer.iterate() val_acc_list.append(perf) estimator = fetch_predict_estimator(config, raw_data.data[0], raw_data.data[1]) pred = estimator.predict(test_raw_data.data[0]) test_perf = balanced_accuracy(test_raw_data.data[1], pred) test_acc_list.append(test_perf) print(val_acc_list) print(test_acc_list) save_path = save_dir + '%s-%d.pkl' % (task_id, run_id) with open(save_path, 'wb') as f: pickle.dump([val_acc_list, test_acc_list], f)
def evaluate(train_data, test_data, config): X_train, y_train = train_data.data X_test, y_test = test_data.data print('X_train/test shapes: %s, %s' % (str(X_train.shape), str(X_test.shape))) # Build the ML estimator. from solnml.components.evaluators.evaluator import fetch_predict_estimator estimator = fetch_predict_estimator(config, X_train, y_train) y_pred = estimator.predict(X_test) return balanced_accuracy(y_test, y_pred)
def evaluate_2rd_layered_bandit(run_id, mth='rb', dataset='pc4', algo='libsvm_svc', cv='holdout', time_limit=120000, seed=1): train_data, test_data = load_train_test_data(dataset) bandit = SecondLayerBandit(algo, train_data, dataset_id=dataset, mth=mth, seed=seed, eval_type=cv) _start_time = time.time() _iter_id = 0 stats = list() while True: if time.time() > time_limit + _start_time or bandit.early_stopped_flag: break res = bandit.play_once() print('Iteration %d - %.4f' % (_iter_id, res)) stats.append([_iter_id, time.time() - _start_time, res]) _iter_id += 1 print(bandit.final_rewards) print(bandit.action_sequence) print(np.mean(bandit.evaluation_cost['fe'])) print(np.mean(bandit.evaluation_cost['hpo'])) fe_optimizer = bandit.optimizer['fe'] final_train_data = fe_optimizer.apply(train_data, bandit.inc['fe']) assert final_train_data == bandit.inc['fe'] final_test_data = fe_optimizer.apply(test_data, bandit.inc['fe']) config = bandit.inc['hpo'] evaluator = ClassificationEvaluator(config, name='fe', seed=seed, resampling_strategy='holdout') val_score = evaluator(None, data_node=final_train_data) print('==> Best validation score', val_score, res) X_train, y_train = final_train_data.data clf = fetch_predict_estimator(config, X_train, y_train) X_test, y_test = final_test_data.data y_pred = clf.predict(X_test) test_score = balanced_accuracy(y_test, y_pred) print('==> Test score', test_score) # Alleviate overfitting. y_pred1 = bandit.predict(test_data.data[0]) test_score1 = balanced_accuracy(y_test, y_pred1) print('==> Test score with average ensemble', test_score1) y_pred2 = bandit.predict(test_data.data[0], is_weighted=True) test_score2 = balanced_accuracy(y_test, y_pred2) print('==> Test score with weighted ensemble', test_score2) save_path = save_folder + '%s_%s_%d_%d_%s.pkl' % (mth, dataset, time_limit, run_id, algo) with open(save_path, 'wb') as f: pickle.dump([dataset, val_score, test_score, test_score1, test_score2], f)
def evaluate_base_model(classifier_id, dataset): _start_time = time.time() train_data, test_data = load_train_test_data(dataset) from autosklearn.pipeline.components.classification import _classifiers clf_class = _classifiers[classifier_id] cs = clf_class.get_hyperparameter_search_space() model = UnParametrizedHyperparameter("estimator", classifier_id) cs.add_hyperparameter(model) default_config = cs.get_default_configuration() X_train, y_train = train_data.data X_test, y_test = test_data.data print('X_train/test shapes: %s, %s' % (str(X_train.shape), str(X_test.shape))) # Build the ML estimator. from solnml.components.evaluators.cls_evaluator import fetch_predict_estimator estimator = fetch_predict_estimator(default_config, X_train, y_train) y_pred = estimator.predict(X_test) print(balanced_accuracy(y_test, y_pred)) print(balanced_accuracy(y_pred, y_test))
def evaluate_2rd_bandit(dataset, algo, time_limit, run_id, seed): print('HMAB-%s-%s: run_id=%d' % (dataset, algo, run_id)) print('==> Start to Evaluate', dataset, 'Budget', time_limit) train_data, test_data = load_train_test_data(dataset) enable_intersect = True bandit = SecondLayerBandit(algo, train_data, per_run_time_limit=300, seed=seed, eval_type='holdout', mth='alter_hpo', enable_intersection=enable_intersect) mth_id = 'hmab' if enable_intersect else 'hmab0' _start_time = time.time() _iter_id = 0 stats = list() while True: if time.time() > time_limit + _start_time or bandit.early_stopped_flag: break res = bandit.play_once() print('Iteration %d - %.4f' % (_iter_id, res)) stats.append([_iter_id, time.time() - _start_time, res]) _iter_id += 1 print(bandit.final_rewards) print(bandit.action_sequence) print(np.mean(bandit.evaluation_cost['fe'])) print(np.mean(bandit.evaluation_cost['hpo'])) fe_optimizer = bandit.optimizer['fe'] final_train_data = fe_optimizer.apply(train_data, bandit.inc['fe']) assert final_train_data == bandit.inc['fe'] final_test_data = fe_optimizer.apply(test_data, bandit.inc['fe']) config = bandit.inc['hpo'] evaluator = ClassificationEvaluator(config, name='fe', seed=seed, resampling_strategy='holdout') val_score = evaluator(None, data_node=final_train_data) print('==> Best validation score', val_score, res) X_train, y_train = final_train_data.data clf = fetch_predict_estimator(config, X_train, y_train) X_test, y_test = final_test_data.data y_pred = clf.predict(X_test) test_score = balanced_accuracy(y_test, y_pred) print('==> Test score', test_score) # Alleviate overfitting. y_pred1 = bandit.predict(test_data.data[0]) test_score1 = balanced_accuracy(y_test, y_pred1) print('==> Test score with average ensemble', test_score1) y_pred2 = bandit.predict(test_data.data[0], is_weighted=True) test_score2 = balanced_accuracy(y_test, y_pred2) print('==> Test score with weighted ensemble', test_score2) save_path = save_dir + '%s_2rd_bandit_%s_%d_%d_%s.pkl' % ( mth_id, dataset, time_limit, run_id, algo) with open(save_path, 'wb') as f: pickle.dump([dataset, val_score, test_score, test_score1, test_score2], f)