def evaluate_hmab(algorithms, dataset, run_id, trial_num, seed, time_limit=1200): print('%s-%s-%d: %d' % (hmab_flag, dataset, run_id, time_limit)) _start_time = time.time() train_data, test_data = load_train_test_data(dataset, task_type=MULTICLASS_CLS) cls_task_type = BINARY_CLS if len(set( train_data.data[1])) == 2 else MULTICLASS_CLS balanced_acc_metric = make_scorer(balanced_accuracy) if is_unbalanced_dataset(train_data): from solnml.components.feature_engineering.transformations.preprocessor.smote_balancer import DataBalancer train_data = DataBalancer().operate(train_data) bandit = FirstLayerBandit(cls_task_type, trial_num, algorithms, train_data, output_dir='logs', per_run_time_limit=per_run_time_limit, dataset_name=dataset, ensemble_size=50, inner_opt_algorithm=opt_algo, metric=balanced_acc_metric, fe_algo='bo', seed=seed, time_limit=time_limit, eval_type='holdout') bandit.optimize() time_taken = time.time() - _start_time model_desc = [ bandit.nbest_algo_ids, bandit.optimal_algo_id, bandit.final_rewards, bandit.action_sequence ] validation_accuracy = np.max(bandit.final_rewards) best_pred = bandit._best_predict(test_data) test_accuracy = balanced_accuracy(test_data.data[1], best_pred) bandit.refit() es_pred = bandit._es_predict(test_data) test_accuracy_with_ens = balanced_accuracy(test_data.data[1], es_pred) data = [ dataset, validation_accuracy, test_accuracy, test_accuracy_with_ens, time_taken, model_desc ] print(model_desc) print(data) save_path = project_dir + '%s_%s_%s_%d_%d_%d_%d_%d.pkl' % ( hmab_flag, opt_algo, dataset, trial_num, len(algorithms), seed, run_id, time_limit) with open(save_path, 'wb') as f: pickle.dump(data, f)
def evaluate_hmab(algorithms, dataset, run_id, trial_num, seed, time_limit=1200): print('%s-%s-%d: %d' % (hmab_flag, dataset, run_id, time_limit)) exclude_datasets = ['gina_prior2', 'pc2', 'abalone', 'wind', 'waveform-5000(2)', 'page-blocks(1)', 'winequality_white', 'pollen'] alad = AlgorithmAdvisor(task_type=MULTICLASS_CLS, n_algorithm=9, metric='bal_acc', exclude_datasets=exclude_datasets) n_algo = 5 assert dataset in exclude_datasets meta_infos = alad.fit_meta_learner() assert dataset not in meta_infos model_candidates = alad.fetch_algorithm_set(dataset) include_models = list() print(model_candidates) for algo in model_candidates: if algo in algorithms and len(include_models) < n_algo: include_models.append(algo) print('After algorithm recommendation', include_models) _start_time = time.time() train_data, test_data = load_train_test_data(dataset, task_type=MULTICLASS_CLS) cls_task_type = BINARY_CLS if len(set(train_data.data[1])) == 2 else MULTICLASS_CLS balanced_acc_metric = make_scorer(balanced_accuracy) if is_unbalanced_dataset(train_data): from solnml.components.feature_engineering.transformations.balancer.smote_balancer import DataBalancer train_data = DataBalancer().operate(train_data) bandit = FirstLayerBandit(cls_task_type, trial_num, include_models, train_data, output_dir='logs', per_run_time_limit=per_run_time_limit, dataset_name=dataset, ensemble_size=50, inner_opt_algorithm=opt_algo, metric=balanced_acc_metric, fe_algo='bo', seed=seed, time_limit=time_limit, eval_type='holdout') bandit.optimize() time_taken = time.time() - _start_time model_desc = [bandit.nbest_algo_ids, bandit.optimal_algo_id, bandit.final_rewards, bandit.action_sequence] validation_accuracy = np.max(bandit.final_rewards) best_pred = bandit._best_predict(test_data) test_accuracy = balanced_accuracy(test_data.data[1], best_pred) bandit.refit() es_pred = bandit._es_predict(test_data) test_accuracy_with_ens = balanced_accuracy(test_data.data[1], es_pred) data = [dataset, validation_accuracy, test_accuracy, test_accuracy_with_ens, time_taken, model_desc] print(model_desc) print(data) save_path = project_dir + '%s_%s_%s_%d_%d_%d_%d_%d.pkl' % ( hmab_flag, opt_algo, dataset, trial_num, len(algorithms), seed, run_id, time_limit) with open(save_path, 'wb') as f: pickle.dump(data, f)
def evaluate_imbalanced(algorithms, dataset, run_id, trial_num, seed, time_limit=1200): print('%s-%s-%d: %d' % (hmab_flag, dataset, run_id, time_limit)) _start_time = time.time() train_data, test_data = load_train_test_data(dataset) cls_task_type = BINARY_CLS if len(set( train_data.data[1])) == 2 else MULTICLASS_CLS # ACC or Balanced_ACC balanced_acc_metric = make_scorer(balanced_accuracy) bandit = FirstLayerBandit(cls_task_type, trial_num, algorithms, train_data, output_dir='logs', per_run_time_limit=per_run_time_limit, dataset_name=dataset, ensemble_size=50, opt_algo=opt_algo, metric=balanced_acc_metric, fe_algo='bo', seed=seed) bandit.optimize() model_desc = [ bandit.nbest_algo_ids, bandit.optimal_algo_id, bandit.final_rewards, bandit.action_sequence ] time_taken = time.time() - _start_time validation_accuracy = np.max(bandit.final_rewards) best_pred = bandit._best_predict(test_data) test_accuracy = balanced_accuracy(test_data.data[1], best_pred) es_pred = bandit._es_predict(test_data) test_accuracy_with_ens = balanced_accuracy(test_data.data[1], es_pred) data = [ dataset, validation_accuracy, test_accuracy, test_accuracy_with_ens, time_taken, model_desc ] print(model_desc) print(data[:4]) save_path = project_dir + 'data/%s_%s_%s_%d_%d_%d_%d.pkl' % ( hmab_flag, opt_algo, dataset, trial_num, len(algorithms), seed, run_id) with open(save_path, 'wb') as f: pickle.dump(data, f)
def evaluate_hmab(algorithms, dataset, run_id, trial_num, seed, time_limit=1200): print('%s-%s-%d: %d' % (hmab_flag, dataset, run_id, time_limit)) _start_time = time.time() train_data, test_data = load_train_test_data(dataset, task_type=MULTICLASS_CLS) cls_task_type = BINARY_CLS if len(set( train_data.data[1])) == 2 else MULTICLASS_CLS balanced_acc_metric = make_scorer(balanced_accuracy) if is_unbalanced_dataset(train_data): from solnml.components.feature_engineering.transformations.preprocessor.smote_balancer import DataBalancer train_data = DataBalancer().operate(train_data) bandit = FirstLayerBandit(cls_task_type, trial_num, algorithms, train_data, output_dir='logs', per_run_time_limit=per_run_time_limit, dataset_name=dataset, ensemble_size=50, inner_opt_algorithm=opt_algo, metric=balanced_acc_metric, fe_algo='bo', seed=seed, time_limit=time_limit, eval_type='partial') # while time.time()-_start_time<time_limit: # bandit.sub_bandits['random_forest'].optimizer['fe'].iterate() # # print(bandit.sub_bandits['random_forest'].optimizer['hpo'].exp_output) bandit.optimize() fe_exp_output = bandit.sub_bandits['random_forest'].optimizer[ 'fe'].exp_output hpo_exp_output = bandit.sub_bandits['random_forest'].optimizer[ 'hpo'].exp_output validation_accuracy = np.max(bandit.final_rewards) best_pred = bandit._best_predict(test_data) test_accuracy = balanced_accuracy(test_data.data[1], best_pred) bandit.refit() es_pred = bandit._es_predict(test_data) test_accuracy_with_ens = balanced_accuracy(test_data.data[1], es_pred) data = [ dataset, validation_accuracy, test_accuracy, test_accuracy_with_ens, fe_exp_output, hpo_exp_output, _start_time ] save_path = project_dir + '%s_%s_%s_%d_%d_%d_%d_%d.pkl' % ( hmab_flag, opt_algo, dataset, trial_num, len(algorithms), seed, run_id, time_limit) with open(save_path, 'wb') as f: pickle.dump(data, f) del_path = './logs/' for i in os.listdir(del_path): file_data = del_path + "/" + i if os.path.isfile(file_data): os.remove(file_data)