def evaluate(mth, run_i, seed): print(mth, run_i, seed, '===== start =====', flush=True) def objective_function(config): y = problem.evaluate_config(config) res = dict() res['config'] = config res['objs'] = (y,) res['constraints'] = None return res bo = SMBO(objective_function, cs, surrogate_type=surrogate_type, # default: gp acq_optimizer_type=acq_optimizer_type, # default: random_scipy initial_runs=initial_runs, # default: 3 init_strategy=init_strategy, # default: random_explore_first max_runs=max_runs, time_limit_per_trial=time_limit_per_trial, task_id=task_id, random_state=seed) # bo.run() time_list = [] global_start_time = time.time() for i in range(max_runs): config, trial_state, _, objs = bo.iterate() global_time = time.time() - global_start_time print(seed, i, objs, config, trial_state, 'time=', global_time) time_list.append(global_time) config_list = bo.get_history().configurations perf_list = bo.get_history().perfs return config_list, perf_list, time_list
def evaluate(mth, run_i, seed): print(mth, run_i, seed, '===== start =====', flush=True) def objective_function(config): y = problem.evaluate_config(config) res = dict() # res['config'] = config res['objs'] = (y, ) # res['constraints'] = None return res task_id = '%s_%s_%d' % (mth, problem_str, seed) bo = SMBO( objective_function, cs, advisor_type=advisor_type, # choices: default, tpe surrogate_type=surrogate_type, # choices: gp, gp_mcmc, prf, lightgbm acq_optimizer_type=acq_optimizer_type, # default: local_random initial_runs=initial_runs, # default: 3 init_strategy=init_strategy, # default: random_explore_first max_runs=max_runs, time_limit_per_trial=time_limit_per_trial, task_id=task_id, random_state=seed) if advisor_type == 'tpe': bo.config_advisor.num_samples = tpe_num_samples bo.run() config_list = bo.get_history().configurations perf_list = bo.get_history().perfs time_list = bo.get_history().update_times return config_list, perf_list, time_list
def evaluate(mth, run_i, seed): print(mth, run_i, seed, '===== start =====', flush=True) def objective_function(config): res = problem.evaluate_config(config) res['config'] = config return res bo = SMBO( objective_function, cs, num_objs=problem.num_objs, num_constraints=0, surrogate_type=surrogate_type, # default: gp acq_type=acq_type, # default: ehvi acq_optimizer_type=acq_optimizer_type, # default: random_scipy initial_runs=initial_runs, # default: 2 * (problem.dim + 1) init_strategy=init_strategy, # default: sobol max_runs=max_runs, ref_point=problem.ref_point, time_limit_per_trial=time_limit_per_trial, task_id=task_id, random_state=seed) # bo.run() hv_diffs = [] time_list = [] global_start_time = time.time() for i in range(max_runs): config, trial_state, _, objs = bo.iterate() global_time = time.time() - global_start_time print(seed, i, objs, config, trial_state, 'time=', global_time) hv = Hypervolume(problem.ref_point).compute( bo.get_history().get_pareto_front()) hv_diff = problem.max_hv - hv print(seed, i, 'hypervolume =', hv) print(seed, i, 'hv diff =', hv_diff) hv_diffs.append(hv_diff) time_list.append(global_time) config_list = bo.get_history().configurations perf_list = bo.get_history().perfs pf = np.asarray(bo.get_history().get_pareto_front()) # plot for debugging if plot_mode == 1: Y_init = None plot_pf(problem, problem_str, mth, pf, Y_init) return hv_diffs, pf, config_list, perf_list, time_list
model = LGBMClassifier(**params) model.fit(x_train, y_train) y_pred = model.predict(x_test) loss = 1 - balanced_accuracy_score(y_test, y_pred) # minimize return dict(objs=(loss, )) from openbox.optimizer.generic_smbo import SMBO import matplotlib.pyplot as plt # Run Optimization bo = SMBO(objective_function, get_configspace(), num_objs=1, num_constraints=0, max_runs=100, surrogate_type='prf', time_limit_per_trial=180, task_id='so_hpo') bo.run() history = bo.get_history() print(history) history.plot_convergence() #plt.show() plt.savefig('logs/plot_convergence_hpo.png') history.visualize_jupyter()
time_limit_per_trial=60, task_id='mo', random_state=seed) bo.config_advisor.optimizer.random_chooser.prob = rand_prob # set rand_prob, default 0 bo.config_advisor.acquisition_function.sample_num = sample_num # set sample_num #bo.config_advisor.acquisition_function.random_state = seed # set random_state bo.config_advisor.optimizer.num_mc = 1000 # MESMO optimizer only bo.config_advisor.optimizer.num_opt = 100 # MESMO optimizer only print(mth, '===== start =====') # bo.run() hv_diffs = [] for i in range(max_runs): config, trial_state, objs, trial_info = bo.iterate() print(i, objs, config) hv = Hypervolume(referencePoint).compute( bo.get_history().get_pareto_front()) print(i, 'hypervolume =', hv) hv_diff = real_hv - hv hv_diffs.append(hv_diff) print(i, 'hv diff =', hv_diff) # Print result pf = np.asarray(bo.get_history().get_pareto_front()) print(mth, 'pareto num:', pf.shape[0]) print('real hv =', real_hv) print('hv_diffs:', hv_diffs) # Evaluate the random search. bo_r = SMBO(multi_objective_func, cs, num_objs=num_objs,
prob.config_space, num_objs=prob.num_objs, num_constraints=0, acq_type='ehvi', acq_optimizer_type='random_scipy', surrogate_type='gp', ref_point=prob.ref_point, max_runs=50, initial_runs=2 * (dim + 1), init_strategy='sobol', task_id='mo', random_state=1) bo.run() # plot pareto front pareto_front = np.asarray(bo.get_history().get_pareto_front()) if pareto_front.shape[-1] in (2, 3): if pareto_front.shape[-1] == 2: plt.scatter(pareto_front[:, 0], pareto_front[:, 1]) plt.xlabel('Objective 1') plt.ylabel('Objective 2') elif pareto_front.shape[-1] == 3: ax = plt.axes(projection='3d') ax.scatter3D(pareto_front[:, 0], pareto_front[:, 1], pareto_front[:, 2]) ax.set_xlabel('Objective 1') ax.set_ylabel('Objective 2') ax.set_zlabel('Objective 3') plt.title('Pareto Front') plt.savefig('logs/plot_pareto_front_zdt2.png') plt.show()
bc_params = {'float': {'x1': (0, 1, 0.5), 'x2': (0, 1, 0.5)}} bc_cs = ConfigurationSpace() bc_cs.add_hyperparameters([ UniformFloatHyperparameter(e, *bc_params['float'][e]) for e in bc_params['float'] ]) bc_max_hv = 59.36011874867746 bc_ref_point = [18., 6.] bo = SMBO(branin_currin, bc_cs, advisor_type='mcadvisor', task_id='mcparego', num_objs=2, acq_type='mcparego', ref_point=bc_ref_point, max_runs=100, random_state=2) bo.run() hvs = bo.get_history().hv_data log_hv_diff = np.log10(bc_max_hv - np.asarray(hvs)) import matplotlib.pyplot as plt plt.plot(log_hv_diff) # plt.savefig('plt.pdf') plt.show()
def evaluate(dataset, method, algo, space_size, max_run, step_size, seed): if algo == 'xgboost': model_class = XGBoost elif algo == 'lightgbm': model_class = LightGBM elif algo == 'adaboost': model_class = Adaboost elif algo == 'random_forest': model_class = RandomForest elif algo == 'extra_trees': model_class = ExtraTrees else: raise ValueError('Invalid algorithm: %s!' % algo) cs = model_class.get_hyperparameter_search_space(space_size=space_size) x_train, y_train, x_val, y_val = load_data(dataset, solnml_path) def objective_func(config): conf_dict = config.get_dictionary() if algo == 'xgboost': model = XGBoost(**conf_dict, n_jobs=n_jobs, seed=1) elif algo == 'lightgbm': model = LightGBM(**conf_dict, n_jobs=n_jobs, random_state=1) elif algo == 'adaboost': model = Adaboost(**conf_dict, random_state=1) elif algo == 'random_forest': model = RandomForest(**conf_dict, n_jobs=n_jobs, random_state=1) elif algo == 'extra_trees': model = ExtraTrees(**conf_dict, n_jobs=n_jobs, random_state=1) else: raise ValueError('Invalid algorithm: %s' % algo) model.fit(x_train, y_train) from sklearn.metrics import balanced_accuracy_score # evaluate on validation data y_pred = model.predict(x_val) perf = -balanced_accuracy_score(y_val, y_pred) # minimize return perf if method == 'random-search': # tuner = RandomTuner(objective_func, cs, max_run=max_run, random_state=seed) # tuner.run() # print(tuner.get_incumbent()) # config_list = list(tuner.history_dict.keys()) # perf_list = list(tuner.history_dict.values()) from openbox.optimizer.generic_smbo import SMBO task_id = 'tuning-random-%s-%s-%s-%d' % (dataset, algo, space_size, seed) bo = SMBO(objective_func, cs, advisor_type='random', max_runs=max_run, task_id=task_id, logging_dir='logs', random_state=seed) bo.run() print(bo.get_incumbent()) history = bo.get_history() config_list = history.configurations perf_list = history.perfs elif method == 'ada-bo': if algo == 'xgboost': importance_list = [ 'n_estimators', 'learning_rate', 'max_depth', 'colsample_bytree', 'gamma', 'min_child_weight', 'reg_alpha', 'reg_lambda', 'subsample' ] elif algo == 'lightgbm': importance_list = [ 'n_estimators', 'learning_rate', 'num_leaves', 'reg_alpha', 'colsample_bytree', 'min_child_weight', 'reg_lambda', 'subsample', 'max_depth' ] elif algo == 'adaboost': importance_list = [ 'n_estimators', 'learning_rate', 'max_depth', 'algorithm' ] elif algo == 'random_forest': importance_list = [ 'n_estimators', 'max_depth', 'max_features', 'min_samples_leaf', 'min_samples_split', 'bootstrap', 'criterion', 'max_leaf_nodes', 'min_impurity_decrease', 'min_weight_fraction_leaf' ] elif algo == 'extra_trees': importance_list = [ 'n_estimators', 'max_depth', 'max_features', 'min_samples_leaf', 'min_samples_split', 'bootstrap', 'criterion', 'max_leaf_nodes', 'min_impurity_decrease', 'min_weight_fraction_leaf' ] else: raise ValueError('Invalid algorithm~') print('Previous important list is', ','.join(importance_list)) if use_meta_order == "yes": data_, scaler_ = load_meta_data(algorithm=algo, dataset_ids=None, include_scaler=True) X, y, labels = data_ from automlspace.ranknet import RankNetAdvisor advisor = RankNetAdvisor(algorithm_id=algo) advisor.fit(X, y) new_embeding = load_meta_feature(dataset_id=dataset) new_embeding = scaler_.transform([new_embeding])[0] importance_list = advisor.predict_ranking(new_embeding, rank_objs=labels) print('New important list is', ','.join(importance_list)) tuner = AdaptiveTuner(objective_func, cs, importance_list, strategy=strategy, max_run=max_run, step_size=step_size, random_state=seed) tuner.run() print(tuner.get_incumbent()) config_list = list(tuner.history_dict.keys()) perf_list = list(tuner.history_dict.values()) elif method == 'openbox': from openbox.optimizer.generic_smbo import SMBO task_id = 'tuning-openbox-%s-%s-%s-%d' % (dataset, algo, space_size, seed) bo = SMBO(objective_func, cs, advisor_type='default', max_runs=max_run, task_id=task_id, logging_dir='logs', random_state=seed) bo.run() print(bo.get_incumbent()) history = bo.get_history() config_list = history.configurations perf_list = history.perfs elif method == 'tpe': from openbox.optimizer.generic_smbo import SMBO task_id = 'tuning-tpe-%s-%s-%s-%d' % (dataset, algo, space_size, seed) bo = SMBO(objective_func, cs, advisor_type='tpe', max_runs=max_run, task_id=task_id, logging_dir='logs', random_state=seed) bo.run() print(bo.get_incumbent()) history = bo.get_history() config_list = history.configurations perf_list = history.perfs else: raise ValueError('Invalid method id - %s.' % args.method) if len(config_list) > max_run: print('len of result: %d. max_run: %d. cut off.' % (len(config_list), max_run)) config_list = config_list[:max_run] perf_list = perf_list[:max_run] if len(config_list) < max_run: print('===== WARNING: len of result: %d. max_run: %d.' % (len(config_list), max_run)) return config_list, perf_list
class RandomSearchOptimizer(BaseOptimizer): def __init__(self, evaluator, config_space, name, eval_type, time_limit=None, evaluation_limit=None, per_run_time_limit=300, output_dir='./', timestamp=None, inner_iter_num_per_iter=1, seed=1, n_jobs=1): super().__init__(evaluator, config_space, name, eval_type=eval_type, timestamp=timestamp, output_dir=output_dir, seed=seed) self.time_limit = time_limit self.evaluation_num_limit = evaluation_limit self.inner_iter_num_per_iter = inner_iter_num_per_iter self.per_run_time_limit = per_run_time_limit # self.per_run_mem_limit= per_run_mem_limit if n_jobs == 1: self.optimizer = RandomSearch(objective_function=self.evaluator, config_space=config_space, advisor_type='random', task_id='Default', time_limit_per_trial=self.per_run_time_limit, random_state=self.seed) else: self.optimizer = pRandomSearch(objective_function=self.evaluator, config_space=config_space, sample_strategy='random', batch_size=n_jobs, task_id='Default', time_limit_per_trial=self.per_run_time_limit, random_state=self.seed) self.trial_cnt = 0 self.configs = list() self.perfs = list() self.exp_output = dict() self.incumbent_perf = float("-INF") self.incumbent_config = self.config_space.get_default_configuration() hp_num = len(self.config_space.get_hyperparameters()) if hp_num == 0: self.config_num_threshold = 0 else: _threshold = int(len(set(self.config_space.sample_configuration(5000)))) self.config_num_threshold = _threshold self.logger.debug("The maximum trial number in HPO is :%d" % self.config_num_threshold) self.maximum_config_num = min(1500, self.config_num_threshold) self.eval_dict = {} self.n_jobs = n_jobs def run(self): while True: evaluation_num = len(self.perfs) if self.evaluation_num_limit is not None and evaluation_num > self.evaluation_num_limit: break if self.time_limit is not None and time.time() - self.start_time > self.time_limit: break self.iterate() return np.max(self.perfs) def iterate(self, budget=MAX_INT): _start_time = time.time() if len(self.configs) == 0 and self.init_hpo_iter_num is not None: inner_iter_num = self.init_hpo_iter_num print('initial hpo trial num is set to %d' % inner_iter_num) else: inner_iter_num = self.inner_iter_num_per_iter if self.n_jobs == 1: for _ in range(inner_iter_num): if len(self.configs) >= self.maximum_config_num: self.early_stopped_flag = True self.logger.warning('Already explored 70 percentage of the ' 'hyperspace or maximum configuration number met: %d!' % self.maximum_config_num) break if time.time() - _start_time > budget: self.logger.warning('Time limit exceeded!') break _config, _status, _, _perf = self.optimizer.iterate() self.update_saver([_config], [_perf[0]]) if _status == SUCCESS: self.exp_output[time.time()] = (_config, _perf[0]) self.configs.append(_config) self.perfs.append(-_perf[0]) else: if len(self.configs) >= self.maximum_config_num: self.early_stopped_flag = True self.logger.warning('Already explored 70 percentage of the ' 'hyperspace or maximum configuration number met: %d!' % self.maximum_config_num) elif time.time() - _start_time > budget: self.logger.warning('Time limit exceeded!') else: _config_list, _status_list, _, _perf_list = self.optimizer.async_iterate(n=inner_iter_num) self.update_saver(_config_list, _perf_list) for i, _config in enumerate(_config_list): if _status_list[i] == SUCCESS: self.exp_output[time.time()] = (_config, _perf_list[i]) self.configs.append(_config) self.perfs.append(-_perf_list[i]) run_history = self.optimizer.get_history() if self.name == 'hpo': if hasattr(self.evaluator, 'fe_config'): fe_config = self.evaluator.fe_config else: fe_config = None self.eval_dict = {(fe_config, hpo_config): [-run_history.perfs[i], time.time(), run_history.trial_states[i]] for i, hpo_config in enumerate(run_history.configurations)} else: if hasattr(self.evaluator, 'hpo_config'): hpo_config = self.evaluator.hpo_config else: hpo_config = None self.eval_dict = {(fe_config, hpo_config): [-run_history.perfs[i], time.time(), run_history.trial_states[i]] for i, fe_config in enumerate(run_history.configurationsa)} if len(run_history.get_incumbents()) > 0: self.incumbent_config, self.incumbent_perf = run_history.get_incumbents()[0] self.incumbent_perf = -self.incumbent_perf iteration_cost = time.time() - _start_time return self.incumbent_perf, iteration_cost, self.incumbent_config
num_objs=2, num_constraints=0, max_runs=50, surrogate_type='gp', acq_type='ehvi', acq_optimizer_type='random_scipy', initial_runs=6, init_strategy='sobol', ref_point=ref_point, time_limit_per_trial=10, task_id='mo', random_state=1) bo.run() # plot pareto front pareto_front = np.asarray(bo.get_history().get_pareto_front()) if pareto_front.shape[-1] in (2, 3): if pareto_front.shape[-1] == 2: plt.scatter(pareto_front[:, 0], pareto_front[:, 1]) plt.xlabel('Objective 1') plt.ylabel('Objective 2') elif pareto_front.shape[-1] == 3: ax = plt.axes(projection='3d') ax.scatter3D(pareto_front[:, 0], pareto_front[:, 1], pareto_front[:, 2]) ax.set_xlabel('Objective 1') ax.set_ylabel('Objective 2') ax.set_zlabel('Objective 3') plt.title('Pareto Front') plt.show()
import os import sys import numpy as np import matplotlib.pyplot as plt sys.path.insert(0, os.getcwd()) from openbox.optimizer.generic_smbo import SMBO from openbox.benchmark.objective_functions.synthetic import BraninCurrin prob = BraninCurrin() bo = SMBO(prob.evaluate, prob.config_space, advisor_type='mcadvisor', task_id='mcehvi', num_objs=prob.num_objs, num_constraints=prob.num_constraints, acq_type='mcehvi', ref_point=prob.ref_point, max_runs=100, random_state=2) bo.run() hvs = bo.get_history().hv_data log_hv_diff = np.log10(prob.max_hv - np.asarray(hvs)) pf = np.asarray(bo.get_history().get_pareto_front()) plt.scatter(pf[:, 0], pf[:, 1]) # plt.plot(log_hv_diff) # plt.show()
from openbox.optimizer.generic_smbo import SMBO from openbox.benchmark.objective_functions.synthetic import Ackley num_inputs = 10 acq_optimizer_type = 'random_scipy' seed = 1 prob = Ackley(dim=num_inputs, constrained=False) initial_runs = 2 * (num_inputs + 1) max_runs = 250 bo = SMBO(prob.evaluate, prob.config_space, task_id='turbo', advisor_type='mcadvisor', num_objs=prob.num_objs, num_constraints=prob.num_constraints, acq_type='mcei', acq_optimizer_type=acq_optimizer_type, use_trust_region=True, surrogate_type='gp', max_runs=max_runs, initial_runs=initial_runs, init_strategy='latin_hypercube', random_state=seed) bo.run() values = list(bo.get_history().data.values()) plt.plot(values) plt.show()