def evaluate(mth, run_i, seed):
    print(mth, run_i, seed, '===== start =====', flush=True)

    def objective_function(config):
        y = problem.evaluate_config(config)
        res = dict()
        res['config'] = config
        res['objs'] = (y,)
        res['constraints'] = None
        return res

    bo = SMBO(objective_function, cs,
              surrogate_type=surrogate_type,            # default: gp
              acq_optimizer_type=acq_optimizer_type,    # default: random_scipy
              initial_runs=initial_runs,                # default: 3
              init_strategy=init_strategy,              # default: random_explore_first
              max_runs=max_runs,
              time_limit_per_trial=time_limit_per_trial, task_id=task_id, random_state=seed)
    # bo.run()
    time_list = []
    global_start_time = time.time()
    for i in range(max_runs):
        config, trial_state, _, objs = bo.iterate()
        global_time = time.time() - global_start_time
        print(seed, i, objs, config, trial_state, 'time=', global_time)
        time_list.append(global_time)
    config_list = bo.get_history().configurations
    perf_list = bo.get_history().perfs

    return config_list, perf_list, time_list
示例#2
0
def evaluate(mth, run_i, seed):
    print(mth, run_i, seed, '===== start =====', flush=True)

    def objective_function(config):
        y = problem.evaluate_config(config)
        res = dict()
        # res['config'] = config
        res['objs'] = (y, )
        # res['constraints'] = None
        return res

    task_id = '%s_%s_%d' % (mth, problem_str, seed)
    bo = SMBO(
        objective_function,
        cs,
        advisor_type=advisor_type,  # choices: default, tpe
        surrogate_type=surrogate_type,  # choices: gp, gp_mcmc, prf, lightgbm
        acq_optimizer_type=acq_optimizer_type,  # default: local_random
        initial_runs=initial_runs,  # default: 3
        init_strategy=init_strategy,  # default: random_explore_first
        max_runs=max_runs,
        time_limit_per_trial=time_limit_per_trial,
        task_id=task_id,
        random_state=seed)
    if advisor_type == 'tpe':
        bo.config_advisor.num_samples = tpe_num_samples

    bo.run()
    config_list = bo.get_history().configurations
    perf_list = bo.get_history().perfs
    time_list = bo.get_history().update_times

    return config_list, perf_list, time_list
def evaluate(mth, run_i, seed):
    print(mth, run_i, seed, '===== start =====', flush=True)

    def objective_function(config):
        res = problem.evaluate_config(config)
        res['config'] = config
        return res

    bo = SMBO(
        objective_function,
        cs,
        num_objs=problem.num_objs,
        num_constraints=0,
        surrogate_type=surrogate_type,  # default: gp
        acq_type=acq_type,  # default: ehvi
        acq_optimizer_type=acq_optimizer_type,  # default: random_scipy
        initial_runs=initial_runs,  # default: 2 * (problem.dim + 1)
        init_strategy=init_strategy,  # default: sobol
        max_runs=max_runs,
        ref_point=problem.ref_point,
        time_limit_per_trial=time_limit_per_trial,
        task_id=task_id,
        random_state=seed)

    # bo.run()
    hv_diffs = []
    time_list = []
    global_start_time = time.time()
    for i in range(max_runs):
        config, trial_state, _, objs = bo.iterate()
        global_time = time.time() - global_start_time
        print(seed, i, objs, config, trial_state, 'time=', global_time)
        hv = Hypervolume(problem.ref_point).compute(
            bo.get_history().get_pareto_front())
        hv_diff = problem.max_hv - hv
        print(seed, i, 'hypervolume =', hv)
        print(seed, i, 'hv diff =', hv_diff)
        hv_diffs.append(hv_diff)
        time_list.append(global_time)
    config_list = bo.get_history().configurations
    perf_list = bo.get_history().perfs
    pf = np.asarray(bo.get_history().get_pareto_front())

    # plot for debugging
    if plot_mode == 1:
        Y_init = None
        plot_pf(problem, problem_str, mth, pf, Y_init)

    return hv_diffs, pf, config_list, perf_list, time_list
示例#4
0
    model = LGBMClassifier(**params)
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)

    loss = 1 - balanced_accuracy_score(y_test, y_pred)  # minimize
    return dict(objs=(loss, ))


from openbox.optimizer.generic_smbo import SMBO
import matplotlib.pyplot as plt

# Run Optimization
bo = SMBO(objective_function,
          get_configspace(),
          num_objs=1,
          num_constraints=0,
          max_runs=100,
          surrogate_type='prf',
          time_limit_per_trial=180,
          task_id='so_hpo')
bo.run()

history = bo.get_history()
print(history)

history.plot_convergence()
#plt.show()
plt.savefig('logs/plot_convergence_hpo.png')

history.visualize_jupyter()
示例#5
0
    time_limit_per_trial=60,
    task_id='mo',
    random_state=seed)
bo.config_advisor.optimizer.random_chooser.prob = rand_prob  # set rand_prob, default 0
bo.config_advisor.acquisition_function.sample_num = sample_num  # set sample_num
#bo.config_advisor.acquisition_function.random_state = seed      # set random_state
bo.config_advisor.optimizer.num_mc = 1000  # MESMO optimizer only
bo.config_advisor.optimizer.num_opt = 100  # MESMO optimizer only
print(mth, '===== start =====')
# bo.run()
hv_diffs = []
for i in range(max_runs):
    config, trial_state, objs, trial_info = bo.iterate()
    print(i, objs, config)
    hv = Hypervolume(referencePoint).compute(
        bo.get_history().get_pareto_front())
    print(i, 'hypervolume =', hv)
    hv_diff = real_hv - hv
    hv_diffs.append(hv_diff)
    print(i, 'hv diff =', hv_diff)

# Print result
pf = np.asarray(bo.get_history().get_pareto_front())
print(mth, 'pareto num:', pf.shape[0])
print('real hv =', real_hv)
print('hv_diffs:', hv_diffs)

# Evaluate the random search.
bo_r = SMBO(multi_objective_func,
            cs,
            num_objs=num_objs,
示例#6
0
          prob.config_space,
          num_objs=prob.num_objs,
          num_constraints=0,
          acq_type='ehvi',
          acq_optimizer_type='random_scipy',
          surrogate_type='gp',
          ref_point=prob.ref_point,
          max_runs=50,
          initial_runs=2 * (dim + 1),
          init_strategy='sobol',
          task_id='mo',
          random_state=1)
bo.run()

# plot pareto front
pareto_front = np.asarray(bo.get_history().get_pareto_front())
if pareto_front.shape[-1] in (2, 3):
    if pareto_front.shape[-1] == 2:
        plt.scatter(pareto_front[:, 0], pareto_front[:, 1])
        plt.xlabel('Objective 1')
        plt.ylabel('Objective 2')
    elif pareto_front.shape[-1] == 3:
        ax = plt.axes(projection='3d')
        ax.scatter3D(pareto_front[:, 0], pareto_front[:, 1], pareto_front[:,
                                                                          2])
        ax.set_xlabel('Objective 1')
        ax.set_ylabel('Objective 2')
        ax.set_zlabel('Objective 3')
    plt.title('Pareto Front')
    plt.savefig('logs/plot_pareto_front_zdt2.png')
    plt.show()

bc_params = {'float': {'x1': (0, 1, 0.5), 'x2': (0, 1, 0.5)}}
bc_cs = ConfigurationSpace()
bc_cs.add_hyperparameters([
    UniformFloatHyperparameter(e, *bc_params['float'][e])
    for e in bc_params['float']
])
bc_max_hv = 59.36011874867746
bc_ref_point = [18., 6.]

bo = SMBO(branin_currin,
          bc_cs,
          advisor_type='mcadvisor',
          task_id='mcparego',
          num_objs=2,
          acq_type='mcparego',
          ref_point=bc_ref_point,
          max_runs=100,
          random_state=2)
bo.run()

hvs = bo.get_history().hv_data
log_hv_diff = np.log10(bc_max_hv - np.asarray(hvs))

import matplotlib.pyplot as plt

plt.plot(log_hv_diff)
# plt.savefig('plt.pdf')
plt.show()
def evaluate(dataset, method, algo, space_size, max_run, step_size, seed):
    if algo == 'xgboost':
        model_class = XGBoost
    elif algo == 'lightgbm':
        model_class = LightGBM
    elif algo == 'adaboost':
        model_class = Adaboost
    elif algo == 'random_forest':
        model_class = RandomForest
    elif algo == 'extra_trees':
        model_class = ExtraTrees
    else:
        raise ValueError('Invalid algorithm: %s!' % algo)
    cs = model_class.get_hyperparameter_search_space(space_size=space_size)

    x_train, y_train, x_val, y_val = load_data(dataset, solnml_path)

    def objective_func(config):
        conf_dict = config.get_dictionary()
        if algo == 'xgboost':
            model = XGBoost(**conf_dict, n_jobs=n_jobs, seed=1)
        elif algo == 'lightgbm':
            model = LightGBM(**conf_dict, n_jobs=n_jobs, random_state=1)
        elif algo == 'adaboost':
            model = Adaboost(**conf_dict, random_state=1)
        elif algo == 'random_forest':
            model = RandomForest(**conf_dict, n_jobs=n_jobs, random_state=1)
        elif algo == 'extra_trees':
            model = ExtraTrees(**conf_dict, n_jobs=n_jobs, random_state=1)
        else:
            raise ValueError('Invalid algorithm: %s' % algo)

        model.fit(x_train, y_train)

        from sklearn.metrics import balanced_accuracy_score
        # evaluate on validation data
        y_pred = model.predict(x_val)
        perf = -balanced_accuracy_score(y_val, y_pred)  # minimize
        return perf

    if method == 'random-search':
        # tuner = RandomTuner(objective_func, cs, max_run=max_run, random_state=seed)
        # tuner.run()
        # print(tuner.get_incumbent())
        # config_list = list(tuner.history_dict.keys())
        # perf_list = list(tuner.history_dict.values())
        from openbox.optimizer.generic_smbo import SMBO
        task_id = 'tuning-random-%s-%s-%s-%d' % (dataset, algo, space_size,
                                                 seed)
        bo = SMBO(objective_func,
                  cs,
                  advisor_type='random',
                  max_runs=max_run,
                  task_id=task_id,
                  logging_dir='logs',
                  random_state=seed)
        bo.run()
        print(bo.get_incumbent())
        history = bo.get_history()
        config_list = history.configurations
        perf_list = history.perfs
    elif method == 'ada-bo':
        if algo == 'xgboost':
            importance_list = [
                'n_estimators', 'learning_rate', 'max_depth',
                'colsample_bytree', 'gamma', 'min_child_weight', 'reg_alpha',
                'reg_lambda', 'subsample'
            ]
        elif algo == 'lightgbm':
            importance_list = [
                'n_estimators', 'learning_rate', 'num_leaves', 'reg_alpha',
                'colsample_bytree', 'min_child_weight', 'reg_lambda',
                'subsample', 'max_depth'
            ]
        elif algo == 'adaboost':
            importance_list = [
                'n_estimators', 'learning_rate', 'max_depth', 'algorithm'
            ]
        elif algo == 'random_forest':
            importance_list = [
                'n_estimators', 'max_depth', 'max_features',
                'min_samples_leaf', 'min_samples_split', 'bootstrap',
                'criterion', 'max_leaf_nodes', 'min_impurity_decrease',
                'min_weight_fraction_leaf'
            ]
        elif algo == 'extra_trees':
            importance_list = [
                'n_estimators', 'max_depth', 'max_features',
                'min_samples_leaf', 'min_samples_split', 'bootstrap',
                'criterion', 'max_leaf_nodes', 'min_impurity_decrease',
                'min_weight_fraction_leaf'
            ]
        else:
            raise ValueError('Invalid algorithm~')
        print('Previous important list is', ','.join(importance_list))

        if use_meta_order == "yes":
            data_, scaler_ = load_meta_data(algorithm=algo,
                                            dataset_ids=None,
                                            include_scaler=True)
            X, y, labels = data_

            from automlspace.ranknet import RankNetAdvisor
            advisor = RankNetAdvisor(algorithm_id=algo)
            advisor.fit(X, y)

            new_embeding = load_meta_feature(dataset_id=dataset)
            new_embeding = scaler_.transform([new_embeding])[0]
            importance_list = advisor.predict_ranking(new_embeding,
                                                      rank_objs=labels)
            print('New important list is', ','.join(importance_list))

        tuner = AdaptiveTuner(objective_func,
                              cs,
                              importance_list,
                              strategy=strategy,
                              max_run=max_run,
                              step_size=step_size,
                              random_state=seed)
        tuner.run()
        print(tuner.get_incumbent())
        config_list = list(tuner.history_dict.keys())
        perf_list = list(tuner.history_dict.values())
    elif method == 'openbox':
        from openbox.optimizer.generic_smbo import SMBO
        task_id = 'tuning-openbox-%s-%s-%s-%d' % (dataset, algo, space_size,
                                                  seed)
        bo = SMBO(objective_func,
                  cs,
                  advisor_type='default',
                  max_runs=max_run,
                  task_id=task_id,
                  logging_dir='logs',
                  random_state=seed)
        bo.run()
        print(bo.get_incumbent())
        history = bo.get_history()
        config_list = history.configurations
        perf_list = history.perfs
    elif method == 'tpe':
        from openbox.optimizer.generic_smbo import SMBO
        task_id = 'tuning-tpe-%s-%s-%s-%d' % (dataset, algo, space_size, seed)
        bo = SMBO(objective_func,
                  cs,
                  advisor_type='tpe',
                  max_runs=max_run,
                  task_id=task_id,
                  logging_dir='logs',
                  random_state=seed)
        bo.run()
        print(bo.get_incumbent())
        history = bo.get_history()
        config_list = history.configurations
        perf_list = history.perfs
    else:
        raise ValueError('Invalid method id - %s.' % args.method)

    if len(config_list) > max_run:
        print('len of result: %d. max_run: %d. cut off.' %
              (len(config_list), max_run))
        config_list = config_list[:max_run]
        perf_list = perf_list[:max_run]
    if len(config_list) < max_run:
        print('===== WARNING: len of result: %d. max_run: %d.' %
              (len(config_list), max_run))
    return config_list, perf_list
示例#9
0
class RandomSearchOptimizer(BaseOptimizer):

    def __init__(self, evaluator, config_space, name, eval_type, time_limit=None, evaluation_limit=None,
                 per_run_time_limit=300, output_dir='./', timestamp=None,
                 inner_iter_num_per_iter=1, seed=1, n_jobs=1):
        super().__init__(evaluator, config_space, name, eval_type=eval_type, timestamp=timestamp, output_dir=output_dir,
                         seed=seed)
        self.time_limit = time_limit
        self.evaluation_num_limit = evaluation_limit
        self.inner_iter_num_per_iter = inner_iter_num_per_iter
        self.per_run_time_limit = per_run_time_limit
        # self.per_run_mem_limit= per_run_mem_limit

        if n_jobs == 1:
            self.optimizer = RandomSearch(objective_function=self.evaluator,
                                          config_space=config_space,
                                          advisor_type='random',
                                          task_id='Default',
                                          time_limit_per_trial=self.per_run_time_limit,
                                          random_state=self.seed)
        else:
            self.optimizer = pRandomSearch(objective_function=self.evaluator,
                                           config_space=config_space,
                                           sample_strategy='random',
                                           batch_size=n_jobs,
                                           task_id='Default',
                                           time_limit_per_trial=self.per_run_time_limit,
                                           random_state=self.seed)

        self.trial_cnt = 0
        self.configs = list()
        self.perfs = list()
        self.exp_output = dict()
        self.incumbent_perf = float("-INF")
        self.incumbent_config = self.config_space.get_default_configuration()

        hp_num = len(self.config_space.get_hyperparameters())
        if hp_num == 0:
            self.config_num_threshold = 0
        else:
            _threshold = int(len(set(self.config_space.sample_configuration(5000))))
            self.config_num_threshold = _threshold

        self.logger.debug("The maximum trial number in HPO is :%d" % self.config_num_threshold)
        self.maximum_config_num = min(1500, self.config_num_threshold)
        self.eval_dict = {}
        self.n_jobs = n_jobs

    def run(self):
        while True:
            evaluation_num = len(self.perfs)
            if self.evaluation_num_limit is not None and evaluation_num > self.evaluation_num_limit:
                break
            if self.time_limit is not None and time.time() - self.start_time > self.time_limit:
                break
            self.iterate()
        return np.max(self.perfs)

    def iterate(self, budget=MAX_INT):
        _start_time = time.time()

        if len(self.configs) == 0 and self.init_hpo_iter_num is not None:
            inner_iter_num = self.init_hpo_iter_num
            print('initial hpo trial num is set to %d' % inner_iter_num)
        else:
            inner_iter_num = self.inner_iter_num_per_iter

        if self.n_jobs == 1:
            for _ in range(inner_iter_num):
                if len(self.configs) >= self.maximum_config_num:
                    self.early_stopped_flag = True
                    self.logger.warning('Already explored 70 percentage of the '
                                        'hyperspace or maximum configuration number met: %d!' % self.maximum_config_num)
                    break
                if time.time() - _start_time > budget:
                    self.logger.warning('Time limit exceeded!')
                    break
                _config, _status, _, _perf = self.optimizer.iterate()
                self.update_saver([_config], [_perf[0]])
                if _status == SUCCESS:
                    self.exp_output[time.time()] = (_config, _perf[0])
                    self.configs.append(_config)
                    self.perfs.append(-_perf[0])
        else:
            if len(self.configs) >= self.maximum_config_num:
                self.early_stopped_flag = True
                self.logger.warning('Already explored 70 percentage of the '
                                    'hyperspace or maximum configuration number met: %d!' % self.maximum_config_num)
            elif time.time() - _start_time > budget:
                self.logger.warning('Time limit exceeded!')
            else:
                _config_list, _status_list, _, _perf_list = self.optimizer.async_iterate(n=inner_iter_num)
                self.update_saver(_config_list, _perf_list)
                for i, _config in enumerate(_config_list):
                    if _status_list[i] == SUCCESS:
                        self.exp_output[time.time()] = (_config, _perf_list[i])
                        self.configs.append(_config)
                        self.perfs.append(-_perf_list[i])

        run_history = self.optimizer.get_history()
        if self.name == 'hpo':
            if hasattr(self.evaluator, 'fe_config'):
                fe_config = self.evaluator.fe_config
            else:
                fe_config = None
            self.eval_dict = {(fe_config, hpo_config): [-run_history.perfs[i], time.time(), run_history.trial_states[i]]
                              for i, hpo_config in enumerate(run_history.configurations)}
        else:
            if hasattr(self.evaluator, 'hpo_config'):
                hpo_config = self.evaluator.hpo_config
            else:
                hpo_config = None
            self.eval_dict = {(fe_config, hpo_config): [-run_history.perfs[i], time.time(), run_history.trial_states[i]]
                              for i, fe_config in enumerate(run_history.configurationsa)}
        if len(run_history.get_incumbents()) > 0:
            self.incumbent_config, self.incumbent_perf = run_history.get_incumbents()[0]
            self.incumbent_perf = -self.incumbent_perf
        iteration_cost = time.time() - _start_time
        return self.incumbent_perf, iteration_cost, self.incumbent_config
示例#10
0
              num_objs=2,
              num_constraints=0,
              max_runs=50,
              surrogate_type='gp',
              acq_type='ehvi',
              acq_optimizer_type='random_scipy',
              initial_runs=6,
              init_strategy='sobol',
              ref_point=ref_point,
              time_limit_per_trial=10,
              task_id='mo',
              random_state=1)
    bo.run()

    # plot pareto front
    pareto_front = np.asarray(bo.get_history().get_pareto_front())
    if pareto_front.shape[-1] in (2, 3):
        if pareto_front.shape[-1] == 2:
            plt.scatter(pareto_front[:, 0], pareto_front[:, 1])
            plt.xlabel('Objective 1')
            plt.ylabel('Objective 2')
        elif pareto_front.shape[-1] == 3:
            ax = plt.axes(projection='3d')
            ax.scatter3D(pareto_front[:, 0], pareto_front[:, 1],
                         pareto_front[:, 2])
            ax.set_xlabel('Objective 1')
            ax.set_ylabel('Objective 2')
            ax.set_zlabel('Objective 3')
        plt.title('Pareto Front')
        plt.show()
示例#11
0
import os
import sys
import numpy as np
import matplotlib.pyplot as plt

sys.path.insert(0, os.getcwd())

from openbox.optimizer.generic_smbo import SMBO
from openbox.benchmark.objective_functions.synthetic import BraninCurrin


prob = BraninCurrin()
bo = SMBO(prob.evaluate, prob.config_space,
          advisor_type='mcadvisor',
          task_id='mcehvi',
          num_objs=prob.num_objs,
          num_constraints=prob.num_constraints,
          acq_type='mcehvi',
          ref_point=prob.ref_point,
          max_runs=100, random_state=2)
bo.run()

hvs = bo.get_history().hv_data
log_hv_diff = np.log10(prob.max_hv - np.asarray(hvs))

pf = np.asarray(bo.get_history().get_pareto_front())
plt.scatter(pf[:, 0], pf[:, 1])
# plt.plot(log_hv_diff)
# plt.show()
示例#12
0
from openbox.optimizer.generic_smbo import SMBO
from openbox.benchmark.objective_functions.synthetic import Ackley

num_inputs = 10
acq_optimizer_type = 'random_scipy'
seed = 1
prob = Ackley(dim=num_inputs, constrained=False)
initial_runs = 2 * (num_inputs + 1)
max_runs = 250

bo = SMBO(prob.evaluate,
          prob.config_space,
          task_id='turbo',
          advisor_type='mcadvisor',
          num_objs=prob.num_objs,
          num_constraints=prob.num_constraints,
          acq_type='mcei',
          acq_optimizer_type=acq_optimizer_type,
          use_trust_region=True,
          surrogate_type='gp',
          max_runs=max_runs,
          initial_runs=initial_runs,
          init_strategy='latin_hypercube',
          random_state=seed)
bo.run()

values = list(bo.get_history().data.values())
plt.plot(values)
plt.show()