Exemplo n.º 1
0
    def __init__(self,
                 config_space,
                 min_points_in_model=None,
                 top_n_percent=15,
                 num_samples=64,
                 random_fraction=1 / 3,
                 bandwidth_factor=3,
                 min_bandwidth=1e-3,
                 task_id=None,
                 output_dir='logs',
                 random_state=1):
        self.top_n_percent = top_n_percent
        self.config_space = config_space
        self.config_space.seed(random_state)
        self.bw_factor = bandwidth_factor
        self.min_bandwidth = min_bandwidth

        self.history_container = HistoryContainer(task_id,
                                                  config_space=config_space)
        self.output_dir = output_dir

        self.min_points_in_model = min_points_in_model
        if min_points_in_model is None:
            self.min_points_in_model = len(
                self.config_space.get_hyperparameters()) + 1

        if self.min_points_in_model < len(
                self.config_space.get_hyperparameters()) + 1:
            self.min_points_in_model = len(
                self.config_space.get_hyperparameters()) + 1

        self.num_samples = num_samples
        self.random_fraction = random_fraction

        self.random_state = random_state
        self.rng = np.random.RandomState(random_state)

        hps = self.config_space.get_hyperparameters()

        self.kde_vartypes = ""
        self.vartypes = []

        for h in hps:
            if hasattr(h, 'choices'):
                self.kde_vartypes += 'u'
                self.vartypes += [len(h.choices)]
            else:
                self.kde_vartypes += 'c'
                self.vartypes += [0]

        self.vartypes = np.array(self.vartypes, dtype=int)

        # store precomputed probs for the categorical parameters
        self.cat_probs = []

        self.good_config_rankings = dict()
        self.kde_models = dict()
        self.logger = logging.getLogger(self.__class__.__name__)
Exemplo n.º 2
0
    def __init__(self,
                 config_space,
                 initial_trials=10,
                 initial_configurations=None,
                 init_strategy='random_explore_first',
                 history_bo_data=None,
                 optimization_strategy='bo',
                 surrogate_type='prf',
                 output_dir='logs',
                 task_id=None,
                 rng=None):

        # Create output (logging) directory.
        # Init logging module.
        # Random seed generator.
        self.init_strategy = init_strategy
        self.output_dir = output_dir
        if rng is None:
            run_id, rng = get_rng()
        self.rng = rng
        self.logger = get_logger(self.__class__.__name__)

        # Basic components in Advisor.
        self.optimization_strategy = optimization_strategy
        self.configurations = list()
        self.failed_configurations = list()
        self.perfs = list()
        self.scale_perc = 5
        self.perc = None
        self.min_y = None
        self.max_y = None

        # Init the basic ingredients in Bayesian optimization.
        self.history_bo_data = history_bo_data
        self.surrogate_type = surrogate_type
        self.init_num = initial_trials
        self.config_space = config_space
        self.config_space.seed(rng.randint(MAXINT))

        if initial_configurations is not None and len(
                initial_configurations) > 0:
            self.initial_configurations = initial_configurations
            self.init_num = len(initial_configurations)
        else:
            self.initial_configurations = self.create_initial_design(
                self.init_strategy)
            self.init_num = len(self.initial_configurations)
        self.history_container = HistoryContainer(task_id)

        self.surrogate_model = None
        self.acquisition_function = None
        self.optimizer = None
        self.setup_bo_basics()
Exemplo n.º 3
0
    def __init__(self, config_space, task_id, output_dir):
        self.output_dir = output_dir
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)

        self.logger = None
        self.history_container = HistoryContainer(task_id)
        self.config_space = config_space
        self.scale_perc = 5
        self.perc = None
        self.min_y = None
        self.max_y = None
Exemplo n.º 4
0
    def __init__(
            self,
            config_space,
            num_objs=1,
            num_constraints=0,
            population_size=30,
            subset_size=20,
            epsilon=0.2,
            strategy='worst',  # 'worst', 'oldest'
            optimization_strategy='ea',
            batch_size=1,
            output_dir='logs',
            task_id='default_task_id',
            random_state=None):

        # Create output (logging) directory.
        # Init logging module.
        # Random seed generator.
        self.num_objs = num_objs
        self.num_constraints = num_constraints
        assert self.num_objs == 1 and self.num_constraints == 0
        self.output_dir = output_dir
        self.rng = check_random_state(random_state)
        self.config_space = config_space
        self.config_space_seed = self.rng.randint(MAXINT)
        self.config_space.seed(self.config_space_seed)
        self.logger = get_logger(self.__class__.__name__)

        # Init parallel settings
        self.batch_size = batch_size
        self.init_num = batch_size  # for compatibility in pSMBO
        self.running_configs = list()

        # Basic components in Advisor.
        self.optimization_strategy = optimization_strategy

        # Init the basic ingredients
        self.all_configs = set()
        self.age = 0
        self.population = list()
        self.population_size = population_size
        self.subset_size = subset_size
        assert 0 < self.subset_size <= self.population_size
        self.epsilon = epsilon
        self.strategy = strategy
        assert self.strategy in ['worst', 'oldest']

        # init history container
        self.history_container = HistoryContainer(
            task_id, self.num_constraints, config_space=self.config_space)
Exemplo n.º 5
0
class BaseFacade(object, metaclass=abc.ABCMeta):
    def __init__(self, config_space, task_id, output_dir):
        self.output_dir = output_dir
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)

        self.logger = None
        self.history_container = HistoryContainer(task_id)
        self.config_space = config_space
        self.scale_perc = 5
        self.perc = None
        self.min_y = None
        self.max_y = None

    @abc.abstractmethod
    def run(self):
        raise NotImplementedError()

    @abc.abstractmethod
    def iterate(self):
        raise NotImplementedError()

    def get_history(self):
        return self.history_container

    def get_incumbent(self):
        return self.history_container.get_incumbents()

    def _get_logger(self, name):
        logger_name = 'open-box-%s' % name
        setup_logger(os.path.join(self.output_dir, '%s.log' % str(logger_name)))
        return get_logger(logger_name)
Exemplo n.º 6
0
class EA_Advisor(object, metaclass=abc.ABCMeta):
    """
    Evolutionary Algorithm Advisor
    """
    def __init__(
            self,
            config_space,
            num_objs=1,
            num_constraints=0,
            population_size=30,
            subset_size=20,
            epsilon=0.2,
            strategy='worst',  # 'worst', 'oldest'
            optimization_strategy='ea',
            batch_size=1,
            output_dir='logs',
            task_id='default_task_id',
            random_state=None):

        # Create output (logging) directory.
        # Init logging module.
        # Random seed generator.
        self.num_objs = num_objs
        self.num_constraints = num_constraints
        assert self.num_objs == 1 and self.num_constraints == 0
        self.output_dir = output_dir
        self.rng = check_random_state(random_state)
        self.config_space = config_space
        self.config_space_seed = self.rng.randint(MAXINT)
        self.config_space.seed(self.config_space_seed)
        self.logger = get_logger(self.__class__.__name__)

        # Init parallel settings
        self.batch_size = batch_size
        self.init_num = batch_size  # for compatibility in pSMBO
        self.running_configs = list()

        # Basic components in Advisor.
        self.optimization_strategy = optimization_strategy

        # Init the basic ingredients
        self.all_configs = set()
        self.age = 0
        self.population = list()
        self.population_size = population_size
        self.subset_size = subset_size
        assert 0 < self.subset_size <= self.population_size
        self.epsilon = epsilon
        self.strategy = strategy
        assert self.strategy in ['worst', 'oldest']

        # init history container
        self.history_container = HistoryContainer(
            task_id, self.num_constraints, config_space=self.config_space)

    def get_suggestion(self, history_container=None):
        """
        Generate a configuration (suggestion) for this query.
        Returns
        -------
        A configuration.
        """
        if history_container is None:
            history_container = self.history_container

        if len(self.population) < self.population_size:
            # Initialize population
            next_config = self.sample_random_config(
                excluded_configs=self.all_configs)
        else:
            # Select a parent by subset tournament and epsilon greedy
            if self.rng.random() < self.epsilon:
                parent_config = random.sample(self.population, 1)[0]['config']
            else:
                subset = random.sample(self.population, self.subset_size)
                subset.sort(key=lambda x: x['perf'])  # minimize
                parent_config = subset[0]['config']

            # Mutation to 1-step neighbors
            next_config = None
            neighbors_gen = get_one_exchange_neighbourhood(
                parent_config, seed=self.rng.randint(MAXINT))
            for neighbor in neighbors_gen:
                if neighbor not in self.all_configs:
                    next_config = neighbor
                    break
            if next_config is None:  # If all the neighors are evaluated, sample randomly!
                next_config = self.sample_random_config(
                    excluded_configs=self.all_configs)

        self.all_configs.add(next_config)
        self.running_configs.append(next_config)
        return next_config

    def get_suggestions(self, batch_size=None, history_container=None):
        if batch_size is None:
            batch_size = self.batch_size

        configs = list()
        for i in range(batch_size):
            config = self.get_suggestion(history_container)
            configs.append(config)
        return configs

    def update_observation(self, observation: Observation):
        """
        Update the current observations.
        Parameters
        ----------
        observation

        Returns
        -------

        """

        config = observation.config
        perf = observation.objs[0]
        trial_state = observation.trial_state

        assert config in self.running_configs
        self.running_configs.remove(config)

        # update population
        if trial_state == SUCCESS and perf < MAXINT:
            self.population.append(dict(config=config, age=self.age,
                                        perf=perf))
            self.age += 1

        # Eliminate samples
        if len(self.population) > self.population_size:
            if self.strategy == 'oldest':
                self.population.sort(key=lambda x: x['age'])
                self.population.pop(0)
            elif self.strategy == 'worst':
                self.population.sort(key=lambda x: x['perf'])
                self.population.pop(-1)
            else:
                raise ValueError('Unknown strategy: %s' % self.strategy)

        return self.history_container.update_observation(observation)

    def sample_random_config(self, excluded_configs=None):
        if excluded_configs is None:
            excluded_configs = set()

        sample_cnt = 0
        max_sample_cnt = 1000
        while True:
            config = self.config_space.sample_configuration()
            sample_cnt += 1
            if config not in excluded_configs:
                break
            if sample_cnt >= max_sample_cnt:
                self.logger.warning(
                    'Cannot sample non duplicate configuration after %d iterations.'
                    % max_sample_cnt)
                break
        return config

    def get_history(self):
        return self.history_container
Exemplo n.º 7
0
    def __init__(self, config_space,
                 task_info,
                 initial_trials=10,
                 initial_configurations=None,
                 init_strategy='random_explore_first',
                 history_bo_data=None,
                 optimization_strategy='bo',
                 surrogate_type=None,
                 acq_type=None,
                 acq_optimizer_type='local_random',
                 ref_point=None,
                 output_dir='logs',
                 task_id=None,
                 random_state=None):

        # Create output (logging) directory.
        # Init logging module.
        # Random seed generator.
        self.task_info = task_info
        self.num_objs = task_info['num_objs']
        self.num_constraints = task_info['num_constraints']
        self.init_strategy = init_strategy
        self.output_dir = output_dir
        self.rng = np.random.RandomState(random_state)
        self.logger = get_logger(self.__class__.__name__)

        history_folder = os.path.join(self.output_dir, 'bo_history')
        if not os.path.exists(history_folder):
            os.makedirs(history_folder)
        self.history_file = os.path.join(history_folder, 'bo_history_%s.json' % task_id)

        # Basic components in Advisor.
        self.optimization_strategy = optimization_strategy

        # Init the basic ingredients in Bayesian optimization.
        self.history_bo_data = history_bo_data
        self.surrogate_type = surrogate_type
        self.constraint_surrogate_type = None
        self.acq_type = acq_type
        self.acq_optimizer_type = acq_optimizer_type
        self.init_num = initial_trials
        self.config_space = config_space
        self.config_space_seed = self.rng.randint(MAXINT)
        self.config_space.seed(self.config_space_seed)
        self.ref_point = ref_point

        # init history container
        if self.num_objs == 1:
            self.history_container = HistoryContainer(task_id, self.num_constraints, config_space=self.config_space)
        else:  # multi-objectives
            self.history_container = MOHistoryContainer(task_id, self.num_objs, self.num_constraints, ref_point)

        # initial design
        if initial_configurations is not None and len(initial_configurations) > 0:
            self.initial_configurations = initial_configurations
            self.init_num = len(initial_configurations)
        else:
            self.initial_configurations = self.create_initial_design(self.init_strategy)
            self.init_num = len(self.initial_configurations)

        self.surrogate_model = None
        self.constraint_models = None
        self.acquisition_function = None
        self.optimizer = None
        self.check_setup()
        self.setup_bo_basics()
Exemplo n.º 8
0
class TPE_Advisor:
    # TODO:Add warm start
    def __init__(self,
                 config_space,
                 min_points_in_model=None,
                 top_n_percent=15,
                 num_samples=64,
                 random_fraction=1 / 3,
                 bandwidth_factor=3,
                 min_bandwidth=1e-3,
                 task_id=None,
                 output_dir='logs',
                 random_state=1):
        self.top_n_percent = top_n_percent
        self.config_space = config_space
        self.config_space.seed(random_state)
        self.bw_factor = bandwidth_factor
        self.min_bandwidth = min_bandwidth

        self.history_container = HistoryContainer(task_id,
                                                  config_space=config_space)
        self.output_dir = output_dir

        self.min_points_in_model = min_points_in_model
        if min_points_in_model is None:
            self.min_points_in_model = len(
                self.config_space.get_hyperparameters()) + 1

        if self.min_points_in_model < len(
                self.config_space.get_hyperparameters()) + 1:
            self.min_points_in_model = len(
                self.config_space.get_hyperparameters()) + 1

        self.num_samples = num_samples
        self.random_fraction = random_fraction

        self.random_state = random_state
        self.rng = np.random.RandomState(random_state)

        hps = self.config_space.get_hyperparameters()

        self.kde_vartypes = ""
        self.vartypes = []

        for h in hps:
            if hasattr(h, 'choices'):
                self.kde_vartypes += 'u'
                self.vartypes += [len(h.choices)]
            else:
                self.kde_vartypes += 'c'
                self.vartypes += [0]

        self.vartypes = np.array(self.vartypes, dtype=int)

        # store precomputed probs for the categorical parameters
        self.cat_probs = []

        self.good_config_rankings = dict()
        self.kde_models = dict()
        self.logger = logging.getLogger(self.__class__.__name__)

    def update_observation(self, observation: Observation):
        self.history_container.update_observation(observation)

    def get_suggestion(self, history_container=None):
        if history_container is None:
            history_container = self.history_container

        # use default as first config
        num_config_evaluated = len(history_container.configurations)
        if num_config_evaluated == 0:
            return self.config_space.get_default_configuration()

        # fit
        self.fit_kde_models(history_container)

        # If no model is available, sample random config
        if len(self.kde_models.keys()
               ) == 0 or self.rng.rand() < self.random_fraction:
            return self.sample_random_configs(1, history_container)[0]

        best = np.inf
        best_vector = None

        try:
            l = self.kde_models['good'].pdf
            g = self.kde_models['bad'].pdf

            minimize_me = lambda x: max(1e-32, g(x)) / max(l(x), 1e-32)

            kde_good = self.kde_models['good']
            kde_bad = self.kde_models['bad']

            for i in range(self.num_samples):
                idx = self.rng.randint(0, len(kde_good.data))
                datum = kde_good.data[idx]
                vector = []

                for m, bw, t in zip(datum, kde_good.bw, self.vartypes):

                    bw = max(bw, self.min_bandwidth)
                    if t == 0:
                        bw = self.bw_factor * bw
                        try:
                            vector.append(
                                sps.truncnorm.rvs(-m / bw, (1 - m) / bw,
                                                  loc=m,
                                                  scale=bw))
                        except:
                            self.logger.warning(
                                "Truncated Normal failed for:\ndatum=%s\nbandwidth=%s\nfor entry with value %s"
                                % (datum, kde_good.bw, m))
                            self.logger.warning("data in the KDE:\n%s" %
                                                kde_good.data)
                    else:

                        if self.rng.rand() < (1 - bw):
                            vector.append(int(m))
                        else:
                            vector.append(self.rng.randint(t))
                val = minimize_me(vector)

                if not np.isfinite(val):
                    self.logger.warning('sampled vector: %s has EI value %s' %
                                        (vector, val))
                    self.logger.warning("data in the KDEs:\n%s\n%s" %
                                        (kde_good.data, kde_bad.data))
                    self.logger.warning("bandwidth of the KDEs:\n%s\n%s" %
                                        (kde_good.bw, kde_bad.bw))
                    self.logger.warning("l(x) = %s" % (l(vector)))
                    self.logger.warning("g(x) = %s" % (g(vector)))

                    # right now, this happens because a KDE does not contain all values for a categorical parameter
                    # this cannot be fixed with the statsmodels KDE, so for now, we are just going to evaluate this one
                    # if the good_kde has a finite value, i.e. there is no config with that value in the bad kde, so it shouldn't be terrible.
                    if np.isfinite(l(vector)):
                        best_vector = vector
                        break

                if val < best:
                    best = val
                    best_vector = vector

            if best_vector is None:
                self.logger.debug(
                    "Sampling based optimization with %i samples failed -> using random configuration"
                    % self.num_samples)
                sample = self.sample_random_configs(
                    1, history_container)[0].get_dictionary()
            else:
                self.logger.debug('best_vector: {}, {}, {}, {}'.format(
                    best_vector, best, l(best_vector), g(best_vector)))
                for i, hp_value in enumerate(best_vector):
                    if isinstance(
                            self.config_space.get_hyperparameter(
                                self.config_space.get_hyperparameter_by_idx(
                                    i)), ConfigSpace.hyperparameters.
                            CategoricalHyperparameter):
                        best_vector[i] = int(np.rint(best_vector[i]))
                sample = ConfigSpace.Configuration(
                    self.config_space, vector=best_vector).get_dictionary()

                try:
                    sample = ConfigSpace.util.deactivate_inactive_hyperparameters(
                        configuration_space=self.config_space,
                        configuration=sample)

                except Exception as e:
                    self.logger.warning(("=" * 50 + "\n") * 3 + \
                                        "Error converting configuration:\n%s" % sample + \
                                        "\n here is a traceback:" + \
                                        traceback.format_exc())
                    raise e

        except:
            self.logger.warning(
                "Sampling based optimization with %i samples failed\n %s \nUsing random configuration"
                % (self.num_samples, traceback.format_exc()))
            sample = self.sample_random_configs(1, history_container)[0]

        return sample

    def impute_conditional_data(self, array):

        return_array = np.empty_like(array)

        for i in range(array.shape[0]):
            datum = np.copy(array[i])
            nan_indices = np.argwhere(np.isnan(datum)).flatten()

            while np.any(nan_indices):
                nan_idx = nan_indices[0]
                valid_indices = np.argwhere(np.isfinite(
                    array[:, nan_idx])).flatten()

                if len(valid_indices) > 0:
                    # pick one of them at random and overwrite all NaN values
                    row_idx = self.rng.choice(valid_indices)
                    datum[nan_indices] = array[row_idx, nan_indices]

                else:
                    # no good point in the data has this value activated, so fill it with a valid but random value
                    t = self.vartypes[nan_idx]
                    if t == 0:
                        datum[nan_idx] = self.rng.rand()
                    else:
                        datum[nan_idx] = self.rng.randint(t)

                nan_indices = np.argwhere(np.isnan(datum)).flatten()
            return_array[i, :] = datum
        return return_array

    def fit_kde_models(self, history_container):
        num_config_successful = len(history_container.successful_perfs)
        if num_config_successful <= self.min_points_in_model - 1:
            self.logger.debug(
                "Only %i run(s) available, need more than %s -> can't build model!"
                % (num_config_successful, self.min_points_in_model + 1))
            return

        train_configs = convert_configurations_to_array(
            history_container.configurations)
        train_losses = history_container.get_transformed_perfs()

        n_good = max(self.min_points_in_model,
                     (self.top_n_percent * train_configs.shape[0]) // 100)
        # n_bad = min(max(self.min_points_in_model, ((100-self.top_n_percent)*train_configs.shape[0])//100), 10)
        n_bad = max(self.min_points_in_model,
                    ((100 - self.top_n_percent) * train_configs.shape[0]) //
                    100)

        # Refit KDE for the current budget
        idx = np.argsort(train_losses)

        train_data_good = self.impute_conditional_data(
            train_configs[idx[:n_good]])
        train_data_bad = self.impute_conditional_data(
            train_configs[idx[n_good:n_good + n_bad]])

        if train_data_good.shape[0] <= train_data_good.shape[1]:
            return
        if train_data_bad.shape[0] <= train_data_bad.shape[1]:
            return

        # more expensive crossvalidation method
        # bw_estimation = 'cv_ls'

        # quick rule of thumb
        bw_estimation = 'normal_reference'

        bad_kde = sm.nonparametric.KDEMultivariate(data=train_data_bad,
                                                   var_type=self.kde_vartypes,
                                                   bw=bw_estimation)
        good_kde = sm.nonparametric.KDEMultivariate(data=train_data_good,
                                                    var_type=self.kde_vartypes,
                                                    bw=bw_estimation)

        bad_kde.bw = np.clip(bad_kde.bw, self.min_bandwidth, None)
        good_kde.bw = np.clip(good_kde.bw, self.min_bandwidth, None)

        self.kde_models = {'good': good_kde, 'bad': bad_kde}

        # update probs for the categorical parameters for later sampling
        self.logger.debug(
            'done building a new model based on %i/%i split\nBest loss for this budget:%f\n\n\n\n\n'
            % (n_good, n_bad, np.min(train_losses)))

    def sample_random_configs(self, num_configs=1, history_container=None):
        """
        Sample a batch of random configurations.
        Parameters
        ----------
        num_configs

        history_container

        Returns
        -------

        """
        if history_container is None:
            history_container = self.history_container

        configs = list()
        sample_cnt = 0
        max_sample_cnt = 1000
        while len(configs) < num_configs:
            config = self.config_space.sample_configuration()
            sample_cnt += 1
            if config not in (history_container.configurations + configs):
                configs.append(config)
                sample_cnt = 0
                continue
            if sample_cnt >= max_sample_cnt:
                self.logger.warning(
                    'Cannot sample non duplicate configuration after %d iterations.'
                    % max_sample_cnt)
                configs.append(config)
                sample_cnt = 0
        return configs
Exemplo n.º 9
0
    def __init__(self,
                 objective_func,
                 config_space: ConfigurationSpace,
                 R,
                 eta=3,
                 skip_outer_loop=0,
                 rand_prob=0.3,
                 use_bohb=False,
                 init_weight=None,
                 update_enable=True,
                 weight_method='rank_loss_p_norm',
                 fusion_method='idp',
                 power_num=3,
                 random_state=1,
                 method_id='mqAsyncMFES',
                 restart_needed=True,
                 time_limit_per_trial=600,
                 runtime_limit=None,
                 seed=1,
                 ip='',
                 port=13579,
                 authkey=b'abc'):
        super().__init__(objective_func,
                         config_space,
                         R,
                         eta=eta,
                         skip_outer_loop=skip_outer_loop,
                         random_state=random_state,
                         method_id=method_id,
                         restart_needed=restart_needed,
                         time_limit_per_trial=time_limit_per_trial,
                         runtime_limit=runtime_limit,
                         ip=ip,
                         port=port,
                         authkey=authkey)
        self.seed = seed
        self.last_n_iteration = None
        self.use_bohb_strategy = use_bohb
        self.update_enable = update_enable
        self.fusion_method = fusion_method
        # Parameter for weight method `rank_loss_p_norm`.
        self.power_num = power_num
        # Specify the weight learning method.
        self.weight_method = weight_method
        self.weight_update_id = 0
        self.weight_changed_cnt = 0

        if init_weight is None:
            init_weight = [1. / self.s_max] * self.s_max + [0.]
            assert len(init_weight) == (self.s_max + 1)
        self.logger.info("Initialize weight to %s" %
                         init_weight[:self.s_max + 1])
        types, bounds = get_types(config_space)

        if not self.use_bohb_strategy:
            self.surrogate = RandomForestEnsemble(types, bounds, self.s_max,
                                                  self.eta, init_weight,
                                                  self.fusion_method)
        else:
            self.surrogate = RandomForestWithInstances(types,
                                                       bounds,
                                                       seed=self.seed)
        self.acquisition_function = EI(model=self.surrogate)

        self.iterate_id = 0
        self.iterate_r = list()
        self.hist_weights = list()

        # Saving evaluation statistics in Hyperband.
        self.target_x = dict()
        self.target_y = dict()
        for index, item in enumerate(
                np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)):
            r = int(item)
            self.iterate_r.append(r)
            self.target_x[r] = list()
            self.target_y[r] = list()

        # BO optimizer settings.
        self.history_container = HistoryContainer(task_id=self.method_name)
        self.sls_max_steps = None
        self.n_sls_iterations = 5
        self.sls_n_steps_plateau_walk = 10
        self.rng = np.random.RandomState(seed=self.seed)
        self.acq_optimizer = InterleavedLocalAndRandomSearch(
            acquisition_function=self.acquisition_function,
            config_space=self.config_space,
            rng=self.rng,
            max_steps=self.sls_max_steps,
            n_steps_plateau_walk=self.sls_n_steps_plateau_walk,
            n_sls_iterations=self.n_sls_iterations,
            rand_prob=0.0,
        )
        self.random_configuration_chooser = ChooserProb(prob=rand_prob,
                                                        rng=self.rng)
        self.random_check_idx = 0
Exemplo n.º 10
0
class async_mqMFES(async_mqHyperband):
    """
    The implementation of Asynchronous MFES (combine ASHA and MFES)
    """
    def __init__(self,
                 objective_func,
                 config_space: ConfigurationSpace,
                 R,
                 eta=3,
                 skip_outer_loop=0,
                 rand_prob=0.3,
                 use_bohb=False,
                 init_weight=None,
                 update_enable=True,
                 weight_method='rank_loss_p_norm',
                 fusion_method='idp',
                 power_num=3,
                 random_state=1,
                 method_id='mqAsyncMFES',
                 restart_needed=True,
                 time_limit_per_trial=600,
                 runtime_limit=None,
                 seed=1,
                 ip='',
                 port=13579,
                 authkey=b'abc'):
        super().__init__(objective_func,
                         config_space,
                         R,
                         eta=eta,
                         skip_outer_loop=skip_outer_loop,
                         random_state=random_state,
                         method_id=method_id,
                         restart_needed=restart_needed,
                         time_limit_per_trial=time_limit_per_trial,
                         runtime_limit=runtime_limit,
                         ip=ip,
                         port=port,
                         authkey=authkey)
        self.seed = seed
        self.last_n_iteration = None
        self.use_bohb_strategy = use_bohb
        self.update_enable = update_enable
        self.fusion_method = fusion_method
        # Parameter for weight method `rank_loss_p_norm`.
        self.power_num = power_num
        # Specify the weight learning method.
        self.weight_method = weight_method
        self.weight_update_id = 0
        self.weight_changed_cnt = 0

        if init_weight is None:
            init_weight = [1. / self.s_max] * self.s_max + [0.]
            assert len(init_weight) == (self.s_max + 1)
        self.logger.info("Initialize weight to %s" %
                         init_weight[:self.s_max + 1])
        types, bounds = get_types(config_space)

        if not self.use_bohb_strategy:
            self.surrogate = RandomForestEnsemble(types, bounds, self.s_max,
                                                  self.eta, init_weight,
                                                  self.fusion_method)
        else:
            self.surrogate = RandomForestWithInstances(types,
                                                       bounds,
                                                       seed=self.seed)
        self.acquisition_function = EI(model=self.surrogate)

        self.iterate_id = 0
        self.iterate_r = list()
        self.hist_weights = list()

        # Saving evaluation statistics in Hyperband.
        self.target_x = dict()
        self.target_y = dict()
        for index, item in enumerate(
                np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)):
            r = int(item)
            self.iterate_r.append(r)
            self.target_x[r] = list()
            self.target_y[r] = list()

        # BO optimizer settings.
        self.history_container = HistoryContainer(task_id=self.method_name)
        self.sls_max_steps = None
        self.n_sls_iterations = 5
        self.sls_n_steps_plateau_walk = 10
        self.rng = np.random.RandomState(seed=self.seed)
        self.acq_optimizer = InterleavedLocalAndRandomSearch(
            acquisition_function=self.acquisition_function,
            config_space=self.config_space,
            rng=self.rng,
            max_steps=self.sls_max_steps,
            n_steps_plateau_walk=self.sls_n_steps_plateau_walk,
            n_sls_iterations=self.n_sls_iterations,
            rand_prob=0.0,
        )
        self.random_configuration_chooser = ChooserProb(prob=rand_prob,
                                                        rng=self.rng)
        self.random_check_idx = 0

    def update_observation(self, config, perf, n_iteration):
        rung_id = self.get_rung_id(self.bracket, n_iteration)

        updated = False
        for job in self.bracket[rung_id]['jobs']:
            _job_status, _config, _perf, _extra_conf = job
            if _config == config:
                assert _job_status == RUNNING
                job[0] = COMPLETED
                job[2] = perf
                updated = True
                break
        assert updated
        # print('=== bracket after update_observation:', self.get_bracket_status(self.bracket))

        configs_running = list()
        for _config in self.bracket[rung_id]['configs']:
            if _config not in self.target_x[n_iteration]:
                configs_running.append(_config)
        value_imputed = np.median(self.target_y[n_iteration])

        n_iteration = int(n_iteration)
        self.target_x[n_iteration].append(config)
        self.target_y[n_iteration].append(perf)

        if n_iteration == self.R:
            self.incumbent_configs.append(config)
            self.incumbent_perfs.append(perf)
            # Update history container.
            self.history_container.add(config, perf)

        # Refit the ensemble surrogate model.
        configs_train = self.target_x[n_iteration] + configs_running
        results_train = self.target_y[n_iteration] + [value_imputed
                                                      ] * len(configs_running)
        results_train = np.array(std_normalization(results_train),
                                 dtype=np.float64)
        if not self.use_bohb_strategy:
            self.surrogate.train(
                convert_configurations_to_array(configs_train),
                results_train,
                r=n_iteration)
        else:
            if n_iteration == self.R:
                self.surrogate.train(
                    convert_configurations_to_array(configs_train),
                    results_train)

    def choose_next(self):
        """
        sample a config according to MFES. give iterations according to Hyperband strategy.
        """
        next_config = None
        next_n_iteration = self.get_next_n_iteration()
        next_rung_id = self.get_rung_id(self.bracket, next_n_iteration)

        # Update weight when the inner loop of hyperband is finished
        if self.last_n_iteration != next_n_iteration and not self.use_bohb_strategy:
            if self.update_enable and self.weight_update_id > self.s_max:
                self.update_weight()
            self.weight_update_id += 1
        self.last_n_iteration = next_n_iteration

        # sample config
        excluded_configs = self.bracket[next_rung_id]['configs']
        if len(self.target_y[self.iterate_r[-1]]) == 0:
            next_config = sample_configuration(
                self.config_space, excluded_configs=excluded_configs)
        else:
            # Like BOHB, sample a fixed percentage of random configurations.
            self.random_check_idx += 1
            if self.random_configuration_chooser.check(self.random_check_idx):
                next_config = sample_configuration(
                    self.config_space, excluded_configs=excluded_configs)
            else:
                acq_configs = self.get_bo_candidates()
                for config in acq_configs:
                    if config not in self.bracket[next_rung_id]['configs']:
                        next_config = config
                        break
                if next_config is None:
                    self.logger.warning(
                        'Cannot get a non duplicate configuration from bo candidates. '
                        'Sample a random one.')
                    next_config = sample_configuration(
                        self.config_space, excluded_configs=excluded_configs)

        next_extra_conf = {}
        return next_config, next_n_iteration, next_extra_conf

    def get_bo_candidates(self):
        std_incumbent_value = np.min(
            std_normalization(self.target_y[self.iterate_r[-1]]))
        # Update surrogate model in acquisition function.
        self.acquisition_function.update(model=self.surrogate,
                                         eta=std_incumbent_value,
                                         num_data=len(self.incumbent_configs))

        challengers = self.acq_optimizer.maximize(
            runhistory=self.history_container,
            num_points=5000,
        )
        return challengers.challengers

    @staticmethod
    def calculate_preserving_order_num(y_pred, y_true):
        array_size = len(y_pred)
        assert len(y_true) == array_size

        total_pair_num, order_preserving_num = 0, 0
        for idx in range(array_size):
            for inner_idx in range(idx + 1, array_size):
                if bool(y_true[idx] > y_true[inner_idx]) == bool(
                        y_pred[idx] > y_pred[inner_idx]):
                    order_preserving_num += 1
                total_pair_num += 1
        return order_preserving_num, total_pair_num

    def update_weight(self):
        start_time = time.time()

        max_r = self.iterate_r[-1]
        incumbent_configs = self.target_x[max_r]
        test_x = convert_configurations_to_array(incumbent_configs)
        test_y = np.array(self.target_y[max_r], dtype=np.float64)

        r_list = self.surrogate.surrogate_r
        K = len(r_list)

        if len(test_y) >= 3:
            # Get previous weights
            if self.weight_method == 'rank_loss_p_norm':
                preserving_order_p = list()
                preserving_order_nums = list()
                for i, r in enumerate(r_list):
                    fold_num = 5
                    if i != K - 1:
                        mean, var = self.surrogate.surrogate_container[
                            r].predict(test_x)
                        tmp_y = np.reshape(mean, -1)
                        preorder_num, pair_num = self.calculate_preserving_order_num(
                            tmp_y, test_y)
                        preserving_order_p.append(preorder_num / pair_num)
                        preserving_order_nums.append(preorder_num)
                    else:
                        if len(test_y) < 2 * fold_num:
                            preserving_order_p.append(0)
                        else:
                            # 5-fold cross validation.
                            kfold = KFold(n_splits=fold_num)
                            cv_pred = np.array([0] * len(test_y))
                            for train_idx, valid_idx in kfold.split(test_x):
                                train_configs, train_y = test_x[
                                    train_idx], test_y[train_idx]
                                valid_configs, valid_y = test_x[
                                    valid_idx], test_y[valid_idx]
                                types, bounds = get_types(self.config_space)
                                _surrogate = RandomForestWithInstances(
                                    types=types, bounds=bounds)
                                _surrogate.train(train_configs, train_y)
                                pred, _ = _surrogate.predict(valid_configs)
                                cv_pred[valid_idx] = pred.reshape(-1)
                            preorder_num, pair_num = self.calculate_preserving_order_num(
                                cv_pred, test_y)
                            preserving_order_p.append(preorder_num / pair_num)
                            preserving_order_nums.append(preorder_num)

                trans_order_weight = np.array(preserving_order_p)
                power_sum = np.sum(np.power(trans_order_weight,
                                            self.power_num))
                new_weights = np.power(trans_order_weight,
                                       self.power_num) / power_sum

            elif self.weight_method == 'rank_loss_prob':
                # For basic surrogate i=1:K-1.
                mean_list, var_list = list(), list()
                for i, r in enumerate(r_list[:-1]):
                    mean, var = self.surrogate.surrogate_container[r].predict(
                        test_x)
                    mean_list.append(np.reshape(mean, -1))
                    var_list.append(np.reshape(var, -1))
                sample_num = 100
                min_probability_array = [0] * K
                for _ in range(sample_num):
                    order_preseving_nums = list()

                    # For basic surrogate i=1:K-1.
                    for idx in range(K - 1):
                        sampled_y = self.rng.normal(mean_list[idx],
                                                    var_list[idx])
                        _num, _ = self.calculate_preserving_order_num(
                            sampled_y, test_y)
                        order_preseving_nums.append(_num)

                    fold_num = 5
                    # For basic surrogate i=K. cv
                    if len(test_y) < 2 * fold_num:
                        order_preseving_nums.append(0)
                    else:
                        # 5-fold cross validation.
                        kfold = KFold(n_splits=fold_num)
                        cv_pred = np.array([0] * len(test_y))
                        for train_idx, valid_idx in kfold.split(test_x):
                            train_configs, train_y = test_x[train_idx], test_y[
                                train_idx]
                            valid_configs, valid_y = test_x[valid_idx], test_y[
                                valid_idx]
                            types, bounds = get_types(self.config_space)
                            _surrogate = RandomForestWithInstances(
                                types=types, bounds=bounds)
                            _surrogate.train(train_configs, train_y)
                            _pred, _var = _surrogate.predict(valid_configs)
                            sampled_pred = self.rng.normal(
                                _pred.reshape(-1), _var.reshape(-1))
                            cv_pred[valid_idx] = sampled_pred
                        _num, _ = self.calculate_preserving_order_num(
                            cv_pred, test_y)
                        order_preseving_nums.append(_num)
                    max_id = np.argmax(order_preseving_nums)
                    min_probability_array[max_id] += 1
                new_weights = np.array(min_probability_array) / sample_num
            else:
                raise ValueError('Invalid weight method: %s!' %
                                 self.weight_method)
        else:
            old_weights = list()
            for i, r in enumerate(r_list):
                _weight = self.surrogate.surrogate_weight[r]
                old_weights.append(_weight)
            new_weights = old_weights.copy()

        self.logger.info(
            '[%s] %d-th Updating weights: %s' %
            (self.weight_method, self.weight_changed_cnt, str(new_weights)))

        # Assign the weight to each basic surrogate.
        for i, r in enumerate(r_list):
            self.surrogate.surrogate_weight[r] = new_weights[i]
        self.weight_changed_cnt += 1
        # Save the weight data.
        self.hist_weights.append(new_weights)
        dir_path = os.path.join(self.data_directory, 'saved_weights')
        file_name = 'mfes_weights_%s.npy' % (self.method_name, )
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)
        np.save(os.path.join(dir_path, file_name),
                np.asarray(self.hist_weights))
        self.logger.info(
            'update_weight() cost %.2fs. new weights are saved to %s' %
            (time.time() - start_time, os.path.join(dir_path, file_name)))

    def get_weights(self):
        return self.hist_weights
Exemplo n.º 11
0
    def __init__(self, objective_func,
                 config_space: ConfigurationSpace,
                 R,
                 eta=3,
                 num_iter=10000,
                 rand_prob=0.3,
                 init_weight=None, update_enable=True,
                 weight_method='rank_loss_p_norm', fusion_method='idp',
                 power_num=3,
                 random_state=1,
                 method_id='mqMFES',
                 restart_needed=True,
                 time_limit_per_trial=600,
                 runtime_limit=None,
                 ip='',
                 port=13579,
                 authkey=b'abc',):
        max_queue_len = 3 * R  # conservative design
        super().__init__(objective_func, method_name=method_id,
                         restart_needed=restart_needed, time_limit_per_trial=time_limit_per_trial,
                         runtime_limit=runtime_limit,
                         max_queue_len=max_queue_len, ip=ip, port=port, authkey=authkey)
        self.seed = random_state
        self.config_space = config_space
        self.config_space.seed(self.seed)

        self.R = R
        self.eta = eta
        self.logeta = lambda x: log(x) / log(self.eta)
        self.s_max = int(self.logeta(self.R))
        self.B = (self.s_max + 1) * self.R
        self.num_iter = num_iter

        self.update_enable = update_enable
        self.fusion_method = fusion_method
        # Parameter for weight method `rank_loss_p_norm`.
        self.power_num = power_num
        # Specify the weight learning method.
        self.weight_method = weight_method
        self.weight_update_id = 0
        self.weight_changed_cnt = 0

        if init_weight is None:
            init_weight = [0.]
            init_weight.extend([1. / self.s_max] * self.s_max)
        assert len(init_weight) == (self.s_max + 1)
        self.logger.info('Weight method & flag: %s-%s' % (self.weight_method, str(self.update_enable)))
        self.logger.info("Initial weight is: %s" % init_weight[:self.s_max + 1])
        types, bounds = get_types(config_space)

        self.weighted_surrogate = WeightedRandomForestCluster(
            types, bounds, self.s_max, self.eta, init_weight, self.fusion_method
        )
        self.acquisition_function = EI(model=self.weighted_surrogate)

        self.incumbent_configs = []
        self.incumbent_perfs = []

        self.iterate_id = 0
        self.iterate_r = []
        self.hist_weights = list()

        # Saving evaluation statistics in Hyperband.
        self.target_x = dict()
        self.target_y = dict()
        for index, item in enumerate(np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)):
            r = int(item)
            self.iterate_r.append(r)
            self.target_x[r] = []
            self.target_y[r] = []

        # BO optimizer settings.
        self.configs = list()
        self.history_container = HistoryContainer(task_id=self.method_name)
        self.sls_max_steps = None
        self.n_sls_iterations = 5
        self.sls_n_steps_plateau_walk = 10
        self.rng = np.random.RandomState(seed=self.seed)
        self.acq_optimizer = InterleavedLocalAndRandomSearch(
            acquisition_function=self.acquisition_function,
            config_space=self.config_space,
            rng=self.rng,
            max_steps=self.sls_max_steps,
            n_steps_plateau_walk=self.sls_n_steps_plateau_walk,
            n_sls_iterations=self.n_sls_iterations,
            rand_prob=0.0,
        )
        self.random_configuration_chooser = ChooserProb(prob=rand_prob, rng=self.rng)
Exemplo n.º 12
0
class mqMFES(mqBaseFacade):
    """
    MFES-HB: https://arxiv.org/abs/2012.03011
    """
    def __init__(self, objective_func,
                 config_space: ConfigurationSpace,
                 R,
                 eta=3,
                 num_iter=10000,
                 rand_prob=0.3,
                 init_weight=None, update_enable=True,
                 weight_method='rank_loss_p_norm', fusion_method='idp',
                 power_num=3,
                 random_state=1,
                 method_id='mqMFES',
                 restart_needed=True,
                 time_limit_per_trial=600,
                 runtime_limit=None,
                 ip='',
                 port=13579,
                 authkey=b'abc',):
        max_queue_len = 3 * R  # conservative design
        super().__init__(objective_func, method_name=method_id,
                         restart_needed=restart_needed, time_limit_per_trial=time_limit_per_trial,
                         runtime_limit=runtime_limit,
                         max_queue_len=max_queue_len, ip=ip, port=port, authkey=authkey)
        self.seed = random_state
        self.config_space = config_space
        self.config_space.seed(self.seed)

        self.R = R
        self.eta = eta
        self.logeta = lambda x: log(x) / log(self.eta)
        self.s_max = int(self.logeta(self.R))
        self.B = (self.s_max + 1) * self.R
        self.num_iter = num_iter

        self.update_enable = update_enable
        self.fusion_method = fusion_method
        # Parameter for weight method `rank_loss_p_norm`.
        self.power_num = power_num
        # Specify the weight learning method.
        self.weight_method = weight_method
        self.weight_update_id = 0
        self.weight_changed_cnt = 0

        if init_weight is None:
            init_weight = [0.]
            init_weight.extend([1. / self.s_max] * self.s_max)
        assert len(init_weight) == (self.s_max + 1)
        self.logger.info('Weight method & flag: %s-%s' % (self.weight_method, str(self.update_enable)))
        self.logger.info("Initial weight is: %s" % init_weight[:self.s_max + 1])
        types, bounds = get_types(config_space)

        self.weighted_surrogate = WeightedRandomForestCluster(
            types, bounds, self.s_max, self.eta, init_weight, self.fusion_method
        )
        self.acquisition_function = EI(model=self.weighted_surrogate)

        self.incumbent_configs = []
        self.incumbent_perfs = []

        self.iterate_id = 0
        self.iterate_r = []
        self.hist_weights = list()

        # Saving evaluation statistics in Hyperband.
        self.target_x = dict()
        self.target_y = dict()
        for index, item in enumerate(np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)):
            r = int(item)
            self.iterate_r.append(r)
            self.target_x[r] = []
            self.target_y[r] = []

        # BO optimizer settings.
        self.configs = list()
        self.history_container = HistoryContainer(task_id=self.method_name)
        self.sls_max_steps = None
        self.n_sls_iterations = 5
        self.sls_n_steps_plateau_walk = 10
        self.rng = np.random.RandomState(seed=self.seed)
        self.acq_optimizer = InterleavedLocalAndRandomSearch(
            acquisition_function=self.acquisition_function,
            config_space=self.config_space,
            rng=self.rng,
            max_steps=self.sls_max_steps,
            n_steps_plateau_walk=self.sls_n_steps_plateau_walk,
            n_sls_iterations=self.n_sls_iterations,
            rand_prob=0.0,
        )
        self.random_configuration_chooser = ChooserProb(prob=rand_prob, rng=self.rng)

    def iterate(self, skip_last=0):

        for s in reversed(range(self.s_max + 1)):

            if self.update_enable and self.weight_update_id > self.s_max:
                self.update_weight()
            self.weight_update_id += 1

            # Set initial number of configurations
            n = int(ceil(self.B / self.R / (s + 1) * self.eta ** s))
            # initial number of iterations per config
            r = int(self.R * self.eta ** (-s))

            # Choose a batch of configurations in different mechanisms.
            start_time = time.time()
            T = self.choose_next(n)
            time_elapsed = time.time() - start_time
            self.logger.info("[%s] Choosing next configurations took %.2f sec." % (self.method_name, time_elapsed))

            extra_info = None
            last_run_num = None

            for i in range((s + 1) - int(skip_last)):  # changed from s + 1

                # Run each of the n configs for <iterations>
                # and keep best (n_configs / eta) configurations

                n_configs = n * self.eta ** (-i)
                n_iteration = r * self.eta ** (i)

                n_iter = n_iteration
                if last_run_num is not None and not self.restart_needed:
                    n_iter -= last_run_num
                last_run_num = n_iteration

                self.logger.info("%s: %d configurations x %d iterations each" %
                                 (self.method_name, int(n_configs), int(n_iteration)))

                ret_val, early_stops = self.run_in_parallel(T, n_iter, extra_info)
                val_losses = [item['loss'] for item in ret_val]
                ref_list = [item['ref_id'] for item in ret_val]

                self.target_x[int(n_iteration)].extend(T)
                self.target_y[int(n_iteration)].extend(val_losses)

                if int(n_iteration) == self.R:
                    self.incumbent_configs.extend(T)
                    self.incumbent_perfs.extend(val_losses)
                    # Update history container.
                    for _config, _perf in zip(T, val_losses):
                        self.history_container.add(_config, _perf)

                # Select a number of best configurations for the next loop.
                # Filter out early stops, if any.
                indices = np.argsort(val_losses)
                if len(T) == sum(early_stops):
                    break
                if len(T) >= self.eta:
                    indices = [i for i in indices if not early_stops[i]]
                    T = [T[i] for i in indices]
                    extra_info = [ref_list[i] for i in indices]
                    reduced_num = int(n_configs / self.eta)
                    T = T[0:reduced_num]
                    extra_info = extra_info[0:reduced_num]
                else:
                    T = [T[indices[0]]]     # todo: confirm no filter early stops?
                    extra_info = [ref_list[indices[0]]]
                val_losses = [val_losses[i] for i in indices][0:len(T)]  # update: sorted
                incumbent_loss = val_losses[0]
                self.add_stage_history(self.stage_id, min(self.global_incumbent, incumbent_loss))
                self.stage_id += 1
            # self.remove_immediate_model()

            for item in self.iterate_r[self.iterate_r.index(r):]:
                # NORMALIZE Objective value: normalization
                normalized_y = std_normalization(self.target_y[item])
                self.weighted_surrogate.train(convert_configurations_to_array(self.target_x[item]),
                                              np.array(normalized_y, dtype=np.float64), r=item)

    def run(self, skip_last=0):
        try:
            for iter in range(1, 1 + self.num_iter):
                self.logger.info('-' * 50)
                self.logger.info("%s algorithm: %d/%d iteration starts" % (self.method_name, iter, self.num_iter))
                start_time = time.time()
                self.iterate(skip_last=skip_last)
                time_elapsed = (time.time() - start_time) / 60
                self.logger.info("%d/%d-Iteration took %.2f min." % (iter, self.num_iter, time_elapsed))
                self.iterate_id += 1
                self.save_intemediate_statistics()
        except Exception as e:
            print(e)
            self.logger.error(str(e))
            # Clean the immediate results.
            # self.remove_immediate_model()

    def get_bo_candidates(self, num_configs):
        # todo: parallel methods
        std_incumbent_value = np.min(std_normalization(self.target_y[self.iterate_r[-1]]))
        # Update surrogate model in acquisition function.
        self.acquisition_function.update(model=self.weighted_surrogate, eta=std_incumbent_value,
                                         num_data=len(self.history_container.data))

        challengers = self.acq_optimizer.maximize(
            runhistory=self.history_container,
            num_points=5000,
        )
        return challengers.challengers[:num_configs]

    def choose_next(self, num_config):
        if len(self.target_y[self.iterate_r[-1]]) == 0:
            configs = sample_configurations(self.config_space, num_config)
            self.configs.extend(configs)
            return configs

        config_candidates = list()
        acq_configs = self.get_bo_candidates(num_configs=2 * num_config)
        acq_idx = 0
        for idx in range(1, 1 + 2 * num_config):
            # Like BOHB, sample a fixed percentage of random configurations.
            if self.random_configuration_chooser.check(idx):
                _config = self.config_space.sample_configuration()
            else:
                _config = acq_configs[acq_idx]
                acq_idx += 1
            if _config not in config_candidates:
                config_candidates.append(_config)
            if len(config_candidates) >= num_config:
                break

        if len(config_candidates) < num_config:
            config_candidates = expand_configurations(config_candidates, self.config_space, num_config)

        _config_candidates = []
        for config in config_candidates:
            if config not in self.configs:  # Check if evaluated
                _config_candidates.append(config)
        self.configs.extend(_config_candidates)
        return _config_candidates

    @staticmethod
    def calculate_ranking_loss(y_pred, y_true):
        length = len(y_pred)
        y_pred = np.reshape(y_pred, -1)
        y_pred1 = np.tile(y_pred, (length, 1))
        y_pred2 = np.transpose(y_pred1)
        diff = y_pred1 - y_pred2
        y_true = np.reshape(y_true, -1)
        y_true1 = np.tile(y_true, (length, 1))
        y_true2 = np.transpose(y_true1)
        y_mask = (y_true1 - y_true2 > 0) + 0
        loss = np.sum(np.log(1 + np.exp(-diff)) * y_mask) / length
        return loss

    @staticmethod
    def calculate_preserving_order_num(y_pred, y_true):
        array_size = len(y_pred)
        assert len(y_true) == array_size

        total_pair_num, order_preserving_num = 0, 0
        for idx in range(array_size):
            for inner_idx in range(idx + 1, array_size):
                if bool(y_true[idx] > y_true[inner_idx]) == bool(y_pred[idx] > y_pred[inner_idx]):
                    order_preserving_num += 1
                total_pair_num += 1
        return order_preserving_num, total_pair_num

    def update_weight(self):
        start_time = time.time()

        max_r = self.iterate_r[-1]
        incumbent_configs = self.target_x[max_r]
        test_x = convert_configurations_to_array(incumbent_configs)
        test_y = np.array(self.target_y[max_r], dtype=np.float64)

        r_list = self.weighted_surrogate.surrogate_r
        K = len(r_list)

        if len(test_y) >= 3:
            # Get previous weights
            if self.weight_method == 'rank_loss_p_norm':
                preserving_order_p = list()
                preserving_order_nums = list()
                for i, r in enumerate(r_list):
                    fold_num = 5
                    if i != K - 1:
                        mean, var = self.weighted_surrogate.surrogate_container[r].predict(test_x)
                        tmp_y = np.reshape(mean, -1)
                        preorder_num, pair_num = self.calculate_preserving_order_num(tmp_y, test_y)
                        preserving_order_p.append(preorder_num / pair_num)
                        preserving_order_nums.append(preorder_num)
                    else:
                        if len(test_y) < 2 * fold_num:
                            preserving_order_p.append(0)
                        else:
                            # 5-fold cross validation.
                            kfold = KFold(n_splits=fold_num)
                            cv_pred = np.array([0] * len(test_y))
                            for train_idx, valid_idx in kfold.split(test_x):
                                train_configs, train_y = test_x[train_idx], test_y[train_idx]
                                valid_configs, valid_y = test_x[valid_idx], test_y[valid_idx]
                                types, bounds = get_types(self.config_space)
                                _surrogate = RandomForestWithInstances(types=types, bounds=bounds)
                                _surrogate.train(train_configs, train_y)
                                pred, _ = _surrogate.predict(valid_configs)
                                cv_pred[valid_idx] = pred.reshape(-1)
                            preorder_num, pair_num = self.calculate_preserving_order_num(cv_pred, test_y)
                            preserving_order_p.append(preorder_num / pair_num)
                            preserving_order_nums.append(preorder_num)

                trans_order_weight = np.array(preserving_order_p)
                power_sum = np.sum(np.power(trans_order_weight, self.power_num))
                new_weights = np.power(trans_order_weight, self.power_num) / power_sum

            elif self.weight_method == 'rank_loss_prob':
                # For basic surrogate i=1:K-1.
                mean_list, var_list = list(), list()
                for i, r in enumerate(r_list[:-1]):
                    mean, var = self.weighted_surrogate.surrogate_container[r].predict(test_x)
                    mean_list.append(np.reshape(mean, -1))
                    var_list.append(np.reshape(var, -1))
                sample_num = 100
                min_probability_array = [0] * K
                for _ in range(sample_num):
                    order_preseving_nums = list()

                    # For basic surrogate i=1:K-1.
                    for idx in range(K - 1):
                        sampled_y = self.rng.normal(mean_list[idx], var_list[idx])
                        _num, _ = self.calculate_preserving_order_num(sampled_y, test_y)
                        order_preseving_nums.append(_num)

                    fold_num = 5
                    # For basic surrogate i=K. cv
                    if len(test_y) < 2 * fold_num:
                        order_preseving_nums.append(0)
                    else:
                        # 5-fold cross validation.
                        kfold = KFold(n_splits=fold_num)
                        cv_pred = np.array([0] * len(test_y))
                        for train_idx, valid_idx in kfold.split(test_x):
                            train_configs, train_y = test_x[train_idx], test_y[train_idx]
                            valid_configs, valid_y = test_x[valid_idx], test_y[valid_idx]
                            types, bounds = get_types(self.config_space)
                            _surrogate = RandomForestWithInstances(types=types, bounds=bounds)
                            _surrogate.train(train_configs, train_y)
                            _pred, _var = _surrogate.predict(valid_configs)
                            sampled_pred = self.rng.normal(_pred.reshape(-1), _var.reshape(-1))
                            cv_pred[valid_idx] = sampled_pred
                        _num, _ = self.calculate_preserving_order_num(cv_pred, test_y)
                        order_preseving_nums.append(_num)
                    max_id = np.argmax(order_preseving_nums)
                    min_probability_array[max_id] += 1
                new_weights = np.array(min_probability_array) / sample_num
            else:
                raise ValueError('Invalid weight method: %s!' % self.weight_method)
        else:
            old_weights = list()
            for i, r in enumerate(r_list):
                _weight = self.weighted_surrogate.surrogate_weight[r]
                old_weights.append(_weight)
            new_weights = old_weights.copy()

        self.logger.info('[%s] %d-th Updating weights: %s' % (
            self.weight_method, self.weight_changed_cnt, str(new_weights)))

        # Assign the weight to each basic surrogate.
        for i, r in enumerate(r_list):
            self.weighted_surrogate.surrogate_weight[r] = new_weights[i]
        self.weight_changed_cnt += 1
        # Save the weight data.
        self.hist_weights.append(new_weights)
        dir_path = os.path.join(self.data_directory, 'saved_weights')
        file_name = 'mfes_weights_%s.npy' % (self.method_name,)
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)
        np.save(os.path.join(dir_path, file_name), np.asarray(self.hist_weights))
        self.logger.info('update_weight() cost %.2fs. new weights are saved to %s'
                         % (time.time()-start_time, os.path.join(dir_path, file_name)))

    def get_incumbent(self, num_inc=1):
        assert (len(self.incumbent_perfs) == len(self.incumbent_configs))
        indices = np.argsort(self.incumbent_perfs)
        configs = [self.incumbent_configs[i] for i in indices[0:num_inc]]
        perfs = [self.incumbent_perfs[i] for i in indices[0: num_inc]]
        return configs, perfs

    def get_weights(self):
        return self.hist_weights
Exemplo n.º 13
0
    def __init__(self,
                 config_space,
                 num_objs=1,
                 num_constraints=0,
                 initial_trials=3,
                 initial_configurations=None,
                 init_strategy='random_explore_first',
                 history_bo_data=None,
                 rand_prob=0.1,
                 optimization_strategy='bo',
                 surrogate_type='auto',
                 acq_type='auto',
                 acq_optimizer_type='auto',
                 ref_point=None,
                 output_dir='logs',
                 task_id='default_task_id',
                 random_state=None,
                 **kwargs):

        # Create output (logging) directory.
        # Init logging module.
        # Random seed generator.
        self.num_objs = num_objs
        self.num_constraints = num_constraints
        self.init_strategy = init_strategy
        self.output_dir = output_dir
        self.task_id = task_id
        self.rng = check_random_state(random_state)
        self.logger = get_logger(self.__class__.__name__)

        # Basic components in Advisor.
        self.rand_prob = rand_prob
        self.optimization_strategy = optimization_strategy

        # Init the basic ingredients in Bayesian optimization.
        self.history_bo_data = history_bo_data
        self.surrogate_type = surrogate_type
        self.constraint_surrogate_type = None
        self.acq_type = acq_type
        self.acq_optimizer_type = acq_optimizer_type
        self.init_num = initial_trials
        self.config_space = config_space
        self.config_space_seed = self.rng.randint(MAXINT)
        self.config_space.seed(self.config_space_seed)
        self.ref_point = ref_point

        # init history container
        if self.num_objs == 1:
            self.history_container = HistoryContainer(
                task_id, self.num_constraints, config_space=self.config_space)
        else:  # multi-objectives
            self.history_container = MOHistoryContainer(
                task_id, self.num_objs, self.num_constraints, ref_point)

        # initial design
        if initial_configurations is not None and len(
                initial_configurations) > 0:
            self.initial_configurations = initial_configurations
            self.init_num = len(initial_configurations)
        else:
            self.initial_configurations = self.create_initial_design(
                self.init_strategy)
            self.init_num = len(self.initial_configurations)

        self.surrogate_model = None
        self.constraint_models = None
        self.acquisition_function = None
        self.optimizer = None
        self.auto_alter_model = False
        self.algo_auto_selection()
        self.check_setup()
        self.setup_bo_basics()
Exemplo n.º 14
0
class Advisor(object, metaclass=abc.ABCMeta):
    def __init__(self,
                 config_space,
                 initial_trials=10,
                 initial_configurations=None,
                 init_strategy='random_explore_first',
                 history_bo_data=None,
                 optimization_strategy='bo',
                 surrogate_type='prf',
                 output_dir='logs',
                 task_id=None,
                 rng=None):

        # Create output (logging) directory.
        # Init logging module.
        # Random seed generator.
        self.init_strategy = init_strategy
        self.output_dir = output_dir
        if rng is None:
            run_id, rng = get_rng()
        self.rng = rng
        self.logger = get_logger(self.__class__.__name__)

        # Basic components in Advisor.
        self.optimization_strategy = optimization_strategy
        self.configurations = list()
        self.failed_configurations = list()
        self.perfs = list()
        self.scale_perc = 5
        self.perc = None
        self.min_y = None
        self.max_y = None

        # Init the basic ingredients in Bayesian optimization.
        self.history_bo_data = history_bo_data
        self.surrogate_type = surrogate_type
        self.init_num = initial_trials
        self.config_space = config_space
        self.config_space.seed(rng.randint(MAXINT))

        if initial_configurations is not None and len(
                initial_configurations) > 0:
            self.initial_configurations = initial_configurations
            self.init_num = len(initial_configurations)
        else:
            self.initial_configurations = self.create_initial_design(
                self.init_strategy)
            self.init_num = len(self.initial_configurations)
        self.history_container = HistoryContainer(task_id)

        self.surrogate_model = None
        self.acquisition_function = None
        self.optimizer = None
        self.setup_bo_basics()

    def setup_bo_basics(self,
                        acq_type='ei',
                        acq_optimizer_type='local_random'):
        self.surrogate_model = build_surrogate(
            func_str=self.surrogate_type,
            config_space=self.config_space,
            rng=self.rng,
            history_hpo_data=self.history_bo_data)

        self.acquisition_function = build_acq_func(func_str=acq_type,
                                                   model=self.surrogate_model)

        self.optimizer = build_optimizer(func_str=acq_optimizer_type,
                                         acq_func=self.acquisition_function,
                                         config_space=self.config_space,
                                         rng=self.rng)

    def create_initial_design(self, init_strategy='random'):
        default_config = self.config_space.get_default_configuration()
        if init_strategy == 'random':
            num_random_config = self.init_num - 1
            initial_configs = [
                default_config
            ] + self.sample_random_configs(num_random_config)
            return initial_configs
        elif init_strategy == 'random_explore_first':
            num_random_config = self.init_num - 1
            candidate_configs = self.sample_random_configs(100)
            return self.max_min_distance(default_config, candidate_configs,
                                         num_random_config)
        else:
            raise ValueError('Unknown initial design strategy: %s.' %
                             init_strategy)

    def max_min_distance(self, default_config, src_configs, num):
        min_dis = list()
        initial_configs = list()
        initial_configs.append(default_config)

        for config in src_configs:
            dis = np.linalg.norm(
                config.get_array() -
                default_config.get_array())  # get_array may have NaN problems
            min_dis.append(dis)
        min_dis = np.array(min_dis)

        for i in range(num):
            furthest_config = src_configs[np.argmax(min_dis)]
            initial_configs.append(furthest_config)
            min_dis[np.argmax(min_dis)] = -1

            for j in range(len(src_configs)):
                if src_configs[j] in initial_configs:
                    continue
                updated_dis = np.linalg.norm(src_configs[j].get_array() -
                                             furthest_config.get_array())
                min_dis[j] = min(updated_dis, min_dis[j])

        return initial_configs

    def get_suggestion(self):
        if len(self.configurations) == 0:
            X = np.array([])
        else:
            failed_configs = list(
            ) if self.max_y is None else self.failed_configurations.copy()
            X = convert_configurations_to_array(self.configurations +
                                                failed_configs)

        num_failed_trial = len(self.failed_configurations)
        failed_perfs = list() if self.max_y is None else [self.max_y
                                                          ] * num_failed_trial
        Y = np.array(self.perfs + failed_perfs, dtype=np.float64)

        num_config_evaluated = len(self.perfs + self.failed_configurations)
        if num_config_evaluated < self.init_num:
            return self.initial_configurations[num_config_evaluated]

        if self.optimization_strategy == 'random':
            return self.sample_random_configs(1)[0]
        elif self.optimization_strategy == 'bo':
            self.surrogate_model.train(X, Y)
            incumbent_value = self.history_container.get_incumbents()[0][1]
            self.acquisition_function.update(model=self.surrogate_model,
                                             eta=incumbent_value,
                                             num_data=num_config_evaluated)
            challengers = self.optimizer.maximize(
                runhistory=self.history_container, num_points=5000)
            is_repeated_config = True
            repeated_time = 0
            cur_config = None
            while is_repeated_config:
                cur_config = challengers.challengers[repeated_time]
                if cur_config in (self.configurations +
                                  self.failed_configurations):
                    repeated_time += 1
                else:
                    is_repeated_config = False
            return cur_config
        else:
            raise ValueError('Unknown optimization strategy: %s.' %
                             self.optimization_strategy)

    def update_observation(self, observation):
        config, perf, trial_state = observation
        if trial_state == SUCCESS and perf < MAXINT:
            self.configurations.append(config)
            self.perfs.append(perf)
            self.history_container.add(config, perf)

            self.perc = np.percentile(self.perfs, self.scale_perc)
            self.min_y = np.min(self.perfs)
            self.max_y = np.max(self.perfs)
        else:
            self.failed_configurations.append(config)

    def sample_random_configs(self, num_configs=1):
        configs = list()
        sample_cnt = 0
        max_sample_cnt = 1000
        while len(configs) < num_configs:
            config = self.config_space.sample_configuration()
            sample_cnt += 1
            if config not in (self.configurations +
                              self.failed_configurations + configs):
                configs.append(config)
                sample_cnt = 0
                continue
            if sample_cnt >= max_sample_cnt:
                self.logger.warning(
                    'Cannot sample non duplicate configuration after %d iterations.'
                    % max_sample_cnt)
                configs.append(config)
                sample_cnt = 0
        return configs

    def get_suggestions(self):
        raise NotImplementedError