Пример #1
0
class mqSMBO(BOBase):
    def __init__(
        self,
        objective_function,
        config_space,
        eval_type='holdout',
        parallel_strategy='async',
        batch_size=4,
        batch_strategy='median_imputation',
        num_constraints=0,
        num_objs=1,
        sample_strategy: str = 'bo',
        runtime_limit=600,
        time_limit_per_trial=180,
        surrogate_type=None,
        acq_type=None,
        acq_optimizer_type='local_random',
        initial_runs=3,
        init_strategy='random_explore_first',
        initial_configurations=None,
        ref_point=None,
        history_bo_data: List[OrderedDict] = None,
        logging_dir='logs',
        task_id='default',
        random_state=1,
        ip="",
        port=13579,
        authkey=b'abc',
    ):

        self.task_info = {
            'num_constraints': num_constraints,
            'num_objs': num_objs
        }
        self.FAILED_PERF = [MAXINT] * num_objs
        super().__init__(objective_function,
                         config_space,
                         task_id=task_id,
                         output_dir=logging_dir,
                         random_state=random_state,
                         initial_runs=initial_runs,
                         max_runs=int(1e10),
                         runtime_limit=runtime_limit,
                         sample_strategy=sample_strategy,
                         time_limit_per_trial=time_limit_per_trial,
                         history_bo_data=history_bo_data)
        if parallel_strategy == 'sync':
            self.config_advisor = SyncBatchAdvisor(
                config_space,
                self.task_info,
                batch_size=batch_size,
                batch_strategy=batch_strategy,
                initial_trials=initial_runs,
                initial_configurations=initial_configurations,
                init_strategy=init_strategy,
                history_bo_data=history_bo_data,
                optimization_strategy=sample_strategy,
                surrogate_type=surrogate_type,
                acq_type=acq_type,
                acq_optimizer_type=acq_optimizer_type,
                ref_point=ref_point,
                task_id=task_id,
                output_dir=logging_dir,
                random_state=random_state)
        elif parallel_strategy == 'async':
            self.config_advisor = AsyncBatchAdvisor(
                config_space,
                self.task_info,
                batch_size=batch_size,
                batch_strategy=batch_strategy,
                initial_trials=initial_runs,
                initial_configurations=initial_configurations,
                init_strategy=init_strategy,
                history_bo_data=history_bo_data,
                optimization_strategy=sample_strategy,
                surrogate_type=surrogate_type,
                acq_type=acq_type,
                acq_optimizer_type=acq_optimizer_type,
                ref_point=ref_point,
                task_id=task_id,
                output_dir=logging_dir,
                random_state=random_state)
        else:
            raise ValueError('Invalid parallel strategy - %s.' %
                             parallel_strategy)

        self.eval_type = eval_type
        self.parallel_strategy = parallel_strategy
        self.batch_size = batch_size
        max_queue_len = max(100, 3 * batch_size)
        self.master_messager = MasterMessager(ip, port, authkey, max_queue_len,
                                              max_queue_len)
        self.start_time = time.time()

        self.configs = list()
        self.perfs = list()
        self.incumbent_perf = float("-INF")
        self.incumbent_config = self.config_space.get_default_configuration()
        self.eval_dict = dict()
        self.workers = dict()

    def async_run(self):
        config_num = 0
        cur_num = 0
        while time.time() - self.start_time < self.runtime_limit:
            # Add jobs to masterQueue.
            while len(self.config_advisor.running_configs
                      ) < self.batch_size and config_num < self.max_iterations:
                config_num += 1
                config = self.config_advisor.get_suggestion()
                msg = [config, self.time_limit_per_trial]
                self.logger.info("Master: Add config %d." % config_num)
                self.master_messager.send_message(msg)

            # Get results from workerQueue.
            while True:
                observation = self.master_messager.receive_message()
                if observation is None:
                    # Wait for workers.
                    # self.logger.info("Master: wait for worker results. sleep 1s.")
                    time.sleep(1)
                    break
                # Report result.
                cur_num += 1
                config, trial_state, constraints, objs, elapsed_time, worker_info, extra_info = observation

                stored_info = list(self.workers.values())
                if worker_info not in stored_info:
                    self.workers[len(self.workers)] = worker_info

                _perf = float("INF") if objs is None else objs[0]
                self.configs.append(config)
                self.perfs.append(_perf)
                self.eval_dict[config] = [-_perf, time.time(), trial_state]

                if -_perf > self.incumbent_perf:
                    self.incumbent_perf = -_perf
                    self.incumbent_config = config

                if objs is None:
                    observation = Observation(config,
                                              trial_state,
                                              constraints,
                                              self.FAILED_PERF,
                                              elapsed_time,
                                              worker_info=worker_info,
                                              extra=extra_info)
                self.config_advisor.update_observation(observation)

                self.logger.info('Master: Get %d observation: %s' %
                                 (cur_num, str(observation)))

    def sync_run(self):
        batch_id = 0
        while time.time() - self.start_time < self.runtime_limit:
            configs = self.config_advisor.get_suggestions()
            # Add batch configs to masterQueue.
            for config in configs:
                msg = [config, self.time_limit_per_trial]
                self.master_messager.send_message(msg)
            self.logger.info('Master: %d-th batch. %d configs sent.' %
                             (batch_id, len(configs)))
            # Get batch results from workerQueue.
            result_num = 0
            result_needed = len(configs)
            while True:
                observation = self.master_messager.receive_message()
                if observation is None:
                    # Wait for workers.
                    # self.logger.info("Master: wait for worker results. sleep 1s.")
                    time.sleep(1)
                    continue
                # Report result.
                result_num += 1
                config, trial_state, constraints, objs, elapsed_time, worker_info, extra_info = observation
                if objs is None:
                    observation = Observation(config, trial_state, constraints,
                                              self.FAILED_PERF, elapsed_time,
                                              worker_info, extra_info)
                self.config_advisor.update_observation(observation)
                self.logger.info(
                    'Master: In the %d-th batch [%d], observation is: %s' %
                    (batch_id, result_num, str(observation)))
                if result_num == result_needed:
                    break
            batch_id += 1

    def run(self):
        if self.parallel_strategy == 'async':
            self.async_run()
        else:
            self.sync_run()

        return self.get_history()
Пример #2
0
class async_mqBaseFacade(object):
    def __init__(self, objective_func,
                 restart_needed=False,
                 need_lc=False,
                 method_name='default_method_name',
                 log_directory='logs',
                 data_directory='data',
                 time_limit_per_trial=600,
                 runtime_limit=None,
                 max_queue_len=300,
                 ip='',
                 port=13579,
                 authkey=b'abc',
                 sleep_time=0.1,):
        self.log_directory = log_directory
        if not os.path.exists(self.log_directory):
            os.makedirs(self.log_directory)
        self.data_directory = data_directory
        if not os.path.exists(self.data_directory):
            os.makedirs(self.data_directory)

        self.logger = self._get_logger(method_name)

        self.objective_func = objective_func
        self.trial_statistics = list()
        self.recorder = list()

        self.global_start_time = time.time()
        self.runtime_limit = None
        self._history = {"time_elapsed": list(), "performance": list(),
                         "best_trial_id": list(), "configuration": list()}
        self.global_incumbent = 1e10
        self.global_incumbent_configuration = None
        self.global_trial_counter = 0
        self.restart_needed = restart_needed
        self.record_lc = need_lc
        self.method_name = method_name
        # evaluation metrics
        self.stage_id = 1
        self.stage_history = {'stage_id': list(), 'performance': list()}
        self.grid_search_perf = list()

        self.save_intermediate_record = False
        self.save_intermediate_record_id = 0
        self.save_intermediate_record_path = None

        if self.method_name is None:
            raise ValueError('Method name must be specified! NOT NONE.')

        self.time_limit_per_trial = time_limit_per_trial
        self.runtime_limit = runtime_limit
        assert self.runtime_limit is not None

        max_queue_len = max(300, max_queue_len)
        self.master_messager = MasterMessager(ip, port, authkey, max_queue_len, max_queue_len)
        self.sleep_time = sleep_time

    def set_restart(self):
        self.restart_needed = True

    def set_method_name(self, name):
        self.method_name = name

    def add_stage_history(self, stage_id, performance):
        self.stage_history['stage_id'].append(stage_id)
        self.stage_history['performance'].append(performance)

    def add_history(self, time_elapsed, performance, trial_id, config):
        self._history['time_elapsed'].append(time_elapsed)
        self._history['performance'].append(performance)
        self._history['best_trial_id'].append(trial_id)
        self._history['configuration'].append(config)

    def run(self):
        try:
            worker_num = 0
            while True:
                if self.runtime_limit is not None and time.time() - self.global_start_time > self.runtime_limit:
                    self.logger.info('RUNTIME BUDGET is RUNNING OUT.')
                    return

                # Get observation from worker
                observation = self.master_messager.receive_message()  # return_info, time_taken, trial_id, config
                if observation is None:
                    # Wait for workers.
                    time.sleep(self.sleep_time)
                    continue

                return_info, time_taken, trial_id, config = observation
                # worker init
                if config is None:
                    worker_num += 1
                    self.logger.info("Worker %d init." % (worker_num, ))
                # update observation
                else:
                    global_time = time.time() - self.global_start_time
                    self.logger.info('Master get observation: %s. Global time=%.2fs.' % (str(observation), global_time))
                    n_iteration = return_info['n_iteration']
                    perf = return_info['loss']
                    t = time.time()
                    self.update_observation(config, perf, n_iteration)
                    self.logger.info('update_observation() cost %.2fs.' % (time.time() - t,))
                    self.recorder.append({'trial_id': trial_id, 'time_consumed': time_taken,
                                          'configuration': config, 'n_iteration': n_iteration,
                                          'return_info': return_info, 'global_time': global_time})
                    if (not hasattr(self, 'R')) or n_iteration == self.R:
                        self.save_intermediate_statistics()

                # Send new job
                t = time.time()
                config, n_iteration, extra_conf = self.get_job()
                self.logger.info('get_job() cost %.2fs.' % (time.time()-t, ))
                msg = [config, extra_conf, self.time_limit_per_trial, n_iteration, self.global_trial_counter]
                self.master_messager.send_message(msg)
                self.global_trial_counter += 1
                self.logger.info('Master send job: %s.' % (msg,))

        except Exception as e:
            print(e)
            print(traceback.format_exc())
            self.logger.error(traceback.format_exc())

    def get_job(self):
        raise NotImplementedError

    def update_observation(self, config, perf, n_iteration):
        raise NotImplementedError

    def set_save_intermediate_record(self, dir_path, file_name):
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)
        self.save_intermediate_record = True
        if file_name.endswith('.pkl'):
            file_name = file_name[:-4]
        self.save_intermediate_record_path = os.path.join(dir_path, file_name)
        self.logger.info('set save_intermediate_record to True. path: %s.' % (self.save_intermediate_record_path,))

    def save_intermediate_statistics(self):
        if self.save_intermediate_record:
            self.save_intermediate_record_id += 1
            path = '%s_%d.pkl' % (self.save_intermediate_record_path, self.save_intermediate_record_id)
            with open(path, 'wb') as f:
                pkl.dump(self.recorder, f)
            global_time = time.time() - self.global_start_time
            self.logger.info('Intermediate record %s saved! global_time=%.2fs.' % (path, global_time))

    def _get_logger(self, name):
        logger_name = name
        setup_logger(os.path.join(self.log_directory, '%s.log' % str(logger_name)), None)
        return get_logger(self.__class__.__name__)
Пример #3
0
class mqBaseFacade(object):
    def __init__(self, objective_func,
                 restart_needed=False,
                 need_lc=False,
                 method_name='default_method_name',
                 log_directory='logs',
                 data_directory='data',
                 time_limit_per_trial=600,
                 runtime_limit=None,
                 max_queue_len=300,
                 ip='',
                 port=13579,
                 authkey=b'abc',):
        self.log_directory = log_directory
        if not os.path.exists(self.log_directory):
            os.makedirs(self.log_directory)
        self.data_directory = data_directory
        if not os.path.exists(self.data_directory):
            os.makedirs(self.data_directory)

        self.logger = self._get_logger(method_name)

        self.objective_func = objective_func
        self.trial_statistics = []
        self.recorder = []

        self.global_start_time = time.time()
        self.runtime_limit = None
        self._history = {"time_elapsed": [], "performance": [], "best_trial_id": [], "configuration": []}
        self.global_incumbent = 1e10
        self.global_incumbent_configuration = None
        self.global_trial_counter = 0
        self.restart_needed = restart_needed
        self.record_lc = need_lc
        self.method_name = method_name
        # evaluation metrics
        self.stage_id = 1
        self.stage_history = {'stage_id': [], 'performance': []}
        self.grid_search_perf = []

        if self.method_name is None:
            raise ValueError('Method name must be specified! NOT NONE.')

        self.time_limit_per_trial = time_limit_per_trial
        self.runtime_limit = runtime_limit

        max_queue_len = max(300, max_queue_len)
        self.master_messager = MasterMessager(ip, port, authkey, max_queue_len, max_queue_len)

    def set_restart(self):
        self.restart_needed = True

    def set_method_name(self, name):
        self.method_name = name

    def add_stage_history(self, stage_id, performance):
        self.stage_history['stage_id'].append(stage_id)
        self.stage_history['performance'].append(performance)

    def add_history(self, time_elapsed, performance, trial_id, config):
        self._history['time_elapsed'].append(time_elapsed)
        self._history['performance'].append(performance)
        self._history['best_trial_id'].append(trial_id)
        self._history['configuration'].append(config)

    def run_in_parallel(self, configurations, n_iteration, extra_info=None):
        n_configuration = len(configurations)
        performance_result = []
        early_stops = []

        # TODO: need systematic tests.
        # check configurations, whether it exists the same configs
        count_dict = dict()
        for i, config in enumerate(configurations):
            if config not in count_dict:
                count_dict[config] = 0
            count_dict[config] += 1

        # incorporate ref info.
        conf_list = []
        for index, config in enumerate(configurations):
            extra_conf_dict = dict()
            if count_dict[config] > 1:
                extra_conf_dict['uid'] = count_dict[config]
                count_dict[config] -= 1

            if extra_info is not None:
                extra_conf_dict['reference'] = extra_info[index]
            extra_conf_dict['need_lc'] = self.record_lc
            extra_conf_dict['method_name'] = self.method_name
            conf_list.append((config, extra_conf_dict))

        # Add batch configs to masterQueue.
        for config, extra_conf in conf_list:
            msg = [config, extra_conf, self.time_limit_per_trial, n_iteration, self.global_trial_counter]
            self.master_messager.send_message(msg)
            self.global_trial_counter += 1
        self.logger.info('Master: %d configs sent.' % (len(conf_list)))
        # Get batch results from workerQueue.
        result_num = 0
        result_needed = len(conf_list)
        while True:
            if self.runtime_limit is not None and time.time() - self.global_start_time > self.runtime_limit:
                break
            observation = self.master_messager.receive_message()    # return_info, time_taken, trial_id, config
            if observation is None:
                # Wait for workers.
                # self.logger.info("Master: wait for worker results. sleep 1s.")
                time.sleep(1)
                continue
            # Report result.
            result_num += 1
            global_time = time.time() - self.global_start_time
            self.trial_statistics.append((observation, global_time))
            self.logger.info('Master: Get the [%d] result, observation is %s.' % (result_num, str(observation)))
            if result_num == result_needed:
                break

        # sort by trial_id. FIX BUG
        self.trial_statistics.sort(key=lambda x: x[0][2])

        # get the evaluation statistics
        for observation, global_time in self.trial_statistics:
            return_info, time_taken, trial_id, config = observation

            performance = return_info['loss']
            if performance < self.global_incumbent:
                self.global_incumbent = performance
                self.global_incumbent_configuration = config

            self.add_history(global_time, self.global_incumbent, trial_id,
                             self.global_incumbent_configuration)
            # TODO: old version => performance_result.append(performance)
            performance_result.append(return_info)
            early_stops.append(return_info.get('early_stop', False))
            self.recorder.append({'trial_id': trial_id, 'time_consumed': time_taken,
                                  'configuration': config, 'n_iteration': n_iteration,
                                  'return_info': return_info, 'global_time': global_time})

        self.trial_statistics.clear()

        self.save_intemediate_statistics()
        if self.runtime_limit is not None and time.time() - self.global_start_time > self.runtime_limit:
            raise ValueError('Runtime budget meets!')
        return performance_result, early_stops

    def save_intemediate_statistics(self, save_stage=False):
        # file_name = '%s.npy' % self.method_name
        # x = np.array(self._history['time_elapsed'])
        # y = np.array(self._history['performance'])
        # np.save(os.path.join(self.data_directory, file_name), np.array([x, y]))
        #
        # config_file_name = 'config_%s.pkl' % self.method_name
        # with open(os.path.join(self.data_directory, config_file_name), 'wb') as f:
        #     pkl.dump(self.global_incumbent_configuration, f)
        #
        # record_file_name = 'record_%s.pkl' % self.method_name
        # with open(os.path.join(self.data_directory, record_file_name), 'wb') as f:
        #     pkl.dump(self.recorder, f)
        #
        # if save_stage:
        #     stage_file_name = 'stage_%s.npy' % self.method_name
        #     stage_x = np.array(self.stage_history['stage_id'])
        #     stage_y = np.array(self.stage_history['performance'])
        #     np.save(os.path.join(self.data_directory, stage_file_name), np.array([stage_x, stage_y]))
        #
        # if PLOT:
        #     plt.plot(x, y)
        #     plt.xlabel('Time elapsed (sec)')
        #     plt.ylabel('Validation error')
        #     plt.savefig("data/%s.png" % self.method_name)
        return

    def _get_logger(self, name):
        logger_name = name
        setup_logger(os.path.join(self.log_directory, '%s.log' % str(logger_name)), None)
        return get_logger(self.__class__.__name__)
Пример #4
0
class mqSMBO(BOBase):
    def __init__(
        self,
        objective_function,
        config_space,
        parallel_strategy='async',
        batch_size=4,
        batch_strategy='default',
        num_constraints=0,
        num_objs=1,
        sample_strategy: str = 'bo',
        max_runs=200,
        time_limit_per_trial=180,
        surrogate_type='auto',
        acq_type='auto',
        acq_optimizer_type='auto',
        initial_runs=3,
        init_strategy='random_explore_first',
        initial_configurations=None,
        ref_point=None,
        history_bo_data: List[OrderedDict] = None,
        logging_dir='logs',
        task_id='default_task_id',
        random_state=None,
        advisor_kwargs: dict = None,
        ip="",
        port=13579,
        authkey=b'abc',
    ):

        if task_id is None:
            raise ValueError(
                'Task id is not SPECIFIED. Please input task id first.')

        self.num_objs = num_objs
        self.num_constraints = num_constraints
        self.FAILED_PERF = [MAXINT] * num_objs
        super().__init__(objective_function,
                         config_space,
                         task_id=task_id,
                         output_dir=logging_dir,
                         random_state=random_state,
                         initial_runs=initial_runs,
                         max_runs=max_runs,
                         sample_strategy=sample_strategy,
                         time_limit_per_trial=time_limit_per_trial,
                         history_bo_data=history_bo_data)

        self.parallel_strategy = parallel_strategy
        self.batch_size = batch_size
        max_queue_len = max(100, 3 * batch_size)
        self.master_messager = MasterMessager(ip, port, authkey, max_queue_len,
                                              max_queue_len)

        advisor_kwargs = advisor_kwargs or {}
        if parallel_strategy == 'sync':
            self.config_advisor = SyncBatchAdvisor(
                config_space,
                num_objs=num_objs,
                num_constraints=num_constraints,
                batch_size=batch_size,
                batch_strategy=batch_strategy,
                initial_trials=initial_runs,
                initial_configurations=initial_configurations,
                init_strategy=init_strategy,
                history_bo_data=history_bo_data,
                optimization_strategy=sample_strategy,
                surrogate_type=surrogate_type,
                acq_type=acq_type,
                acq_optimizer_type=acq_optimizer_type,
                ref_point=ref_point,
                task_id=task_id,
                output_dir=logging_dir,
                random_state=random_state,
                **advisor_kwargs)
        elif parallel_strategy == 'async':
            self.config_advisor = AsyncBatchAdvisor(
                config_space,
                num_objs=num_objs,
                num_constraints=num_constraints,
                batch_size=batch_size,
                batch_strategy=batch_strategy,
                initial_trials=initial_runs,
                initial_configurations=initial_configurations,
                init_strategy=init_strategy,
                history_bo_data=history_bo_data,
                optimization_strategy=sample_strategy,
                surrogate_type=surrogate_type,
                acq_type=acq_type,
                acq_optimizer_type=acq_optimizer_type,
                ref_point=ref_point,
                task_id=task_id,
                output_dir=logging_dir,
                random_state=random_state,
                **advisor_kwargs)
        else:
            raise ValueError('Invalid parallel strategy - %s.' %
                             parallel_strategy)

    def async_run(self):
        config_num = 0
        result_num = 0
        while result_num < self.max_iterations:
            # Add jobs to masterQueue.
            while len(self.config_advisor.running_configs
                      ) < self.batch_size and config_num < self.max_iterations:
                config_num += 1
                config = self.config_advisor.get_suggestion()
                msg = [config, self.time_limit_per_trial]
                self.logger.info("Master: Add config %d." % config_num)
                self.master_messager.send_message(msg)

            # Get results from workerQueue.
            while True:
                observation = self.master_messager.receive_message()
                if observation is None:
                    # Wait for workers.
                    # self.logger.info("Master: wait for worker results. sleep 1s.")
                    time.sleep(1)
                    break
                # Report result.
                result_num += 1
                if observation.objs is None:
                    observation = Observation(
                        config=observation.config,
                        objs=self.FAILED_PERF,
                        constraints=observation.constraints,
                        trial_state=observation.trial_state,
                        elapsed_time=observation.elapsed_time,
                    )
                self.config_advisor.update_observation(observation)
                self.logger.info('Master: Get %d observation: %s' %
                                 (result_num, str(observation)))

    def sync_run(self):
        batch_num = (self.max_iterations + self.batch_size -
                     1) // self.batch_size
        if self.batch_size > self.config_advisor.init_num:
            batch_num += 1  # fix bug
        batch_id = 0
        while batch_id < batch_num:
            configs = self.config_advisor.get_suggestions()
            # Add batch configs to masterQueue.
            for config in configs:
                msg = [config, self.time_limit_per_trial]
                self.master_messager.send_message(msg)
            self.logger.info('Master: %d-th batch. %d configs sent.' %
                             (batch_id, len(configs)))
            # Get batch results from workerQueue.
            result_num = 0
            result_needed = len(configs)
            while True:
                observation = self.master_messager.receive_message()
                if observation is None:
                    # Wait for workers.
                    # self.logger.info("Master: wait for worker results. sleep 1s.")
                    time.sleep(1)
                    continue
                # Report result.
                result_num += 1
                if observation.objs is None:
                    observation = Observation(
                        config=observation.config,
                        objs=self.FAILED_PERF,
                        constraints=observation.constraints,
                        trial_state=observation.trial_state,
                        elapsed_time=observation.elapsed_time,
                    )
                self.config_advisor.update_observation(observation)
                self.logger.info(
                    'Master: In the %d-th batch [%d], observation is: %s' %
                    (batch_id, result_num, str(observation)))
                if result_num == result_needed:
                    break
            batch_id += 1

    def run(self):
        if self.parallel_strategy == 'async':
            self.async_run()
        else:
            self.sync_run()
        return self.get_history()