Пример #1
0
    def _fit_in_hpo_way(self, algorithm_candidates, train_data, **kwargs):
        cs = self.get_pipeline_config_space(algorithm_candidates)
        hpo_evaluator = DLEvaluator(cs.get_default_configuration(),
                                    self.task_type,
                                    max_epoch=self.max_epoch,
                                    scorer=self.metric,
                                    dataset=train_data,
                                    device=self.device,
                                    image_size=self.image_size,
                                    seed=self.seed,
                                    timestamp=self.timestamp)
        optimizer = build_hpo_optimizer(self.evaluation_type,
                                        hpo_evaluator,
                                        cs,
                                        output_dir=self.output_dir,
                                        per_run_time_limit=100000,
                                        seed=self.seed,
                                        n_jobs=self.n_jobs)
        self.solvers['hpo_solver'] = optimizer
        self.evaluators['hpo_solver'] = hpo_evaluator

        # Control flow via round robin.
        _start_time = time.time()
        if self.trial_num is None:
            while True:
                _time_elapsed = time.time() - _start_time
                if _time_elapsed >= self.time_limit:
                    break
                _budget_left = self.time_limit - _time_elapsed
                self.solvers['hpo_solver'].iterate(budget=_budget_left)
        else:
            for _ in self.trial_num:
                self.solvers['hpo_solver'].iterate()

        # Best model id.
        self.best_algo_id = 'hpo_solver'
        # Best model configuration.
        solver_ = self.solvers[self.best_algo_id]
        inc_idx = np.argmax(solver_.perfs)
        self.best_algo_config = solver_.configs[inc_idx]

        # Skip Ensemble
        if self.task_type == OBJECT_DET:
            return

        if self.ensemble_method is not None:
            stats = self.fetch_ensemble_members(algorithm_candidates)

            # Ensembling all intermediate/ultimate models found in above optimization process.
            self.es = EnsembleBuilder(stats=stats,
                                      ensemble_method=self.ensemble_method,
                                      ensemble_size=self.ensemble_size,
                                      task_type=self.task_type,
                                      max_epoch=self.max_epoch,
                                      metric=self.metric,
                                      device=self.device,
                                      output_dir=self.output_dir,
                                      **kwargs)
            self.es.fit(data=train_data)
Пример #2
0
    def prepare_optimizer(self, _arm):
        if _arm == 'fe':
            # Build the Feature Engineering component.
            self.original_data._node_id = -1
            inc_hpo = copy.deepcopy(self.inc['hpo'])
            if self.task_type in CLS_TASKS:
                fe_evaluator = ClassificationEvaluator(
                    inc_hpo,
                    scorer=self.metric,
                    name='fe',
                    resampling_strategy=self.evaluation_type,
                    seed=self.seed)
            elif self.task_type in REG_TASKS:
                fe_evaluator = RegressionEvaluator(
                    inc_hpo,
                    scorer=self.metric,
                    name='fe',
                    resampling_strategy=self.evaluation_type,
                    seed=self.seed)
            else:
                raise ValueError('Invalid task type!')
            self.optimizer[_arm] = build_fe_optimizer(
                self.fe_algo,
                self.evaluation_type,
                self.task_type,
                self.inc['fe'],
                fe_evaluator,
                self.estimator_id,
                self.per_run_time_limit,
                self.per_run_mem_limit,
                self.seed,
                shared_mode=self.share_fe,
                n_jobs=self.n_jobs)
        else:
            # trials_per_iter = self.optimizer['fe'].evaluation_num_last_iteration // 2
            # trials_per_iter = max(20, trials_per_iter)
            trials_per_iter = self.one_unit_of_resource * self.number_of_unit_resource
            if self.task_type in CLS_TASKS:
                hpo_evaluator = ClassificationEvaluator(
                    self.default_config,
                    scorer=self.metric,
                    data_node=self.inc['fe'].copy_(),
                    name='hpo',
                    resampling_strategy=self.evaluation_type,
                    seed=self.seed)
            elif self.task_type in REG_TASKS:
                hpo_evaluator = RegressionEvaluator(
                    self.default_config,
                    scorer=self.metric,
                    data_node=self.inc['fe'].copy_(),
                    name='hpo',
                    resampling_strategy=self.evaluation_type,
                    seed=self.seed)
            else:
                raise ValueError('Invalid task type!')

            self.optimizer[_arm] = build_hpo_optimizer(
                self.evaluation_type,
                hpo_evaluator,
                self.config_space,
                output_dir=self.output_dir,
                per_run_time_limit=self.per_run_time_limit,
                trials_per_iter=trials_per_iter,
                seed=self.seed)

        self.logger.debug('=' * 30)
        self.logger.debug('UPDATE OPTIMIZER: %s' % _arm)
        self.logger.debug('=' * 30)
Пример #3
0
    def __init__(self,
                 task_type,
                 estimator_id: str,
                 data: DataNode,
                 metric,
                 share_fe=False,
                 output_dir='logs',
                 per_run_time_limit=120,
                 per_run_mem_limit=5120,
                 dataset_id='default',
                 eval_type='holdout',
                 mth='rb',
                 sw_size=3,
                 n_jobs=1,
                 seed=1,
                 fe_algo='tree_based',
                 enable_intersection=True,
                 number_of_unit_resource=2,
                 total_resource=30):
        self.task_type = task_type
        self.metric = metric
        self.number_of_unit_resource = number_of_unit_resource
        # One unit of resource, that's, the number of trials per iteration.
        self.one_unit_of_resource = 5
        self.total_resource = total_resource
        self.per_run_time_limit = per_run_time_limit
        self.per_run_mem_limit = per_run_mem_limit
        self.estimator_id = estimator_id
        self.evaluation_type = eval_type
        self.original_data = data.copy_()
        self.share_fe = share_fe
        self.output_dir = output_dir
        self.n_jobs = n_jobs
        self.mth = mth
        self.seed = seed
        self.sliding_window_size = sw_size
        task_id = '%s-%d-%s' % (dataset_id, seed, estimator_id)
        self.logger = get_logger(self.__class__.__name__ + '-' + task_id)
        np.random.seed(self.seed)

        # Bandit settings.
        # self.arms = ['fe', 'hpo']
        self.arms = ['hpo', 'fe']
        self.rewards = dict()
        self.optimizer = dict()
        self.evaluation_cost = dict()
        self.update_flag = dict()
        # Global incumbent.
        self.inc = dict()
        self.local_inc = dict()
        self.local_hist = {'fe': [], 'hpo': []}
        for arm in self.arms:
            self.rewards[arm] = list()
            self.update_flag[arm] = False
            self.evaluation_cost[arm] = list()
        self.pull_cnt = 0
        self.action_sequence = list()
        self.final_rewards = list()
        self.incumbent_perf = float("-INF")
        self.early_stopped_flag = False
        self.enable_intersection = enable_intersection

        # Fetch hyperparameter space.
        if self.task_type in CLS_TASKS:
            from solnml.components.models.classification import _classifiers, _addons
            if estimator_id in _classifiers:
                clf_class = _classifiers[estimator_id]
            elif estimator_id in _addons.components:
                clf_class = _addons.components[estimator_id]
            else:
                raise ValueError("Algorithm %s not supported!" % estimator_id)
            cs = clf_class.get_hyperparameter_search_space()
            model = UnParametrizedHyperparameter("estimator", estimator_id)
            cs.add_hyperparameter(model)
        elif self.task_type in REG_TASKS:
            from solnml.components.models.regression import _regressors, _addons
            if estimator_id in _regressors:
                reg_class = _regressors[estimator_id]
            elif estimator_id in _addons.components:
                reg_class = _addons.components[estimator_id]
            else:
                raise ValueError("Algorithm %s not supported!" % estimator_id)
            cs = reg_class.get_hyperparameter_search_space()
            model = UnParametrizedHyperparameter("estimator", estimator_id)
            cs.add_hyperparameter(model)
        else:
            raise ValueError("Unknown task type %s!" % self.task_type)

        self.config_space = cs
        self.default_config = cs.get_default_configuration()
        self.config_space.seed(self.seed)

        # Build the Feature Engineering component.
        if self.task_type in CLS_TASKS:
            fe_evaluator = ClassificationEvaluator(
                self.default_config,
                scorer=self.metric,
                name='fe',
                resampling_strategy=self.evaluation_type,
                seed=self.seed)
            hpo_evaluator = ClassificationEvaluator(
                self.default_config,
                scorer=self.metric,
                data_node=self.original_data,
                name='hpo',
                resampling_strategy=self.evaluation_type,
                seed=self.seed)
        elif self.task_type in REG_TASKS:
            fe_evaluator = RegressionEvaluator(
                self.default_config,
                scorer=self.metric,
                name='fe',
                resampling_strategy=self.evaluation_type,
                seed=self.seed)
            hpo_evaluator = RegressionEvaluator(
                self.default_config,
                scorer=self.metric,
                data_node=self.original_data,
                name='hpo',
                resampling_strategy=self.evaluation_type,
                seed=self.seed)
        else:
            raise ValueError('Invalid task type!')

        self.fe_algo = fe_algo
        self.optimizer['fe'] = build_fe_optimizer(self.fe_algo,
                                                  self.evaluation_type,
                                                  self.task_type,
                                                  self.original_data,
                                                  fe_evaluator,
                                                  estimator_id,
                                                  per_run_time_limit,
                                                  per_run_mem_limit,
                                                  self.seed,
                                                  shared_mode=self.share_fe,
                                                  n_jobs=n_jobs)

        self.inc['fe'], self.local_inc[
            'fe'] = self.original_data, self.original_data

        # Build the HPO component.
        # trials_per_iter = max(len(self.optimizer['fe'].trans_types), 20)
        trials_per_iter = self.one_unit_of_resource * self.number_of_unit_resource

        self.optimizer['hpo'] = build_hpo_optimizer(
            self.evaluation_type,
            hpo_evaluator,
            cs,
            output_dir=output_dir,
            per_run_time_limit=per_run_time_limit,
            trials_per_iter=trials_per_iter,
            seed=self.seed,
            n_jobs=n_jobs)

        self.inc['hpo'], self.local_inc[
            'hpo'] = self.default_config, self.default_config
        self.init_config = cs.get_default_configuration()
        self.local_hist['fe'].append(self.original_data)
        self.local_hist['hpo'].append(self.default_config)
Пример #4
0
    def fit(self, train_data: DLDataset, **kwargs):
        _start_time = time.time()
        if 'opt_method' in kwargs:
            self.optalgo = kwargs['opt_method']
        else:
            self.optalgo = 'see'

        if self.task_type == IMG_CLS:
            self.image_size = kwargs['image_size']

        if self.config_file_path is not None:
            config_parser = ConfigParser(logger=self.logger)
            self.update_cs = config_parser.read(self.config_file_path)

        # TODO: For first-time user, download pretrained params here!
        algorithm_candidates = self.include_algorithms.copy()
        num_train_samples = train_data.get_train_samples_num()
        if self.optalgo == 'hpo':
            self._fit_in_hpo_way(algorithm_candidates, train_data, **kwargs)
            return

        # Initialize solver for each architecture.
        for estimator_id in self.include_algorithms:
            cs = self.get_model_config_space(estimator_id)
            default_config = cs.get_default_configuration()
            cs.seed(self.seed)

            hpo_evaluator = DLEvaluator(default_config,
                                        self.task_type,
                                        max_epoch=self.max_epoch,
                                        scorer=self.metric,
                                        dataset=train_data,
                                        device=self.device,
                                        seed=self.seed,
                                        timestamp=self.timestamp,
                                        **kwargs)
            optimizer = build_hpo_optimizer(self.evaluation_type,
                                            hpo_evaluator,
                                            cs,
                                            output_dir=self.output_dir,
                                            per_run_time_limit=100000,
                                            seed=self.seed,
                                            n_jobs=self.n_jobs)
            self.solvers[estimator_id] = optimizer
            self.evaluators[estimator_id] = hpo_evaluator

        # Execute profiling procedure.
        if not self.skip_profile:
            algorithm_candidates = self.profile_models(num_train_samples)
            if len(algorithm_candidates) == 0:
                raise ValueError(
                    'After profiling, no arch is in the candidates!')
            else:
                self.logger.info('After profiling, arch candidates={%s}' %
                                 ','.join(algorithm_candidates))

        # Execute neural architecture selection.
        self.logger.info('Before NAS, arch candidates={%s}' %
                         ','.join(algorithm_candidates))

        dl_evaluator = DLEvaluator(None,
                                   self.task_type,
                                   max_epoch=self.max_epoch,
                                   scorer=self.metric,
                                   dataset=train_data,
                                   device=self.device,
                                   seed=self.seed,
                                   timestamp=self.timestamp,
                                   **kwargs)
        if self.optalgo == 'see':
            from solnml.components.hpo_optimizer.cashp_optimizer import CashpOptimizer
            self.see_optimizer = CashpOptimizer(self.task_type,
                                                algorithm_candidates,
                                                self.time_limit,
                                                n_jobs=self.n_jobs)
            inc_config, inc_perf = self.see_optimizer.run(dl_evaluator)
            self.best_algo_config = inc_config
            self.best_algo_id = inc_config['estimator']
            return

        algorithm_candidates = self.select_network_architectures(
            algorithm_candidates, dl_evaluator, num_arch=1, **kwargs)
        self.logger.info('After NAS, arch candidates={%s}' %
                         ','.join(algorithm_candidates))
        # Control flow via round robin.
        n_algorithm = len(algorithm_candidates)
        if self.trial_num is None:
            algo_id = 0
            while True:
                _time_elapsed = time.time() - _start_time
                if _time_elapsed >= self.time_limit:
                    break
                _budget_left = self.time_limit - _time_elapsed
                self.solvers[algorithm_candidates[algo_id]].iterate(
                    budget=_budget_left)
                algo_id = (algo_id + 1) % n_algorithm
        else:
            for id in self.trial_num:
                self.solvers[algorithm_candidates[id % n_algorithm]].iterate()

        # Best architecture id.
        best_scores_ = list()
        for estimator_id in algorithm_candidates:
            if estimator_id in self.solvers:
                solver_ = self.solvers[estimator_id]
                if len(solver_.perfs) > 0:
                    best_scores_.append(np.max(solver_.perfs))
                else:
                    best_scores_.append(-np.inf)
            else:
                best_scores_.append(-np.inf)
        print(algorithm_candidates, best_scores_)
        assert len(algorithm_candidates) > 0

        if len(best_scores_) > 1 and (np.array(best_scores_) > -np.inf).any():
            self.best_algo_id = algorithm_candidates[np.argmax(best_scores_)]
            # Best model configuration.
            solver_ = self.solvers[self.best_algo_id]
            inc_idx = np.argmax(solver_.perfs)
            self.best_algo_config = solver_.configs[inc_idx]
        else:
            self.best_algo_id = algorithm_candidates[0]
            rs = list(self.eval_hist_perfs.keys())
            set_flag = False
            if len(rs) > 0:
                max_resource = np.max(rs)
                if max_resource in self.eval_hist_configs:
                    idxs = [
                        idx for (idx, config) in enumerate(
                            self.eval_hist_configs[max_resource])
                        if config['estimator'] == self.best_algo_id
                    ]
                    best_idx = np.argmax([
                        self.eval_hist_perfs[max_resource][idx] for idx in idxs
                    ])
                    self.best_algo_config = self.eval_hist_configs[
                        max_resource][best_idx]
                    set_flag = True
            if not set_flag:
                solver_ = self.solvers[self.best_algo_id]
                inc_idx = np.argmax(solver_.perfs)
                self.best_algo_config = solver_.configs[inc_idx]

        print(self.best_algo_config)
        # Skip Ensemble
        if self.task_type == OBJECT_DET:
            return

        if self.ensemble_method is not None:
            stats = self.fetch_ensemble_members(algorithm_candidates)

            # Ensembling all intermediate/ultimate models found in above optimization process.
            self.es = EnsembleBuilder(stats=stats,
                                      ensemble_method=self.ensemble_method,
                                      ensemble_size=self.ensemble_size,
                                      task_type=self.task_type,
                                      max_epoch=self.max_epoch,
                                      metric=self.metric,
                                      timestamp=self.timestamp,
                                      device=self.device,
                                      output_dir=self.output_dir,
                                      **kwargs)
            self.es.fit(data=train_data)