def get_hpo_cs(estimator_id, task_type=REGRESSION): _candidates = get_combined_candidtates(_regressors, _addons) if estimator_id in _candidates: rgs_class = _candidates[estimator_id] else: raise ValueError("Algorithm %s not supported!" % estimator_id) cs = rgs_class.get_hyperparameter_search_space() return cs
def get_hpo_cs(estimator_id, task_type=CLASSIFICATION): _candidates = get_combined_candidtates(_classifiers, _addons) if estimator_id in _candidates: clf_class = _candidates[estimator_id] else: raise ValueError("Algorithm %s not supported!" % estimator_id) cs = clf_class.get_hyperparameter_search_space() return cs
def get_estimator(config, estimator_id): from solnml.components.models.classification import _classifiers, _addons _candidates = get_combined_candidtates(_classifiers, _addons) classifier_type = estimator_id config_ = config.copy() config_['random_state'] = 1 estimator = _candidates[classifier_type](**config_) if hasattr(estimator, 'n_jobs'): setattr(estimator, 'n_jobs', 1) return classifier_type, estimator
def get_estimator(config, estimator_id): regressor_type = estimator_id config_ = config.copy() config_['%s:random_state' % regressor_type] = 1 hpo_config = dict() for key in config_: key_name = key.split(':')[0] if regressor_type == key_name: act_key = key.split(':')[1] hpo_config[act_key] = config_[key] _candidates = get_combined_candidtates(_regressors, _addons) estimator = _candidates[regressor_type](**hpo_config) if hasattr(estimator, 'n_jobs'): setattr(estimator, 'n_jobs', 1) return regressor_type, estimator
def get_cash_cs(include_algorithms=None, task_type=CLASSIFICATION): _candidates = get_combined_candidtates(_classifiers, _addons) if include_algorithms is not None: _candidates = set(include_algorithms).intersection( set(_candidates.keys())) if len(_candidates) == 0: raise ValueError( "No algorithms included! Please check the spelling of the included algorithms!" ) cs = ConfigurationSpace() algo = CategoricalHyperparameter('algorithm', list(_candidates)) cs.add_hyperparameter(algo) for estimator_id in _candidates: estimator_cs = get_hpo_cs(estimator_id) parent_hyperparameter = {'parent': algo, 'value': estimator_id} cs.add_configuration_space(estimator_id, estimator_cs, parent_hyperparameter=parent_hyperparameter) return cs
def __init__(self, task_type, estimator_id: str, data: DataNode, metric, include_preprocessors=None, share_fe=False, output_dir='logs', per_run_time_limit=120, per_run_mem_limit=5120, dataset_id='default', eval_type='holdout', mth='rb', sw_size=3, n_jobs=1, seed=1, enable_fe=True, fe_algo='bo', number_of_unit_resource=2, total_resource=30, timestamp=None): self.task_type = task_type self.metric = metric self.number_of_unit_resource = number_of_unit_resource # One unit of resource, that's, the number of trials per iteration. self.one_unit_of_resource = 5 self.total_resource = total_resource self.per_run_time_limit = per_run_time_limit self.per_run_mem_limit = per_run_mem_limit self.estimator_id = estimator_id self.include_preprocessors = include_preprocessors self.evaluation_type = eval_type self.original_data = data.copy_() self.share_fe = share_fe self.output_dir = output_dir self.n_jobs = n_jobs self.mth = mth self.seed = seed self.sliding_window_size = sw_size task_id = '%s-%d-%s' % (dataset_id, seed, estimator_id) self.logger = get_logger(self.__class__.__name__ + '-' + task_id) # Bandit settings. # self.arms = ['fe', 'hpo'] self.arms = ['hpo', 'fe'] self.rewards = dict() self.optimizer = dict() self.evaluation_cost = dict() self.update_flag = dict() # Global incumbent. self.inc = dict() self.local_inc = dict() self.local_hist = {'fe': [], 'hpo': []} self.inc_record = {'fe': list(), 'hpo': list()} self.exp_output = dict() self.eval_dict = {'fe': dict(), 'hpo': dict()} for arm in self.arms: self.rewards[arm] = list() self.update_flag[arm] = False self.evaluation_cost[arm] = list() self.exp_output[arm] = dict() self.pull_cnt = 0 self.action_sequence = list() self.final_rewards = list() self.incumbent_config = None self.incumbent_perf = float("-INF") self.early_stopped_flag = False self.first_start = True self.include_text = True if TEXT in self.original_data.feature_types else False self.include_image = True if IMAGE in self.original_data.feature_types else False # Fetch hyperparameter space. if self.task_type in CLS_TASKS: from solnml.components.models.classification import _classifiers, _addons _candidates = get_combined_candidtates(_classifiers, _addons) if estimator_id in _candidates: clf_class = _candidates[estimator_id] else: raise ValueError("Algorithm %s not supported!" % estimator_id) cs = clf_class.get_hyperparameter_search_space() elif self.task_type in RGS_TASKS: from solnml.components.models.regression import _regressors, _addons _candidates = get_combined_candidtates(_regressors, _addons) if estimator_id in _candidates: reg_class = _candidates[estimator_id] else: raise ValueError("Algorithm %s not supported!" % estimator_id) cs = reg_class.get_hyperparameter_search_space() else: raise ValueError("Unknown task type %s!" % self.task_type) self.config_space = cs self.default_config = cs.get_default_configuration() self.config_space.seed(self.seed) self.if_imbal = is_imbalanced_dataset(self.original_data) self.fe_config_space = get_task_hyperparameter_space( self.task_type, self.estimator_id, include_preprocessors=self.include_preprocessors, include_text=self.include_text, include_image=self.include_image, if_imbal=self.if_imbal) self.fe_default_config = self.fe_config_space.get_default_configuration( ) self.timestamp = timestamp # Build the Feature Engineering component. if self.task_type in CLS_TASKS: fe_evaluator = ClassificationEvaluator( self.default_config, self.fe_default_config, estimator_id, scorer=self.metric, data_node=self.original_data, name='fe', resampling_strategy=self.evaluation_type, if_imbal=self.if_imbal, seed=self.seed, output_dir=self.output_dir, timestamp=self.timestamp) hpo_evaluator = ClassificationEvaluator( self.default_config, self.fe_default_config, estimator_id, scorer=self.metric, data_node=self.original_data, name='hpo', resampling_strategy=self.evaluation_type, if_imbal=self.if_imbal, seed=self.seed, output_dir=self.output_dir, timestamp=self.timestamp) elif self.task_type in RGS_TASKS: fe_evaluator = RegressionEvaluator( self.default_config, self.fe_default_config, estimator_id, scorer=self.metric, data_node=self.original_data, name='fe', resampling_strategy=self.evaluation_type, seed=self.seed, output_dir=self.output_dir, timestamp=self.timestamp) hpo_evaluator = RegressionEvaluator( self.default_config, self.fe_default_config, estimator_id, scorer=self.metric, data_node=self.original_data, name='hpo', resampling_strategy=self.evaluation_type, seed=self.seed, output_dir=self.output_dir, timestamp=self.timestamp) else: raise ValueError('Invalid task type!') if self.mth != 'combined': self.enable_fe = enable_fe trials_per_iter = self.one_unit_of_resource * self.number_of_unit_resource self.optimizer['fe'] = build_fe_optimizer( self.evaluation_type, fe_evaluator, self.fe_config_space, per_run_time_limit=per_run_time_limit, per_run_mem_limit=per_run_mem_limit, inner_iter_num_per_iter=trials_per_iter, output_dir=output_dir, seed=self.seed, n_jobs=n_jobs) self.inc['fe'], self.local_inc[ 'fe'] = self.fe_default_config, self.fe_default_config # Build the HPO component. # trials_per_iter = max(len(self.optimizer['fe'].trans_types), 20) trials_per_iter = self.one_unit_of_resource * self.number_of_unit_resource self.optimizer['hpo'] = build_hpo_optimizer( self.evaluation_type, hpo_evaluator, cs, output_dir=output_dir, per_run_time_limit=per_run_time_limit, inner_iter_num_per_iter=trials_per_iter, seed=self.seed, n_jobs=n_jobs) self.inc['hpo'], self.local_inc[ 'hpo'] = self.default_config, self.default_config self.init_config = cs.get_default_configuration() self.local_hist['fe'].append(self.fe_default_config) self.local_hist['hpo'].append(self.default_config) else: self.rewards = list() self.evaluation_cost = list() self.eval_dict = {} trials_per_iter = self.one_unit_of_resource * self.number_of_unit_resource if self.task_type in CLS_TASKS: from solnml.utils.combined_cls_evaluator import get_combined_cs from solnml.utils.combined_cls_evaluator import CombinedClassificationEvaluator as CombinedEvaluator else: from solnml.utils.combined_rgs_evaluator import get_combined_cs from solnml.utils.combined_rgs_evaluator import CombinedRegressionEvaluator as CombinedEvaluator self.evaluator = CombinedEvaluator( estimator_id, scorer=self.metric, data_node=self.original_data, if_imbal=self.if_imbal, timestamp=self.timestamp, output_dir=self.output_dir, resampling_strategy=self.evaluation_type) cs = get_combined_cs( self.estimator_id, self.task_type, include_image=self.include_image, include_text=self.include_text, include_preprocessors=self.include_preprocessors, if_imbal=self.if_imbal) self.optimizer = build_hpo_optimizer( self.evaluation_type, self.evaluator, cs, output_dir=self.output_dir, per_run_time_limit=self.per_run_time_limit, inner_iter_num_per_iter=trials_per_iter, seed=self.seed, n_jobs=self.n_jobs)