def test_uniformfloat_transform(self): """This checks whether a value sampled through the configuration space (it does not happend when the variable is sampled alone) stays equal when it is serialized via JSON and the deserialized again.""" cs = ConfigurationSpace() a = cs.add_hyperparameter(UniformFloatHyperparameter('a', -5, 10)) b = cs.add_hyperparameter(NormalFloatHyperparameter('b', 1, 2, log=True)) for i in range(100): config = cs.sample_configuration() value = OrderedDict(sorted(config.get_dictionary().items())) string = json.dumps(value) saved_value = json.loads(string) saved_value = OrderedDict(sorted(byteify(saved_value).items())) self.assertEqual(repr(value), repr(saved_value)) # Next, test whether the truncation also works when initializing the # Configuration with a dictionary for i in range(100): rs = np.random.RandomState(1) value_a = a.sample(rs) value_b = b.sample(rs) values_dict = {'a': value_a, 'b': value_b} config = Configuration(cs, values=values_dict) string = json.dumps(config.get_dictionary()) saved_value = json.loads(string) saved_value = byteify(saved_value) self.assertEqual(values_dict, saved_value)
def test_check_neighbouring_config_diamond_str(self): diamond = ConfigurationSpace() head = CategoricalHyperparameter('head', ['red', 'green']) left = CategoricalHyperparameter('left', ['red', 'green']) right = CategoricalHyperparameter('right', ['red', 'green', 'blue', 'yellow']) bottom = CategoricalHyperparameter('bottom', ['red', 'green']) diamond.add_hyperparameters([head, left, right, bottom]) diamond.add_condition(EqualsCondition(left, head, 'red')) diamond.add_condition(EqualsCondition(right, head, 'red')) diamond.add_condition(AndConjunction(EqualsCondition(bottom, left, 'green'), EqualsCondition(bottom, right, 'green'))) config = Configuration(diamond, {'bottom': 'red', 'head': 'red', 'left': 'green', 'right': 'green'}) hp_name = "head" index = diamond.get_idx_by_hyperparameter_name(hp_name) neighbor_value = 1 new_array = ConfigSpace.c_util.change_hp_value( diamond, config.get_array(), hp_name, neighbor_value, index ) expected_array = np.array([1, np.nan, np.nan, np.nan]) np.testing.assert_almost_equal(new_array, expected_array)
def test_check_forbidden_with_sampled_vector_configuration(self): cs = ConfigurationSpace() metric = CategoricalHyperparameter("metric", ["minkowski", "other"]) cs.add_hyperparameter(metric) forbidden = ForbiddenEqualsClause(metric, "other") cs.add_forbidden_clause(forbidden) configuration = Configuration(cs, vector=np.ones(1, dtype=float)) self.assertRaisesRegex(ValueError, "violates forbidden clause", cs._check_forbidden, configuration.get_array())
def test_sample_configuration(self): cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("parent", [0, 1]) cs.add_hyperparameter(hp1) hp2 = UniformIntegerHyperparameter("child", 0, 10) cs.add_hyperparameter(hp2) cond1 = EqualsCondition(hp2, hp1, 0) cs.add_condition(cond1) # This automatically checks the configuration! Configuration(cs, dict(parent=0, child=5)) # and now for something more complicated cs = ConfigurationSpace(seed=1) hp1 = CategoricalHyperparameter("input1", [0, 1]) cs.add_hyperparameter(hp1) hp2 = CategoricalHyperparameter("input2", [0, 1]) cs.add_hyperparameter(hp2) hp3 = CategoricalHyperparameter("input3", [0, 1]) cs.add_hyperparameter(hp3) hp4 = CategoricalHyperparameter("input4", [0, 1]) cs.add_hyperparameter(hp4) hp5 = CategoricalHyperparameter("input5", [0, 1]) cs.add_hyperparameter(hp5) hp6 = Constant("AND", "True") cs.add_hyperparameter(hp6) cond1 = EqualsCondition(hp6, hp1, 1) cond2 = NotEqualsCondition(hp6, hp2, 1) cond3 = InCondition(hp6, hp3, [1]) cond4 = EqualsCondition(hp5, hp3, 1) cond5 = EqualsCondition(hp4, hp5, 1) cond6 = EqualsCondition(hp6, hp4, 1) cond7 = EqualsCondition(hp6, hp5, 1) conj1 = AndConjunction(cond1, cond2) conj2 = OrConjunction(conj1, cond3) conj3 = AndConjunction(conj2, cond6, cond7) cs.add_condition(cond4) cs.add_condition(cond5) cs.add_condition(conj3) samples = [] for i in range(5): cs.seed(1) samples.append([]) for j in range(100): sample = cs.sample_configuration() samples[-1].append(sample) if i > 0: for j in range(100): self.assertEqual(samples[-1][j], samples[-2][j])
def test_merge_foreign_data(self): ''' test smac.utils.merge_foreign_data ''' scenario = Scenario(self.test_scenario_dict) scenario_2 = Scenario(self.test_scenario_dict) scenario_2.feature_dict = {"inst_new": [4]} # init cs cs = ConfigurationSpace() cs.add_hyperparameter(UniformIntegerHyperparameter(name='a', lower=0, upper=100)) cs.add_hyperparameter(UniformIntegerHyperparameter(name='b', lower=0, upper=100)) # build runhistory rh_merge = RunHistory(aggregate_func=average_cost) config = Configuration(cs, values={'a': 1, 'b': 2}) rh_merge.add(config=config, instance_id="inst_new", cost=10, time=20, status=StatusType.SUCCESS, seed=None, additional_info=None) # "d" is an instance in <scenario> rh_merge.add(config=config, instance_id="d", cost=5, time=20, status=StatusType.SUCCESS, seed=None, additional_info=None) # build empty rh rh_base = RunHistory(aggregate_func=average_cost) merge_foreign_data(scenario=scenario, runhistory=rh_base, in_scenario_list=[scenario_2], in_runhistory_list=[rh_merge]) # both runs should be in the runhistory # but we should not use the data to update the cost of config self.assertTrue(len(rh_base.data) == 2) self.assertTrue(np.isnan(rh_base.get_cost(config))) # we should not get direct access to external run data runs = rh_base.get_runs_for_config(config) self.assertTrue(len(runs) == 0) rh_merge.add(config=config, instance_id="inst_new_2", cost=10, time=20, status=StatusType.SUCCESS, seed=None, additional_info=None) self.assertRaises(ValueError, merge_foreign_data, **{ "scenario": scenario, "runhistory": rh_base, "in_scenario_list": [scenario_2], "in_runhistory_list": [rh_merge]})
def _get_incumbent(self, i): result = self.results[i] config_space = self.config_spaces[i] if isinstance(result, str): result = logged_results_to_HBS_result(result) id2config = result.get_id2config_mapping() trajectory = result.get_incumbent_trajectory( bigger_is_better=self.bigger_is_better, non_decreasing_budget=self.bigger_is_better) incumbent = id2config[trajectory["config_ids"][-1]]["config"] return Configuration(config_space, incumbent)
def impute_inactive_values( configuration: Configuration, strategy: Union[str, float] = 'default') -> Configuration: """Impute inactive parameters. Parameters ---------- strategy : string, optional (default='default') The imputation strategy. - If 'default', replace inactive parameters by their default. - If float, replace inactive parameters by the given float value, which should be able to be splitted apart by a tree-based model. """ values = dict() for hp_name in configuration: value = configuration.get(hp_name) if value is None: if strategy == 'default': hp = configuration.configuration_space.get_hyperparameter( hp_name) new_value = hp.default elif isinstance(strategy, float): new_value = strategy else: raise ValueError('Unknown imputation strategy %s' % str(strategy)) value = new_value values[hp_name] = value new_configuration = Configuration(configuration.configuration_space, values=values, allow_inactive_with_values=True) return new_configuration
def test_bounds_on_crash(self): rh = RunHistory() cs = get_config_space() config1 = Configuration(cs, values={"a": 1, "b": 2}) config2 = Configuration(cs, values={"a": 2, "b": 3}) config3 = Configuration(cs, values={"a": 3, "b": 4}) rh.add( config=config1, cost=[10, 50], time=5, status=StatusType.SUCCESS, instance_id=1, seed=1, budget=1, ) rh.add( config=config2, cost=[100, 100], time=10, status=StatusType.CRASHED, instance_id=1, seed=1, budget=1, ) rh.add( config=config3, cost=[0, 150], time=15, status=StatusType.SUCCESS, instance_id=1, seed=1, budget=1, ) self.assertEqual(rh.objective_bounds[0], (0, 10)) self.assertEqual(rh.objective_bounds[1], (50, 150))
def test_get_configs_per_budget(self): rh = RunHistory() cs = get_config_space() config1 = Configuration(cs, values={"a": 1, "b": 1}) rh.add( config=config1, cost=[10, 20], time=10, status=StatusType.SUCCESS, instance_id=1, seed=1, budget=1, ) config2 = Configuration(cs, values={"a": 2, "b": 2}) rh.add( config=config2, cost=[20, 30], time=20, status=StatusType.SUCCESS, instance_id=1, seed=1, budget=1, ) config3 = Configuration(cs, values={"a": 3, "b": 3}) rh.add( config=config3, cost=[30, 40], time=30, status=StatusType.SUCCESS, instance_id=1, seed=1, budget=3, ) configs = rh.get_all_configs_per_budget([1]) self.assertListEqual(configs, [config1, config2])
def setUp(self): unittest.TestCase.setUp(self) self.rh = RunHistory() self.cs = get_config_space() self.config1 = Configuration(self.cs, values={"a": 0, "b": 100}) self.config2 = Configuration(self.cs, values={"a": 100, "b": 0}) self.config3 = Configuration(self.cs, values={"a": 100, "b": 100}) self.scen = Scenario({ "cutoff_time": 2, "cs": self.cs, "run_obj": "runtime", "output_dir": "", "deterministic": False, "limit_resources": True, }) self.stats = Stats(scenario=self.scen) self.stats.start_timing() self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__)
def setUp(self): unittest.TestCase.setUp(self) self.rh = runhistory.RunHistory() self.cs = get_config_space() self.config1 = Configuration(self.cs, values={'a': 0, 'b': 100}) self.config2 = Configuration(self.cs, values={'a': 100, 'b': 0}) self.config3 = Configuration(self.cs, values={'a': 100, 'b': 100}) self.config4 = Configuration(self.cs, values={'a': 23, 'b': 23}) self.config5 = Configuration(self.cs, values={'a': 5, 'b': 10}) self.scen = Scenario({ 'run_obj': 'runtime', 'cutoff_time': 20, 'cs': self.cs }) self.types, self.bounds = get_types(self.cs, None) self.scen = Scenario({ 'run_obj': 'runtime', 'cutoff_time': 20, 'cs': self.cs, 'output_dir': '' })
def __init__(self, config=None, pipeline=None, dataset_properties=None, include=None, exclude=None, random_state=None, init_params=None, incremental_learning=False): self._init_params = init_params if init_params is not None else {} self.include_ = include if include is not None else {} self.exclude_ = exclude if exclude is not None else {} self.dataset_properties_ = dataset_properties if \ dataset_properties is not None else {} self.dataset_properties_['incremental_learning'] = incremental_learning if pipeline is None: self.steps = self._get_pipeline() else: self.steps = pipeline self.config_space = self.get_hyperparameter_search_space() if config is None: self.configuration_ = self.config_space.get_default_configuration() else: if isinstance(config, dict): config = Configuration(self.config_space, config) if self.config_space != config.configuration_space: print(self.config_space._children) print(config.configuration_space._children) import difflib diff = difflib.unified_diff( str(self.config_space).splitlines(), str(config.configuration_space).splitlines()) diff = '\n'.join(diff) raise ValueError('Configuration passed does not come from the ' 'same configuration space. Differences are: ' '%s' % diff) self.configuration_ = config self.set_hyperparameters(self.configuration_, init_params=init_params) if random_state is None: self.random_state = check_random_state(1) else: self.random_state = check_random_state(random_state) super().__init__(steps=self.steps) self._additional_run_info = {}
def __call__( self, scenario_dict, seed, ta, ta_kwargs, metalearning_configurations, n_jobs, dask_client, ): from smac.facade.smac_ac_facade import SMAC4AC from smac.intensification.successive_halving import SuccessiveHalving from smac.runhistory.runhistory2epm import RunHistory2EPM4LogCost from smac.scenario.scenario import Scenario scenario = Scenario(scenario_dict) initial_configurations = [] for member in self.portfolio.values(): try: initial_configurations.append( Configuration(configuration_space=scenario.cs, values=member) ) except ValueError: pass rh2EPM = RunHistory2EPM4LogCost ta_kwargs['budget_type'] = self.budget_type smac4ac = SMAC4AC( scenario=scenario, rng=seed, runhistory2epm=rh2EPM, tae_runner=ta, tae_runner_kwargs=ta_kwargs, initial_configurations=initial_configurations, run_id=seed, intensifier=SuccessiveHalving, intensifier_kwargs={ 'initial_budget': self.initial_budget, 'max_budget': 100, 'eta': self.eta, 'min_chall': 1, }, dask_client=dask_client, n_jobs=n_jobs, ) smac4ac.solver.epm_chooser.min_samples_model = int( len(scenario.cs.get_hyperparameters()) / 2 ) return smac4ac
def set_hyperparameters(self, configuration: Configuration, init_params: Optional[Dict] = None) -> 'Pipeline': """Method to set the hyperparameter configuration of the pipeline. It iterates over the components of the pipeline and applies a given configuration accordingly. Args: configuration (Configuration): configuration object to search and overwrite in the pertinent spaces init_params (Optional[Dict]): optional initial settings for the config """ self.configuration = configuration for node_idx, n_ in enumerate(self.steps): node_name, node = n_ sub_configuration_space = node.get_hyperparameter_search_space( self.dataset_properties) sub_config_dict = {} for param in configuration: if param.startswith('%s:' % node_name): value = configuration[param] new_name = param.replace('%s:' % node_name, '', 1) sub_config_dict[new_name] = value sub_configuration = Configuration(sub_configuration_space, values=sub_config_dict) if init_params is not None: sub_init_params_dict = {} for param in init_params: if param.startswith('%s:' % node_name): value = init_params[param] new_name = param.replace('%s:' % node_name, '', 1) sub_init_params_dict[new_name] = value if isinstance( node, (autoPyTorchChoice, autoPyTorchComponent, BasePipeline)): node.set_hyperparameters( configuration=sub_configuration, init_params=None if init_params is None else sub_init_params_dict, ) else: raise NotImplementedError('Not supported yet!') return self
def test_add(self): ''' simply adding some rundata to runhistory ''' rh = runhistory.RunHistory() cs = get_config_space() config1 = Configuration(cs, values={'a': 1, 'b': 2}) config2 = Configuration(cs, values={'a': 1, 'b': 25}) config3 = Configuration(cs, values={'a': 2, 'b': 2}) rh.add(config=config1, cost=10, time=20, status=StatusType.SUCCESS, instance_id=23, seed=None, additional_info=None) rh.add(config=config2, cost=10, time=20, status=StatusType.SUCCESS, instance_id=1, seed=12354, additional_info={"start_time": 10}) rh.add(config=config3, cost=10, time=20, status=StatusType.TIMEOUT, instance_id=1, seed=45, additional_info={"start_time": 10}) scen = Scenario({"cutoff_time": 10}) self.assertRaises(TypeError, runhistory2epm.RunHistory2EPM4LogCost) rh2epm = runhistory2epm.RunHistory2EPM4LogCost(num_params=2, scenario=scen) rhArr = rh2epm.transform(rh)
def test_add_multiple_times(self): rh = RunHistory() cs = get_config_space() config = Configuration(cs, values={'a': 1, 'b': 2}) for i in range(5): rh.add(config=config, cost=i + 1, time=i + 1, status=StatusType.SUCCESS, instance_id=None, seed=12345, additional_info=None) self.assertEqual(len(rh.data), 1) self.assertEqual(len(rh.get_runs_for_config(config, only_max_observed_budget=True)), 1) self.assertEqual(len(rh._configid_to_inst_seed_budget[1]), 1) self.assertEqual(list(rh.data.values())[0].cost, 1)
def get_config(self, budget) -> Tuple[dict, dict]: # get max_budget # calc by budget2epm max_budget = self.get_available_max_budget() # initial points if self.initial_points is not None and self.initial_points_index < len( self.initial_points): while True: if self.initial_points_index >= len(self.initial_points): break initial_point_dict = self.initial_points[ self.initial_points_index] initial_point = Configuration(self.config_space, initial_point_dict) self.initial_points_index += 1 initial_point.origin = "User Defined" if not self.is_config_exist(budget, initial_point): self.logger.debug( f"Using initial points [{self.initial_points_index - 1}]" ) return self.process_config_info_pair( initial_point, {}, budget) return self.get_config_(budget, max_budget)
def _invert_bilog_logit(self, x): dictionary = copy.copy(x) if isinstance(x, dict) else x.get_dictionary() for k, v in dictionary.items(): if k in self._par: hp = self.original_cs.get_hyperparameter(k) _fun = INV_TRANS[self._par[k]] dictionary[k] = np.clip(_fun(v), hp.lower, hp.upper) # need to check original configspace since bilog-int are converted to float if isinstance(self.original_cs.get_hyperparameter(k), UniformIntegerHyperparameter): dictionary[k] = int(np.rint(dictionary[k])) x = Configuration(self.original_cs, values=dictionary) return x
def suggest_configuration(self): if self.X is None and self.Y is None: new_x = init_random_uniform(self.X_lower, self.X_upper, N=1, rng=self.rng) elif self.X.shape[0] == 1: # We need at least 2 data points to train a GP Xopt = init_random_uniform(self.X_lower, self.X_upper, N=1, rng=self.rng) else: prior = DNGOPrior() model = DNGO(batch_size=100, num_epochs=20000, learning_rate=0.1, momentum=0.9, l2=1e-16, adapt_epoch=5000, n_hypers=20, prior=prior, do_optimize=True, do_mcmc=True) #acquisition_func = EI(model, task.X_lower, task.X_upper) lo = np.ones([model.n_units_3]) * -1 up = np.ones([model.n_units_3]) ei = LogEI(model, lo, up) acquisition_func = IntegratedAcquisition(model, ei, self.X_lower, self.X_upper) maximizer = Direct(acquisition_func, self.X_lower, self.X_upper) model.train(self.X, self.Y) acquisition_func.update(model) new_x = maximizer.maximize() # Map from [0, 1]^D space back to original space next_config = Configuration(self.config_space, vector=new_x[0, :]) # Transform to sacred configuration result = configspace_config_to_sacred(next_config) return result
def fit(self, scenario: ASlibScenario, config: Configuration): ''' fit pca object to ASlib scenario data Arguments --------- scenario: data.aslib_scenario.ASlibScenario ASlib Scenario with all data in pandas config: ConfigSpace.Configuration configuration ''' self.imputer = Imputer(strategy=config.get("imputer_strategy")) self.imputer.fit(scenario.feature_data.values)
def test_get_config_runs(self): ''' get some config runs from runhistory ''' rh = RunHistory(aggregate_func=average_cost) cs = get_config_space() config1 = Configuration(cs, values={'a': 1, 'b': 2}) config2 = Configuration(cs, values={'a': 1, 'b': 3}) rh.add(config=config1, cost=10, time=20, status=StatusType.SUCCESS, instance_id=1, seed=1) rh.add(config=config2, cost=10, time=20, status=StatusType.SUCCESS, instance_id=1, seed=1) rh.add(config=config1, cost=10, time=20, status=StatusType.SUCCESS, instance_id=2, seed=2) ist = rh.get_runs_for_config(config=config1) #print(ist) #print(ist[0]) #print(ist[1]) self.assertEqual(len(ist), 2) self.assertEqual(ist[0].instance, 1) self.assertEqual(ist[1].instance, 2)
def new_result(self, job: Job, update_model=True): ############################## ### 1. update observations ### ############################## if job.result is None: # One could skip crashed results, but we decided to # assign a +inf loss and count them as bad configurations loss = np.inf else: # same for non numeric losses. # Note that this means losses of minus infinity will count as bad! loss = job.result["loss"] if np.isfinite(job.result["loss"]) else np.inf budget = job.kwargs["budget"] config_dict = job.kwargs["config"] configId = get_hash_of_config(config_dict) runId = (configId, budget) if runId in self.runId2info: self.runId2info[runId]["end_time"] = time() self.runId2info[runId]["loss"] = loss else: self.logger.error(f"runId {runId} not in runId2info, it's impossible!!!") # config_info = job.kwargs["config_info"] config = Configuration(self.config_space, config_dict) # add lock (It may be added twice, but it does not affect) self.budget2obvs[budget]["locks"].append(config.get_array().copy()) self.budget2obvs[budget]["configs"].append(deepcopy(config)) self.budget2obvs[budget]["vectors"].append(config.get_array()) self.budget2obvs[budget]["losses"].append(loss) losses = np.array(self.budget2obvs[budget]["losses"]) vectors = np.array(self.budget2obvs[budget]["vectors"]) ################################################################### ### 2. Judge whether the EPM training conditions are satisfied ### ################################################################### if not update_model: return self._new_result(budget, vectors, losses)
def __init__(self, config: Optional[Configuration] = None, steps: Optional[List[Tuple[str, autoPyTorchChoice]]] = None, dataset_properties: Optional[Dict[str, Any]] = None, include: Optional[Dict[str, Any]] = None, exclude: Optional[Dict[str, Any]] = None, random_state: Optional[np.random.RandomState] = None, init_params: Optional[Dict[str, Any]] = None): self.init_params = init_params if init_params is not None else {} self.dataset_properties = dataset_properties if \ dataset_properties is not None else {} self.include = include if include is not None else {} self.exclude = exclude if exclude is not None else {} if steps is None: self.steps = self._get_pipeline_steps(dataset_properties) else: self.steps = steps self.config_space = self.get_hyperparameter_search_space() if config is None: self.config = self.config_space.get_default_configuration() else: if isinstance(config, dict): config = Configuration(self.config_space, config) if self.config_space != config.configuration_space: warnings.warn(self.config_space._children) warnings.warn(config.configuration_space._children) import difflib diff = difflib.unified_diff( str(self.config_space).splitlines(), str(config.configuration_space).splitlines()) diff_msg = '\n'.join(diff) raise ValueError('Configuration passed does not come from the ' 'same configuration space. Differences are: ' '%s' % diff_msg) self.config = config self.set_hyperparameters(self.config, init_params=init_params) if random_state is None: self.random_state = check_random_state(1) else: self.random_state = check_random_state(random_state) super().__init__(steps=self.steps) self._additional_run_info = {} # type: Dict[str, str]
def suggest_configuration(self): if self.X is None and self.y is None: new_x = init_random_uniform(self.lower, self.upper, n_points=1, rng=self.rng)[0, :] elif self.X.shape[0] == 1: # We need at least 2 data points to train a GP new_x = init_random_uniform(self.lower, self.upper, n_points=1, rng=self.rng)[0, :] else: cov_amp = 1 n_dims = self.lower.shape[0] initial_ls = np.ones([n_dims]) exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims) kernel = cov_amp * exp_kernel prior = DefaultPrior(len(kernel) + 1) model = GaussianProcessMCMC(kernel, prior=prior, n_hypers=self.n_hypers, chain_length=self.chain_length, burnin_steps=self.burnin, normalize_input=False, normalize_output=True, rng=self.rng, lower=self.lower, upper=self.upper) a = LogEI(model) acquisition_func = MarginalizationGPMCMC(a) max_func = Direct(acquisition_func, self.lower, self.upper, verbose=False) model.train(self.X, self.y) acquisition_func.update(model) new_x = max_func.maximize() next_config = Configuration(self.config_space, vector=new_x) # Transform to sacred configuration result = configspace_config_to_sacred(next_config) return result
def get_id_of_config(config: Configuration): # todo:, instance="", seed=0 X: np.ndarray = config.get_array() m = hashlib.md5() if X.flags['C_CONTIGUOUS']: m.update(X.data) m.update(str(X.shape).encode('utf8')) else: X_tmp = np.ascontiguousarray(X.T) m.update(X_tmp.data) m.update(str(X_tmp.shape).encode('utf8')) # m.update(instance.encode()) # m.update(str(seed).encode()) hash_value = m.hexdigest() return hash_value
def extract_configuration(self, job): # One could skip crashed results, but we decided to assign a +inf loss # We count them as bad configurations if job.result is None: logger.warning("Job %s failed with exception\n%s".format(job.id, job.exception)) loss = np.inf else: loss = job.result["loss"] budget = job.kwargs["budget"] # We want to get a numerical representation of the configuration in the original space configuration = Configuration(self.config_space, job.kwargs["config"]) return configuration, budget, loss
def test_config_decorator(self): @AbstractBenchmark.check_parameters def tmp(_, configuration: Union[Dict, np.ndarray], fidelity: Dict, **kwargs): return configuration, fidelity hps = dict(hp1=0.25, hp2=1.25, hp3=2.25) configuration = Configuration(self.foo.configuration_space, hps) config, fidel = tmp(self=self.foo, configuration=configuration, fidelity=None) assert isinstance(config, Dict) assert isinstance(fidel, Dict) assert fidel['fidelity1'] == 1.0
def split(self, config_ext: CS.Configuration, as_dict: bool=False) -> \ (Union[CS.Configuration, dict], int): """ Split extended config into normal config and resource value. :param config_ext: Extended config :param as_dict: Return config as dict? :return: (config, resource_value) """ x_res = copy.copy(config_ext.get_dictionary()) resource_value = int(x_res[self.resource_attr_name]) del x_res[self.resource_attr_name] if not as_dict: x_res = CS.Configuration(self.hp_ranges.config_space, values=x_res) return x_res, resource_value
def test_multi_config_design(self): stats = Stats(scenario=self.scenario) stats.start_timing() self.ta.stats = stats tj = TrajLogger(output_dir=None, stats=stats) rh = RunHistory(aggregate_func=average_cost) self.ta.runhistory = rh rng = np.random.RandomState(seed=12345) intensifier = Intensifier( tae_runner=self.ta, stats=stats, traj_logger=tj, rng=rng, instances=[None], run_obj_time=False, ) configs = [Configuration(configuration_space=self.cs, values={"x1":4}), Configuration(configuration_space=self.cs, values={"x1":2})] dc = InitialDesign( tae_runner=self.ta, scenario=self.scenario, stats=stats, traj_logger=tj, runhistory=rh, rng=rng, configs=configs, intensifier=intensifier, aggregate_func=average_cost, ) inc = dc.run() self.assertTrue(stats.ta_runs==4) # two runs per config self.assertTrue(len(rh.data)==4) # two runs per config self.assertTrue(rh.get_cost(inc) == 4)
def test_init_with_values(self): c1 = Configuration(self.cs, values={ 'parent': 1, 'child': 2, 'friend': 3 }) # Pay attention that the vector does not necessarily has an intuitive # sorting! # Values are a little bit higher than one would expect because, # an integer range of [0,10] is transformed to [-0.499,10.499]. vector_values = { 'parent': 1, 'child': 0.22727223140405708, 'friend': 0.583333611112037 } vector = [None] * 3 for name in self.cs._hyperparameter_idx: vector[self.cs._hyperparameter_idx[name]] = vector_values[name] c2 = Configuration(self.cs, vector=vector) # This tests # a) that the vector representation of both are the same # b) that the dictionary representation of both are the same self.assertEqual(c1, c2)
def fit(self, scenario: ASlibScenario, config: Configuration): ''' fit StandardScaler object to ASlib scenario data Arguments --------- scenario: data.aslib_scenario.ASlibScenario ASlib Scenario with all data in pandas config: ConfigSpace.Configuration configuration ''' if config.get("StandardScaler"): self.scaler = StandardScaler() self.scaler.fit(scenario.feature_data.values)
def test_full_update(self): rh = RunHistory(aggregate_func=average_cost) cs = get_config_space() config1 = Configuration(cs, values={'a': 1, 'b': 2}) config2 = Configuration(cs, values={'a': 1, 'b': 3}) rh.add(config=config1, cost=10, time=20, status=StatusType.SUCCESS, instance_id=1, seed=1) rh.add(config=config2, cost=10, time=20, status=StatusType.SUCCESS, instance_id=1, seed=1) rh.add(config=config2, cost=20, time=20, status=StatusType.SUCCESS, instance_id=2, seed=2) cost_config2 = rh.get_cost(config2) rh.compute_all_costs() updated_cost_config2 = rh.get_cost(config2) self.assertTrue(cost_config2 == updated_cost_config2) rh.compute_all_costs(instances=[2]) updated_cost_config2 = rh.get_cost(config2) self.assertTrue(cost_config2 != updated_cost_config2) self.assertTrue(updated_cost_config2 == 20)
def remove_resource( self, config_ext: CS.Configuration, as_dict: bool = False) -> Union[CS.Configuration, dict]: """ Strips away resource attribute and returns normal config :param config_ext: Extended config :param as_dict: Return as dict? :return: config_ext without resource attribute """ x_dct = copy.copy(config_ext.get_dictionary()) del x_dct[self.resource_attr_name] if as_dict: return x_dct else: return CS.Configuration(self.hp_ranges.config_space, values=x_dct)
def impute_inactive_values(configuration: Configuration, strategy: Union[str, float]='default') -> Configuration: """Impute inactive parameters. Parameters ---------- strategy : string, optional (default='default') The imputation strategy. - If 'default', replace inactive parameters by their default. - If float, replace inactive parameters by the given float value, which should be able to be splitted apart by a tree-based model. """ values = dict() for hp_name in configuration: value = configuration.get(hp_name) if value is None: if strategy == 'default': hp = configuration.configuration_space.get_hyperparameter( hp_name) new_value = hp.default elif isinstance(strategy, float): new_value = strategy else: raise ValueError('Unknown imputation strategy %s' % str(strategy)) value = new_value values[hp_name] = value new_configuration = Configuration(configuration.configuration_space, values=values, allow_inactive_with_values=True) return new_configuration
def get_random_neighbor(configuration: Configuration, seed: int) -> Configuration: """Draw a random neighbor by changing one parameter of a configuration. * If the parameter is categorical, it changes it to another value. * If the parameter is ordinal, it changes it to the next higher or lower value. * If parameter is a float, draw a random sample If changing a parameter activates new parameters or deactivates previously active parameters, the configuration will be rejected. If more than 10000 configurations were rejected, this function raises a ValueError. Parameters ---------- configuration : Configuration seed : int Used to generate a random state. Returns ------- Configuration The new neighbor. """ random = np.random.RandomState(seed) rejected = True values = copy.deepcopy(configuration.get_dictionary()) while rejected: # First, choose an active hyperparameter active = False iteration = 0 while not active: iteration += 1 if configuration._num_hyperparameters > 1: rand_idx = random.randint(0, configuration._num_hyperparameters - 1) else: rand_idx = 0 value = configuration.get_array()[rand_idx] if np.isfinite(value): active = True hp_name = configuration.configuration_space \ .get_hyperparameter_by_idx(rand_idx) hp = configuration.configuration_space.get_hyperparameter(hp_name) # Only choose if there is a possibility of finding a neigboor if not hp.has_neighbors(): active = False if iteration > 10000: raise ValueError('Probably caught in an infinite loop.') # Get a neighboor and adapt the rest of the configuration if necessary neighbor = hp.get_neighbors(value, random, number=1, transform=True)[0] previous_value = values[hp.name] values[hp.name] = neighbor try: new_configuration = Configuration( configuration.configuration_space, values=values) rejected = False except ValueError as e: values[hp.name] = previous_value return new_configuration
def get_one_exchange_neighbourhood(configuration, seed): """Return all configurations in a one-exchange neighborhood. The method is implemented as defined by: Frank Hutter, Holger H. Hoos and Kevin Leyton-Brown Sequential Model-Based Optimization for General Algorithm Configuration In: Proceedings of the conference on Learning and Intelligent OptimizatioN (LION 5) """ random = np.random.RandomState(seed) neighbourhood = [] for i, hp_name in enumerate(configuration): number_of_sampled_neighbors = 0 array = configuration.get_array() if not np.isfinite(array[i]): continue iteration = 0 while True: hp = configuration.configuration_space.get_hyperparameter(hp_name) configuration._populate_values() num_neighbors = hp.get_num_neighbors() # Obtain neigbors differently for different possible numbers of # neighbors if num_neighbors == 0: break # No infinite loops elif iteration > 1000: break elif np.isinf(num_neighbors): if number_of_sampled_neighbors >= 4: break num_samples_to_go = 4 - number_of_sampled_neighbors neighbors = hp.get_neighbors(array[i], random, number=num_samples_to_go) else: if iteration > 0: break neighbors = hp.get_neighbors(array[i], random) # Check all newly obtained neigbors for neighbor in neighbors: new_array = array.copy() new_array[i] = neighbor neighbor_value = hp._transform(neighbor) # Activate hyperparameters if their parent node got activated children = configuration.configuration_space.get_children_of( hp_name) if len(children) > 0: to_visit = deque() to_visit.extendleft(children) visited = set() activated_values = dict() while len(to_visit) > 0: current = to_visit.pop() if current.name in visited: continue visited.add(current.name) current_idx = configuration.configuration_space. \ get_idx_by_hyperparameter_name(current.name) current_value = new_array[current_idx] conditions = configuration.configuration_space.\ _get_parent_conditions_of(current.name) active = True for condition in conditions: parent_names = [c.parent.name for c in condition.get_descendant_literal_conditions()] parents = {parent_name: configuration[parent_name] for parent_name in parent_names} # parents come from the original configuration. # We change at least one parameter. In order set # other parameters which are conditional on this, # we have to activate this if hp_name in parents: parents[hp_name] = neighbor_value # Hyperparameters which are in depth 1 of the # hyperparameter tree might have children which # have to be activated as well. Once we set hp in # level 1 to active, it's value changes from the # value of the original configuration and this # must be done here for parent_name in parent_names: if parent_name in activated_values: parents[parent_name] = activated_values[ parent_name] # if one of the parents is None, the hyperparameter cannot be # active! Else we have to check this if any([parent_value is None for parent_value in parents.values()]): active = False else: if not condition.evaluate(parents): active = False if active and (current_value is None or not np.isfinite(current_value)): default = current._inverse_transform(current.default) new_array[current_idx] = default children = configuration.configuration_space.get_children_of( current.name) if len(children) > 0: to_visit.extendleft(children) activated_values[current.name] = current.default if not active and (current_value is not None or np.isfinite(current_value)): new_array[current_idx] = np.NaN try: # Populating a configuration from an array does not check # if it is a legal configuration - check this (slow) new_configuration = Configuration( configuration.configuration_space, vector=new_array) new_configuration.is_valid_configuration() neighbourhood.append(new_configuration) number_of_sampled_neighbors += 1 except ValueError as e: pass # Count iterations to not run into an infinite loop when # sampling floats/ints and there is large amount of forbidden # values; also to find out if we tried to get a neighbor for # a categorical hyperparameter, and the only possible # neighbor is forbidden together with another active # value/default hyperparameter iteration += 1 return neighbourhood
def get_one_exchange_neighbourhood(configuration: Configuration, seed: int) -> List[Configuration]: """Return all configurations in a one-exchange neighborhood. The method is implemented as defined by: Frank Hutter, Holger H. Hoos and Kevin Leyton-Brown Sequential Model-Based Optimization for General Algorithm Configuration In: Proceedings of the conference on Learning and Intelligent OptimizatioN (LION 5) """ random = np.random.RandomState(seed) hyperparameters_list = list(configuration.keys()) hyperparameters_list_length = len(hyperparameters_list) neighbors_to_return = dict() hyperparameters_used = list() number_of_usable_hyperparameters = sum(np.isfinite(configuration.get_array())) configuration_space = configuration.configuration_space while len(hyperparameters_used) != number_of_usable_hyperparameters: index = random.randint(hyperparameters_list_length) hp_name = hyperparameters_list[index] if hp_name in neighbors_to_return: random.shuffle(neighbors_to_return[hp_name]) n_ = neighbors_to_return[hp_name].pop() if len(neighbors_to_return[hp_name]) == 0: del neighbors_to_return[hp_name] hyperparameters_used.append(hp_name) yield n_ else: neighbourhood = [] number_of_sampled_neighbors = 0 array = configuration.get_array() if not np.isfinite(array[index]): continue iteration = 0 while True: hp = configuration_space.get_hyperparameter(hp_name) configuration._populate_values() num_neighbors = hp.get_num_neighbors(configuration.get(hp_name)) # Obtain neigbors differently for different possible numbers of # neighbors if num_neighbors == 0: break # No infinite loops elif iteration > 100: break elif np.isinf(num_neighbors): if number_of_sampled_neighbors >= 4: break num_samples_to_go = 4 - number_of_sampled_neighbors neighbors = hp.get_neighbors(array[index], random, number=num_samples_to_go) else: if iteration > 0: break neighbors = hp.get_neighbors(array[index], random) # Check all newly obtained neigbors for neighbor in neighbors: new_array = array.copy() new_array[index] = neighbor neighbor_value = hp._transform(neighbor) # Hyperparameters which are going to be set to inactive disabled = [] # Activate hyperparameters if their parent node got activated children = configuration_space._children_of[hp_name] if len(children) > 0: to_visit = deque() #type: deque to_visit.extendleft(children) visited = set() #type: Set[str] activated_values = dict() #type: Dict[str, Union[int, float, str]] while len(to_visit) > 0: current = to_visit.pop() if current.name in visited: continue visited.add(current.name) if current.name in disabled: continue current_idx = configuration_space.get_idx_by_hyperparameter_name(current.name) current_value = new_array[current_idx] conditions = configuration.configuration_space.\ _parent_conditions_of[current.name] active = True for condition in conditions: parent_names = [parent.name for parent in configuration_space._parents_of[current.name]] parents = {parent_name: configuration[parent_name] for parent_name in parent_names} # parents come from the original configuration. # We change at least one parameter. In order set # other parameters which are conditional on this, # we have to activate this if hp_name in parents: parents[hp_name] = neighbor_value # Hyperparameters which are in depth 1 of the # hyperparameter tree might have children which # have to be activated as well. Once we set hp in # level 1 to active, it's value changes from the # value of the original configuration and this # must be done here for parent_name in parent_names: if parent_name in activated_values: parents[parent_name] = activated_values[ parent_name] # if one of the parents is None, the hyperparameter cannot be # active! Else we have to check this if any([parent_value is None for parent_value in parents.values()]): active = False break else: if not condition.evaluate(parents): active = False break if active and (current_value is None or not np.isfinite(current_value)): default = current._inverse_transform(current.default) new_array[current_idx] = default children_ = configuration_space._children_of[current.name] if len(children_) > 0: to_visit.extendleft(children_) activated_values[current.name] = current.default # If the hyperparameter was made inactive, # all its children need to be deactivade as well if not active and (current_value is not None or np.isfinite(current_value)): new_array[current_idx] = np.NaN children = configuration.configuration_space._children_of[current.name] if len(children) > 0: to_disable = set() for ch in children: to_disable.add(ch.name) while len(to_disable) > 0: child = to_disable.pop() child_idx = configuration.configuration_space. \ get_idx_by_hyperparameter_name(child) disabled.append(child_idx) children = configuration.configuration_space._children_of[child] for ch in children: to_disable.add(ch.name) for idx in disabled: new_array[idx] = np.NaN try: # Populating a configuration from an array does not check # if it is a legal configuration - check this (slow) new_configuration = Configuration(configuration_space, vector=new_array) new_configuration.is_valid_configuration() neighbourhood.append(new_configuration) number_of_sampled_neighbors += 1 # todo: investigate why tests fail when ForbiddenValueError is caught here except ForbiddenValueError as e: pass # Count iterations to not run into an infinite loop when # sampling floats/ints and there is large amount of forbidden # values; also to find out if we tried to get a neighbor for # a categorical hyperparameter, and the only possible # neighbor is forbidden together with another active # value/default hyperparameter iteration += 1 if len(neighbourhood) == 0: hyperparameters_used.append(hp_name) else: if hp_name not in hyperparameters_used: neighbors_to_return[hp_name] = neighbourhood random.shuffle(neighbors_to_return[hp_name]) n_ = neighbors_to_return[hp_name].pop() if len(neighbors_to_return[hp_name]) == 0: del neighbors_to_return[hp_name] hyperparameters_used.append(hp_name) yield n_