def test_uniformfloat_transform(self):
        """This checks whether a value sampled through the configuration
        space (it does not happend when the variable is sampled alone) stays
        equal when it is serialized via JSON and the deserialized again."""

        cs = ConfigurationSpace()
        a = cs.add_hyperparameter(UniformFloatHyperparameter('a', -5, 10))
        b = cs.add_hyperparameter(NormalFloatHyperparameter('b', 1, 2,
                                                            log=True))
        for i in range(100):
            config = cs.sample_configuration()
            value = OrderedDict(sorted(config.get_dictionary().items()))
            string = json.dumps(value)
            saved_value = json.loads(string)
            saved_value = OrderedDict(sorted(byteify(saved_value).items()))
            self.assertEqual(repr(value), repr(saved_value))

        # Next, test whether the truncation also works when initializing the
        # Configuration with a dictionary
        for i in range(100):
            rs = np.random.RandomState(1)
            value_a = a.sample(rs)
            value_b = b.sample(rs)
            values_dict = {'a': value_a, 'b': value_b}
            config = Configuration(cs, values=values_dict)
            string = json.dumps(config.get_dictionary())
            saved_value = json.loads(string)
            saved_value = byteify(saved_value)
            self.assertEqual(values_dict, saved_value)
示例#2
0
    def test_check_neighbouring_config_diamond_str(self):
        diamond = ConfigurationSpace()
        head = CategoricalHyperparameter('head', ['red', 'green'])
        left = CategoricalHyperparameter('left', ['red', 'green'])
        right = CategoricalHyperparameter('right', ['red', 'green', 'blue', 'yellow'])
        bottom = CategoricalHyperparameter('bottom', ['red', 'green'])
        diamond.add_hyperparameters([head, left, right, bottom])
        diamond.add_condition(EqualsCondition(left, head, 'red'))
        diamond.add_condition(EqualsCondition(right, head, 'red'))
        diamond.add_condition(AndConjunction(EqualsCondition(bottom, left, 'green'),
                                             EqualsCondition(bottom, right, 'green')))

        config = Configuration(diamond, {'bottom': 'red', 'head': 'red', 'left': 'green', 'right': 'green'})
        hp_name = "head"
        index = diamond.get_idx_by_hyperparameter_name(hp_name)
        neighbor_value = 1

        new_array = ConfigSpace.c_util.change_hp_value(
            diamond,
            config.get_array(),
            hp_name,
            neighbor_value,
            index
        )
        expected_array = np.array([1, np.nan, np.nan, np.nan])

        np.testing.assert_almost_equal(new_array, expected_array)
    def test_check_forbidden_with_sampled_vector_configuration(self):
        cs = ConfigurationSpace()
        metric = CategoricalHyperparameter("metric", ["minkowski", "other"])
        cs.add_hyperparameter(metric)

        forbidden = ForbiddenEqualsClause(metric, "other")
        cs.add_forbidden_clause(forbidden)
        configuration = Configuration(cs, vector=np.ones(1, dtype=float))
        self.assertRaisesRegex(ValueError, "violates forbidden clause",
                               cs._check_forbidden, configuration.get_array())
    def test_sample_configuration(self):
        cs = ConfigurationSpace()
        hp1 = CategoricalHyperparameter("parent", [0, 1])
        cs.add_hyperparameter(hp1)
        hp2 = UniformIntegerHyperparameter("child", 0, 10)
        cs.add_hyperparameter(hp2)
        cond1 = EqualsCondition(hp2, hp1, 0)
        cs.add_condition(cond1)
        # This automatically checks the configuration!
        Configuration(cs, dict(parent=0, child=5))

        # and now for something more complicated
        cs = ConfigurationSpace(seed=1)
        hp1 = CategoricalHyperparameter("input1", [0, 1])
        cs.add_hyperparameter(hp1)
        hp2 = CategoricalHyperparameter("input2", [0, 1])
        cs.add_hyperparameter(hp2)
        hp3 = CategoricalHyperparameter("input3", [0, 1])
        cs.add_hyperparameter(hp3)
        hp4 = CategoricalHyperparameter("input4", [0, 1])
        cs.add_hyperparameter(hp4)
        hp5 = CategoricalHyperparameter("input5", [0, 1])
        cs.add_hyperparameter(hp5)
        hp6 = Constant("AND", "True")
        cs.add_hyperparameter(hp6)

        cond1 = EqualsCondition(hp6, hp1, 1)
        cond2 = NotEqualsCondition(hp6, hp2, 1)
        cond3 = InCondition(hp6, hp3, [1])
        cond4 = EqualsCondition(hp5, hp3, 1)
        cond5 = EqualsCondition(hp4, hp5, 1)
        cond6 = EqualsCondition(hp6, hp4, 1)
        cond7 = EqualsCondition(hp6, hp5, 1)

        conj1 = AndConjunction(cond1, cond2)
        conj2 = OrConjunction(conj1, cond3)
        conj3 = AndConjunction(conj2, cond6, cond7)
        cs.add_condition(cond4)
        cs.add_condition(cond5)
        cs.add_condition(conj3)

        samples = []
        for i in range(5):
            cs.seed(1)
            samples.append([])
            for j in range(100):
                sample = cs.sample_configuration()
                samples[-1].append(sample)

            if i > 0:
                for j in range(100):
                    self.assertEqual(samples[-1][j], samples[-2][j])
示例#5
0
    def test_merge_foreign_data(self):
        ''' test smac.utils.merge_foreign_data '''

        scenario = Scenario(self.test_scenario_dict)
        scenario_2 = Scenario(self.test_scenario_dict)
        scenario_2.feature_dict = {"inst_new": [4]}

        # init cs
        cs = ConfigurationSpace()
        cs.add_hyperparameter(UniformIntegerHyperparameter(name='a',
                                                           lower=0,
                                                           upper=100))
        cs.add_hyperparameter(UniformIntegerHyperparameter(name='b',
                                                           lower=0,
                                                           upper=100))
        # build runhistory
        rh_merge = RunHistory(aggregate_func=average_cost)
        config = Configuration(cs, values={'a': 1, 'b': 2})

        rh_merge.add(config=config, instance_id="inst_new", cost=10, time=20,
                     status=StatusType.SUCCESS,
                     seed=None,
                     additional_info=None)

        # "d" is an instance in <scenario>
        rh_merge.add(config=config, instance_id="d", cost=5, time=20,
                     status=StatusType.SUCCESS,
                     seed=None,
                     additional_info=None)

        # build empty rh
        rh_base = RunHistory(aggregate_func=average_cost)

        merge_foreign_data(scenario=scenario, runhistory=rh_base,
                           in_scenario_list=[scenario_2], in_runhistory_list=[rh_merge])

        # both runs should be in the runhistory
        # but we should not use the data to update the cost of config
        self.assertTrue(len(rh_base.data) == 2)
        self.assertTrue(np.isnan(rh_base.get_cost(config)))

        # we should not get direct access to external run data
        runs = rh_base.get_runs_for_config(config)
        self.assertTrue(len(runs) == 0)

        rh_merge.add(config=config, instance_id="inst_new_2", cost=10, time=20,
                     status=StatusType.SUCCESS,
                     seed=None,
                     additional_info=None)

        self.assertRaises(ValueError, merge_foreign_data, **{
                          "scenario": scenario, "runhistory": rh_base, "in_scenario_list": [scenario_2], "in_runhistory_list": [rh_merge]})
示例#6
0
    def _get_incumbent(self, i):
        result = self.results[i]
        config_space = self.config_spaces[i]

        if isinstance(result, str):
            result = logged_results_to_HBS_result(result)
        id2config = result.get_id2config_mapping()
        trajectory = result.get_incumbent_trajectory(
            bigger_is_better=self.bigger_is_better,
            non_decreasing_budget=self.bigger_is_better)

        incumbent = id2config[trajectory["config_ids"][-1]]["config"]
        return Configuration(config_space, incumbent)
示例#7
0
def impute_inactive_values(
        configuration: Configuration,
        strategy: Union[str, float] = 'default') -> Configuration:
    """Impute inactive parameters.

    Parameters
    ----------
    strategy : string, optional (default='default')
        The imputation strategy.

        - If 'default', replace inactive parameters by their default.
        - If float, replace inactive parameters by the given float value,
          which should be able to be splitted apart by a tree-based model.
    """
    values = dict()
    for hp_name in configuration:
        value = configuration.get(hp_name)
        if value is None:

            if strategy == 'default':
                hp = configuration.configuration_space.get_hyperparameter(
                    hp_name)
                new_value = hp.default

            elif isinstance(strategy, float):
                new_value = strategy

            else:
                raise ValueError('Unknown imputation strategy %s' %
                                 str(strategy))

            value = new_value

        values[hp_name] = value

    new_configuration = Configuration(configuration.configuration_space,
                                      values=values,
                                      allow_inactive_with_values=True)
    return new_configuration
示例#8
0
    def test_bounds_on_crash(self):
        rh = RunHistory()
        cs = get_config_space()
        config1 = Configuration(cs, values={"a": 1, "b": 2})
        config2 = Configuration(cs, values={"a": 2, "b": 3})
        config3 = Configuration(cs, values={"a": 3, "b": 4})

        rh.add(
            config=config1,
            cost=[10, 50],
            time=5,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=1,
            budget=1,
        )

        rh.add(
            config=config2,
            cost=[100, 100],
            time=10,
            status=StatusType.CRASHED,
            instance_id=1,
            seed=1,
            budget=1,
        )

        rh.add(
            config=config3,
            cost=[0, 150],
            time=15,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=1,
            budget=1,
        )

        self.assertEqual(rh.objective_bounds[0], (0, 10))
        self.assertEqual(rh.objective_bounds[1], (50, 150))
示例#9
0
    def test_get_configs_per_budget(self):
        rh = RunHistory()
        cs = get_config_space()

        config1 = Configuration(cs, values={"a": 1, "b": 1})
        rh.add(
            config=config1,
            cost=[10, 20],
            time=10,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=1,
            budget=1,
        )

        config2 = Configuration(cs, values={"a": 2, "b": 2})
        rh.add(
            config=config2,
            cost=[20, 30],
            time=20,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=1,
            budget=1,
        )

        config3 = Configuration(cs, values={"a": 3, "b": 3})
        rh.add(
            config=config3,
            cost=[30, 40],
            time=30,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=1,
            budget=3,
        )

        configs = rh.get_all_configs_per_budget([1])
        self.assertListEqual(configs, [config1, config2])
示例#10
0
    def setUp(self):
        unittest.TestCase.setUp(self)

        self.rh = RunHistory()
        self.cs = get_config_space()
        self.config1 = Configuration(self.cs, values={"a": 0, "b": 100})
        self.config2 = Configuration(self.cs, values={"a": 100, "b": 0})
        self.config3 = Configuration(self.cs, values={"a": 100, "b": 100})

        self.scen = Scenario({
            "cutoff_time": 2,
            "cs": self.cs,
            "run_obj": "runtime",
            "output_dir": "",
            "deterministic": False,
            "limit_resources": True,
        })
        self.stats = Stats(scenario=self.scen)
        self.stats.start_timing()

        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)
示例#11
0
    def setUp(self):
        unittest.TestCase.setUp(self)

        self.rh = runhistory.RunHistory()
        self.cs = get_config_space()
        self.config1 = Configuration(self.cs, values={'a': 0, 'b': 100})
        self.config2 = Configuration(self.cs, values={'a': 100, 'b': 0})
        self.config3 = Configuration(self.cs, values={'a': 100, 'b': 100})
        self.config4 = Configuration(self.cs, values={'a': 23, 'b': 23})
        self.config5 = Configuration(self.cs, values={'a': 5, 'b': 10})
        self.scen = Scenario({
            'run_obj': 'runtime',
            'cutoff_time': 20,
            'cs': self.cs
        })
        self.types, self.bounds = get_types(self.cs, None)
        self.scen = Scenario({
            'run_obj': 'runtime',
            'cutoff_time': 20,
            'cs': self.cs,
            'output_dir': ''
        })
示例#12
0
    def __init__(self,
                 config=None,
                 pipeline=None,
                 dataset_properties=None,
                 include=None,
                 exclude=None,
                 random_state=None,
                 init_params=None,
                 incremental_learning=False):

        self._init_params = init_params if init_params is not None else {}
        self.include_ = include if include is not None else {}
        self.exclude_ = exclude if exclude is not None else {}
        self.dataset_properties_ = dataset_properties if \
            dataset_properties is not None else {}
        self.dataset_properties_['incremental_learning'] = incremental_learning

        if pipeline is None:
            self.steps = self._get_pipeline()
        else:
            self.steps = pipeline

        self.config_space = self.get_hyperparameter_search_space()

        if config is None:
            self.configuration_ = self.config_space.get_default_configuration()
        else:
            if isinstance(config, dict):
                config = Configuration(self.config_space, config)
            if self.config_space != config.configuration_space:
                print(self.config_space._children)
                print(config.configuration_space._children)
                import difflib
                diff = difflib.unified_diff(
                    str(self.config_space).splitlines(),
                    str(config.configuration_space).splitlines())
                diff = '\n'.join(diff)
                raise ValueError('Configuration passed does not come from the '
                                 'same configuration space. Differences are: '
                                 '%s' % diff)
            self.configuration_ = config

        self.set_hyperparameters(self.configuration_, init_params=init_params)

        if random_state is None:
            self.random_state = check_random_state(1)
        else:
            self.random_state = check_random_state(random_state)
        super().__init__(steps=self.steps)

        self._additional_run_info = {}
示例#13
0
    def __call__(
        self,
        scenario_dict,
        seed,
        ta,
        ta_kwargs,
        metalearning_configurations,
        n_jobs,
        dask_client,
    ):
        from smac.facade.smac_ac_facade import SMAC4AC
        from smac.intensification.successive_halving import SuccessiveHalving
        from smac.runhistory.runhistory2epm import RunHistory2EPM4LogCost
        from smac.scenario.scenario import Scenario

        scenario = Scenario(scenario_dict)

        initial_configurations = []
        for member in self.portfolio.values():
            try:
                initial_configurations.append(
                    Configuration(configuration_space=scenario.cs, values=member)
                )
            except ValueError:
                pass

        rh2EPM = RunHistory2EPM4LogCost
        ta_kwargs['budget_type'] = self.budget_type

        smac4ac = SMAC4AC(
            scenario=scenario,
            rng=seed,
            runhistory2epm=rh2EPM,
            tae_runner=ta,
            tae_runner_kwargs=ta_kwargs,
            initial_configurations=initial_configurations,
            run_id=seed,
            intensifier=SuccessiveHalving,
            intensifier_kwargs={
                'initial_budget': self.initial_budget,
                'max_budget': 100,
                'eta': self.eta,
                'min_chall': 1,
            },
            dask_client=dask_client,
            n_jobs=n_jobs,
        )
        smac4ac.solver.epm_chooser.min_samples_model = int(
            len(scenario.cs.get_hyperparameters()) / 2
        )
        return smac4ac
示例#14
0
    def set_hyperparameters(self,
                            configuration: Configuration,
                            init_params: Optional[Dict] = None) -> 'Pipeline':
        """Method to set the hyperparameter configuration of the pipeline.

        It iterates over the components of the pipeline and applies a given
        configuration accordingly.

        Args:
            configuration (Configuration): configuration object to search and overwrite in
                the pertinent spaces
            init_params (Optional[Dict]): optional initial settings for the config

        """
        self.configuration = configuration

        for node_idx, n_ in enumerate(self.steps):
            node_name, node = n_

            sub_configuration_space = node.get_hyperparameter_search_space(
                self.dataset_properties)
            sub_config_dict = {}
            for param in configuration:
                if param.startswith('%s:' % node_name):
                    value = configuration[param]
                    new_name = param.replace('%s:' % node_name, '', 1)
                    sub_config_dict[new_name] = value

            sub_configuration = Configuration(sub_configuration_space,
                                              values=sub_config_dict)

            if init_params is not None:
                sub_init_params_dict = {}
                for param in init_params:
                    if param.startswith('%s:' % node_name):
                        value = init_params[param]
                        new_name = param.replace('%s:' % node_name, '', 1)
                        sub_init_params_dict[new_name] = value

            if isinstance(
                    node,
                (autoPyTorchChoice, autoPyTorchComponent, BasePipeline)):
                node.set_hyperparameters(
                    configuration=sub_configuration,
                    init_params=None
                    if init_params is None else sub_init_params_dict,
                )
            else:
                raise NotImplementedError('Not supported yet!')

        return self
示例#15
0
    def test_add(self):
        '''
            simply adding some rundata to runhistory
        '''
        rh = runhistory.RunHistory()
        cs = get_config_space()
        config1 = Configuration(cs, values={'a': 1, 'b': 2})
        config2 = Configuration(cs, values={'a': 1, 'b': 25})
        config3 = Configuration(cs, values={'a': 2, 'b': 2})
        rh.add(config=config1,
               cost=10,
               time=20,
               status=StatusType.SUCCESS,
               instance_id=23,
               seed=None,
               additional_info=None)
        rh.add(config=config2,
               cost=10,
               time=20,
               status=StatusType.SUCCESS,
               instance_id=1,
               seed=12354,
               additional_info={"start_time": 10})
        rh.add(config=config3,
               cost=10,
               time=20,
               status=StatusType.TIMEOUT,
               instance_id=1,
               seed=45,
               additional_info={"start_time": 10})

        scen = Scenario({"cutoff_time": 10})

        self.assertRaises(TypeError, runhistory2epm.RunHistory2EPM4LogCost)

        rh2epm = runhistory2epm.RunHistory2EPM4LogCost(num_params=2,
                                                       scenario=scen)
        rhArr = rh2epm.transform(rh)
示例#16
0
    def test_add_multiple_times(self):
        rh = RunHistory()
        cs = get_config_space()
        config = Configuration(cs, values={'a': 1, 'b': 2})

        for i in range(5):
            rh.add(config=config, cost=i + 1, time=i + 1,
                   status=StatusType.SUCCESS, instance_id=None,
                   seed=12345, additional_info=None)

        self.assertEqual(len(rh.data), 1)
        self.assertEqual(len(rh.get_runs_for_config(config, only_max_observed_budget=True)), 1)
        self.assertEqual(len(rh._configid_to_inst_seed_budget[1]), 1)
        self.assertEqual(list(rh.data.values())[0].cost, 1)
示例#17
0
 def get_config(self, budget) -> Tuple[dict, dict]:
     # get max_budget
     # calc by budget2epm
     max_budget = self.get_available_max_budget()
     # initial points
     if self.initial_points is not None and self.initial_points_index < len(
             self.initial_points):
         while True:
             if self.initial_points_index >= len(self.initial_points):
                 break
             initial_point_dict = self.initial_points[
                 self.initial_points_index]
             initial_point = Configuration(self.config_space,
                                           initial_point_dict)
             self.initial_points_index += 1
             initial_point.origin = "User Defined"
             if not self.is_config_exist(budget, initial_point):
                 self.logger.debug(
                     f"Using initial points [{self.initial_points_index - 1}]"
                 )
                 return self.process_config_info_pair(
                     initial_point, {}, budget)
     return self.get_config_(budget, max_budget)
示例#18
0
 def _invert_bilog_logit(self, x):
     dictionary = copy.copy(x) if isinstance(x,
                                             dict) else x.get_dictionary()
     for k, v in dictionary.items():
         if k in self._par:
             hp = self.original_cs.get_hyperparameter(k)
             _fun = INV_TRANS[self._par[k]]
             dictionary[k] = np.clip(_fun(v), hp.lower, hp.upper)
             # need to check  original configspace since bilog-int are converted to float
             if isinstance(self.original_cs.get_hyperparameter(k),
                           UniformIntegerHyperparameter):
                 dictionary[k] = int(np.rint(dictionary[k]))
     x = Configuration(self.original_cs, values=dictionary)
     return x
示例#19
0
    def suggest_configuration(self):
        if self.X is None and self.Y is None:
            new_x = init_random_uniform(self.X_lower,
                                        self.X_upper,
                                        N=1,
                                        rng=self.rng)

        elif self.X.shape[0] == 1:
            # We need at least 2 data points to train a GP
            Xopt = init_random_uniform(self.X_lower,
                                       self.X_upper,
                                       N=1,
                                       rng=self.rng)

        else:
            prior = DNGOPrior()
            model = DNGO(batch_size=100,
                         num_epochs=20000,
                         learning_rate=0.1,
                         momentum=0.9,
                         l2=1e-16,
                         adapt_epoch=5000,
                         n_hypers=20,
                         prior=prior,
                         do_optimize=True,
                         do_mcmc=True)

            #acquisition_func = EI(model, task.X_lower, task.X_upper)
            lo = np.ones([model.n_units_3]) * -1
            up = np.ones([model.n_units_3])
            ei = LogEI(model, lo, up)

            acquisition_func = IntegratedAcquisition(model, ei, self.X_lower,
                                                     self.X_upper)

            maximizer = Direct(acquisition_func, self.X_lower, self.X_upper)

            model.train(self.X, self.Y)

            acquisition_func.update(model)

            new_x = maximizer.maximize()

        # Map from [0, 1]^D space back to original space
        next_config = Configuration(self.config_space, vector=new_x[0, :])

        # Transform to sacred configuration
        result = configspace_config_to_sacred(next_config)

        return result
示例#20
0
    def fit(self, scenario: ASlibScenario, config: Configuration):
        '''
            fit pca object to ASlib scenario data

            Arguments
            ---------
            scenario: data.aslib_scenario.ASlibScenario
                ASlib Scenario with all data in pandas
            config: ConfigSpace.Configuration
                configuration
        '''

        self.imputer = Imputer(strategy=config.get("imputer_strategy"))
        self.imputer.fit(scenario.feature_data.values)
示例#21
0
    def test_get_config_runs(self):
        '''
            get some config runs from runhistory
        '''

        rh = RunHistory(aggregate_func=average_cost)
        cs = get_config_space()
        config1 = Configuration(cs, values={'a': 1, 'b': 2})
        config2 = Configuration(cs, values={'a': 1, 'b': 3})
        rh.add(config=config1,
               cost=10,
               time=20,
               status=StatusType.SUCCESS,
               instance_id=1,
               seed=1)

        rh.add(config=config2,
               cost=10,
               time=20,
               status=StatusType.SUCCESS,
               instance_id=1,
               seed=1)

        rh.add(config=config1,
               cost=10,
               time=20,
               status=StatusType.SUCCESS,
               instance_id=2,
               seed=2)

        ist = rh.get_runs_for_config(config=config1)
        #print(ist)
        #print(ist[0])
        #print(ist[1])
        self.assertEqual(len(ist), 2)
        self.assertEqual(ist[0].instance, 1)
        self.assertEqual(ist[1].instance, 2)
示例#22
0
    def new_result(self, job: Job, update_model=True):

        ##############################
        ### 1. update observations ###
        ##############################
        if job.result is None:
            # One could skip crashed results, but we decided to
            # assign a +inf loss and count them as bad configurations
            loss = np.inf
        else:
            # same for non numeric losses.
            # Note that this means losses of minus infinity will count as bad!
            loss = job.result["loss"] if np.isfinite(job.result["loss"]) else np.inf
        budget = job.kwargs["budget"]
        config_dict = job.kwargs["config"]
        configId = get_hash_of_config(config_dict)
        runId = (configId, budget)
        if runId in self.runId2info:
            self.runId2info[runId]["end_time"] = time()
            self.runId2info[runId]["loss"] = loss
        else:
            self.logger.error(f"runId {runId} not in runId2info, it's impossible!!!")
        # config_info = job.kwargs["config_info"]
        config = Configuration(self.config_space, config_dict)
        # add lock (It may be added twice, but it does not affect)
        self.budget2obvs[budget]["locks"].append(config.get_array().copy())
        self.budget2obvs[budget]["configs"].append(deepcopy(config))
        self.budget2obvs[budget]["vectors"].append(config.get_array())
        self.budget2obvs[budget]["losses"].append(loss)
        losses = np.array(self.budget2obvs[budget]["losses"])
        vectors = np.array(self.budget2obvs[budget]["vectors"])
        ###################################################################
        ### 2. Judge whether the EPM training conditions are satisfied  ###
        ###################################################################
        if not update_model:
            return
        self._new_result(budget, vectors, losses)
示例#23
0
    def __init__(self,
                 config: Optional[Configuration] = None,
                 steps: Optional[List[Tuple[str, autoPyTorchChoice]]] = None,
                 dataset_properties: Optional[Dict[str, Any]] = None,
                 include: Optional[Dict[str, Any]] = None,
                 exclude: Optional[Dict[str, Any]] = None,
                 random_state: Optional[np.random.RandomState] = None,
                 init_params: Optional[Dict[str, Any]] = None):

        self.init_params = init_params if init_params is not None else {}
        self.dataset_properties = dataset_properties if \
            dataset_properties is not None else {}
        self.include = include if include is not None else {}
        self.exclude = exclude if exclude is not None else {}

        if steps is None:
            self.steps = self._get_pipeline_steps(dataset_properties)
        else:
            self.steps = steps

        self.config_space = self.get_hyperparameter_search_space()

        if config is None:
            self.config = self.config_space.get_default_configuration()
        else:
            if isinstance(config, dict):
                config = Configuration(self.config_space, config)
            if self.config_space != config.configuration_space:
                warnings.warn(self.config_space._children)
                warnings.warn(config.configuration_space._children)
                import difflib
                diff = difflib.unified_diff(
                    str(self.config_space).splitlines(),
                    str(config.configuration_space).splitlines())
                diff_msg = '\n'.join(diff)
                raise ValueError('Configuration passed does not come from the '
                                 'same configuration space. Differences are: '
                                 '%s' % diff_msg)
            self.config = config

        self.set_hyperparameters(self.config, init_params=init_params)

        if random_state is None:
            self.random_state = check_random_state(1)
        else:
            self.random_state = check_random_state(random_state)
        super().__init__(steps=self.steps)

        self._additional_run_info = {}  # type: Dict[str, str]
    def suggest_configuration(self):
        if self.X is None and self.y is None:
            new_x = init_random_uniform(self.lower, self.upper,
                                        n_points=1, rng=self.rng)[0, :]

        elif self.X.shape[0] == 1:
            # We need at least 2 data points to train a GP
            new_x = init_random_uniform(self.lower, self.upper,
                                        n_points=1, rng=self.rng)[0, :]

        else:
            cov_amp = 1
            n_dims = self.lower.shape[0]

            initial_ls = np.ones([n_dims])
            exp_kernel = george.kernels.Matern52Kernel(initial_ls,
                                                       ndim=n_dims)
            kernel = cov_amp * exp_kernel

            prior = DefaultPrior(len(kernel) + 1)

            model = GaussianProcessMCMC(kernel, prior=prior,
                                        n_hypers=self.n_hypers,
                                        chain_length=self.chain_length,
                                        burnin_steps=self.burnin,
                                        normalize_input=False,
                                        normalize_output=True,
                                        rng=self.rng,
                                        lower=self.lower,
                                        upper=self.upper)

            a = LogEI(model)

            acquisition_func = MarginalizationGPMCMC(a)

            max_func = Direct(acquisition_func, self.lower, self.upper, verbose=False)

            model.train(self.X, self.y)

            acquisition_func.update(model)

            new_x = max_func.maximize()

        next_config = Configuration(self.config_space, vector=new_x)

        # Transform to sacred configuration
        result = configspace_config_to_sacred(next_config)

        return result
示例#25
0
def get_id_of_config(config: Configuration):
    # todo:, instance="", seed=0
    X: np.ndarray = config.get_array()
    m = hashlib.md5()
    if X.flags['C_CONTIGUOUS']:
        m.update(X.data)
        m.update(str(X.shape).encode('utf8'))
    else:
        X_tmp = np.ascontiguousarray(X.T)
        m.update(X_tmp.data)
        m.update(str(X_tmp.shape).encode('utf8'))
    # m.update(instance.encode())
    # m.update(str(seed).encode())
    hash_value = m.hexdigest()
    return hash_value
示例#26
0
    def extract_configuration(self, job):
        # One could skip crashed results, but we decided to assign a +inf loss
        # We count them as bad configurations
        if job.result is None:
            logger.warning("Job %s failed with exception\n%s".format(job.id, job.exception))
            loss = np.inf
        else:
            loss = job.result["loss"]

        budget = job.kwargs["budget"]

        # We want to get a numerical representation of the configuration in the original space
        configuration = Configuration(self.config_space, job.kwargs["config"])

        return configuration, budget, loss
示例#27
0
    def test_config_decorator(self):
        @AbstractBenchmark.check_parameters
        def tmp(_, configuration: Union[Dict, np.ndarray], fidelity: Dict,
                **kwargs):
            return configuration, fidelity

        hps = dict(hp1=0.25, hp2=1.25, hp3=2.25)
        configuration = Configuration(self.foo.configuration_space, hps)
        config, fidel = tmp(self=self.foo,
                            configuration=configuration,
                            fidelity=None)

        assert isinstance(config, Dict)
        assert isinstance(fidel, Dict)
        assert fidel['fidelity1'] == 1.0
示例#28
0
    def split(self, config_ext: CS.Configuration, as_dict: bool=False) -> \
            (Union[CS.Configuration, dict], int):
        """
        Split extended config into normal config and resource value.

        :param config_ext: Extended config
        :param as_dict: Return config as dict?
        :return: (config, resource_value)
        """
        x_res = copy.copy(config_ext.get_dictionary())
        resource_value = int(x_res[self.resource_attr_name])
        del x_res[self.resource_attr_name]
        if not as_dict:
            x_res = CS.Configuration(self.hp_ranges.config_space, values=x_res)
        return x_res, resource_value
    def test_multi_config_design(self):
        stats = Stats(scenario=self.scenario)
        stats.start_timing()
        self.ta.stats = stats
        tj = TrajLogger(output_dir=None, stats=stats)
        rh = RunHistory(aggregate_func=average_cost)
        self.ta.runhistory = rh
        rng = np.random.RandomState(seed=12345)

        intensifier = Intensifier(
            tae_runner=self.ta,
            stats=stats,
            traj_logger=tj,
            rng=rng,
            instances=[None],
            run_obj_time=False,
        )

        configs = [Configuration(configuration_space=self.cs, values={"x1":4}),
                   Configuration(configuration_space=self.cs, values={"x1":2})]
        dc = InitialDesign(
            tae_runner=self.ta,
            scenario=self.scenario,
            stats=stats,
            traj_logger=tj,
            runhistory=rh,
            rng=rng,
            configs=configs,
            intensifier=intensifier,
            aggregate_func=average_cost,
        )

        inc = dc.run()
        self.assertTrue(stats.ta_runs==4)  # two runs per config
        self.assertTrue(len(rh.data)==4)  # two runs per config
        self.assertTrue(rh.get_cost(inc) == 4)
示例#30
0
 def test_init_with_values(self):
     c1 = Configuration(self.cs,
                        values={
                            'parent': 1,
                            'child': 2,
                            'friend': 3
                        })
     # Pay attention that the vector does not necessarily has an intuitive
     #  sorting!
     # Values are a little bit higher than one would expect because,
     # an integer range of [0,10] is transformed to [-0.499,10.499].
     vector_values = {
         'parent': 1,
         'child': 0.22727223140405708,
         'friend': 0.583333611112037
     }
     vector = [None] * 3
     for name in self.cs._hyperparameter_idx:
         vector[self.cs._hyperparameter_idx[name]] = vector_values[name]
     c2 = Configuration(self.cs, vector=vector)
     # This tests
     # a) that the vector representation of both are the same
     # b) that the dictionary representation of both are the same
     self.assertEqual(c1, c2)
示例#31
0
    def fit(self, scenario: ASlibScenario, config: Configuration):
        '''
            fit StandardScaler object to ASlib scenario data

            Arguments
            ---------
            scenario: data.aslib_scenario.ASlibScenario
                ASlib Scenario with all data in pandas
            config: ConfigSpace.Configuration
                configuration
        '''

        if config.get("StandardScaler"):
            self.scaler = StandardScaler()
            self.scaler.fit(scenario.feature_data.values)
示例#32
0
    def test_full_update(self):
        rh = RunHistory(aggregate_func=average_cost)
        cs = get_config_space()
        config1 = Configuration(cs, values={'a': 1, 'b': 2})
        config2 = Configuration(cs, values={'a': 1, 'b': 3})
        rh.add(config=config1,
               cost=10,
               time=20,
               status=StatusType.SUCCESS,
               instance_id=1,
               seed=1)

        rh.add(config=config2,
               cost=10,
               time=20,
               status=StatusType.SUCCESS,
               instance_id=1,
               seed=1)

        rh.add(config=config2,
               cost=20,
               time=20,
               status=StatusType.SUCCESS,
               instance_id=2,
               seed=2)

        cost_config2 = rh.get_cost(config2)

        rh.compute_all_costs()
        updated_cost_config2 = rh.get_cost(config2)
        self.assertTrue(cost_config2 == updated_cost_config2)

        rh.compute_all_costs(instances=[2])
        updated_cost_config2 = rh.get_cost(config2)
        self.assertTrue(cost_config2 != updated_cost_config2)
        self.assertTrue(updated_cost_config2 == 20)
示例#33
0
    def remove_resource(
            self,
            config_ext: CS.Configuration,
            as_dict: bool = False) -> Union[CS.Configuration, dict]:
        """
        Strips away resource attribute and returns normal config

        :param config_ext: Extended config
        :param as_dict: Return as dict?
        :return: config_ext without resource attribute
        """
        x_dct = copy.copy(config_ext.get_dictionary())
        del x_dct[self.resource_attr_name]
        if as_dict:
            return x_dct
        else:
            return CS.Configuration(self.hp_ranges.config_space, values=x_dct)
示例#34
0
def impute_inactive_values(configuration: Configuration, strategy: Union[str, float]='default') -> Configuration:
    """Impute inactive parameters.

    Parameters
    ----------
    strategy : string, optional (default='default')
        The imputation strategy.

        - If 'default', replace inactive parameters by their default.
        - If float, replace inactive parameters by the given float value,
          which should be able to be splitted apart by a tree-based model.
    """
    values = dict()
    for hp_name in configuration:
        value = configuration.get(hp_name)
        if value is None:

            if strategy == 'default':
                hp = configuration.configuration_space.get_hyperparameter(
                    hp_name)
                new_value = hp.default

            elif isinstance(strategy, float):
                new_value = strategy

            else:
                raise ValueError('Unknown imputation strategy %s' % str(strategy))

            value = new_value

        values[hp_name] = value

    new_configuration = Configuration(configuration.configuration_space,
                                      values=values,
                                      allow_inactive_with_values=True)
    return new_configuration
示例#35
0
def get_random_neighbor(configuration: Configuration, seed: int) -> Configuration:
    """Draw a random neighbor by changing one parameter of a configuration.

    * If the parameter is categorical, it changes it to another value.
    * If the parameter is ordinal, it changes it to the next higher or lower
      value.
    * If parameter is a float, draw a random sample

    If changing a parameter activates new parameters or deactivates
    previously active parameters, the configuration will be rejected. If more
    than 10000 configurations were rejected, this function raises a
    ValueError.

    Parameters
    ----------
    configuration : Configuration

    seed : int
        Used to generate a random state.

    Returns
    -------
    Configuration
        The new neighbor.

    """
    random = np.random.RandomState(seed)
    rejected = True
    values = copy.deepcopy(configuration.get_dictionary())

    while rejected:
        # First, choose an active hyperparameter
        active = False
        iteration = 0
        while not active:
            iteration += 1
            if configuration._num_hyperparameters > 1:
                rand_idx = random.randint(0,
                                          configuration._num_hyperparameters - 1)
            else:
                rand_idx = 0

            value = configuration.get_array()[rand_idx]
            if np.isfinite(value):
                active = True

                hp_name = configuration.configuration_space \
                    .get_hyperparameter_by_idx(rand_idx)
                hp = configuration.configuration_space.get_hyperparameter(hp_name)

                # Only choose if there is a possibility of finding a neigboor
                if not hp.has_neighbors():
                    active = False

            if iteration > 10000:
                raise ValueError('Probably caught in an infinite loop.')
        # Get a neighboor and adapt the rest of the configuration if necessary
        neighbor = hp.get_neighbors(value, random, number=1, transform=True)[0]
        previous_value = values[hp.name]
        values[hp.name] = neighbor

        try:
            new_configuration = Configuration(
                configuration.configuration_space, values=values)
            rejected = False
        except ValueError as e:
            values[hp.name] = previous_value

    return new_configuration
示例#36
0
def get_one_exchange_neighbourhood(configuration, seed):
    """Return all configurations in a one-exchange neighborhood.

    The method is implemented as defined by:
    Frank Hutter, Holger H. Hoos and Kevin Leyton-Brown
    Sequential Model-Based Optimization for General Algorithm Configuration
    In: Proceedings of the conference on Learning and Intelligent OptimizatioN (LION 5)
    """
    random = np.random.RandomState(seed)
    neighbourhood = []
    for i, hp_name in enumerate(configuration):
        number_of_sampled_neighbors = 0
        array = configuration.get_array()

        if not np.isfinite(array[i]):
            continue

        iteration = 0
        while True:
            hp = configuration.configuration_space.get_hyperparameter(hp_name)
            configuration._populate_values()
            num_neighbors = hp.get_num_neighbors()

            # Obtain neigbors differently for different possible numbers of
            # neighbors
            if num_neighbors == 0:
                break
            # No infinite loops
            elif iteration > 1000:
                break
            elif np.isinf(num_neighbors):
                if number_of_sampled_neighbors >= 4:
                    break
                num_samples_to_go = 4 - number_of_sampled_neighbors
                neighbors = hp.get_neighbors(array[i], random,
                                             number=num_samples_to_go)
            else:
                if iteration > 0:
                    break
                neighbors = hp.get_neighbors(array[i], random)

            # Check all newly obtained neigbors
            for neighbor in neighbors:
                new_array = array.copy()
                new_array[i] = neighbor
                neighbor_value = hp._transform(neighbor)

                # Activate hyperparameters if their parent node got activated
                children = configuration.configuration_space.get_children_of(
                    hp_name)

                if len(children) > 0:
                    to_visit = deque()
                    to_visit.extendleft(children)
                    visited = set()
                    activated_values = dict()
                    while len(to_visit) > 0:
                        current = to_visit.pop()
                        if current.name in visited:
                            continue
                        visited.add(current.name)

                        current_idx = configuration.configuration_space. \
                            get_idx_by_hyperparameter_name(current.name)
                        current_value = new_array[current_idx]

                        conditions = configuration.configuration_space.\
                            _get_parent_conditions_of(current.name)

                        active = True
                        for condition in conditions:
                            parent_names = [c.parent.name for c in
                                            condition.get_descendant_literal_conditions()]

                            parents = {parent_name: configuration[parent_name] for
                                       parent_name in parent_names}

                            # parents come from the original configuration.
                            # We change at least one parameter. In order set
                            # other parameters which are conditional on this,
                            #  we have to activate this
                            if hp_name in parents:
                                parents[hp_name] = neighbor_value
                            # Hyperparameters which are in depth 1 of the
                            # hyperparameter tree might have children which
                            # have to be activated as well. Once we set hp in
                            #  level 1 to active, it's value changes from the
                            #  value of the original configuration and this
                            # must be done here
                            for parent_name in parent_names:
                                if parent_name in activated_values:
                                    parents[parent_name] = activated_values[
                                        parent_name]

                            # if one of the parents is None, the hyperparameter cannot be
                            # active! Else we have to check this
                            if any([parent_value is None for parent_value in
                                    parents.values()]):
                                active = False
                            else:
                                if not condition.evaluate(parents):
                                    active = False

                        if active and (current_value is None or
                                       not np.isfinite(current_value)):
                            default = current._inverse_transform(current.default)
                            new_array[current_idx] = default
                            children = configuration.configuration_space.get_children_of(
                                current.name)
                            if len(children) > 0:
                                to_visit.extendleft(children)
                            activated_values[current.name] = current.default

                        if not active and (current_value is not None
                                           or np.isfinite(current_value)):
                            new_array[current_idx] = np.NaN

                try:
                    # Populating a configuration from an array does not check
                    #  if it is a legal configuration - check this (slow)
                    new_configuration = Configuration(
                        configuration.configuration_space, vector=new_array)
                    new_configuration.is_valid_configuration()
                    neighbourhood.append(new_configuration)
                    number_of_sampled_neighbors += 1
                except ValueError as e:
                    pass

                # Count iterations to not run into an infinite loop when
                # sampling floats/ints and there is large amount of forbidden
                #  values; also to find out if we tried to get a neighbor for
                #  a categorical hyperparameter, and the only possible
                # neighbor is forbidden together with another active
                # value/default hyperparameter
                iteration += 1

    return neighbourhood
示例#37
0
def get_one_exchange_neighbourhood(configuration: Configuration, seed: int) -> List[Configuration]:
    """Return all configurations in a one-exchange neighborhood.

    The method is implemented as defined by:
    Frank Hutter, Holger H. Hoos and Kevin Leyton-Brown
    Sequential Model-Based Optimization for General Algorithm Configuration
    In: Proceedings of the conference on Learning and Intelligent OptimizatioN (LION 5)
    """
    random = np.random.RandomState(seed)
    hyperparameters_list = list(configuration.keys())
    hyperparameters_list_length = len(hyperparameters_list)
    neighbors_to_return = dict()
    hyperparameters_used = list()
    number_of_usable_hyperparameters = sum(np.isfinite(configuration.get_array()))
    configuration_space = configuration.configuration_space

    while len(hyperparameters_used) != number_of_usable_hyperparameters:
        index = random.randint(hyperparameters_list_length)
        hp_name = hyperparameters_list[index]
        if hp_name in neighbors_to_return:
            random.shuffle(neighbors_to_return[hp_name])
            n_ = neighbors_to_return[hp_name].pop()
            if len(neighbors_to_return[hp_name]) == 0:
                del neighbors_to_return[hp_name]
                hyperparameters_used.append(hp_name)
            yield n_

        else:
            neighbourhood = []
            number_of_sampled_neighbors = 0
            array = configuration.get_array()

            if not np.isfinite(array[index]):
                continue

            iteration = 0
            while True:
                hp = configuration_space.get_hyperparameter(hp_name)
                configuration._populate_values()
                num_neighbors = hp.get_num_neighbors(configuration.get(hp_name))

                # Obtain neigbors differently for different possible numbers of
                # neighbors
                if num_neighbors == 0:
                    break
                # No infinite loops
                elif iteration > 100:
                    break
                elif np.isinf(num_neighbors):
                    if number_of_sampled_neighbors >= 4:
                        break
                    num_samples_to_go = 4 - number_of_sampled_neighbors
                    neighbors = hp.get_neighbors(array[index], random,
                                                 number=num_samples_to_go)
                else:
                    if iteration > 0:
                        break
                    neighbors = hp.get_neighbors(array[index], random)


                # Check all newly obtained neigbors
                for neighbor in neighbors:
                    new_array = array.copy()
                    new_array[index] = neighbor
                    neighbor_value = hp._transform(neighbor)
                    # Hyperparameters which are going to be set to inactive
                    disabled = []

                    # Activate hyperparameters if their parent node got activated
                    children = configuration_space._children_of[hp_name]
                    if len(children) > 0:
                        to_visit = deque()  #type: deque
                        to_visit.extendleft(children)
                        visited = set()  #type: Set[str]
                        activated_values = dict()  #type: Dict[str, Union[int, float, str]]
                        while len(to_visit) > 0:
                            current = to_visit.pop()
                            if current.name in visited:
                                continue
                            visited.add(current.name)
                            if current.name in disabled:
                                continue

                            current_idx = configuration_space.get_idx_by_hyperparameter_name(current.name)
                            current_value = new_array[current_idx]

                            conditions = configuration.configuration_space.\
                                _parent_conditions_of[current.name]

                            active = True
                            for condition in conditions:
                                parent_names = [parent.name for parent in
                                                configuration_space._parents_of[current.name]]
                                parents = {parent_name: configuration[parent_name] for
                                           parent_name in parent_names}

                                # parents come from the original configuration.
                                # We change at least one parameter. In order set
                                # other parameters which are conditional on this,
                                #  we have to activate this
                                if hp_name in parents:
                                    parents[hp_name] = neighbor_value
                                # Hyperparameters which are in depth 1 of the
                                # hyperparameter tree might have children which
                                # have to be activated as well. Once we set hp in
                                #  level 1 to active, it's value changes from the
                                #  value of the original configuration and this
                                # must be done here
                                for parent_name in parent_names:
                                    if parent_name in activated_values:
                                        parents[parent_name] = activated_values[
                                            parent_name]

                                # if one of the parents is None, the hyperparameter cannot be
                                # active! Else we have to check this
                                if any([parent_value is None for parent_value in
                                        parents.values()]):
                                    active = False
                                    break
                                else:
                                    if not condition.evaluate(parents):
                                        active = False
                                        break

                            if active and (current_value is None or
                                           not np.isfinite(current_value)):
                                default = current._inverse_transform(current.default)
                                new_array[current_idx] = default
                                children_ = configuration_space._children_of[current.name]
                                if len(children_) > 0:
                                    to_visit.extendleft(children_)
                                activated_values[current.name] = current.default

                            # If the hyperparameter was made inactive,
                            # all its children need to be deactivade as well
                            if not active and (current_value is not None
                                               or np.isfinite(current_value)):
                                new_array[current_idx] = np.NaN

                                children = configuration.configuration_space._children_of[current.name]

                                if len(children) > 0:
                                    to_disable = set()
                                    for ch in children:
                                        to_disable.add(ch.name)
                                    while len(to_disable) > 0:
                                        child = to_disable.pop()
                                        child_idx = configuration.configuration_space. \
                                            get_idx_by_hyperparameter_name(child)
                                        disabled.append(child_idx)
                                        children = configuration.configuration_space._children_of[child]

                                        for ch in children:
                                            to_disable.add(ch.name)

                    for idx in disabled:
                        new_array[idx] = np.NaN
                    try:
                        # Populating a configuration from an array does not check
                        #  if it is a legal configuration - check this (slow)
                        new_configuration = Configuration(configuration_space, vector=new_array)
                        new_configuration.is_valid_configuration()
                        neighbourhood.append(new_configuration)
                        number_of_sampled_neighbors += 1
                    # todo: investigate why tests fail when ForbiddenValueError is caught here
                    except ForbiddenValueError as e:
                        pass

                    # Count iterations to not run into an infinite loop when
                    # sampling floats/ints and there is large amount of forbidden
                    #  values; also to find out if we tried to get a neighbor for
                    #  a categorical hyperparameter, and the only possible
                    # neighbor is forbidden together with another active
                    # value/default hyperparameter
                    iteration += 1
            if len(neighbourhood) == 0:
                hyperparameters_used.append(hp_name)
            else:
                if hp_name not in hyperparameters_used:
                    neighbors_to_return[hp_name] = neighbourhood
                    random.shuffle(neighbors_to_return[hp_name])
                    n_ = neighbors_to_return[hp_name].pop()
                    if len(neighbors_to_return[hp_name]) == 0:
                        del neighbors_to_return[hp_name]
                        hyperparameters_used.append(hp_name)
                    yield n_