def _select_configurations(self) -> Configuration: """Selects a single configuration to run Returns ------- config: Configuration initial incumbent configuration """ cs = self.scenario.cs params = cs.get_hyperparameters() values = [] mid = [] for param in params: if isinstance(param, Constant): v = [param.value] mid.append(param.value) elif isinstance(param, NumericalHyperparameter): v = [param.lower, param.upper] mid.append(np.average([param.lower, param.upper])) elif isinstance(param, CategoricalHyperparameter): v = list(param.choices) mid.append(param.choices[0]) elif isinstance(param, OrdinalHyperparameter): v = [param.sequence[0], param.sequence[-1]] l = len(param.sequence) mid.append(param.sequence[int(l/2)]) values.append(v) factorial_design = itertools.product(*values) self.logger.debug("Initial Design") configs = [cs.get_default_configuration()] # add middle point in space conf_dict = dict([(p.name,v) for p,v in zip(params,mid)]) middle_conf = deactivate_inactive_hyperparameters(conf_dict, cs) configs.append(middle_conf) # add corner points for design in factorial_design: conf_dict = dict([(p.name,v) for p,v in zip(params,design)]) conf = deactivate_inactive_hyperparameters(conf_dict, cs) conf.origin = "Factorial Design" configs.append(conf) self.logger.debug(conf) self.logger.debug("Size of factorial design: %d" %(len(configs))) return configs
def inverse_transform( self, array: np.ndarray, return_vector=False) -> Union[np.ndarray, None, Configuration]: if self.encoder is not None: array = self.encoder.inverse_transform(array) array = np.array(array) for i, n_choices in enumerate(self.n_choices_list): if n_choices == 2: array[:, i] = (array[:, i] > 0.5).astype("float64") is_ordinal = self.is_ordinal_list[i] if is_ordinal: sequence = self.sequence_mapper[i] array[:, i] = np.clip(np.round(array[:, i]), 0, len(sequence) - 1) N, M = array.shape result = np.zeros([N, len(self.mask)]) result[:, self.mask] = array if return_vector: return result configs = [] for i in range(N): try: config = deactivate(self.config_space, result[i, :]) config = deactivate_inactive_hyperparameters( configuration_space=self.config_space, configuration=config) configs.append(config) except Exception as e: pass return configs
def _select_configurations(self) -> typing.List[Configuration]: """Selects a single configuration to run Returns ------- config: Configuration initial incumbent configuration """ cs = self.scenario.cs params = cs.get_hyperparameters() lhd = lhs(n=len(params), samples=self.init_budget) for idx, param in enumerate(params): if isinstance(param, FloatHyperparameter): lhd[:,idx] = lhd[:,idx] * (param.upper - param.lower) + param.lower else: raise ValueError("only FloatHyperparameters supported in LHD") self.logger.debug("Initial Design") configs = [] # add middle point in space for design in lhd: conf_dict = dict([(p.name,v) for p,v in zip(params,design)]) conf = deactivate_inactive_hyperparameters(conf_dict, cs) conf.origin = "LHD" configs.append(conf) self.logger.debug(conf) self.logger.debug("Size of lhd: %d" %(len(configs))) return configs
def _check_and_cast_configuration(configuration: Union[Dict, ConfigSpace.Configuration], configuration_space: ConfigSpace.ConfigurationSpace) \ -> ConfigSpace.Configuration: """ Helper-function to evaluate the given configuration. Cast it to a ConfigSpace.Configuration and evaluate if it violates its boundaries. Note: We remove inactive hyperparameters from the given configuration. Inactive hyperparameters are hyperparameters that are not relevant for a configuration, e.g. hyperparameter A is only relevant if hyperparameter B=1 and if B!=1 then A is inactive and will be removed from the configuration. Since the authors of the benchmark removed those parameters explicitly, they should also handle the cases that inactive parameters are not present in the input-configuration. """ if isinstance(configuration, dict): configuration = ConfigSpace.Configuration(configuration_space, configuration, allow_inactive_with_values=True) elif isinstance(configuration, ConfigSpace.Configuration): configuration = configuration else: raise TypeError(f'Configuration has to be from type List, np.ndarray, dict, or ' f'ConfigSpace.Configuration but was {type(configuration)}') all_hps = set(configuration_space.get_hyperparameter_names()) active_hps = configuration_space.get_active_hyperparameters(configuration) inactive_hps = all_hps - active_hps if len(inactive_hps) != 0: logger.debug(f'There are inactive {len(inactive_hps)} hyperparameter: {inactive_hps}' 'Going to remove them from the configuration.') configuration = deactivate_inactive_hyperparameters(configuration, configuration_space) configuration_space.check_configuration(configuration) return configuration
def test_deactivate_inactive_hyperparameters(self): diamond = ConfigurationSpace() head = CategoricalHyperparameter('head', [0, 1]) left = CategoricalHyperparameter('left', [0, 1]) right = CategoricalHyperparameter('right', [0, 1]) bottom = CategoricalHyperparameter('bottom', [0, 1]) diamond.add_hyperparameters([head, left, right, bottom]) diamond.add_condition(EqualsCondition(left, head, 0)) diamond.add_condition(EqualsCondition(right, head, 0)) diamond.add_condition(AndConjunction(EqualsCondition(bottom, left, 0), EqualsCondition(bottom, right, 0))) c = deactivate_inactive_hyperparameters({'head': 0, 'left': 0, 'right': 0, 'bottom': 0}, diamond) diamond._check_configuration_rigorous(c) c = deactivate_inactive_hyperparameters({'head': 1, 'left': 0, 'right': 0, 'bottom': 0}, diamond) diamond._check_configuration_rigorous(c) c = deactivate_inactive_hyperparameters({'head': 0, 'left': 1, 'right': 0, 'bottom': 0}, diamond) diamond._check_configuration_rigorous(c) diamond = ConfigurationSpace() head = CategoricalHyperparameter('head', [0, 1]) left = CategoricalHyperparameter('left', [0, 1]) right = CategoricalHyperparameter('right', [0, 1]) bottom = CategoricalHyperparameter('bottom', [0, 1]) diamond.add_hyperparameters([head, left, right, bottom]) diamond.add_condition(EqualsCondition(left, head, 0)) diamond.add_condition(EqualsCondition(right, head, 0)) diamond.add_condition(OrConjunction(EqualsCondition(bottom, left, 0), EqualsCondition(bottom, right, 0))) c = deactivate_inactive_hyperparameters({'head': 0, 'left': 0, 'right': 0, 'bottom': 0}, diamond) diamond._check_configuration_rigorous(c) c = deactivate_inactive_hyperparameters({'head': 1, 'left': 1, 'right': 0, 'bottom': 0}, diamond) diamond._check_configuration_rigorous(c) c = deactivate_inactive_hyperparameters({'head': 0, 'left': 1, 'right': 0, 'bottom': 0}, diamond) diamond._check_configuration_rigorous(c) plain = ConfigurationSpace() a = UniformIntegerHyperparameter('a', 0, 10) b = UniformIntegerHyperparameter('b', 0, 10) plain.add_hyperparameters([a, b]) c = deactivate_inactive_hyperparameters({'a': 5, 'b': 6}, plain) plain.check_configuration(c)
def test_deactivate_inactive_hyperparameters(self): diamond = ConfigurationSpace() head = CategoricalHyperparameter('head', [0, 1]) left = CategoricalHyperparameter('left', [0, 1]) right = CategoricalHyperparameter('right', [0, 1]) bottom = CategoricalHyperparameter('bottom', [0, 1]) diamond.add_hyperparameters([head, left, right, bottom]) diamond.add_condition(EqualsCondition(left, head, 0)) diamond.add_condition(EqualsCondition(right, head, 0)) diamond.add_condition(AndConjunction(EqualsCondition(bottom, left, 0), EqualsCondition(bottom, right, 0))) c = deactivate_inactive_hyperparameters({'head': 0, 'left': 0, 'right': 0, 'bottom': 0}, diamond) diamond._check_configuration_rigorous(c) c = deactivate_inactive_hyperparameters({'head': 1, 'left': 0, 'right': 0, 'bottom': 0}, diamond) diamond._check_configuration_rigorous(c) c = deactivate_inactive_hyperparameters({'head': 0, 'left': 1, 'right': 0, 'bottom': 0}, diamond) diamond._check_configuration_rigorous(c) diamond = ConfigurationSpace() head = CategoricalHyperparameter('head', [0, 1]) left = CategoricalHyperparameter('left', [0, 1]) right = CategoricalHyperparameter('right', [0, 1]) bottom = CategoricalHyperparameter('bottom', [0, 1]) diamond.add_hyperparameters([head, left, right, bottom]) diamond.add_condition(EqualsCondition(left, head, 0)) diamond.add_condition(EqualsCondition(right, head, 0)) diamond.add_condition(OrConjunction(EqualsCondition(bottom, left, 0), EqualsCondition(bottom, right, 0))) c = deactivate_inactive_hyperparameters({'head': 0, 'left': 0, 'right': 0, 'bottom': 0}, diamond) diamond._check_configuration_rigorous(c) c = deactivate_inactive_hyperparameters({'head': 1, 'left': 1, 'right': 0, 'bottom': 0}, diamond) diamond._check_configuration_rigorous(c) c = deactivate_inactive_hyperparameters({'head': 0, 'left': 1, 'right': 0, 'bottom': 0}, diamond) diamond._check_configuration_rigorous(c) plain = ConfigurationSpace() a = UniformIntegerHyperparameter('a', 0, 10) b = UniformIntegerHyperparameter('b', 0, 10) plain.add_hyperparameters([a, b]) c = deactivate_inactive_hyperparameters({'a': 5, 'b': 6}, plain) plain.check_configuration(c)
def evaluate_wrapper(self, config): # Impute the missing hyper-parameters with default values. config_dict = config.get_dictionary() included_keys = config_dict.keys() all_keys = self.defaults.keys() for _missing_key in list(set(all_keys) - set(included_keys)): config_dict[_missing_key] = self.defaults[_missing_key] _config = deactivate_inactive_hyperparameters( configuration_space=self.config_space, configuration=config_dict) return {'objs': (self.objective_function(_config), )}
def get_runhistory(self, cs): """ Expects the following files: - `self.folder/smac-output/aclib/state-run1/runs_and_results(...).csv` - `self.folder/smac-output/aclib/state-run1/paramstrings(...).csv` Returns ------- rh: RunHistory runhistory """ rh_fn = self.get_glob_file(self.folder, 'runs_and_results*.csv') self.logger.debug("Runhistory loaded as csv from %s", rh_fn) configs_fn = self.get_glob_file(self.folder, 'paramstrings*.txt') self.logger.debug("Configurations loaded from %s", configs_fn) # Translate smac2 to csv csv_data = load_csv_to_pandaframe(rh_fn, self.logger) data = pd.DataFrame() data["config_id"] = csv_data["Run History Configuration ID"] data["instance_id"] = csv_data["Instance ID"].apply( lambda x: self.scen.train_insts[x - 1]) data["seed"] = csv_data["Seed"] data["time"] = csv_data["Runtime"] if self.scen.run_obj == 'runtime': data["cost"] = csv_data["Runtime"] else: data["cost"] = csv_data["Run Quality"] data["status"] = csv_data["Run Result"] # Load configurations with open(configs_fn, 'r') as csv_file: csv_data = list( csv.reader(csv_file, delimiter=',', skipinitialspace=True)) id_to_config = {} for row in csv_data: config_id = int(re.match(r'^(\d*):', row[0]).group(1)) params = [re.match(r'^\d*: (.*)', row[0]).group(1)] params.extend(row[1:]) #self.logger.debug(params) matches = [re.match(r'(.*)=\'(.*)\'', p) for p in params] values = {m.group(1): m.group(2) for m in matches} values = deactivate_inactive_hyperparameters( fix_types(values, cs), cs).get_dictionary() id_to_config[config_id] = Configuration(cs, values=values) self.id_to_config = id_to_config names, feats = self.scen.feature_names, self.scen.feature_dict rh = CSV2RH().read_csv_to_rh(data, cs=cs, id_to_config=id_to_config, train_inst=self.scen.train_insts, test_inst=self.scen.test_insts, instance_features=feats) return rh
def add_config(row): values = { name: row[name] for name in parameters if row[name] != '' } config = deactivate_inactive_hyperparameters( fix_types(values, cs), cs) if config not in config_to_id: config_to_id[config] = len(config_to_id) row['config_id'] = config_to_id[config] return row
def iterate(self): config_space, hist_list = self.get_configspace() # print(self._hp_cnt, config_space) # print(self._hp_cnt, hist_list) # Set the number of initial number. if len(hist_list) > 0: init_num = 0 else: init_num = 3 # Set the number of iterations. # eta = 3 # if self._hp_cnt > 0: # iter_num = eta ** (self._hp_cnt + 1) - eta ** self._hp_cnt # if eta ** (self._hp_cnt + 1) > self.max_run: # iter_num = self.max_run - eta ** self._hp_cnt # else: # iter_num = eta iter_num = self.step_size smbo = SMBO(self.evaluate_wrapper, config_space, advisor_type=self.strategy, max_runs=iter_num, init_num=init_num, task_id='smbo%d' % self._hp_cnt, random_state=self.random_state) # Set the history trials. for _config_dict, _perf in hist_list: config = deactivate_inactive_hyperparameters( configuration_space=config_space, configuration=_config_dict) _observation = Observation(config, SUCCESS, None, (_perf, ), None) smbo.config_advisor.history_container.update_observation( _observation) smbo.run() # Save the runhistory. self.history_dict = OrderedDict() for _config, perf in zip( smbo.config_advisor.history_container.configurations, smbo.config_advisor.history_container.perfs): self.history_dict[_config] = perf self._hp_cnt += self._delta if self._hp_cnt > self.hp_size: self._hp_cnt = self.hp_size
def read_config_file(fn:str, cs:ConfigurationSpace): config_pd = pd.read_csv(fn, header=0, index_col=0, dtype=object) configs = [] for param_name in list(config_pd): if param_name.startswith("dummy_non_parameter"): del config_pd[param_name] for config in config_pd.iterrows(): config = fix_types(configuration=config[1:][0].to_dict(), configuration_space=cs) config = deactivate_inactive_hyperparameters(configuration=config, configuration_space=cs) configs.append(config) return configs
def load_config_csv(path, cs, logger): """ Load configurations.csv in the following format: +-----------+-----------------+-----------------+-----+ | CONFIG_ID | parameter_name1 | parameter_name2 | ... | +===========+=================+=================+=====+ | 0 | value1 | value2 | ... | +-----------+-----------------+-----------------+-----+ | ... | ... | ... | ... | +-----------+-----------------+-----------------+-----+ Parameters ---------- path: str path to csv-file cs: ConfigurationSpace configspace with matching parameters logger: Logger logger for debugs Returns ------- (parameters, id_to_config): (str, dict) parameter-names and dict mapping ids to Configurations """ id_to_config = {} logger.debug("Trying to read configuration-csv-file: %s.", path) config_data = load_csv_to_pandaframe(path, logger, apply_numeric=False) config_data['CONFIG_ID'] = config_data['CONFIG_ID'].apply(pd.to_numeric) config_data.set_index('CONFIG_ID', inplace=True) logger.debug("Found parameters: %s", config_data.columns) logger.debug("Parameters in pcs: %s", cs.get_hyperparameter_names()) diff = set(config_data.columns).symmetric_difference( set(cs.get_hyperparameter_names())) if diff: raise ValueError("Provided pcs does not match configuration-file " "\'%s\' (check parameters %s)" % (path, diff)) for index, row in config_data.iterrows(): values = {name: row[name] for name in config_data.columns if row[name]} id_to_config[index] = deactivate_inactive_hyperparameters( fix_types(values, cs), cs) return config_data.columns, id_to_config
def _transform_continuous_designs( self, design: np.ndarray, origin: str, cs: ConfigurationSpace) -> typing.List[Configuration]: params = cs.get_hyperparameters() for idx, param in enumerate(params): if isinstance(param, NumericalHyperparameter): continue elif isinstance(param, Constant): # add a vector with zeros design_ = np.zeros(np.array(design.shape) + np.array((0, 1))) design_[:, :idx] = design[:, :idx] design_[:, idx + 1:] = design[:, idx:] design = design_ elif isinstance(param, CategoricalHyperparameter): v_design = design[:, idx] v_design[v_design == 1] = 1 - 10**-10 design[:, idx] = np.array(v_design * len(param.choices), dtype=np.int) elif isinstance(param, OrdinalHyperparameter): v_design = design[:, idx] v_design[v_design == 1] = 1 - 10**-10 design[:, idx] = np.array(v_design * len(param.sequence), dtype=np.int) else: raise ValueError("Hyperparameter not supported in LHD") self.logger.debug("Initial Design") configs = [] for vector in design: try: conf = deactivate_inactive_hyperparameters( configuration=None, configuration_space=cs, vector=vector) except ForbiddenValueError: continue conf.origin = origin configs.append(conf) self.logger.debug(conf) self.logger.debug("Size of initial design: %d" % (len(configs))) return configs
def inverse_transform( self, array: np.ndarray, return_vector=False) -> Union[np.ndarray, None, Configuration]: # todo: 没有考虑涉及OHE的部分 # fixme: 一般用在对KDE或TPE的l(x)采样后,用vector构建一个Configuration assert self.ohe == False result = np.zeros([len(self.mask)]) result[self.mask] = array if return_vector: return result try: config = deactivate(self.config_space, result) config = deactivate_inactive_hyperparameters( configuration_space=self.config_space, configuration=config) return config except Exception as e: # print(e) # print(config) return None
def get_config_from_dict(config_dict: dict, config_space: ConfigurationSpace): config = deactivate_inactive_hyperparameters(configuration_space=config_space, configuration=config_dict) return config
def get_validated_runhistory(self, cs): """ Expects the following files: - `self.folder/validate-time-train/validationCallStrings(...).csv` - `self.folder/validate-time-train/validationRunResultLineMatrix(...).csv` - `self.folder/validate-time-test/validationCallStrings(...).csv` - `self.folder/validate-time-test/validationRunResultLineMatrix(...).csv` Returns ------- validated_rh: RunHistory validated runhistory """ self.logger.debug("Loading validation-data") folder = os.path.join(self.folder, 'validate-time-train') configs_fn = re.search(r'validationCallStrings.*?\.csv', str(os.listdir(folder))) if not configs_fn: self.logger.warning( "Specified validation_format is \'SMAC2\', but no " "\'validationCallStrings(...).csv\'-file could be found " "in %s" % folder) return configs_fn = os.path.join(folder, configs_fn.group()) results_fn = re.search(r'validationRunResultLineMatrix.*?\.csv', str(os.listdir(folder))) if not results_fn: self.logger.warning( "Specified validation_format is \'SMAC2\', but no " "\'validationRunResultLineMatrix(...).csv\'-file could be found " "in %s" % folder) return results_fn = os.path.join(folder, results_fn.group()) self.logger.debug("Configurations loaded from %s", configs_fn) self.logger.debug("Runhistory loaded as csv from %s", results_fn) # Load configurations csv_data = load_csv_to_pandaframe(configs_fn, self.logger, False) id_to_config = {} for idx, row in csv_data.iterrows(): config_id = int(row[0]) configuration = row[1].split() params = [p.lstrip('-') for p in configuration[::2]] # all odds values = [v.strip('\'') for v in configuration[1::2]] # all evens param_values = dict(zip(params, values)) param_values = deactivate_inactive_hyperparameters( fix_types(param_values, cs), cs).get_dictionary() id_to_config[config_id] = Configuration(cs, values=param_values) names, feats = self.scen.feature_names, self.scen.feature_dict # Translate smac2-validation (RunResultString-matrix) to csv csv_data = load_csv_to_pandaframe(results_fn, self.logger, delimiter='\",\"') data = pd.DataFrame() for idx, row in csv_data.iterrows(): instance, seed = row[0], row[1] for column in csv_data.columns[2:]: config_id = int( re.match(r'^Run result line of validation config #(\d*)$', column).group(1)) result = [e.strip() for e in row[column].split(',')] data = data.append( { "config_id": config_id, "instance_id": instance, "seed": seed, "time": result[1], "cost": result[1] if self.scen.run_obj == 'runtime' else result[3], "status": result[0] }, ignore_index=True) rh = CSV2RH().read_csv_to_rh(data, cs=cs, id_to_config=id_to_config, train_inst=self.scen.train_insts, test_inst=self.scen.test_insts, instance_features=feats) self.logger.debug( "%d datapoints for %d configurations found in validated rh.", len(rh.data), len(rh.get_all_configs())) return rh
def rvs(self, n_samples=1, random_state=None): """Draw random samples. The samples are in the original space. They need to be transformed before being passed to a model or minimizer by `space.transform()`. Parameters ---------- n_samples : int, default=1 Number of samples to be drawn from the space. random_state : int, RandomState instance, or None (default) Set random state to something other than None for reproducible results. Returns ------- points : list of lists, shape=(n_points, n_dims) Points sampled from the space. """ rng = check_random_state(random_state) if self.is_config_space: req_points = [] hps_names = self.config_space.get_hyperparameter_names() if self.model_sdv is None: confs = self.config_space.sample_configuration(n_samples) if n_samples == 1: confs = [confs] else: confs = self.model_sdv.sample(n_samples) sdv_names = confs.columns new_hps_names = list(set(hps_names) - set(sdv_names)) # randomly sample the new hyperparameters for name in new_hps_names: hp = self.config_space.get_hyperparameter(name) rvs = [] for i in range(n_samples): v = hp._sample(rng) rv = hp._transform(v) rvs.append(rv) confs[name] = rvs # reoder the column names confs = confs[hps_names] confs = confs.to_dict("records") for idx, conf in enumerate(confs): cf = deactivate_inactive_hyperparameters( conf, self.config_space) confs[idx] = cf.get_dictionary() # TODO: remove because debug instructions # check if other conditions are not met; generate valid 1-exchange neighbor; need to test and develop the logic # print('conf invalid...generating valid 1-exchange neighbor') # neighborhood = get_one_exchange_neighbourhood(cf,1) # for new_config in neighborhood: # print(new_config) # print(new_config.is_valid_configuration()) # confs[idx] = new_config.get_dictionary() for idx, conf in enumerate(confs): point = [] for hps_name in hps_names: val = np.nan if self.hps_type[hps_name] == "Categorical": val = "NA" if hps_name in conf.keys(): val = conf[hps_name] point.append(val) req_points.append(point) return req_points else: if self.model_sdv is None: # Draw columns = [] for dim in self.dimensions: columns.append( dim.rvs(n_samples=n_samples, random_state=rng)) # Transpose return _transpose_list_array(columns) else: confs = self.model_sdv.sample(n_samples) # sample from SDV columns = [] for dim in self.dimensions: if dim.name in confs.columns: columns.append(confs[dim.name].values.tolist()) else: columns.append( dim.rvs(n_samples=n_samples, random_state=rng)) # Transpose return _transpose_list_array(columns)