def workon(self, fct, max_trials=infinity, **kwargs): """Optimize a given function Parameters ---------- fct: callable Function to optimize. Must take arguments provided by trial.params. Additional constant parameter can be passed as ``**kwargs`` to `workon`. Function must return the final objective. max_trials: int, optional Maximum number of trials to execute within `workon`. If the experiment or algorithm reach status is_done before, the execution of `workon` terminates. **kwargs Constant argument to pass to `fct` in addition to trial.params. If values in kwargs are present in trial.params, the latter takes precedence. Raises ------ `ValueError` If results returned by `fct` have invalid format """ trials = 0 kwargs = flatten(kwargs) while not self.is_done and trials < max_trials: trial = self.suggest() if trial is None: log.warning('Algorithm could not sample new points') return trials kwargs.update(flatten(trial.params)) results = fct(**unflatten(kwargs)) self.observe(trial, results=results) trials += 1 return trials
def dict_to_trial(data, space): """Create a `orion.core.worker.trial.Trial` object from `data`, filling only parameter information from `data`. :param data: A dict representing a sample point from `space`. :param space: Definition of problem's domain. :type space: `orion.algo.space.Space` """ data = flatten(data) params = [] for name, dim in space.items(): if name not in data and dim.default_value is dim.NO_DEFAULT_VALUE: raise ValueError( "Dimension {} not specified and does not have a default value.".format( name ) ) value = data.get(name, dim.default_value) params.append(dict(name=dim.name, type=dim.type, value=value)) trial = Trial(params=params) if trial not in space: error_msg = f"Parameters values {trial.params} are outside of space {space}" raise ValueError(error_msg) return trial
def dict_to_trial(data, space): """Create a `orion.core.worker.trial.Trial` object from `data`, filling only parameter information from `data`. :param data: A dict representing a sample point from `space`. :param space: Definition of problem's domain. :type space: `orion.algo.space.Space` """ data = flatten(data) params = [] for name, dim in space.items(): if name not in data and dim.default_value is dim.NO_DEFAULT_VALUE: raise ValueError( 'Dimension {} not specified and does not have a default value.' .format(name)) value = data.get(name, dim.default_value) if value not in dim: error_msg = "Dimension {} value {} is outside of prior {}".format( name, value, dim.get_prior_string()) raise ValueError(error_msg) params.append(dict(name=dim.name, type=dim.type, value=value)) assert len(params) == len(space) return Trial(params=params)
def build(self, configuration): """Create a definition of the problem's search space. Using information from the user's script configuration (if provided) and the command line arguments, will create a `Space` object defining the problem's search space. Parameters ---------- configuration: OrderedDict An OrderedDict containing the name and the expression of the parameters. Returns ------- `orion.algo.space.Space` The problem's search space definition. """ self.space = Space() for namespace, expression in flatten(configuration).items(): if _should_not_be_built(expression): continue expression = _remove_marker(expression) dimension = self.dimbuilder.build(namespace, expression) try: self.space.register(dimension) except ValueError as exc: error_msg = 'Conflict for name \'{}\' in parameters'.format( namespace) raise ValueError(error_msg) from exc return self.space
def transform(self, trial): """Transform a point that was in the original space to be in this one.""" transformed_point = tuple( dim.transform(flatten(trial.params)[name]) for name, dim in self.items() ) return change_trial_params(trial, transformed_point, self)
def load_yaml(self, path): """Load yaml file and set global default configuration Parameters ---------- path: str Path to the global configuration file. Raises ------- ConfigurationError If some option in the yaml file does not exist in the config """ with open(path) as f: cfg = yaml.safe_load(f) if cfg is None: return # implies that yaml must be in dict form for key, value in flatten(cfg).items(): default = self[key + '._default'] deprecated = self[key + '._deprecated'] logger.debug('Overwritting "%s" default %s with %s', key, default, value) self[key + '._yaml'] = value if deprecated and deprecated.get('alternative'): logger.debug('Overwritting "%s" default %s with %s', key, default, value) self[deprecated.get('alternative') + '._yaml'] = value
def _build_configuration(self, trial): configuration = copy.deepcopy(self.parser.arguments) for name, value in flatten(trial.params).items(): name = name.lstrip("/") configuration[name] = value return configuration
def _optimize(trial, fct, trial_arg, **kwargs): """Execute a trial on a worker""" kwargs.update(flatten(trial.params)) if trial_arg: kwargs[trial_arg] = trial return fct(**unflatten(kwargs))
def update(self, data): """Update the values of the document. Parameters ---------- data: dict Dictionary of data to update the document. If `$set` is in the data, the corresponding `data[$set]` will be used instead. """ if "$set" in data: unflattened_data = unflatten(self._data) for key, value in data["$set"].items(): if isinstance(value, dict): value = flatten(value) unflattened_data[key] = value self._data = flatten(unflattened_data) else: self._data.update(flatten(data))
def match(self, query=None): """Test if the document corresponds to a given query""" if query is None or query == {}: return True query = flatten(query) for key, value in query.items(): if not self.match_key(key, value): return False return True
def update(self, data): """Update the values of the document. Parameters ---------- data: dict Dictionary of data to update the document. If `$set` is in the data, the corresponding `data[$set]` will be used instead. """ data = flatten(data.get("$set", data)) self._data.update(data)
def promote(self, num: int) -> list[Trial]: """Promote the first candidate that is found and return it The rungs are iterated over in reversed order, so that high rungs are prioritised for promotions. When a candidate is promoted, the loop is broken and the method returns the promoted trial. .. note :: All trials are part of the rungs, for any state. Only completed trials are eligible for promotion, i.e., only completed trials can be part of top-k. Lookup for promotion in rung l + 1 contains trials of any status. """ if self.is_done: return [] for rung_id in range(len(self.rungs)): # No more promotion possible, skip to next rung if self.has_rung_filled(rung_id + 1): continue if not self.is_ready(rung_id): return [] trials = [] for candidate in self.get_candidates(rung_id): # pylint: disable=logging-format-interpolation logger.debug( "Promoting {trial} from rung {past_rung} with fidelity {past_fidelity} to " "rung {new_rung} with fidelity {new_fidelity}".format( trial=candidate, past_rung=rung_id, past_fidelity=flatten( candidate.params)[self.owner.fidelity_index], new_rung=rung_id + 1, new_fidelity=self.rungs[rung_id + 1]["resources"], )) candidate = candidate.branch( status="new", params={ self.owner.fidelity_index: self.rungs[rung_id + 1]["resources"] }, ) if not self.owner.has_suggested(candidate): trials.append(candidate) return trials[:num] return []
def _params(self): """See `~orion.core.worker.trial.Trial`""" if self.memory is not None: return self.memory._params types = self.storage.metadata["params_types"] params = flatten(self.storage.parameters) return [ OrionTrial.Param(name=add_leading_slash(name), value=params.get(name), type=vtype) for name, vtype in types.items() ]
def reverse(self, transformed_trial): """Reverses transformation so that a point from this `TransformedSpace` to be in the original one. """ reversed_point = tuple( dim.reverse(flatten(transformed_trial.params)[name]) for name, dim in self.items() ) return change_trial_params( transformed_trial, reversed_point, self, )
def _get_results(self, trial: Trial) -> dict: fidelity = flatten(trial.params)[self.owner.fidelity_index] rung_results = [ rung["results"] for rung in self.rungs if rung["resources"] == fidelity ] if not rung_results: budgets = [rung["resources"] for rung in self.rungs] raise IndexError( REGISTRATION_ERROR.format(fidelity=fidelity, budgets=budgets, params=trial.params)) return rung_results[0]
def test_perturb_hierarchical_params(self, hspace): explore = PerturbExplore() rng = RNGStub() rng.randint = lambda low, high, size: [1] rng.random = lambda: 1.0 rng.normal = lambda mean, variance: 0.0 rng.choice = lambda choices: choices[0] params = {"numerical": {"x": 1.0, "y": 2, "f": 10}, "z": 0} new_params = explore(rng, hspace, params) assert "numerical" in new_params assert "x" in new_params["numerical"] for key in hspace.keys(): assert flatten(new_params)[key] in hspace[key]
def select(self, keys): """Only select or only drop the specified keys For a pair (key, value) in the dictionnary, value=0 means the key will not be included while value=1 means it will. All specified keys should be 0 or 1. They cannot have different values with the exception of _id which can be specified to 0 while the others are at 1. The _id field is always returned unless specified with 0. Parameters ---------- keys: dict Pairs of keys and 0 or 1s. When a key is associated with 1, it is kept in the selection, otherwise it is dropped. """ if not keys: return unflatten(self._data) keys = flatten(keys) keys = self._validate_keys(keys) selection = dict() def key_is_match(key, selected_key): """Test if key matches the selected key key_is_match(abc.def.ghi, abc.def.ghi) -> True key_is_match(abc.def.ghi, abc.def) -> True key_is_match(abc.def.ghi, abc.de) -> False key_is_match(abc.def.ghi, xyz) -> False """ return (key == selected_key or (key.startswith(selected_key) and key.replace(selected_key, '')[0] == ".")) for selected_key, include in filter(lambda item: item[1], keys.items()): match = False for key, value in self._data.items(): if include and key_is_match(key, selected_key): match = True selection[key] = value if not match: selection[selected_key] = None return unflatten(selection)
def trial_to_tuple(trial, space): """Extract a parameter tuple from a `orion.core.worker.trial.Trial`. The order within the tuple is dictated by the defined `orion.algo.space.Space` object. """ params = flatten(trial.params) trial_keys = set(params.keys()) space_keys = set(space.keys()) if trial_keys != space_keys: raise ValueError("""" The trial {} has wrong params: Trial params: {} Space dims: {}""".format(trial.id, sorted(trial_keys), sorted(space_keys))) return tuple(params[name] for name in space.keys())
def __call__(self, rng, space, params): """Execute perturbation Given a set of parameter values, this exploration object randomly perturb them with a given ``factor``. It will multiply the value of a dimension with probability 0.5, otherwise divide it. Values are clamped to limits of the search space when exceeding it. For categorical dimensions, a new value is sampled from categories with equal probability for each categories. Parameters ---------- rng: numpy.random.Generator A random number generator. It is not contained in ``BaseExplore`` because the explore class must be stateless. space: Space The search space optimized by the algorithm. params: dict Dictionary representing the parameters of the current trial under examination (`trial.params`). Returns ------- ``dict`` The new set of parameters for the trial to be branched. """ new_params = {} params = flatten(params) for dim in space.values(): dim_value = params[dim.name] if dim.type == "real": dim_value = self.perturb_real(rng, dim_value, dim.interval()) elif dim.type == "integer": dim_value = self.perturb_int(rng, dim_value, dim.interval()) elif dim.type == "categorical": dim_value = self.perturb_cat(rng, dim_value, dim) elif dim.type == "fidelity": # do nothing pass else: raise ValueError(f"Unsupported dimension type {dim.type}") new_params[dim.name] = dim_value return unflatten(new_params)
def observe(self, trials): """Observe the `trials` new state of result. Parameters ---------- trials: list of ``orion.core.worker.trial.Trial`` Trials from a `orion.algo.space.Space`. """ with self.get_client() as ax_client: for trial in trials: if not self.has_suggested(trial): _, trial_index = ax_client.attach_trial( AxOptimizer.transform_params(flatten(trial.params), self.space)) self._trials_map[self.get_id(trial)] = trial_index if not self.has_observed(trial): # Check the trial status trial_status = trial.status # If the trial status is `completed` if trial_status == "completed": # Complete it in Ax ax_trial_index = self._trials_map[self.get_id(trial)] raw_data = { "objective": trial.objective.value, **{ s.name: s.value for s in trial.statistics if s.name in self.extra_objectives }, **{r.name: r.value for r in trial.constraints}, } ax_client.complete_trial(trial_index=ax_trial_index, raw_data=raw_data) # If the trial status is `broken` elif trial_status == "broken": # Set is as broken is Ax ax_trial_index = self._trials_map[self.get_id(trial)] ax_client.log_trial_failure(ax_trial_index) # Register the unobserved trial self.register(trial)
def to_pandas(self, with_evc_tree=False): """Builds a dataframe with the trials of the experiment Parameters ---------- with_evc_tree: bool, optional Fetch all trials from the EVC tree. Default: False """ columns = [ "id", "experiment_id", "status", "suggested", "reserved", "completed", "objective", ] data = [] for trial in self.fetch_trials(with_evc_tree=with_evc_tree): row = [ trial.id, trial.experiment, trial.status, trial.submit_time, trial.start_time, trial.end_time, ] row.append(trial.objective.value if trial.objective else None) params = flatten(trial.params) for name in self.space.keys(): row.append(params[name]) data.append(row) columns += list(self.space.keys()) if not data: return pandas.DataFrame([], columns=columns) return pandas.DataFrame(data, columns=columns)
def force_observe(hyperband, trial): # hyperband.sampled.add(hashlib.md5(str(list(point)).encode("utf-8")).hexdigest()) hyperband.register(trial) id_wo_fidelity = hyperband.get_id(trial, ignore_fidelity=True) bracket_index = hyperband.trial_to_brackets.get(id_wo_fidelity, None) if bracket_index is None: fidelity = flatten(trial.params)[hyperband.fidelity_index] bracket_index = [ i for i, bracket in enumerate(hyperband.brackets) if bracket.rungs[0]["resources"] == fidelity ][0] hyperband.trial_to_brackets[id_wo_fidelity] = bracket_index hyperband.observe([trial])
def __contains__(self, key_or_trial): """Check whether `trial` is within the bounds of the space. Or check if a name for a dimension is registered in this space. Parameters ---------- key_or_trial: str or `orion.core.worker.trial.Trial` If str, test if the string is a dimension part of the search space. If a Trial, test if trial's hyperparameters fit the current search space. """ if isinstance(key_or_trial, str): return super(Space, self).__contains__(key_or_trial) trial = key_or_trial flattened_params = flatten(trial.params) keys = set(flattened_params.keys()) for dim_name, dim in self.items(): if dim_name not in keys or flattened_params[dim_name] not in dim: return False keys.remove(dim_name) return len(keys) == 0
def generate_olh_perf_table(self, trials: list[Trial]) -> pd.DataFrame: """ Build an orthogonal Latin hypercube (OLH) performance table from trial parameters Parameters ---------- trials: list of orion.core.worker.trial.Trial objects Completed trials """ # TODO: deal with categoricals # Put trial params into list olh_param_table = [] olh_objective_table = [] for trial in trials: if trial.status != "completed": continue # Take subset in self.space only trial_params = flatten(trial.params) param_vals = [trial_params[key] for key in self.space] olh_param_table.append(param_vals) olh_objective_table.append(trial.objective.value) # Normalize olh_param_table = np.clip( np.array(olh_param_table), a_min=[bound[0] for bound in self.sk_space.bounds], a_max=[bound[1] for bound in self.sk_space.bounds], ) olh_param_table = self.sk_space.transform(olh_param_table) table = np.hstack( [olh_param_table, np.array(olh_objective_table)[:, None]]) return pd.DataFrame(table, columns=list(self.space.keys()) + ["objective"])
def _generate_offspring(self, trial): """Try to promote or fork a given trial.""" new_trial = trial if not self.has_suggested(new_trial): raise RuntimeError( "Trying to fork a trial that was not registered yet. This should never happen" ) attempts = 0 start = time.perf_counter() while ( self.has_suggested(new_trial) and time.perf_counter() - start <= self.fork_timeout ): trial_to_explore = self.exploit_func( self.rng, trial, self.lineages, ) if trial_to_explore is None: return None, None elif trial_to_explore is trial: new_params = {} trial_to_branch = trial logger.debug("Promoting trial %s, parameters stay the same.", trial) else: new_params = flatten( self.explore_func(self.rng, self.space, trial_to_explore.params) ) trial_to_branch = trial_to_explore logger.debug( "Forking trial %s with new parameters %s", trial_to_branch, new_params, ) # Set next level of fidelity new_params[self.fidelity_index] = self.fidelity_upgrades[ trial_to_branch.params[self.fidelity_index] ] new_trial = trial_to_branch.branch(params=new_params) new_trial = self.space.transform(self.space.reverse(new_trial)) logger.debug("Attempt %s - Creating new trial %s", attempts, new_trial) attempts += 1 if ( self.has_suggested(new_trial) and time.perf_counter() - start > self.fork_timeout ): raise RuntimeError( f"Could not generate unique new parameters for trial {trial.id} in " f"less than {self.fork_timeout} seconds. Attempted {attempts} times." ) return trial_to_branch, new_trial
def call(self, **kwargs) -> List[Dict]: """Get the value of the sampled objective function at the given point (hyper-parameters). If `self.with_grad` is set, also returns the gradient of the objective function with respect to the inputs. Parameters ---------- **kwargs Dictionary of hyper-parameters. Returns ------- List[Dict] Result dictionaries: objective and optionally gradient. Raises ------ ValueError If the input isn't of a supported type. """ # A bit of gymnastics to convert the params Dict into a PyTorch tensor. trial = dict_to_trial(kwargs, self._space) flattened_trial = self.transformed_space.transform(trial) flattened_params = flatten(flattened_trial.params) flattened_point = np.array( [flattened_params[key] for key in self.transformed_space.keys()]) x_tensor = torch.as_tensor(flattened_point).type_as(self.h_tensor) if self.with_grad: x_tensor = x_tensor.requires_grad_(True) p_tensor = torch.cat([x_tensor, self.h_tensor]) p_tensor = torch.atleast_2d(p_tensor) devices = [] if self.device.type == "cpu" else [self.device] # NOTE: Currently no way to locally seed the rng of torch distributions, hence forking the # rng for torch only here. with torch.random.fork_rng(devices=devices): torch.random.manual_seed(self.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(self.seed) # Forward pass: out = self.net(p_tensor) y_mean, y_log_std = out[0, 0], out[0, 1] y_std = torch.exp(y_log_std) # NOTE: Here we create a distribution over `y`, and use `rsample()`, so that we get can # also return the gradients if need be. y_dist = Normal(loc=y_mean, scale=y_std) y_sample = y_dist.rsample() logger.debug(f"y_sample: {y_sample}") results: List[dict] = [ dict(name=self.name, type="objective", value=y_sample.detach().cpu().item()) ] if self.with_grad: self.net.zero_grad() y_sample.backward() assert x_tensor.grad is not None results.append( dict(name=self.name, type="gradient", value=x_tensor.grad.cpu().numpy())) return results
def test_basic(): """Test basic functionality of flatten""" d = {"a": {"b": 2, "c": 3}, "c": {"d": 3, "e": 4}} assert flatten(d) == {"a.b": 2, "a.c": 3, "c.d": 3, "c.e": 4}
def flatten(self, dictionary): """Flatten the a dictionary""" return flatten(dictionary)
def __init__(self, data): """Initialise the document with a flattened version of the data""" self._data = flatten(data)
def test_unflatten(): """Test than unflatten(flatten(x)) is idempotent""" a = {"b": 2, "c": 3} d = {"a": a, "d": {"e": a}} assert unflatten(flatten(d)) == d