def __init__( self, *, candidates_func: Callable[ [ "torch.Tensor", "torch.Tensor", Optional["torch.Tensor"], "torch.Tensor", ], "torch.Tensor", ] = None, constraints_func: Optional[Callable[[FrozenTrial], Sequence[float]]] = None, n_startup_trials: int = 10, independent_sampler: Optional[BaseSampler] = None, ): _imports.check() self._candidates_func = candidates_func self._constraints_func = constraints_func self._independent_sampler = independent_sampler or RandomSampler() self._n_startup_trials = n_startup_trials self._study_id: Optional[int] = None self._search_space = IntersectionSearchSpace()
def __init__( self, consider_prior: bool = True, prior_weight: float = 1.0, consider_magic_clip: bool = True, consider_endpoints: bool = False, n_startup_trials: int = 10, n_ei_candidates: int = 24, gamma: Callable[[int], int] = default_gamma, weights: Callable[[int], np.ndarray] = default_weights, seed: Optional[int] = None, *, multivariate: bool = False, warn_independent_sampling: bool = True, ) -> None: self._parzen_estimator_parameters = _ParzenEstimatorParameters( consider_prior, prior_weight, consider_magic_clip, consider_endpoints, weights) self._prior_weight = prior_weight self._n_startup_trials = n_startup_trials self._n_ei_candidates = n_ei_candidates self._gamma = gamma self._weights = weights self._warn_independent_sampling = warn_independent_sampling self._rng = np.random.RandomState(seed) self._random_sampler = RandomSampler(seed=seed) self._multivariate = multivariate self._search_space = IntersectionSearchSpace() if multivariate: warnings.warn( "``multivariate`` option is an experimental feature." " The interface can change in the future.", ExperimentalWarning, )
def test_intersection_search_space_class_with_different_studies() -> None: search_space = IntersectionSearchSpace() with StorageSupplier("sqlite") as storage: study0 = create_study(storage=storage) study1 = create_study(storage=storage) search_space.calculate(study0) with pytest.raises(ValueError): # An `IntersectionSearchSpace` instance isn't supposed to be used for multiple studies. search_space.calculate(study1)
class TPESampler(BaseSampler): """Sampler using TPE (Tree-structured Parzen Estimator) algorithm. This sampler is based on *independent sampling*. See also :class:`~optuna.samplers.BaseSampler` for more details of 'independent sampling'. On each trial, for each parameter, TPE fits one Gaussian Mixture Model (GMM) ``l(x)`` to the set of parameter values associated with the best objective values, and another GMM ``g(x)`` to the remaining parameter values. It chooses the parameter value ``x`` that maximizes the ratio ``l(x)/g(x)``. For further information about TPE algorithm, please refer to the following papers: - `Algorithms for Hyper-Parameter Optimization <https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf>`_ - `Making a Science of Model Search: Hyperparameter Optimization in Hundreds of Dimensions for Vision Architectures <http://proceedings.mlr.press/v28/bergstra13.pdf>`_ Example: .. testcode:: import optuna from optuna.samplers import TPESampler def objective(trial): x = trial.suggest_uniform("x", -10, 10) return x ** 2 study = optuna.create_study(sampler=TPESampler()) study.optimize(objective, n_trials=10) Args: consider_prior: Enhance the stability of Parzen estimator by imposing a Gaussian prior when :obj:`True`. The prior is only effective if the sampling distribution is either :class:`~optuna.distributions.UniformDistribution`, :class:`~optuna.distributions.DiscreteUniformDistribution`, :class:`~optuna.distributions.LogUniformDistribution`, :class:`~optuna.distributions.IntUniformDistribution`, or :class:`~optuna.distributions.IntLogUniformDistribution`. prior_weight: The weight of the prior. This argument is used in :class:`~optuna.distributions.UniformDistribution`, :class:`~optuna.distributions.DiscreteUniformDistribution`, :class:`~optuna.distributions.LogUniformDistribution`, :class:`~optuna.distributions.IntUniformDistribution`, :class:`~optuna.distributions.IntLogUniformDistribution`, and :class:`~optuna.distributions.CategoricalDistribution`. consider_magic_clip: Enable a heuristic to limit the smallest variances of Gaussians used in the Parzen estimator. consider_endpoints: Take endpoints of domains into account when calculating variances of Gaussians in Parzen estimator. See the original paper for details on the heuristics to calculate the variances. n_startup_trials: The random sampling is used instead of the TPE algorithm until the given number of trials finish in the same study. n_ei_candidates: Number of candidate samples used to calculate the expected improvement. gamma: A function that takes the number of finished trials and returns the number of trials to form a density function for samples with low grains. See the original paper for more details. weights: A function that takes the number of finished trials and returns a weight for them. See `Making a Science of Model Search: Hyperparameter Optimization in Hundreds of Dimensions for Vision Architectures <http://proceedings.mlr.press/v28/bergstra13.pdf>`_ for more details. seed: Seed for random number generator. multivariate: If this is :obj:`True`, the multivariate TPE is used when suggesting parameters. The multivariate TPE is reported to outperform the independent TPE. See `BOHB: Robust and Efficient Hyperparameter Optimization at Scale <http://proceedings.mlr.press/v80/falkner18a.html>`_ for more details. .. note:: Added in v2.2.0 as an experimental feature. The interface may change in newer versions without prior notice. See https://github.com/optuna/optuna/releases/tag/v2.2.0. warn_independent_sampling: If this is :obj:`True` and ``multivariate=True``, a warning message is emitted when the value of a parameter is sampled by using an independent sampler. If ``multivariate=False``, this flag has no effect. """ def __init__( self, consider_prior: bool = True, prior_weight: float = 1.0, consider_magic_clip: bool = True, consider_endpoints: bool = False, n_startup_trials: int = 10, n_ei_candidates: int = 24, gamma: Callable[[int], int] = default_gamma, weights: Callable[[int], np.ndarray] = default_weights, seed: Optional[int] = None, *, multivariate: bool = False, warn_independent_sampling: bool = True, ) -> None: self._parzen_estimator_parameters = _ParzenEstimatorParameters( consider_prior, prior_weight, consider_magic_clip, consider_endpoints, weights) self._prior_weight = prior_weight self._n_startup_trials = n_startup_trials self._n_ei_candidates = n_ei_candidates self._gamma = gamma self._weights = weights self._warn_independent_sampling = warn_independent_sampling self._rng = np.random.RandomState(seed) self._random_sampler = RandomSampler(seed=seed) self._multivariate = multivariate self._search_space = IntersectionSearchSpace() if multivariate: warnings.warn( "``multivariate`` option is an experimental feature." " The interface can change in the future.", ExperimentalWarning, ) def reseed_rng(self) -> None: self._rng = np.random.RandomState() self._random_sampler.reseed_rng() def infer_relative_search_space( self, study: Study, trial: FrozenTrial) -> Dict[str, BaseDistribution]: if not self._multivariate: return {} search_space: Dict[str, BaseDistribution] = {} for name, distribution in self._search_space.calculate(study).items(): if not isinstance(distribution, _DISTRIBUTION_CLASSES): if self._warn_independent_sampling: complete_trials = study.get_trials(deepcopy=False) if len(complete_trials) >= self._n_startup_trials: self._log_independent_sampling(trial, name) continue search_space[name] = distribution return search_space def _log_independent_sampling(self, trial: FrozenTrial, param_name: str) -> None: _logger.warning( "The parameter '{}' in trial#{} is sampled independently " "instead of being sampled by multivariate TPE sampler. " "(optimization performance may be degraded). " "You can suppress this warning by setting `warn_independent_sampling` " "to `False` in the constructor of `TPESampler`, " "if this independent sampling is intended behavior.".format( param_name, trial.number)) def sample_relative( self, study: Study, trial: FrozenTrial, search_space: Dict[str, BaseDistribution]) -> Dict[str, Any]: self._raise_error_if_multi_objective(study) if search_space == {}: return {} param_names = list(search_space.keys()) values, scores = _get_multivariate_observation_pairs( study, param_names) # If the number of samples is insufficient, we run random trial. n = len(scores) if n < self._n_startup_trials: return {} # We divide data into below and above. below, above = self._split_multivariate_observation_pairs( values, scores) # We then sample by maximizing log likelihood ratio. mpe_below = _MultivariateParzenEstimator( below, search_space, self._parzen_estimator_parameters) mpe_above = _MultivariateParzenEstimator( above, search_space, self._parzen_estimator_parameters) samples_below = mpe_below.sample(self._rng, self._n_ei_candidates) log_likelihoods_below = mpe_below.log_pdf(samples_below) log_likelihoods_above = mpe_above.log_pdf(samples_below) ret = TPESampler._compare_multivariate(samples_below, log_likelihoods_below, log_likelihoods_above) for param_name, dist in search_space.items(): ret[param_name] = dist.to_external_repr(ret[param_name]) return ret def sample_independent( self, study: Study, trial: FrozenTrial, param_name: str, param_distribution: BaseDistribution, ) -> Any: self._raise_error_if_multi_objective(study) values, scores = _get_observation_pairs(study, param_name) n = len(values) if n < self._n_startup_trials: return self._random_sampler.sample_independent( study, trial, param_name, param_distribution) below_param_values, above_param_values = self._split_observation_pairs( values, scores) if isinstance(param_distribution, distributions.UniformDistribution): return self._sample_uniform(param_distribution, below_param_values, above_param_values) elif isinstance(param_distribution, distributions.LogUniformDistribution): return self._sample_loguniform(param_distribution, below_param_values, above_param_values) elif isinstance(param_distribution, distributions.DiscreteUniformDistribution): return self._sample_discrete_uniform(param_distribution, below_param_values, above_param_values) elif isinstance(param_distribution, distributions.IntUniformDistribution): return self._sample_int(param_distribution, below_param_values, above_param_values) elif isinstance(param_distribution, distributions.IntLogUniformDistribution): return self._sample_int_loguniform(param_distribution, below_param_values, above_param_values) elif isinstance(param_distribution, distributions.CategoricalDistribution): index = self._sample_categorical_index(param_distribution, below_param_values, above_param_values) return param_distribution.choices[index] else: distribution_list = [ distributions.UniformDistribution.__name__, distributions.LogUniformDistribution.__name__, distributions.DiscreteUniformDistribution.__name__, distributions.IntUniformDistribution.__name__, distributions.IntLogUniformDistribution.__name__, distributions.CategoricalDistribution.__name__, ] raise NotImplementedError( "The distribution {} is not implemented. " "The parameter distribution should be one of the {}".format( param_distribution, distribution_list)) def _split_observation_pairs( self, config_vals: List[Optional[float]], loss_vals: List[Tuple[float, float]]) -> Tuple[np.ndarray, np.ndarray]: config_vals = np.asarray(config_vals) loss_vals = np.asarray(loss_vals, dtype=[("step", float), ("score", float)]) n_below = self._gamma(len(config_vals)) loss_ascending = np.argsort(loss_vals) below = config_vals[np.sort(loss_ascending[:n_below])] below = np.asarray([v for v in below if v is not None], dtype=float) above = config_vals[np.sort(loss_ascending[n_below:])] above = np.asarray([v for v in above if v is not None], dtype=float) return below, above def _split_multivariate_observation_pairs( self, config_vals: Dict[str, List[Optional[float]]], loss_vals: List[Tuple[float, float]], ) -> Tuple[Dict[str, np.ndarray], Dict[str, np.ndarray]]: config_vals = { k: np.asarray(v, dtype=float) for k, v in config_vals.items() } loss_vals = np.asarray(loss_vals, dtype=[("step", float), ("score", float)]) n_below = self._gamma(len(loss_vals)) index_loss_ascending = np.argsort(loss_vals) # `np.sort` is used to keep chronological order. index_below = np.sort(index_loss_ascending[:n_below]) index_above = np.sort(index_loss_ascending[n_below:]) below = {} above = {} for param_name, param_val in config_vals.items(): below[param_name] = param_val[index_below] above[param_name] = param_val[index_above] return below, above def _sample_uniform(self, distribution: distributions.UniformDistribution, below: np.ndarray, above: np.ndarray) -> float: low = distribution.low high = distribution.high return self._sample_numerical(low, high, below, above) def _sample_loguniform( self, distribution: distributions.LogUniformDistribution, below: np.ndarray, above: np.ndarray, ) -> float: low = distribution.low high = distribution.high return self._sample_numerical(low, high, below, above, is_log=True) def _sample_discrete_uniform( self, distribution: distributions.DiscreteUniformDistribution, below: np.ndarray, above: np.ndarray, ) -> float: q = distribution.q r = distribution.high - distribution.low # [low, high] is shifted to [0, r] to align sampled values at regular intervals. low = 0 - 0.5 * q high = r + 0.5 * q # Shift below and above to [0, r] above -= distribution.low below -= distribution.low best_sample = self._sample_numerical(low, high, below, above, q=q) + distribution.low return min(max(best_sample, distribution.low), distribution.high) def _sample_int( self, distribution: distributions.IntUniformDistribution, below: np.ndarray, above: np.ndarray, ) -> int: d = distributions.DiscreteUniformDistribution(low=distribution.low, high=distribution.high, q=distribution.step) return int(self._sample_discrete_uniform(d, below, above)) def _sample_int_loguniform( self, distribution: distributions.IntLogUniformDistribution, below: np.ndarray, above: np.ndarray, ) -> int: low = distribution.low - 0.5 high = distribution.high + 0.5 sample = self._sample_numerical(low, high, below, above, is_log=True) best_sample = np.round(sample) return int(min(max(best_sample, distribution.low), distribution.high)) def _sample_numerical( self, low: float, high: float, below: np.ndarray, above: np.ndarray, q: Optional[float] = None, is_log: bool = False, ) -> float: if is_log: low = np.log(low) high = np.log(high) below = np.log(below) above = np.log(above) size = (self._n_ei_candidates, ) parzen_estimator_below = _ParzenEstimator( mus=below, low=low, high=high, parameters=self._parzen_estimator_parameters) samples_below = self._sample_from_gmm( parzen_estimator=parzen_estimator_below, low=low, high=high, q=q, size=size) log_likelihoods_below = self._gmm_log_pdf( samples=samples_below, parzen_estimator=parzen_estimator_below, low=low, high=high, q=q, ) parzen_estimator_above = _ParzenEstimator( mus=above, low=low, high=high, parameters=self._parzen_estimator_parameters) log_likelihoods_above = self._gmm_log_pdf( samples=samples_below, parzen_estimator=parzen_estimator_above, low=low, high=high, q=q, ) ret = float( TPESampler._compare(samples=samples_below, log_l=log_likelihoods_below, log_g=log_likelihoods_above)[0]) return math.exp(ret) if is_log else ret def _sample_categorical_index( self, distribution: distributions.CategoricalDistribution, below: np.ndarray, above: np.ndarray, ) -> int: choices = distribution.choices below = list(map(int, below)) above = list(map(int, above)) upper = len(choices) # We can use `np.arange(len(distribution.choices))` instead of sampling from `l(x)` # when the cardinality of categorical parameters is lower than `n_ei_candidates`. # Though it seems to be theoretically correct, it leads to performance degradation # on the NAS benchmark experiment in https://arxiv.org/abs/1902.09635. # See https://github.com/optuna/optuna/pull/1603 for more details. size = (self._n_ei_candidates, ) weights_below = self._weights(len(below)) counts_below = np.bincount(below, minlength=upper, weights=weights_below) weighted_below = counts_below + self._prior_weight weighted_below /= weighted_below.sum() samples_below = self._sample_from_categorical_dist( weighted_below, size) log_likelihoods_below = TPESampler._categorical_log_pdf( samples_below, weighted_below) weights_above = self._weights(len(above)) counts_above = np.bincount(above, minlength=upper, weights=weights_above) weighted_above = counts_above + self._prior_weight weighted_above /= weighted_above.sum() log_likelihoods_above = TPESampler._categorical_log_pdf( samples_below, weighted_above) return int( TPESampler._compare(samples=samples_below, log_l=log_likelihoods_below, log_g=log_likelihoods_above)[0]) def _sample_from_gmm( self, parzen_estimator: _ParzenEstimator, low: float, high: float, q: Optional[float] = None, size: Tuple = (), ) -> np.ndarray: weights = parzen_estimator.weights mus = parzen_estimator.mus sigmas = parzen_estimator.sigmas weights, mus, sigmas = map(np.asarray, (weights, mus, sigmas)) if low >= high: raise ValueError("The 'low' should be lower than the 'high'. " "But (low, high) = ({}, {}).".format(low, high)) active = np.argmax(self._rng.multinomial(1, weights, size=size), axis=-1) trunc_low = (low - mus[active]) / sigmas[active] trunc_high = (high - mus[active]) / sigmas[active] samples = np.full((), fill_value=high + 1.0, dtype=np.float64) while (samples >= high).any(): samples = np.where( samples < high, samples, truncnorm.rvs( trunc_low, trunc_high, size=size, loc=mus[active], scale=sigmas[active], random_state=self._rng, ), ) if q is None: return samples else: return np.round(samples / q) * q def _gmm_log_pdf( self, samples: np.ndarray, parzen_estimator: _ParzenEstimator, low: float, high: float, q: Optional[float] = None, ) -> np.ndarray: weights = parzen_estimator.weights mus = parzen_estimator.mus sigmas = parzen_estimator.sigmas samples, weights, mus, sigmas = map(np.asarray, (samples, weights, mus, sigmas)) if samples.size == 0: return np.asarray([], dtype=float) if weights.ndim != 1: raise ValueError("The 'weights' should be 1-dimension. " "But weights.shape = {}".format(weights.shape)) if mus.ndim != 1: raise ValueError( "The 'mus' should be 1-dimension. But mus.shape = {}".format( mus.shape)) if sigmas.ndim != 1: raise ValueError( "The 'sigmas' should be 1-dimension. But sigmas.shape = {}". format(sigmas.shape)) p_accept = np.sum(weights * (TPESampler._normal_cdf(high, mus, sigmas) - TPESampler._normal_cdf(low, mus, sigmas))) if q is None: distance = samples[..., None] - mus mahalanobis = (distance / np.maximum(sigmas, EPS))**2 Z = np.sqrt(2 * np.pi) * sigmas coefficient = weights / Z / p_accept return TPESampler._logsum_rows(-0.5 * mahalanobis + np.log(coefficient)) else: cdf_func = TPESampler._normal_cdf upper_bound = np.minimum(samples + q / 2.0, high) lower_bound = np.maximum(samples - q / 2.0, low) probabilities = np.sum( weights[..., None] * (cdf_func(upper_bound[None], mus[..., None], sigmas[..., None]) - cdf_func(lower_bound[None], mus[..., None], sigmas[..., None])), axis=0, ) return np.log(probabilities + EPS) - np.log(p_accept + EPS) def _sample_from_categorical_dist(self, probabilities: np.ndarray, size: Tuple[int]) -> np.ndarray: if size == (0, ): return np.asarray([], dtype=float) assert len(size) if probabilities.size == 1 and isinstance(probabilities[0], np.ndarray): probabilities = probabilities[0] assert probabilities.ndim == 1 n_draws = np.prod(size).item() sample = self._rng.multinomial(n=1, pvals=probabilities, size=n_draws) assert sample.shape == size + probabilities.shape return_val = np.dot(sample, np.arange(probabilities.size)).reshape(size) return return_val @classmethod def _categorical_log_pdf(cls, sample: np.ndarray, p: np.ndarray) -> np.ndarray: if sample.size: return np.log(np.asarray(p)[sample]) else: return np.asarray([]) @classmethod def _compare(cls, samples: np.ndarray, log_l: np.ndarray, log_g: np.ndarray) -> np.ndarray: samples, log_l, log_g = map(np.asarray, (samples, log_l, log_g)) if samples.size: score = log_l - log_g if samples.size != score.size: raise ValueError( "The size of the 'samples' and that of the 'score' " "should be same. " "But (samples.size, score.size) = ({}, {})".format( samples.size, score.size)) best = np.argmax(score) return np.asarray([samples[best]] * samples.size) else: return np.asarray([]) @classmethod def _compare_multivariate( cls, multivariate_samples: Dict[str, np.ndarray], log_l: np.ndarray, log_g: np.ndarray, ) -> Dict[str, Union[float, int]]: sample_size = next(iter(multivariate_samples.values())).size if sample_size: score = log_l - log_g if sample_size != score.size: raise ValueError( "The size of the 'samples' and that of the 'score' " "should be same. " "But (samples.size, score.size) = ({}, {})".format( sample_size, score.size)) best = np.argmax(score) return {k: v[best].item() for k, v in multivariate_samples.items()} else: raise ValueError("The size of 'samples' should be more than 0." "But samples.size = {}".format(sample_size)) @classmethod def _logsum_rows(cls, x: np.ndarray) -> np.ndarray: x = np.asarray(x) m = x.max(axis=1) return np.log(np.exp(x - m[:, None]).sum(axis=1)) + m @classmethod def _normal_cdf(cls, x: float, mu: np.ndarray, sigma: np.ndarray) -> np.ndarray: mu, sigma = map(np.asarray, (mu, sigma)) denominator = x - mu numerator = np.maximum(np.sqrt(2) * sigma, EPS) z = denominator / numerator return 0.5 * (1 + scipy.special.erf(z)) @staticmethod def hyperopt_parameters() -> Dict[str, Any]: """Return the the default parameters of hyperopt (v0.1.2). :class:`~optuna.samplers.TPESampler` can be instantiated with the parameters returned by this method. Example: Create a :class:`~optuna.samplers.TPESampler` instance with the default parameters of `hyperopt <https://github.com/hyperopt/hyperopt/tree/0.1.2>`_. .. testcode:: import optuna from optuna.samplers import TPESampler def objective(trial): x = trial.suggest_uniform("x", -10, 10) return x ** 2 sampler = TPESampler(**TPESampler.hyperopt_parameters()) study = optuna.create_study(sampler=sampler) study.optimize(objective, n_trials=10) Returns: A dictionary containing the default parameters of hyperopt. """ return { "consider_prior": True, "prior_weight": 1.0, "consider_magic_clip": True, "consider_endpoints": False, "n_startup_trials": 20, "n_ei_candidates": 24, "gamma": hyperopt_default_gamma, "weights": default_weights, }
class BoTorchSampler(BaseSampler): """A sampler that uses BoTorch, a Bayesian optimization library built on top of PyTorch. This sampler allows using BoTorch's optimization algorithms from Optuna to suggest parameter configurations. Parameters are transformed to continuous space and passed to BoTorch, and then transformed back to Optuna's representations. Categorical parameters are one-hot encoded. .. seealso:: See an `example <https://github.com/optuna/optuna/blob/master/examples/ multi_objective/botorch_simple.py>`_ how to use the sampler. .. seealso:: See the `BoTorch <https://botorch.org/>`_ homepage for details and for how to implement your own ``candidates_func``. .. note:: An instance of this sampler *should be not used with different studies* when used with constraints. Instead, a new instance should be created for each new study. The reason for this is that the sampler is stateful keeping all the computed constraints. Args: candidates_func: An optional function that suggests the next candidates. It must take the training data, the objectives, the constraints, the search space bounds and return the next candidates. The arguments are of type ``torch.Tensor``. The return value must be a ``torch.Tensor``. However, if ``constraints_func`` is omitted, constraints will be :obj:`None`. For any constraints that failed to compute, the tensor will contain NaN. If omitted, is determined automatically based on the number of objectives. If the number of objectives is one, Quasi MC-based batch Expected Improvement (qEI) is used. If the number of objectives is larger than one but smaller than four, Quasi MC-based batch Expected Hypervolume Improvement (qEHVI) is used. Otherwise, for larger number of objectives, the faster Quasi MC-based extended ParEGO (qParEGO) is used. The function should assume *maximization* of the objective. .. seealso:: See :func:`optuna.integration.botorch.qei_candidates_func` for an example. constraints_func: An optional function that computes the objective constraints. It must take a :class:`~optuna.trial.FrozenTrial` and return the constraints. The return value must be a sequence of :obj:`float` s. A value strictly larger than 0 means that a constraints is violated. A value equal to or smaller than 0 is considered feasible. If omitted, no constraints will be passed to ``candidates_func`` nor taken into account during suggestion if ``candidates_func`` is omitted. n_startup_trials: Number of initial trials, that is the number of trials to resort to independent sampling. independent_sampler: An independent sampler to use for the initial trials and for parameters that are conditional. """ def __init__( self, *, candidates_func: Callable[[ "torch.Tensor", "torch.Tensor", Optional["torch.Tensor"], "torch.Tensor", ], "torch.Tensor", ] = None, constraints_func: Optional[Callable[[FrozenTrial], Sequence[float]]] = None, n_startup_trials: int = 10, independent_sampler: Optional[BaseSampler] = None, ): _imports.check() self._candidates_func = candidates_func self._constraints_func = constraints_func self._independent_sampler = independent_sampler or RandomSampler() self._n_startup_trials = n_startup_trials self._study_id: Optional[int] = None self._search_space = IntersectionSearchSpace() def infer_relative_search_space( self, study: Study, trial: FrozenTrial, ) -> Dict[str, BaseDistribution]: if self._study_id is None: self._study_id = study._study_id if self._study_id != study._study_id: # Note that the check below is meaningless when `InMemoryStorage` is used # because `InMemoryStorage.create_new_study` always returns the same study ID. raise RuntimeError( "BoTorchSampler cannot handle multiple studies.") return self._search_space.calculate(study, ordered_dict=True) # type: ignore def sample_relative( self, study: Study, trial: FrozenTrial, search_space: Dict[str, BaseDistribution], ) -> Dict[str, Any]: assert isinstance(search_space, OrderedDict) if len(search_space) == 0: return {} trials = [ t for t in study.get_trials(deepcopy=False) if t.state == TrialState.COMPLETE ] n_trials = len(trials) if n_trials < self._n_startup_trials: return {} trans = _SearchSpaceTransform(search_space) n_objectives = len(study.directions) values = numpy.empty((n_trials, n_objectives), dtype=numpy.float64) params = numpy.empty((n_trials, trans.bounds.shape[0]), dtype=numpy.float64) con = None bounds = trans.bounds for trial_idx, trial in enumerate(trials): params[trial_idx] = trans.transform(trial.params) assert len(study.directions) == len(trial.values) for obj_idx, (direction, value) in enumerate( zip(study.directions, trial.values)): assert value is not None if direction == StudyDirection.MINIMIZE: # BoTorch always assumes maximization. value *= -1 values[trial_idx, obj_idx] = value if self._constraints_func is not None: constraints = study._storage.get_trial_system_attrs( trial._trial_id).get("botorch:constraints") if constraints is not None: n_constraints = len(constraints) if con is None: con = numpy.full((n_trials, n_constraints), numpy.nan, dtype=numpy.float64) elif n_constraints != con.shape[1]: raise RuntimeError( f"Expected {con.shape[1]} constraints but received {n_constraints}." ) con[trial_idx] = constraints if self._constraints_func is not None: if con is None: warnings.warn( "`constraints_func` was given but no call to it correctly computed " "constraints. Constraints passed to `candidates_func` will be `None`." ) elif numpy.isnan(con).any(): warnings.warn( "`constraints_func` was given but some calls to it did not correctly compute " "constraints. Constraints passed to `candidates_func` will contain NaN." ) values = torch.from_numpy(values) params = torch.from_numpy(params) if con is not None: con = torch.from_numpy(con) bounds = torch.from_numpy(bounds) if con is not None: if con.dim() == 1: con.unsqueeze_(-1) bounds.transpose_(0, 1) if self._candidates_func is None: self._candidates_func = _get_default_candidates_func( n_objectives=n_objectives) candidates = self._candidates_func(params, values, con, bounds) if not isinstance(candidates, torch.Tensor): raise TypeError("Candidates must be a torch.Tensor.") if candidates.dim() == 2: if candidates.size(0) != 1: raise ValueError( "Candidates batch optimization is not supported and the first dimension must " "have size 1 if candidates is a two-dimensional tensor. Actual: " f"{candidates.size()}.") # Batch size is one. Get rid of the batch dimension. candidates = candidates.squeeze(0) if candidates.dim() != 1: raise ValueError("Candidates must be one or two-dimensional.") if candidates.size(0) != bounds.size(1): raise ValueError( "Candidates size must match with the given bounds. Actual candidates: " f"{candidates.size(0)}, bounds: {bounds.size(1)}.") candidates = candidates.numpy() params = trans.untransform(candidates) return params def sample_independent( self, study: Study, trial: FrozenTrial, param_name: str, param_distribution: BaseDistribution, ) -> Any: return self._independent_sampler.sample_independent( study, trial, param_name, param_distribution) def reseed_rng(self) -> None: self._independent_sampler.reseed_rng() def after_trial( self, study: Study, trial: FrozenTrial, state: TrialState, values: Optional[Sequence[float]], ) -> None: if self._constraints_func is not None: constraints = None try: con = self._constraints_func(trial) if not isinstance(con, (tuple, list)): warnings.warn( f"Constraints should be a sequence of floats but got {type(con).__name__}." ) constraints = tuple(con) except Exception: raise finally: assert constraints is None or isinstance(constraints, tuple) study._storage.set_trial_system_attr( trial._trial_id, "botorch:constraints", constraints, )
def test_intersection_search_space() -> None: search_space = IntersectionSearchSpace() study = create_study() # No trial. assert search_space.calculate(study) == {} assert search_space.calculate(study) == intersection_search_space(study) # First trial. study.optimize( lambda t: t.suggest_float("y", -3, 3) + t.suggest_int("x", 0, 10), n_trials=1) assert search_space.calculate(study) == { "x": IntUniformDistribution(low=0, high=10), "y": UniformDistribution(low=-3, high=3), } assert search_space.calculate(study) == intersection_search_space(study) # Returning sorted `OrderedDict` instead of `dict`. assert search_space.calculate(study, ordered_dict=True) == OrderedDict([ ("x", IntUniformDistribution(low=0, high=10)), ("y", UniformDistribution(low=-3, high=3)), ]) assert search_space.calculate( study, ordered_dict=True) == intersection_search_space(study, ordered_dict=True) # Second trial (only 'y' parameter is suggested in this trial). study.optimize(lambda t: t.suggest_float("y", -3, 3), n_trials=1) assert search_space.calculate(study) == { "y": UniformDistribution(low=-3, high=3) } assert search_space.calculate(study) == intersection_search_space(study) # Failed or pruned trials are not considered in the calculation of # an intersection search space. def objective(trial: Trial, exception: Exception) -> float: trial.suggest_float("z", 0, 1) raise exception study.optimize(lambda t: objective(t, RuntimeError()), n_trials=1, catch=(RuntimeError, )) study.optimize(lambda t: objective(t, TrialPruned()), n_trials=1) assert search_space.calculate(study) == { "y": UniformDistribution(low=-3, high=3) } assert search_space.calculate(study) == intersection_search_space(study) # If two parameters have the same name but different distributions, # those are regarded as different parameters. study.optimize(lambda t: t.suggest_float("y", -1, 1), n_trials=1) assert search_space.calculate(study) == {} assert search_space.calculate(study) == intersection_search_space(study) # The search space remains empty once it is empty. study.optimize( lambda t: t.suggest_float("y", -3, 3) + t.suggest_int("x", 0, 10), n_trials=1) assert search_space.calculate(study) == {} assert search_space.calculate(study) == intersection_search_space(study)