def test_match_batch_shape(self): X = torch.rand(3, 2) Y = torch.rand(1, 3, 2) X_tf = match_batch_shape(X, Y) self.assertTrue(torch.equal(X_tf, X.unsqueeze(0))) X = torch.rand(1, 3, 2) Y = torch.rand(2, 3, 2) X_tf = match_batch_shape(X, Y) self.assertTrue(torch.equal(X_tf, X.repeat(2, 1, 1))) X = torch.rand(2, 3, 2) Y = torch.rand(1, 3, 2) with self.assertRaises(RuntimeError): match_batch_shape(X, Y)
def forward(self, X: Tensor) -> Tensor: r"""Evaluate qNoisyExpectedImprovement on the candidate set `X`. Args: X: A `batch_shape x q x d`-dim Tensor of t-batches with `q` `d`-dim design points each. Returns: A `batch_shape'`-dim Tensor of Noisy Expected Improvement values at the given design points `X`, where `batch_shape'` is the broadcasted batch shape of model and input `X`. """ q = X.shape[-2] X_full = torch.cat([match_batch_shape(self.X_baseline, X), X], dim=-2) # TODO: Implement more efficient way to compute posterior over both training and # test points in GPyTorch (https://github.com/cornellius-gp/gpytorch/issues/567) posterior = self.model.posterior( X_full, posterior_transform=self.posterior_transform) if self._cache_root: diffs = self._forward_cached(posterior=posterior, X_full=X_full, q=q) else: samples = self.sampler(posterior) obj = self.objective(samples, X=X_full) diffs = obj[..., -q:].max(dim=-1).values - obj[..., :-q].max( dim=-1).values return diffs.clamp_min(0).mean(dim=0)
def _sample_max_values(self): r"""Sample max values for MC approximation of the expectation in MES""" with torch.no_grad(): # Append X_pending to candidate set if self.X_pending is None: X_pending = torch.tensor( [], dtype=self.candidate_set.dtype, device=self.candidate_set.device ) else: X_pending = self.X_pending X_pending = match_batch_shape(X_pending, self.candidate_set) candidate_set = torch.cat([self.candidate_set, X_pending], dim=0) # project the candidate_set to the highest fidelity, # which is needed for the multi-fidelity MES try: candidate_set = self.project(candidate_set) except AttributeError: pass # sample max values if self.use_gumbel: self.posterior_max_values = _sample_max_value_Gumbel( self.model, candidate_set, self.num_mv_samples, self.maximize ) else: self.posterior_max_values = _sample_max_value_Thompson( self.model, candidate_set, self.num_mv_samples, self.maximize )
def get_multi_step_tree_input_representation(self, X: Tensor) -> List[Tensor]: r"""Get the multi-step tree representation of X. Args: X: A `batch_shape x q' x d`-dim Tensor with `q'` design points for each batch, where `q' = q_0 + f_1 q_1 + f_2 f_1 q_2 + ...`. Here `q_i` is the number of candidates jointly considered in look-ahead step `i`, and `f_i` is respective number of fantasies. Returns: A list `[X_j, ..., X_k]` of tensors, where `X_i` has shape `f_i x .... x f_1 x batch_shape x q_i x d`. """ batch_shape, shapes, sizes = self.get_split_shapes(X=X) # Each X_i in Xsplit has shape batch_shape x qtilde x d with # qtilde = f_i * ... * f_1 * q_i Xsplit = torch.split(X, sizes, dim=-2) # now reshape (need to permute batch_shape and qtilde dimensions for i > 0) perm = [-2] + list(range(len(batch_shape))) + [-1] X0 = Xsplit[0].reshape(shapes[0]) Xother = [ X.permute(*perm).reshape(shape) for X, shape in zip(Xsplit[1:], shapes[1:]) ] # concatenate in pending points if self.X_pending is not None: X0 = torch.cat([X0, match_batch_shape(self.X_pending, X0)], dim=-2) return [X0] + Xother
def __init__( self, model: Model, candidate_set: Tensor, num_fantasies: int = 16, num_mv_samples: int = 10, num_y_samples: int = 128, use_gumbel: bool = True, maximize: bool = True, X_pending: Optional[Tensor] = None, train_inputs: Tensor = None, **kwargs: Any, ) -> None: r"""Single-outcome max-value entropy search acquisition function. Args: model: A fitted single-outcome model. candidate_set: A `n x d` Tensor including `n` candidate points to discretize the design space. Max values are sampled from the (joint) model posterior over these points. num_fantasies: Number of fantasies to generate. The higher this number the more accurate the model (at the expense of model complexity, wall time and memory). Ignored if `X_pending` is `None`. num_mv_samples: Number of max value samples. num_y_samples: Number of posterior samples at specific design point `X`. use_gumbel: If True, use Gumbel approximation to sample the max values. X_pending: A `m x d`-dim Tensor of `m` design points that have been submitted for function evaluation but have not yet been evaluated. maximize: If True, consider the problem a maximization problem. train_inputs: A `n_train x d` Tensor that the model has been fitted on, optional if model is an exact GP model. """ sampler = SobolQMCNormalSampler(num_y_samples) super().__init__(model=model, sampler=sampler) # Batch GP models (e.g. fantasized models) are not currently supported if train_inputs is None: train_inputs = self.model.train_inputs[0] if train_inputs.ndim > 2: raise NotImplementedError( "Batch GP models (e.g. fantasized models) " "are not yet supported by qMaxValueEntropy") self._init_model = model # only used for the `fantasize()` in `set_X_pending()` train_inputs = match_batch_shape(train_inputs, candidate_set) self.candidate_set = torch.cat([candidate_set, train_inputs], dim=0) self.fantasies_sampler = SobolQMCNormalSampler(num_fantasies) self.num_fantasies = num_fantasies self.use_gumbel = use_gumbel self.num_mv_samples = num_mv_samples self.maximize = maximize self.weight = 1.0 if maximize else -1.0 # If we put the `self._sample_max_values()` to `set_X_pending()`, # it will throw errors when the initial `super().__init__()` is called, # since some members required by `_sample_max_values()` are not yet initialized. if X_pending is None: self._sample_max_values() else: self.set_X_pending(X_pending)
def forward(self, X: Tensor) -> Tensor: r"""Evaluate qKnowledgeGradient on the candidate set `X`. Args: X: A `b x (q + num_fantasies) x d` Tensor with `b` t-batches of `q + num_fantasies` design points each. We split this X tensor into two parts in the `q` dimension (`dim=-2`). The first `q` are the q-batch of design points and the last num_fantasies are the current solutions of the inner optimization problem. `X_fantasies = X[..., -num_fantasies:, :]` `X_fantasies.shape = b x num_fantasies x d` `X_actual = X[..., :-num_fantasies, :]` `X_actual.shape = b x q x d` Returns: A Tensor of shape `b`. For t-batch b, the q-KG value of the design `X_actual[b]` is averaged across the fantasy models, where `X_fantasies[b, i]` is chosen as the final selection for the `i`-th fantasy model. NOTE: If `current_value` is not provided, then this is not the true KG value of `X_actual[b]`, and `X_fantasies[b, : ]` must be maximized at fixed `X_actual[b]`. """ X_actual, X_fantasies = _split_fantasy_points(X=X, n_f=self.num_fantasies) # We only concatenate X_pending into the X part after splitting if self.X_pending is not None: X_actual = torch.cat( [X_actual, match_batch_shape(self.X_pending, X_actual)], dim=-2) # construct the fantasy model of shape `num_fantasies x b` fantasy_model = self.model.fantasize(X=X_actual, sampler=self.sampler, observation_noise=True) # get the value function value_function = _get_value_function( model=fantasy_model, objective=self.objective, posterior_transform=self.posterior_transform, sampler=self.inner_sampler, ) # make sure to propagate gradients to the fantasy model train inputs with settings.propagate_grads(True): values = value_function(X=X_fantasies) # num_fantasies x b if self.current_value is not None: values = values - self.current_value # return average over the fantasy samples return values.mean(dim=0)
def test_match_batch_shape_multi_dim(self): X = torch.rand(1, 3, 2) Y = torch.rand(5, 4, 3, 2) X_tf = match_batch_shape(X, Y) self.assertTrue(torch.equal(X_tf, X.expand(5, 4, 3, 2))) X = torch.rand(4, 3, 2) Y = torch.rand(5, 4, 3, 2) X_tf = match_batch_shape(X, Y) self.assertTrue(torch.equal(X_tf, X.repeat(5, 1, 1, 1))) X = torch.rand(2, 1, 3, 2) Y = torch.rand(2, 4, 3, 2) X_tf = match_batch_shape(X, Y) self.assertTrue(torch.equal(X_tf, X.repeat(1, 4, 1, 1))) X = torch.rand(4, 2, 3, 2) Y = torch.rand(4, 3, 3, 2) with self.assertRaises(RuntimeError): match_batch_shape(X, Y)
def __init__( self, model: Model, candidate_set: Tensor, num_mv_samples: int = 10, posterior_transform: Optional[PosteriorTransform] = None, use_gumbel: bool = True, maximize: bool = True, X_pending: Optional[Tensor] = None, train_inputs: Optional[Tensor] = None, ) -> None: r"""Single-outcome MES-like acquisition functions based on discrete MV sampling. Args: model: A fitted single-outcome model. candidate_set: A `n x d` Tensor including `n` candidate points to discretize the design space. Max values are sampled from the (joint) model posterior over these points. num_mv_samples: Number of max value samples. posterior_transform: A PosteriorTransform. If using a multi-output model, a PosteriorTransform that transforms the multi-output posterior into a single-output posterior is required. use_gumbel: If True, use Gumbel approximation to sample the max values. maximize: If True, consider the problem a maximization problem. X_pending: A `m x d`-dim Tensor of `m` design points that have been submitted for function evaluation but have not yet been evaluated. train_inputs: A `n_train x d` Tensor that the model has been fitted on. Not required if the model is an instance of a GPyTorch ExactGP model. """ self.use_gumbel = use_gumbel if train_inputs is None and hasattr(model, "train_inputs"): train_inputs = model.train_inputs[0] if train_inputs is not None: if train_inputs.ndim > 2: raise NotImplementedError( "Batch GP models (e.g. fantasized models) " "are not yet supported by `MaxValueBase`" ) train_inputs = match_batch_shape(train_inputs, candidate_set) candidate_set = torch.cat([candidate_set, train_inputs], dim=0) self.candidate_set = candidate_set super().__init__( model=model, num_mv_samples=num_mv_samples, posterior_transform=posterior_transform, maximize=maximize, X_pending=X_pending, )
def forward(self, X: Tensor) -> Tensor: X_full = torch.cat([match_batch_shape(self.X_baseline, X), X], dim=-2) # Note: it is important to compute the full posterior over `(X_baseline, X)` # to ensure that we properly sample `f(X)` from the joint distribution ` # `f(X_baseline, X) ~ P(f | D)` given that we can already fixed the sampled # function values for `f(X_baseline)` # TODO: improve efficiency by not recomputing baseline-baseline # covariance matrix posterior = self.model.posterior(X_full) q = X.shape[-2] self._set_sampler(q=q, posterior=posterior) samples = self._get_f_X_samples(posterior=posterior, q=q) # add previous nehvi from pending points return self._compute_qehvi(samples=samples) + self._prev_nehvi
def forward(self, X: Tensor) -> Tensor: X_full = torch.cat([match_batch_shape(self.X_baseline, X), X], dim=-2) # Note: it is important to compute the full posterior over `(X_baseline, X)` # to ensure that we properly sample `f(X)` from the joint distribution ` # `f(X_baseline, X) ~ P(f | D)` given that we can already fixed the sampled # function values for `f(X_baseline)`. # TODO: improve efficiency by not recomputing baseline-baseline # covariance matrix. posterior = self.model.posterior(X_full) # Account for possible one-to-many transform. n_w = posterior.event_shape[X_full.dim() - 2] // X_full.shape[-2] q_in = X.shape[-2] * n_w self._set_sampler(q_in=q_in, posterior=posterior) samples = self._get_f_X_samples(posterior=posterior, q_in=q_in) # Add previous nehvi from pending points. return self._compute_qehvi(samples=samples, X=X) + self._prev_nehvi
def _sample_max_values(self, num_samples: int, X_pending: Optional[Tensor] = None) -> Tensor: r"""Draw samples from the posterior over maximum values on a discrete set. These samples are used to compute Monte Carlo approximations of expecations over the posterior over the function maximum. Args: num_samples: The number of samples to draw. X_pending: A `m x d`-dim Tensor of `m` design points that have been submitted for function evaluation but have not yet been evaluated. Returns: A `num_samples x num_fantasies` Tensor of posterior max value samples (`num_fantasies=1` for non-fantasized models). """ if self.use_gumbel: sample_max_values = _sample_max_value_Gumbel else: sample_max_values = _sample_max_value_Thompson candidate_set = self.candidate_set with torch.no_grad(): if X_pending is not None: # Append X_pending to candidate set X_pending = match_batch_shape(X_pending, self.candidate_set) candidate_set = torch.cat([self.candidate_set, X_pending], dim=0) # project the candidate_set to the highest fidelity, # which is needed for the multi-fidelity MES try: candidate_set = self.project(candidate_set) except AttributeError: pass self.posterior_max_values = sample_max_values( model=self.model, candidate_set=candidate_set, num_samples=self.num_mv_samples, maximize=self.maximize, )
def forward(self, X: Tensor) -> Tensor: r"""Evaluate qNoisyExpectedImprovement on the candidate set `X`. Args: X: A `batch_shape x q x d`-dim Tensor of t-batches with `q` `d`-dim design points each. Returns: A `batch_shape'`-dim Tensor of Noisy Expected Improvement values at the given design points `X`, where `batch_shape'` is the broadcasted batch shape of model and input `X`. """ q = X.shape[-2] X_full = torch.cat([X, match_batch_shape(self.X_baseline, X)], dim=-2) # TODO (T41248036): Implement more efficient way to compute posterior # over both training and test points in GPyTorch posterior = self.model.posterior(X_full) samples = self.sampler(posterior) obj = self.objective(samples) diffs = obj[:, :, :q].max(dim=-1)[0] - obj[:, :, q:].max(dim=-1)[0] return diffs.clamp_min(0).mean(dim=0)
def forward(self, X: Tensor) -> Tensor: r"""Evaluate analytical EUBO on the candidate set X. Args: X: A `batch_shape x q x d`-dim Tensor, where `q = 2` if `previous_winner` is not `None`, and `q = 1` otherwise. Returns: The acquisition value for each batch as a tensor of shape `batch_shape`. """ if not ((X.shape[-2] == 2) or ((X.shape[-2] == 1) and (self.previous_winner is not None))): raise UnsupportedError( f"{self.__class__.__name__} only support q=2 or q=1" "with a previous winner specified") Y = X if self.outcome_model is None else self.outcome_model(X) if self.previous_winner is not None: Y = torch.cat([Y, match_batch_shape(self.previous_winner, Y)], dim=-2) # Calling forward directly instead of posterior here to # obtain the full covariance matrix pref_posterior = self.model(Y) pref_mean = pref_posterior.mean pref_cov = pref_posterior.covariance_matrix delta = pref_mean[..., 0] - pref_mean[..., 1] sigma = torch.sqrt(pref_cov[..., 0, 0] + pref_cov[..., 1, 1] - pref_cov[..., 0, 1] - pref_cov[..., 1, 0]) u = delta / sigma ucdf = self.std_norm.cdf(u) updf = torch.exp(self.std_norm.log_prob(u)) acqf_val = sigma * (updf + u * ucdf) if self.previous_winner is None: acqf_val = acqf_val + pref_mean[..., 1] return acqf_val
def forward(self, X: Tensor) -> Tensor: r"""Evaluate qMultiFidelityKnowledgeGradient on the candidate set `X`. Args: X: A `b x (q + num_fantasies) x d` Tensor with `b` t-batches of `q + num_fantasies` design points each. We split this X tensor into two parts in the `q` dimension (`dim=-2`). The first `q` are the q-batch of design points and the last num_fantasies are the current solutions of the inner optimization problem. `X_fantasies = X[..., -num_fantasies:, :]` `X_fantasies.shape = b x num_fantasies x d` `X_actual = X[..., :-num_fantasies, :]` `X_actual.shape = b x q x d` In addition, `X` may be augmented with fidelity parameteres as part of thee `d`-dimension. Projecting fidelities to the target fidelity is handled by `project`. Returns: A Tensor of shape `b`. For t-batch b, the q-KG value of the design `X_actual[b]` is averaged across the fantasy models, where `X_fantasies[b, i]` is chosen as the final selection for the `i`-th fantasy model. NOTE: If `current_value` is not provided, then this is not the true KG value of `X_actual[b]`, and `X_fantasies[b, : ]` must be maximized at fixed `X_actual[b]`. """ X_actual, X_fantasies = _split_fantasy_points(X=X, n_f=self.num_fantasies) # We only concatenate X_pending into the X part after splitting if self.X_pending is not None: X_eval = torch.cat( [X_actual, match_batch_shape(self.X_pending, X_actual)], dim=-2 ) else: X_eval = X_actual # construct the fantasy model of shape `num_fantasies x b` # expand X (to potentially add trace observations) fantasy_model = self.model.fantasize( X=self.expand(X_eval), sampler=self.sampler, observation_noise=True ) # get the value function value_function = _get_value_function( model=fantasy_model, objective=self.objective, sampler=self.inner_sampler ) # make sure to propagate gradients to the fantasy model train inputs # project the fantasy points with settings.propagate_grads(True): values = value_function(X=self.project(X_fantasies)) # num_fantasies x b if self.current_value is not None: values = values - self.current_value if self.cost_aware_utility is not None: values = self.cost_aware_utility( X=X_actual, deltas=values, sampler=self.cost_sampler ) # return average over the fantasy samples return values.mean(dim=0)