def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> Posterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `batch_shape x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. output_indices: As defined in parent Model class, not used for this model. observation_noise: If True, add observation noise to the posterior. Returns: A `Posterior` object, representing joint distributions over `q` points. Includes observation noise if specified. """ self.eval() # make sure model is in eval mode if output_indices is not None: raise RuntimeError( "output_indices is not None. PairwiseGP should not be a" "multi-output model.") post = self(X) if observation_noise: noise_module = self.noise_module(shape=post.mean.shape).evaluate() post = MultivariateNormal(post.mean, post.covariance_matrix + noise_module) return GPyTorchPosterior(post)
def __call__(self, x: Tensor) -> MultivariateNormal: """ If model is non-batch, then just make a prediction. If model has multiple batches, then these are samples from the kernel hyperparameter posterior and we integrate over them with moment matching. The shape of the MVN that this outputs will be the same regardless of whether the model is batched or not. Args: x: Point to be predicted. Returns: MultivariateNormal distribution of prediction. """ if len(self._aug_batch_shape) == 0: return super().__call__(x) # Else, approximately integrate over batches with moment matching. # Take X as (b) x q x d, and expand to (b) x ns x q x d if x.ndim > 3: # pyre-ignore raise ValueError("Don't know how to predict this shape") # pragma: no cover x = x.unsqueeze(-3).expand( x.shape[:-2] + torch.Size([self._aug_batch_shape[0]]) # pyre-ignore + x.shape[-2:] ) mvn_b = super().__call__(x) mu = mvn_b.mean.mean(dim=-2) C = ( mvn_b.covariance_matrix.mean(dim=-3) + torch.matmul(mvn_b.mean.transpose(-2, -1), mvn_b.mean) / mvn_b.mean.shape[-2] - torch.matmul(mu.unsqueeze(-1), mu.unsqueeze(-2)) ) # Law of Total Covariance mvn = MultivariateNormal(mu, C) return mvn
def _predict_full(self, torch_inputs): with torch.no_grad(): pred_dist = self(torch_inputs) mean = pred_dist.mean * self.label_std.view( self.label_dim, 1) + self.label_mean.view(self.label_dim, 1) covar = pred_dist.lazy_covariance_matrix * self.label_std.pow(2).view( self.label_dim, 1, 1) return MultivariateNormal(mean, covar)
def forward(self, x: Tensor) -> MultivariateNormal: x_basic, task_idcs = self._split_inputs(x) # Compute base mean and covariance mean_x = self.mean_module(x_basic) covar_x = self.covar_module(x_basic) # Compute task covariances covar_i = self.task_covar_matrix(task_idcs) covar = covar_x.mul(covar_i) return MultivariateNormal(mean_x, covar)
def forward(self, features): """ Args: features (torch.Tensor): [n x feature_dim] Returns: GPyTorch MultivariateNormal distribution """ mean = self.mean_module(features) covar = self.covar_module(features) return MultivariateNormal(mean, covar)
def forward(self, x_in: Tensor) -> MultivariateNormal: """ This method is not strictly needed, because we won't train this model as a NN, nor use autograd for it. However, it's left here for compatibility and also used in a few places. """ return MultivariateNormal(mean=self.mean_module(x_in), covariance_matrix=self.covar_module(x_in))
def forward(self, x: Tensor) -> MultivariateNormal: x = self.transform_inputs(x) x_basic, task_idcs = self._split_inputs(x) # Compute base mean and covariance mean_x = self.mean_module(x_basic) covar_x = self.covar_module(x_basic) # Compute task covariances covar_i = self.task_covar_module(task_idcs) # Combine the two in an ICM fashion covar = covar_x.mul(covar_i) return MultivariateNormal(mean_x, covar)
def forward(self, datapoints: Tensor) -> MultivariateNormal: r"""Calculate a posterior or prior prediction. During training mode, forward implemented solely for gradient-based hyperparam opt. Essentially what it does is to re-calculate the utility f using its analytical form at f_map so that we are able to obtain gradients of the hyperparameters. We only take in one parameter datapoints without the comparisons for the compatibility with other gpytorch/botorch APIs. It assumes `datapoints` is the same as `self.datapoints`. That's what "Must train on training data" means. Args: datapoints: A `batch_shape x n x d` Tensor, should be the same as self.datapoints Returns: A MultivariateNormal object, being one of the followings: 1. Posterior centered at MAP points for training data (training mode) 2. Prior predictions (prior mode) 3. Predictive posterior (eval mode) """ # Training mode: optimizing if self.training: if self._has_no_data(): raise RuntimeError( "datapoints and comparisons cannot be None in training mode. " "Call .eval() for prior predictions, " "or call .set_train_data() to add training data.") if datapoints is not self.datapoints: raise RuntimeError("Must train on training data") self.set_train_data(datapoints, self.comparisons, update_model=True) # Take a newton step on the posterior MAP point to fill # in gradients for pytorch self.utility = self._util_newton_updates(self.utility, max_iter=1) hl = self.likelihood_hess = self._hess_likelihood_f_sum( self.utility, self.D, self.DT, self.std_noise) covar = self.covar # Apply matrix inversion lemma on eq. in page 27 of [Brochu2010tutorial]_ # (A + B)^-1 = A^-1 - A^-1 @ (I + BA^-1)^-1 @ BA^-1 # where A = covar_inv, B = hl hl_cov = hl @ covar eye = torch.eye( hl_cov.size(-1), dtype=self.datapoints.dtype, device=self.datapoints.device, ).expand(hl_cov.shape) hl_cov_I = hl_cov + eye # add I to hl_cov train_covar_map = covar - covar @ torch.solve(hl_cov, hl_cov_I).solution output_mean, output_covar = self.utility, train_covar_map # Prior mode elif settings.prior_mode.on() or self._has_no_data(): X_new = datapoints # if we don't have any data yet, use prior GP to make predictions output_mean, output_covar = self._prior_predict(X_new) # Posterior mode else: # self.utility might be None if exception was raised and _update # was failed to be called during hyperparameter optimization # procedures (e.g., fit_gpytorch_scipy) if self.utility is None: self._update() if self.pred_cov_fac_need_update: self._update_utility_derived_values() datapoints = datapoints.to(self.datapoints) X, X_new = self._transform_batch_shape(self.datapoints, datapoints) covar_chol, _ = self._transform_batch_shape(self.covar_chol, X_new) hl, _ = self._transform_batch_shape(self.likelihood_hess, X_new) hlcov_eye, _ = self._transform_batch_shape(self.hlcov_eye, X_new) # otherwise compute predictive mean and covariance covar_xnew_x = self._calc_covar(X_new, X) covar_x_xnew = covar_xnew_x.transpose(-1, -2) covar_xnew = self._calc_covar(X_new, X_new) p = self.utility - self._prior_mean(X) covar_inv_p = torch.cholesky_solve(p.unsqueeze(-1), covar_chol) pred_mean = (covar_xnew_x @ covar_inv_p).squeeze(-1) pred_mean = pred_mean + self._prior_mean(X_new) # [Brochu2010tutorial]_ page 27 # Preictive covariance fatcor: hlcov_eye = (K + C^-1) # fac = (K + C^-1)^-1 @ k = pred_cov_fac_inv @ covar_x_xnew # used substitution method here to calculate fac fac = torch.solve(hl @ covar_x_xnew, hlcov_eye).solution pred_covar = covar_xnew - (covar_xnew_x @ fac) output_mean, output_covar = pred_mean, pred_covar try: if self.datapoints is None: diag_jitter = torch.eye(output_covar.size(-1)) else: diag_jitter = torch.eye( output_covar.size(-1), dtype=self.datapoints.dtype, device=self.datapoints.device, ) diag_jitter = diag_jitter.expand(output_covar.shape) diag_jitter = diag_jitter * self._jitter # Preemptively adding jitter to diagonal to prevent the use of _add_jitter # given that torch.cholesky may be very slow on non-pd matrix input # See https://github.com/pytorch/pytorch/issues/34272 # TODO: remove this once torch.cholesky issue is resolved output_covar = output_covar + diag_jitter post = MultivariateNormal(output_mean, output_covar) except RuntimeError: output_covar = self._add_jitter(output_covar) post = MultivariateNormal(output_mean, output_covar) return post
def forward(self, x: Tensor) -> MultivariateNormal: mean_x = self.mean_module(x) covar_x = self.covar_module(x) return MultivariateNormal(mean_x, covar_x)
def predictive(self, x_in): # A `num_restarts x q x d` tensor of initial conditions. # print("") # print("x_in.shape: "+str(x_in.shape)) # mean_shape = x_in.shape[:-1] x_in = self._error_checking_x_in(x_in) # # print("x_in.shape: "+str(/x_in.shape)) # print("x_in:",x_in) if self.train_x_sorted.shape[0] == 0: # No data case return self.forward(x_in) else: with torch.no_grad(): k_Xxp = self.covar_module(self.train_x_sorted, x_in).evaluate() k_xpxp = self.covar_module(x_in).evaluate() # K_XX_inv_k_Xxp = torch.solve(input=k_Xxp,A=self.Kprior_cov.evaluate() + 1e-6*torch.eye(self.train_x_sorted.shape[0]))[0] K_XX_inv_k_Xxp = torch.solve(input=k_Xxp, A=self.Kprior_cov.evaluate())[0] # mean_pred = K_XX_inv_k_Xxp.T.dot(self.expectation_posterior) # mean_pred = torch.matmul(K_XX_inv_k_Xxp.t(),self.expectation_posterior[:,None]) mean_pred = torch.matmul(K_XX_inv_k_Xxp.t(), self.expectation_posterior) # cov_pred = k_xpxp - k_Xxp.T.dot(K_XX_inv_k_Xxp) + K_XX_inv_k_Xxp.T.dot(self.covariance_posterior).dot(K_XX_inv_k_Xxp) # cov_pred = k_xpxp - torch.matmul(k_Xxp.t(),K_XX_inv_k_Xxp) + torch.chain_matmul(K_XX_inv_k_Xxp.t(),self.covariance_posterior,K_XX_inv_k_Xxp) cov_pred = k_xpxp - torch.matmul( k_Xxp.t(), K_XX_inv_k_Xxp) + torch.matmul( K_XX_inv_k_Xxp.t(), torch.matmul(self.covariance_posterior, K_XX_inv_k_Xxp)) # cov_pred = k_xpxp - torch.matmul(k_Xxp.t(),K_XX_inv_k_Xxp) + torch.matmul(K_XX_inv_k_Xxp.t(),torch.matmul(self.covariance_posterior+1e-5*torch.eye(self.train_x_sorted.shape[0]),K_XX_inv_k_Xxp)) cov_pred_numpy = cov_pred.cpu().numpy() # cov_pred_numpy = self.gauss_tools.fix_singular_matrix(cov_pred_numpy,verbosity=False,what2fix="Fixing prior cov...") # DBG: TEMPORARY TRIAL; NOT ADDING NOISE cov_pred = torch.from_numpy(cov_pred_numpy).to(device=device, dtype=dtype) # pdb.set_trace() # cov_pred += 1e-4*torch.eye(cov_pred.shape[0]) """ Re-shape mean TODO: This might not be needed anymore, since we're using _get_posterior_reimplemented in EIC """ if x_in.dim() == 3: batch_shape = torch.Size([1]) elif x_in.dim() == 2: batch_shape = torch.Size([]) else: raise ValueError("No way") test_shape = torch.Size([x_in.shape[0]]) mean_pred = mean_pred.view(*batch_shape, *test_shape).contiguous() try: mvn = MultivariateNormal(mean=mean_pred, covariance_matrix=cov_pred) except Exception as inst: logger.info("type: {0:s} | args: {1:s}".format( str(type(inst)), str(inst.args))) # pdb.set_trace() # DBG: TEMPORARY TRIAL; NOT ADDING NOISE mvn = MultivariateNormal(mean=mean_pred, covariance_matrix=cov_pred + 1e-6 * torch.eye(x_in.shape[0])) # print("mean_pred.shape:"+str(mean_pred.shape)) # print("cov_pred.shape:"+str(cov_pred.shape)) # print("mvn.batch_shape: "+str(mvn.batch_shape)) # print("mvn.event_shape: "+str(mvn.event_shape)) # print("mvn:",mvn) return mvn
def forward(self, x: Tensor) -> MultivariateNormal: if self.training: x = self.transform_inputs(x) mean_x = self.mean_module(x) covar_x = self.covar_module(x) return MultivariateNormal(mean_x, covar_x)