def test_posterior(self): for dtype in [torch.float, torch.double]: for mcs in [800, 10]: torch.random.manual_seed(0) with max_cholesky_size(mcs): test_x = torch.rand(2, 12, 1).to(device=self.device, dtype=dtype) self.model.to(dtype) # clear caches self.model.train() self.model.eval() # test the posterior works posterior = self.model.posterior(test_x) self.assertIsInstance(posterior, GPyTorchPosterior) # test the posterior works with observation noise posterior = self.model.posterior(test_x, observation_noise=True) self.assertIsInstance(posterior, GPyTorchPosterior) # test the posterior works with no variances # some funkiness in MVNs registration so the variance is non-zero. with skip_posterior_variances(): posterior = self.model.posterior(test_x) self.assertIsInstance(posterior, GPyTorchPosterior) self.assertLessEqual(posterior.variance.max(), 1e-6)
def sm_partial_mll(ski_gp, new_x, new_y, num_seen): # M := (K_{uu}^{-1} + W'W)^{-1} = K_{uu} - K_{uu}LQ^{-1}L'K_{uu} with skip_posterior_variances(False): M = ski_gp.prediction_cache['pred_cov'].detach() W_y = ski_gp._kernel_cache["interpolation_cache"].detach() # Q = ski_gp.current_qmatrix.detach() # Kuu_L = ski_gp.current_inducing_compression_matrix.detach() # Kuu_L_t = Kuu_L.transpose(-1, -2) # Kuu = ski_gp.kxx_cache.base_lazy_tensor.detach() # if ski_gp.has_learnable_noise: # Kuu = Kuu / ski_gp.likelihood.second_noise_covar.noise.detach() # w:= w(x') lazy_kernel = ski_gp.covar_module(new_x).evaluate_kernel() w = _get_wmat_from_kernel(lazy_kernel) if w.ndim < 3: w = w.unsqueeze(0) new_W_y = W_y + w * new_y new_W_y_t = new_W_y.transpose(-1, -2) rhs = torch.cat([w, new_W_y], dim=-1) solves = M.matmul(rhs) # v := Mw v = solves[..., :1] # v_rhs = Kuu_L_t.matmul(w) # v = Kuu.matmul(w) - Kuu_L.matmul(Q.inv_matmul(v_rhs)) v_t = v.transpose(-1, -2) sm_divisor = 1 + v_t.bmm(w) # quad_term_1 := y'WK_{uu}W'y # quad_term_1 = new_W_y_t.matmul(Kuu.matmul(new_W_y)) # # quad_term_2 := y'WK_{uu}LQ^{-1}L'K_{uu}W'y # term_2_rhs = Kuu_L_t.matmul(new_W_y) # term_2_rhs_t = term_2_rhs.transpose(-1, -2) # quad_term_2 = term_2_rhs_t.matmul(Q.inv_matmul(term_2_rhs)) # quad_term_3 := y'Wvv'W'y / (1 + v'w) M_W_y = solves[..., 1:] quad_term_1 = new_W_y_t.matmul(M_W_y) quad_term_3 = (v_t.bmm(new_W_y)**2) / sm_divisor # quad_term := y'WAW'y - (y'Wvv'W'y) / (1 + v'w) # quad_term = (quad_term_1 - quad_term_2 - quad_term_3) quad_term = quad_term_1 - quad_term_3 if ski_gp.has_learnable_noise: quad_term = quad_term / ski_gp.likelihood.second_noise_covar.noise.detach( ) # \log|WKW' + \sigma^2 I| = n\log(\sigma^2) + \log|K_{uu}| - \log|A_t| # \log|A_t| = \log|A_{t-1}| - \log(1 + v'w) logdet_term = torch.log(sm_divisor) partial_mll = (quad_term - logdet_term) / 2 return partial_mll / (num_seen + 1)
def test_posterior(self): torch.random.manual_seed(0) test_x = torch.rand(2, 30, 1).to(device=self.device) # test the posterior works posterior = self.model.posterior(test_x) self.assertIsInstance(posterior, GPyTorchPosterior) # test the posterior works with observation noise posterior = self.model.posterior(test_x, observation_noise=True) self.assertIsInstance(posterior, GPyTorchPosterior) # test the posterior works with no variances # some funkiness in MVNs registration so the variance is non-zero. with skip_posterior_variances(): posterior = self.model.posterior(test_x) self.assertIsInstance(posterior, GPyTorchPosterior) self.assertLessEqual(posterior.variance.max(), 1e-6)
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: Union[bool, Tensor] = False, **kwargs: Any, ) -> GPyTorchPosterior: self.eval() # make sure we're calling a posterior no_pred_variance = skip_posterior_variances._state with ExitStack() as es: es.enter_context(gpt_posterior_settings()) es.enter_context(fast_pred_var(True)) # we need to skip posterior variances here es.enter_context(skip_posterior_variances(True)) mvn = self(X) if observation_noise is not False: # TODO: implement Kronecker + diagonal solves so that this is possible. # if torch.is_tensor(observation_noise): # # TODO: Validate noise shape # # make observation_noise `batch_shape x q x n` # obs_noise = observation_noise.transpose(-1, -2) # mvn = self.likelihood(mvn, X, noise=obs_noise) # elif isinstance(self.likelihood, FixedNoiseGaussianLikelihood): # noise = self.likelihood.noise.mean().expand(X.shape[:-1]) # mvn = self.likelihood(mvn, X, noise=noise) # else: mvn = self.likelihood(mvn, X) # lazy covariance matrix includes the interpolated version of the full # covariance matrix so we can actually grab that instead. if X.ndimension() > self.train_inputs[0].ndimension(): X_batch_shape = X.shape[:-2] train_inputs = self.train_inputs[0].reshape( *[1] * len(X_batch_shape), *self.train_inputs[0].shape ) train_inputs = train_inputs.repeat( *X_batch_shape, *[1] * self.train_inputs[0].ndimension() ) else: train_inputs = self.train_inputs[0] full_covar = self.covar_modules[0](torch.cat((train_inputs, X), dim=-2)) if no_pred_variance: pred_variance = mvn.variance else: joint_covar = self._get_joint_covariance([X]) pred_variance = self.make_posterior_variances(joint_covar) full_covar = KroneckerProductLazyTensor( full_covar, *joint_covar.lazy_tensors[1:] ) joint_covar_list = [self.covar_modules[0](X, train_inputs)] batch_shape = joint_covar_list[0].batch_shape for cm, param in zip(self.covar_modules[1:], self.latent_parameters): covar = cm(param) if covar.batch_shape != batch_shape: covar = BatchRepeatLazyTensor(covar, batch_shape) joint_covar_list.append(covar) test_train_covar = KroneckerProductLazyTensor(*joint_covar_list) # mean and variance get reshaped into the target shape new_mean = mvn.mean.reshape(*X.shape[:-1], *self.target_shape) if not no_pred_variance: new_variance = pred_variance.reshape(*X.shape[:-1], *self.target_shape) new_variance = DiagLazyTensor(new_variance) else: new_variance = ZeroLazyTensor( *X.shape[:-1], *self.target_shape, self.target_shape[-1] ) mvn = MultivariateNormal(new_mean, new_variance) # return a specialized Posterior to allow for sampling posterior = HigherOrderGPPosterior( mvn=mvn, train_targets=self.train_targets.unsqueeze(-1), train_train_covar=self.prediction_strategy.lik_train_train_covar, test_train_covar=test_train_covar, joint_covariance_matrix=full_covar, output_shape=Size( ( *X.shape[:-1], *self.target_shape, ) ), num_outputs=self._num_outputs, ) if hasattr(self, "outcome_transform"): posterior = self.outcome_transform.untransform_posterior(posterior) return posterior
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: Union[bool, Tensor] = False, **kwargs: Any, ) -> GPyTorchPosterior: self.eval() # make sure we're calling a posterior # input transforms are applied at `posterior` in `eval` mode, and at # `model.forward()` at the training time X = self.transform_inputs(X) no_pred_variance = skip_posterior_variances._state with ExitStack() as es: es.enter_context(gpt_posterior_settings()) es.enter_context(fast_pred_var(True)) # we need to skip posterior variances here es.enter_context(skip_posterior_variances(True)) mvn = self(X) if observation_noise is not False: # TODO: ensure that this still works for structured noise solves. mvn = self.likelihood(mvn, X) # lazy covariance matrix includes the interpolated version of the full # covariance matrix so we can actually grab that instead. if X.ndimension() > self.train_inputs[0].ndimension(): X_batch_shape = X.shape[:-2] train_inputs = self.train_inputs[0].reshape( *[1] * len(X_batch_shape), *self.train_inputs[0].shape ) train_inputs = train_inputs.repeat( *X_batch_shape, *[1] * self.train_inputs[0].ndimension() ) else: train_inputs = self.train_inputs[0] # we now compute the data covariances for the training data, the testing # data, the joint covariances, and the test train cross-covariance train_train_covar = self.prediction_strategy.lik_train_train_covar.detach() base_train_train_covar = train_train_covar.lazy_tensor data_train_covar = base_train_train_covar.lazy_tensors[0] data_covar = self.covar_modules[0] data_train_test_covar = data_covar(X, train_inputs) data_test_test_covar = data_covar(X) data_joint_covar = data_train_covar.cat_rows( cross_mat=data_train_test_covar, new_mat=data_test_test_covar, ) # we detach the latents so that they don't cause gradient errors # TODO: Can we enable backprop through the latent covariances? batch_shape = data_train_test_covar.batch_shape latent_covar_list = [] for latent_covar in base_train_train_covar.lazy_tensors[1:]: if latent_covar.batch_shape != batch_shape: latent_covar = BatchRepeatLazyTensor(latent_covar, batch_shape) latent_covar_list.append(latent_covar.detach()) joint_covar = KroneckerProductLazyTensor( data_joint_covar, *latent_covar_list ) test_train_covar = KroneckerProductLazyTensor( data_train_test_covar, *latent_covar_list ) # compute the posterior variance if necessary if no_pred_variance: pred_variance = mvn.variance else: pred_variance = self.make_posterior_variances(joint_covar) # mean and variance get reshaped into the target shape new_mean = mvn.mean.reshape(*X.shape[:-1], *self.target_shape) if not no_pred_variance: new_variance = pred_variance.reshape(*X.shape[:-1], *self.target_shape) new_variance = DiagLazyTensor(new_variance) else: new_variance = ZeroLazyTensor( *X.shape[:-1], *self.target_shape, self.target_shape[-1] ) mvn = MultivariateNormal(new_mean, new_variance) # return a specialized Posterior to allow for sampling # cloning the full covar allows backpropagation through it posterior = HigherOrderGPPosterior( mvn=mvn, train_targets=self.train_targets.unsqueeze(-1), train_train_covar=train_train_covar, test_train_covar=test_train_covar, joint_covariance_matrix=joint_covar.clone(), output_shape=X.shape[:-1] + self.target_shape, num_outputs=self._num_outputs, ) if hasattr(self, "outcome_transform"): posterior = self.outcome_transform.untransform_posterior(posterior) return posterior