def kernel2(self, Kxx, V, U): # TODO: Make this a separate module # if M1, M2 = (0, 0) # [ k_** РіЌ B ] РіЌ A Kij_xx_22 = KroneckerProductLazyTensor( KroneckerProductLazyTensor(Kxx, V), U) return Kij_xx_22
def test_matmul_vec_random_rectangular(self): ax = torch.randn(4, 2, 3, requires_grad=True) bx = torch.randn(4, 5, 2, requires_grad=True) cx = torch.randn(4, 6, 4, requires_grad=True) rhsx = torch.randn(4, 3 * 2 * 4, 1) rhsx = (rhsx / torch.norm(rhsx)).requires_grad_(True) ax_copy = ax.clone().detach().requires_grad_(True) bx_copy = bx.clone().detach().requires_grad_(True) cx_copy = cx.clone().detach().requires_grad_(True) rhsx_copy = rhsx.clone().detach().requires_grad_(True) kp_lazy_var = KroneckerProductLazyTensor(NonLazyTensor(ax), NonLazyTensor(bx), NonLazyTensor(cx)) res = kp_lazy_var.matmul(rhsx) actual_mat = kron(kron(ax_copy, bx_copy), cx_copy) actual = actual_mat.matmul(rhsx_copy) self.assertTrue(approx_equal(res, actual)) actual.sum().backward() res.sum().backward() self.assertTrue(approx_equal(ax_copy.grad, ax.grad)) self.assertTrue(approx_equal(bx_copy.grad, bx.grad)) self.assertTrue(approx_equal(cx_copy.grad, cx.grad)) self.assertTrue(approx_equal(rhsx_copy.grad, rhsx.grad))
def kernel1(self, Kxx, H1, H2, V, U): # TODO: Make this a separate module # If M1, M2 = (1, 1) # HРѓЂрхђ [ K РіЌ B ] HРѓѓ РіЌ A Kij_xx_11 = KroneckerProductLazyTensor( H1 @ KroneckerProductLazyTensor(Kxx, V) @ H2.t(), U) return Kij_xx_11
def correlation_kernel_12(self, Kxx, H1, V, U): # TODO: Make this a separate module # elif M1, M2 = (1, 0) # HРѓЂрхђ [ k_x* РіЌ B ] РіЌ A Kij_xx_12 = KroneckerProductLazyTensor( H1 @ KroneckerProductLazyTensor(Kxx, V), U) return Kij_xx_12
def correlation_kernel_12(self, Kxx, H1, Sigma): # TODO: Make this a separate module # elif M1, M2 = (1, 0) # (HРѓЂрхђ РіЌ IРѓЎ) [ k_x* РіЌ ╬Б ] _, x_dim, u_dim_p_1 = self.decoder.sizes In = torch.eye(x_dim, dtype=H1.dtype, device=H1.device) Kij_xx_12 = KroneckerProductLazyTensor( H1, In) @ KroneckerProductLazyTensor(Kxx, Sigma) return Kij_xx_12
def kernel1(self, Kxx, H1, H2, Sigma): # TODO: Make this a separate module # If M1, M2 = (1, 1) # (HРѓЂрхђ РіЌ IРѓЎ)[ K РіЌ ╬Б ] (HРѓѓ РіЌ IРѓЎ) _, x_dim, u_dim_p_1 = self.decoder.sizes In = torch.eye(x_dim, dtype=H1.dtype, device=H1.device) Kij_xx_11 = ( KroneckerProductLazyTensor(H1, In) @ KroneckerProductLazyTensor( Kxx, Sigma) @ KroneckerProductLazyTensor(H2.t(), In)) return Kij_xx_11
def create_lazy_tensor(self): a = torch.tensor([[4, 0, 2], [0, 3, -1], [2, -1, 3]], dtype=torch.float) b = torch.tensor([[2, 1], [1, 2]], dtype=torch.float) c = torch.tensor([[4, 0.5, 1], [0.5, 4, -1], [1, -1, 3]], dtype=torch.float) d = torch.tensor([[1.2, 0.75], [0.75, 1.2]], dtype=torch.float) a.requires_grad_(True) b.requires_grad_(True) c.requires_grad_(True) d.requires_grad_(True) kp_lt_1 = KroneckerProductLazyTensor(NonLazyTensor(a), NonLazyTensor(b)) kp_lt_2 = KroneckerProductLazyTensor(NonLazyTensor(c), NonLazyTensor(d)) return SumKroneckerLazyTensor(kp_lt_1, kp_lt_2)
def _create_marginal_input(self, batch_shape=torch.Size([])): mat = torch.randn(*batch_shape, 5, 5) mat2 = torch.randn(*batch_shape, 4, 4) covar = KroneckerProductLazyTensor(RootLazyTensor(mat), RootLazyTensor(mat2)) return MultitaskMultivariateNormal(torch.randn(*batch_shape, 5, 4), covar)
def forward(self, input): # Here I should calculate the (final) weight first using tensor products and the rest is exactly the same # w = nn.BatchNorm2d(self.weight_leafs.shape[1]).cuda()(self.weight_leafs) w = self.weight_leafs if self.lazy: self.weight = KroneckerProductLazyTensor(*NonLazyTensor(w)).sum( dim=0) # get the sum of the batch of product logging.debug('self.weight.shape: ' + str(self.weight.shape)) if input.dim() == 1: # return self.weight[input].base_lazy_tensor.evaluate().sum( dim=-3 )[:, :self. embedding_dim] # https://github.com/cornellius-gp/gpytorch/pull/871 elif input.dim() == 2: input_1d = input.contiguous().view(1, -1) result = self.weight[input_1d[0]].base_lazy_tensor.evaluate( ).sum( dim=-3 )[:, :self. embedding_dim] #TODO: Not sure if this selection (self.embedding_dim) is correct in here. # https://github.com/cornellius-gp/gpytorch/pull/871 return result.view(input.shape[0], input.shape[1], -1) else: raise Exception('This input dimesion is not yet implemented') else: weight_leafs_product = w[0] for i in range(1, self.order): weight_leafs_product = self.knocker_product( weight_leafs_product, w[i]) self.weight = weight_leafs_product.sum(dim=0) return F.embedding(input, self.weight, self.padding_idx, self.max_norm, self.norm_type, self.scale_grad_by_freq, self.sparse)
def forward(self, x1, x2, diag=False, last_dim_is_batch=False, **params): if last_dim_is_batch: raise RuntimeError( "MultitaskKernel does not accept the last_dim_is_batch argument." ) covar_i = self.task_covar_module.covar_matrix if len(x1.shape[:-2]): covar_i = covar_i.repeat(*x1.shape[:-2], 1, 1) if self.bias_only: covar_i = lazify( torch.ones_like(covar_i.evaluate()) ) # task covariance now all one so it shares covariance but still # as multitask mean covar_x = lazify(self.data_covar_module.forward(x1, x2, **params)) res = KroneckerProductLazyTensor(covar_x, covar_i) return res.diag() if diag else res
def forward(self, X: Tensor) -> MultivariateNormal: X = self.transform_inputs(X) covariance_list = [] covariance_list.append(self.covar_modules[0](X)) for cm, param in zip(self.covar_modules[1:], self.latent_parameters): covariance_list.append(cm(param)) # check batch_shapes if covariance_list[0].batch_shape != covariance_list[1].batch_shape: for i in range(1, len(covariance_list)): cm = covariance_list[i] covariance_list[i] = BatchRepeatLazyTensor( cm, covariance_list[0].batch_shape ) kronecker_covariance = KroneckerProductLazyTensor(*covariance_list) # TODO: expand options for the mean module via batch shaping? mean = torch.zeros( *covariance_list[0].batch_shape, kronecker_covariance.shape[-1], device=kronecker_covariance.device, dtype=kronecker_covariance.dtype, ) return MultivariateNormal(mean, kronecker_covariance)
def create_lazy_tensor(self): a = torch.tensor([[4, 0, 2], [0, 3, -1], [2, -1, 3]], dtype=torch.float) b = torch.tensor([[2, 1], [1, 2]], dtype=torch.float) c = torch.tensor( [[4, 0.5, 1, 0], [0.5, 4, -1, 0], [1, -1, 3, 0], [0, 0, 0, 4]], dtype=torch.float) d = torch.tensor([2], dtype=torch.float) e = torch.tensor([5], dtype=torch.float) f = torch.tensor([2.5], dtype=torch.float) a.requires_grad_(True) b.requires_grad_(True) c.requires_grad_(True) d.requires_grad_(True) e.requires_grad_(True) f.requires_grad_(True) kp_lazy_tensor = KroneckerProductLazyTensor(NonLazyTensor(a), NonLazyTensor(b), NonLazyTensor(c)) diag_lazy_tensor = KroneckerProductDiagLazyTensor( ConstantDiagLazyTensor(d, diag_shape=3), ConstantDiagLazyTensor(e, diag_shape=2), ConstantDiagLazyTensor(f, diag_shape=4), ) return KroneckerProductAddedDiagLazyTensor(kp_lazy_tensor, diag_lazy_tensor)
def create_lazy_tensor(self): a = torch.randn(2, 3, requires_grad=True) b = torch.randn(5, 2, requires_grad=True) c = torch.randn(6, 4, requires_grad=True) kp_lazy_tensor = KroneckerProductLazyTensor(NonLazyTensor(a), NonLazyTensor(b), NonLazyTensor(c)) return kp_lazy_tensor
def create_lazy_tensor(self): a = torch.tensor([[4, 0, 2], [0, 3, -1], [2, -1, 3]], dtype=torch.float) b = torch.tensor([[2, 1], [1, 2]], dtype=torch.float) c = torch.tensor([[4, 0.5, 1, 0], [0.5, 4, -1, 0], [1, -1, 3, 0], [0, 0, 0, 4]], dtype=torch.float) a.requires_grad_(True) b.requires_grad_(True) c.requires_grad_(True) kp_lazy_tensor = KroneckerProductLazyTensor(NonLazyTensor(a), NonLazyTensor(b), NonLazyTensor(c)) return kp_lazy_tensor
def create_lazy_tensor(self): a = torch.tensor([[4, 0, 2], [0, 3, -1], [2, -1, 3]], dtype=torch.float) b = torch.tensor([[2, 1], [1, 2]], dtype=torch.float) c = torch.tensor([[4, 0.5, 1, 0], [0.5, 4, -1, 0], [1, -1, 3, 0], [0, 0, 0, 4]], dtype=torch.float) a.requires_grad_(True) b.requires_grad_(True) c.requires_grad_(True) kp_lazy_tensor = KroneckerProductLazyTensor(NonLazyTensor(a), NonLazyTensor(b), NonLazyTensor(c)) diag_lazy_tensor = ConstantDiagLazyTensor( torch.tensor([0.25], dtype=torch.float, requires_grad=True), kp_lazy_tensor.shape[-1], ) return KroneckerProductAddedDiagLazyTensor(kp_lazy_tensor, diag_lazy_tensor)
def create_lazy_tensor(self): a = torch.tensor([[4, 0, 2], [0, 3, -1], [2, -1, 3]], dtype=torch.float) b = torch.tensor([[2, 1], [1, 2]], dtype=torch.float) c = torch.tensor([[4, 0.5, 1, 0], [0.5, 4, -1, 0], [1, -1, 3, 0], [0, 0, 0, 4]], dtype=torch.float) d = 0.5 * torch.rand(24, dtype=torch.float) a.requires_grad_(True) b.requires_grad_(True) c.requires_grad_(True) d.requires_grad_(True) kp_lazy_tensor = KroneckerProductLazyTensor(NonLazyTensor(a), NonLazyTensor(b), NonLazyTensor(c)) diag_lazy_tensor = DiagLazyTensor(d) return KroneckerProductAddedDiagLazyTensor(kp_lazy_tensor, diag_lazy_tensor)
def make_posterior_variances(self, joint_covariance_matrix: LazyTensor) -> Tensor: r""" Computes the posterior variances given the data points X. As currently implemented, it computes another forwards call with the stacked data to get out the joint covariance across all data points. """ # TODO: use the exposed joint covariances from the prediction strategy data_joint_covariance = joint_covariance_matrix.lazy_tensors[ 0 ].evaluate_kernel() num_train = self.train_inputs[0].shape[-2] test_train_covar = data_joint_covariance[..., num_train:, :num_train] train_train_covar = data_joint_covariance[..., :num_train, :num_train] test_test_covar = data_joint_covariance[..., num_train:, num_train:] full_train_train_covar = KroneckerProductLazyTensor( train_train_covar, *joint_covariance_matrix.lazy_tensors[1:] ) full_test_test_covar = KroneckerProductLazyTensor( test_test_covar, *joint_covariance_matrix.lazy_tensors[1:] ) full_test_train_covar_list = [test_train_covar] + [ *joint_covariance_matrix.lazy_tensors[1:] ] train_evals, train_evecs = full_train_train_covar.symeig(eigenvectors=True) # (\kron \Lambda_i + \sigma^2 I)^{-1} train_inv_evals = DiagLazyTensor(1.0 / (train_evals + self.likelihood.noise)) # compute K_i S_i \hadamard K_i S_i test_train_hadamard = KroneckerProductLazyTensor( *[ lt1.matmul(lt2).evaluate() ** 2 for lt1, lt2 in zip( full_test_train_covar_list, train_evecs.lazy_tensors ) ] ) # and compute the column sums of # (\kron K_i S_i * K_i S_i) \tilde{\Lambda}^{-1} test_train_pred_covar = test_train_hadamard.matmul(train_inv_evals).sum(dim=-1) pred_variances = full_test_test_covar.diag() - test_train_pred_covar return pred_variances
def test_matmul_batch_mat(self): avar = a.repeat(3, 1, 1).requires_grad_(True) bvar = b.repeat(3, 1, 1).requires_grad_(True) cvar = c.repeat(3, 1, 1).requires_grad_(True) mat = torch.randn(3, 24, 5, requires_grad=True) kp_lazy_var = KroneckerProductLazyTensor(NonLazyTensor(avar), NonLazyTensor(bvar), NonLazyTensor(cvar)) res = kp_lazy_var.matmul(mat) avar_copy = avar.clone().detach().requires_grad_(True) bvar_copy = bvar.clone().detach().requires_grad_(True) cvar_copy = cvar.clone().detach().requires_grad_(True) mat_copy = mat.clone().detach().requires_grad_(True) actual = kron(kron(avar_copy, bvar_copy), cvar_copy).matmul(mat_copy) self.assertTrue(approx_equal(res, actual)) actual.sum().backward() res.sum().backward() self.assertTrue(approx_equal(avar_copy.grad, avar.grad)) self.assertTrue(approx_equal(bvar_copy.grad, bvar.grad)) self.assertTrue(approx_equal(cvar_copy.grad, cvar.grad)) self.assertTrue(approx_equal(mat_copy.grad, mat.grad))
def test_matmul_mat_random_rectangular(self): a = torch.randn(4, 2, 3, requires_grad=True) b = torch.randn(4, 5, 2, requires_grad=True) c = torch.randn(4, 6, 4, requires_grad=True) rhs = torch.randn(4, 3 * 2 * 4, 2, requires_grad=True) a_copy = a.clone().detach().requires_grad_(True) b_copy = b.clone().detach().requires_grad_(True) c_copy = c.clone().detach().requires_grad_(True) rhs_copy = rhs.clone().detach().requires_grad_(True) actual = kron(kron(a_copy, b_copy), c_copy).matmul(rhs_copy) kp_lazy_var = KroneckerProductLazyTensor(NonLazyTensor(a), NonLazyTensor(b), NonLazyTensor(c)) res = kp_lazy_var.matmul(rhs) self.assertTrue(approx_equal(res, actual)) actual.sum().backward() res.sum().backward() self.assertTrue(approx_equal(a_copy.grad, a.grad)) self.assertTrue(approx_equal(b_copy.grad, b.grad)) self.assertTrue(approx_equal(c_copy.grad, c.grad)) self.assertTrue(approx_equal(rhs_copy.grad, rhs.grad))
def test_matmul_vec(self): avar = a.clone().requires_grad_(True) bvar = b.clone().requires_grad_(True) cvar = c.clone().requires_grad_(True) vec = torch.randn(24, requires_grad=True) kp_lazy_var = KroneckerProductLazyTensor(NonLazyTensor(avar), NonLazyTensor(bvar), NonLazyTensor(cvar)) res = kp_lazy_var.matmul(vec) avar_copy = a.clone().requires_grad_(True) bvar_copy = b.clone().requires_grad_(True) cvar_copy = c.clone().requires_grad_(True) vec_copy = vec.clone().detach().requires_grad_(True) actual = kron(kron(avar_copy, bvar_copy), cvar_copy).matmul(vec_copy) self.assertTrue(approx_equal(res, actual)) actual.sum().backward() res.sum().backward() self.assertTrue(approx_equal(avar_copy.grad, avar.grad)) self.assertTrue(approx_equal(bvar_copy.grad, bvar.grad)) self.assertTrue(approx_equal(cvar_copy.grad, cvar.grad)) self.assertTrue(approx_equal(vec_copy.grad, vec.grad))
def forward(self, input): """ Adds the log task noises to the diagonal of the covariance matrix of the supplied :obj:`gpytorch.random_variables.GaussianRandomVariable` or :obj:`gpytorch.random_variables.MultitaskGaussianRandomVariable`, in case of `rank` == 0. Otherwise, adds a rank `rank` covariance matrix to it. To accomplish this, we form a new :obj:`gpytorch.lazy.KroneckerProductLazyTensor` between :math:`I_{n}`, an identity matrix with size equal to the data and a (not necessarily diagonal) matrix containing the task noises :math:`D_{t}`. We also incorporate a shared `log_noise` parameter from the base :class:`gpytorch.likelihoods.GaussianLikelihood` that we extend. The final covariance matrix after this method is then :math:`K + D_{t} \otimes I_{n} + \sigma^{2}I_{nt}`. Args: input (:obj:`gpytorch.random_variables.MultitaskGaussianRandomVariable`): Random variable whose covariance matrix is a :obj:`gpytorch.lazy.LazyTensor` we intend to augment. Returns: :obj:`gpytorch.random_variables.MultitaskGaussianRandomVariable`: A new random variable whose covariance matrix is a :obj:`gpytorch.lazy.LazyTensor` with :math:`D_{t} \otimes I_{n}` and :math:`\sigma^{2}I_{nt}` added. """ mean, covar = input.representation() eye_lv = DiagLazyTensor( torch.ones(covar.size(-1) // self.n_tasks, device=self.log_noise.device)) if hasattr(self, "log_task_noises"): task_var_lv = DiagLazyTensor(self.log_task_noises.exp()) else: task_var_lv = RootLazyTensor(self.task_noise_covar_factor) covar_kron_lv = KroneckerProductLazyTensor(task_var_lv, eye_lv) noise = covar + covar_kron_lv noise = add_diag(noise, self.log_noise.exp()) return input.__class__(mean, noise)
def test_evaluate(self): avar = a bvar = b cvar = c kp_lazy_var = KroneckerProductLazyTensor(NonLazyTensor(avar), NonLazyTensor(bvar), NonLazyTensor(cvar)) res = kp_lazy_var.evaluate() actual = kron(kron(avar, bvar), cvar) self.assertTrue(approx_equal(res, actual)) avar = a.repeat(3, 1, 1) bvar = b.repeat(3, 1, 1) cvar = c.repeat(3, 1, 1) kp_lazy_var = KroneckerProductLazyTensor(NonLazyTensor(avar), NonLazyTensor(bvar), NonLazyTensor(cvar)) res = kp_lazy_var.evaluate() actual = kron(kron(avar, bvar), cvar) self.assertTrue(approx_equal(res, actual))
def test_diag(self): avar = a bvar = b cvar = c kp_lazy_var = KroneckerProductLazyTensor(NonLazyTensor(avar), NonLazyTensor(bvar), NonLazyTensor(cvar)) res = kp_lazy_var.diag() actual = kron(kron(avar, bvar), cvar).diag() self.assertTrue(approx_equal(res, actual)) avar = a.repeat(3, 1, 1) bvar = b.repeat(3, 1, 1) cvar = c.repeat(3, 1, 1) kp_lazy_var = KroneckerProductLazyTensor(NonLazyTensor(avar), NonLazyTensor(bvar), NonLazyTensor(cvar)) res = kp_lazy_var.diag() actual_mat = kron(kron(avar, bvar), cvar) actual = torch.stack( [actual_mat[0].diag(), actual_mat[1].diag(), actual_mat[2].diag()]) self.assertTrue(approx_equal(res, actual))
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: Union[bool, Tensor] = False, posterior_transform: Optional[PosteriorTransform] = None, **kwargs: Any, ) -> MultitaskGPPosterior: self.eval() if posterior_transform is not None: # this could be very costly, disallow for now raise NotImplementedError( "Posterior transforms currently not supported for " f"{self.__class__.__name__}") X = self.transform_inputs(X) train_x = self.transform_inputs(self.train_inputs[0]) # construct Ktt task_covar = self._task_covar_matrix task_rootlt = self._task_covar_matrix.root_decomposition( method="diagonalization") task_root = task_rootlt.root if task_covar.batch_shape != X.shape[:-2]: task_covar = BatchRepeatLazyTensor(task_covar, batch_repeat=X.shape[:-2]) task_root = BatchRepeatLazyTensor(lazify(task_root), batch_repeat=X.shape[:-2]) task_covar_rootlt = RootLazyTensor(task_root) # construct RR' \approx Kxx data_data_covar = self.train_full_covar.lazy_tensors[0] # populate the diagonalziation caches for the root and inverse root # decomposition data_data_evals, data_data_evecs = data_data_covar.diagonalization() # pad the eigenvalue and eigenvectors with zeros if we are using lanczos if data_data_evecs.shape[-1] < data_data_evecs.shape[-2]: cols_to_add = data_data_evecs.shape[-2] - data_data_evecs.shape[-1] zero_evecs = torch.zeros( *data_data_evecs.shape[:-1], cols_to_add, dtype=data_data_evals.dtype, device=data_data_evals.device, ) zero_evals = torch.zeros( *data_data_evecs.shape[:-2], cols_to_add, dtype=data_data_evals.dtype, device=data_data_evals.device, ) data_data_evecs = CatLazyTensor( data_data_evecs, lazify(zero_evecs), dim=-1, output_device=data_data_evals.device, ) data_data_evals = torch.cat((data_data_evals, zero_evals), dim=-1) # construct K_{xt, x} test_data_covar = self.covar_module.data_covar_module(X, train_x) # construct K_{xt, xt} test_test_covar = self.covar_module.data_covar_module(X) # now update root so that \tilde{R}\tilde{R}' \approx K_{(x,xt), (x,xt)} # cloning preserves the gradient history updated_lazy_tensor = data_data_covar.cat_rows( cross_mat=test_data_covar.clone(), new_mat=test_test_covar, method="diagonalization", ) updated_root = updated_lazy_tensor.root_decomposition().root # occasionally, there's device errors so enforce this comes out right updated_root = updated_root.to(data_data_covar.device) # build a root decomposition of the joint train/test covariance matrix # construct (\tilde{R} \otimes M)(\tilde{R} \otimes M)' \approx # (K_{(x,xt), (x,xt)} \otimes Ktt) joint_covar = RootLazyTensor( KroneckerProductLazyTensor(updated_root, task_covar_rootlt.root.detach())) # construct K_{xt, x} \otimes Ktt test_obs_kernel = KroneckerProductLazyTensor(test_data_covar, task_covar) # collect y - \mu(x) and \mu(X) train_diff = self.train_targets - self.mean_module(train_x) if detach_test_caches.on(): train_diff = train_diff.detach() test_mean = self.mean_module(X) train_noise = self.likelihood._shaped_noise_covar(train_x.shape) diagonal_noise = isinstance(train_noise, DiagLazyTensor) if detach_test_caches.on(): train_noise = train_noise.detach() test_noise = (self.likelihood._shaped_noise_covar(X.shape) if observation_noise else None) # predictive mean and variance for the mvn # first the predictive mean pred_mean = (test_obs_kernel.matmul( self.predictive_mean_cache).reshape_as(test_mean) + test_mean) # next the predictive variance, assume diagonal noise test_var_term = KroneckerProductLazyTensor(test_test_covar, task_covar).diag() if diagonal_noise: task_evals, task_evecs = self._task_covar_matrix.diagonalization() # TODO: make this be the default KPMatmulLT diagonal method in gpytorch full_data_inv_evals = (KroneckerProductDiagLazyTensor( DiagLazyTensor(data_data_evals), DiagLazyTensor(task_evals)) + train_noise).inverse() test_train_hadamard = KroneckerProductLazyTensor( test_data_covar.matmul(data_data_evecs).evaluate()**2, task_covar.matmul(task_evecs).evaluate()**2, ) data_var_term = test_train_hadamard.matmul( full_data_inv_evals).sum(dim=-1) else: # if non-diagonal noise (but still kronecker structured), we have to pull # across the noise because the inverse is not closed form # should be a kronecker lt, R = \Sigma_X^{-1/2} \kron \Sigma_T^{-1/2} # TODO: enforce the diagonalization to return a KPLT for all shapes in # gpytorch or dense linear algebra for small shapes data_noise, task_noise = train_noise.lazy_tensors data_noise_root = data_noise.root_inv_decomposition( method="diagonalization") task_noise_root = task_noise.root_inv_decomposition( method="diagonalization") # ultimately we need to compute the diagonal of # (K_{x* X} \kron K_T)(K_{XX} \kron K_T + \Sigma_X \kron \Sigma_T)^{-1} # (K_{x* X} \kron K_T)^T # = (K_{x* X} \Sigma_X^{-1/2} Q_R)(\Lambda_R + I)^{-1} # (K_{x* X} \Sigma_X^{-1/2} Q_R)^T # where R = (\Sigma_X^{-1/2T}K_{XX}\Sigma_X^{-1/2} \kron # \Sigma_T^{-1/2T}K_{T}\Sigma_T^{-1/2}) # first we construct the components of R's eigen-decomposition # TODO: make this be the default KPMatmulLT diagonal method in gpytorch whitened_data_covar = (data_noise_root.transpose( -1, -2).matmul(data_data_covar).matmul(data_noise_root)) w_data_evals, w_data_evecs = whitened_data_covar.diagonalization() whitened_task_covar = (task_noise_root.transpose(-1, -2).matmul( self._task_covar_matrix).matmul(task_noise_root)) w_task_evals, w_task_evecs = whitened_task_covar.diagonalization() # we add one to the eigenvalues as above (not just for stability) full_data_inv_evals = (KroneckerProductDiagLazyTensor( DiagLazyTensor(w_data_evals), DiagLazyTensor(w_task_evals)).add_jitter(1.0).inverse()) test_data_comp = (test_data_covar.matmul(data_noise_root).matmul( w_data_evecs).evaluate()**2) task_comp = (task_covar.matmul(task_noise_root).matmul( w_task_evecs).evaluate()**2) test_train_hadamard = KroneckerProductLazyTensor( test_data_comp, task_comp) data_var_term = test_train_hadamard.matmul( full_data_inv_evals).sum(dim=-1) pred_variance = test_var_term - data_var_term specialized_mvn = MultitaskMultivariateNormal( pred_mean, DiagLazyTensor(pred_variance)) if observation_noise: specialized_mvn = self.likelihood(specialized_mvn) posterior = MultitaskGPPosterior( mvn=specialized_mvn, joint_covariance_matrix=joint_covar, test_train_covar=test_obs_kernel, train_diff=train_diff, test_mean=test_mean, train_train_covar=self.train_full_covar, train_noise=train_noise, test_noise=test_noise, ) if hasattr(self, "outcome_transform"): posterior = self.outcome_transform.untransform_posterior(posterior) return posterior
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: Union[bool, Tensor] = False, **kwargs: Any, ) -> GPyTorchPosterior: self.eval() # make sure we're calling a posterior # input transforms are applied at `posterior` in `eval` mode, and at # `model.forward()` at the training time X = self.transform_inputs(X) no_pred_variance = skip_posterior_variances._state with ExitStack() as es: es.enter_context(gpt_posterior_settings()) es.enter_context(fast_pred_var(True)) # we need to skip posterior variances here es.enter_context(skip_posterior_variances(True)) mvn = self(X) if observation_noise is not False: # TODO: ensure that this still works for structured noise solves. mvn = self.likelihood(mvn, X) # lazy covariance matrix includes the interpolated version of the full # covariance matrix so we can actually grab that instead. if X.ndimension() > self.train_inputs[0].ndimension(): X_batch_shape = X.shape[:-2] train_inputs = self.train_inputs[0].reshape( *[1] * len(X_batch_shape), *self.train_inputs[0].shape ) train_inputs = train_inputs.repeat( *X_batch_shape, *[1] * self.train_inputs[0].ndimension() ) else: train_inputs = self.train_inputs[0] # we now compute the data covariances for the training data, the testing # data, the joint covariances, and the test train cross-covariance train_train_covar = self.prediction_strategy.lik_train_train_covar.detach() base_train_train_covar = train_train_covar.lazy_tensor data_train_covar = base_train_train_covar.lazy_tensors[0] data_covar = self.covar_modules[0] data_train_test_covar = data_covar(X, train_inputs) data_test_test_covar = data_covar(X) data_joint_covar = data_train_covar.cat_rows( cross_mat=data_train_test_covar, new_mat=data_test_test_covar, ) # we detach the latents so that they don't cause gradient errors # TODO: Can we enable backprop through the latent covariances? batch_shape = data_train_test_covar.batch_shape latent_covar_list = [] for latent_covar in base_train_train_covar.lazy_tensors[1:]: if latent_covar.batch_shape != batch_shape: latent_covar = BatchRepeatLazyTensor(latent_covar, batch_shape) latent_covar_list.append(latent_covar.detach()) joint_covar = KroneckerProductLazyTensor( data_joint_covar, *latent_covar_list ) test_train_covar = KroneckerProductLazyTensor( data_train_test_covar, *latent_covar_list ) # compute the posterior variance if necessary if no_pred_variance: pred_variance = mvn.variance else: pred_variance = self.make_posterior_variances(joint_covar) # mean and variance get reshaped into the target shape new_mean = mvn.mean.reshape(*X.shape[:-1], *self.target_shape) if not no_pred_variance: new_variance = pred_variance.reshape(*X.shape[:-1], *self.target_shape) new_variance = DiagLazyTensor(new_variance) else: new_variance = ZeroLazyTensor( *X.shape[:-1], *self.target_shape, self.target_shape[-1] ) mvn = MultivariateNormal(new_mean, new_variance) # return a specialized Posterior to allow for sampling # cloning the full covar allows backpropagation through it posterior = HigherOrderGPPosterior( mvn=mvn, train_targets=self.train_targets.unsqueeze(-1), train_train_covar=train_train_covar, test_train_covar=test_train_covar, joint_covariance_matrix=joint_covar.clone(), output_shape=X.shape[:-1] + self.target_shape, num_outputs=self._num_outputs, ) if hasattr(self, "outcome_transform"): posterior = self.outcome_transform.untransform_posterior(posterior) return posterior
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: Union[bool, Tensor] = False, **kwargs: Any, ) -> GPyTorchPosterior: self.eval() # make sure we're calling a posterior no_pred_variance = skip_posterior_variances._state with ExitStack() as es: es.enter_context(gpt_posterior_settings()) es.enter_context(fast_pred_var(True)) # we need to skip posterior variances here es.enter_context(skip_posterior_variances(True)) mvn = self(X) if observation_noise is not False: # TODO: implement Kronecker + diagonal solves so that this is possible. # if torch.is_tensor(observation_noise): # # TODO: Validate noise shape # # make observation_noise `batch_shape x q x n` # obs_noise = observation_noise.transpose(-1, -2) # mvn = self.likelihood(mvn, X, noise=obs_noise) # elif isinstance(self.likelihood, FixedNoiseGaussianLikelihood): # noise = self.likelihood.noise.mean().expand(X.shape[:-1]) # mvn = self.likelihood(mvn, X, noise=noise) # else: mvn = self.likelihood(mvn, X) # lazy covariance matrix includes the interpolated version of the full # covariance matrix so we can actually grab that instead. if X.ndimension() > self.train_inputs[0].ndimension(): X_batch_shape = X.shape[:-2] train_inputs = self.train_inputs[0].reshape( *[1] * len(X_batch_shape), *self.train_inputs[0].shape ) train_inputs = train_inputs.repeat( *X_batch_shape, *[1] * self.train_inputs[0].ndimension() ) else: train_inputs = self.train_inputs[0] full_covar = self.covar_modules[0](torch.cat((train_inputs, X), dim=-2)) if no_pred_variance: pred_variance = mvn.variance else: joint_covar = self._get_joint_covariance([X]) pred_variance = self.make_posterior_variances(joint_covar) full_covar = KroneckerProductLazyTensor( full_covar, *joint_covar.lazy_tensors[1:] ) joint_covar_list = [self.covar_modules[0](X, train_inputs)] batch_shape = joint_covar_list[0].batch_shape for cm, param in zip(self.covar_modules[1:], self.latent_parameters): covar = cm(param) if covar.batch_shape != batch_shape: covar = BatchRepeatLazyTensor(covar, batch_shape) joint_covar_list.append(covar) test_train_covar = KroneckerProductLazyTensor(*joint_covar_list) # mean and variance get reshaped into the target shape new_mean = mvn.mean.reshape(*X.shape[:-1], *self.target_shape) if not no_pred_variance: new_variance = pred_variance.reshape(*X.shape[:-1], *self.target_shape) new_variance = DiagLazyTensor(new_variance) else: new_variance = ZeroLazyTensor( *X.shape[:-1], *self.target_shape, self.target_shape[-1] ) mvn = MultivariateNormal(new_mean, new_variance) # return a specialized Posterior to allow for sampling posterior = HigherOrderGPPosterior( mvn=mvn, train_targets=self.train_targets.unsqueeze(-1), train_train_covar=self.prediction_strategy.lik_train_train_covar, test_train_covar=test_train_covar, joint_covariance_matrix=full_covar, output_shape=Size( ( *X.shape[:-1], *self.target_shape, ) ), num_outputs=self._num_outputs, ) if hasattr(self, "outcome_transform"): posterior = self.outcome_transform.untransform_posterior(posterior) return posterior
def covar_matrix(self): U = self.U.covar_matrix V = self.V.covar_matrix return KroneckerProductLazyTensor(V, U)
def kernel2(self, Kxx, Sigma): # TODO: Make this a separate module # if M1, M2 = (0, 0) # [ k_** РіЌ ╬Б ] Kij_xx_22 = KroneckerProductLazyTensor(Kxx, Sigma) return Kij_xx_22