def _create_marginal_input(self, batch_shape=torch.Size([])): mat = torch.randn(*batch_shape, 5, 5) mat2 = torch.randn(*batch_shape, 4, 4) covar = KroneckerProductLazyTensor(RootLazyTensor(mat), RootLazyTensor(mat2)) return MultitaskMultivariateNormal(torch.randn(*batch_shape, 5, 4), covar)
def test_batch_mode_matmul_batch_mat_with_five_matrices(self): mats = make_random_mat(6, rank=4, batch_size=30) vec = torch.randn(5, 6, 7, requires_grad=True) mats_copy = mats.clone().detach().requires_grad_(True) vec_copy = vec.clone().detach().requires_grad_(True) # Forward res = RootLazyTensor(mats).mul_batch(mul_batch_size=6).matmul(vec) reshaped_mats_copy = mats_copy.view(5, 6, 6, 4) actual = prod( [ (reshaped_mats_copy[:, 0].matmul(reshaped_mats_copy[:, 0].transpose(-1, -2)).view(5, 6, 6)), (reshaped_mats_copy[:, 1].matmul(reshaped_mats_copy[:, 1].transpose(-1, -2)).view(5, 6, 6)), (reshaped_mats_copy[:, 2].matmul(reshaped_mats_copy[:, 2].transpose(-1, -2)).view(5, 6, 6)), (reshaped_mats_copy[:, 3].matmul(reshaped_mats_copy[:, 3].transpose(-1, -2)).view(5, 6, 6)), (reshaped_mats_copy[:, 4].matmul(reshaped_mats_copy[:, 4].transpose(-1, -2)).view(5, 6, 6)), (reshaped_mats_copy[:, 5].matmul(reshaped_mats_copy[:, 5].transpose(-1, -2)).view(5, 6, 6)), ] ).matmul(vec_copy) self.assertLess(torch.max(((res - actual) / actual).abs()), 0.01) # Backward res.sum().backward() actual.sum().backward() self.assertLess(torch.max(((mats.grad - mats_copy.grad) / mats_copy.grad).abs()), 0.05) self.assertLess(torch.max(((vec.grad - vec_copy.grad) / vec_copy.grad).abs()), 0.05)
def test_batch_get_indices(self): root = torch.randn(2, 5, 1) actual = root.matmul(root.transpose(-1, -2)) res = RootLazyTensor(root) batch_indices = torch.tensor([0, 1, 0, 1], dtype=torch.long) left_indices = torch.tensor([1, 2, 4, 0], dtype=torch.long) right_indices = torch.tensor([0, 1, 3, 2], dtype=torch.long) self.assertTrue( approx_equal( actual[batch_indices, left_indices, right_indices], res._batch_get_indices(batch_indices, left_indices, right_indices), )) batch_indices = torch.tensor( [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1], dtype=torch.long) left_indices = torch.tensor( [1, 2, 4, 0, 1, 2, 3, 1, 2, 2, 1, 1, 0, 0, 4, 4, 4, 4], dtype=torch.long) right_indices = torch.tensor( [0, 1, 3, 2, 3, 4, 2, 2, 1, 1, 2, 1, 2, 4, 4, 3, 3, 0], dtype=torch.long) self.assertTrue( approx_equal( actual[batch_indices, left_indices, right_indices], res._batch_get_indices(batch_indices, left_indices, right_indices), ))
def create_lazy_tensor(self): mat1 = make_random_mat(40, rank=5, batch_size=2) mat2 = make_random_mat(40, rank=5, batch_size=2) mat3 = make_random_mat(40, rank=5, batch_size=2) mat4 = make_random_mat(40, rank=5, batch_size=2) mat5 = make_random_mat(40, rank=5, batch_size=2) res = MulLazyTensor(RootLazyTensor(mat1), RootLazyTensor(mat2), RootLazyTensor(mat3), RootLazyTensor(mat4), RootLazyTensor(mat5)) return res.add_diag(torch.tensor(0.5))
def create_lazy_tensor(self): mat1 = make_random_mat(30, 3) mat2 = make_random_mat(30, 3) mat3 = make_random_mat(30, 3) mat4 = make_random_mat(30, 3) mat5 = make_random_mat(30, 3) res = MulLazyTensor(RootLazyTensor(mat1), RootLazyTensor(mat2), RootLazyTensor(mat3), RootLazyTensor(mat4), RootLazyTensor(mat5)) return res.add_diag(torch.tensor(1.0))
def forward(self, x1, x2): if x1.size() == x2.size() and torch.equal(x1, x2): # Use RootLazyTensor when x1 == x2 for efficiency when composing # with other kernels prod = RootLazyTensor(x1 - self.offset) else: prod = MatmulLazyTensor(x1 - self.offset, (x2 - self.offset).transpose(2, 1)) return prod + self.variance.expand(prod.size())
def _test_inv_quad_logdet(self, inv_quad_rhs=None, logdet=False, improper_logdet=False): # Set up mat = torch.randn(*self.__class__.matrix_shape).requires_grad_(True) mat_clone = mat.detach().clone().requires_grad_(True) if inv_quad_rhs is not None: inv_quad_rhs.requires_grad_(True) inv_quad_rhs_clone = inv_quad_rhs.detach().clone().requires_grad_(True) # Compute actual values actual_tensor = mat_clone @ mat_clone.transpose(-1, -2) if inv_quad_rhs is not None: actual_inv_quad = actual_tensor.inverse().matmul(inv_quad_rhs_clone).mul(inv_quad_rhs_clone) actual_inv_quad = actual_inv_quad.sum([-1, -2]) if inv_quad_rhs.dim() >= 2 else actual_inv_quad.sum() if logdet: flattened_tensor = actual_tensor.view(-1, *actual_tensor.shape[-2:]) logdets = torch.cat([mat.logdet().unsqueeze(0) for mat in flattened_tensor]) if actual_tensor.dim() > 2: actual_logdet = logdets.view(*actual_tensor.shape[:-2]) else: actual_logdet = logdets.squeeze() # Compute values with LazyTensor _wrapped_cg = MagicMock(wraps=gpytorch.utils.linear_cg) with gpytorch.settings.num_trace_samples(2000), \ gpytorch.settings.max_cholesky_size(0), \ gpytorch.settings.cg_tolerance(1e-5), \ gpytorch.settings.skip_logdet_forward(improper_logdet), \ patch("gpytorch.utils.linear_cg", new=_wrapped_cg) as linear_cg_mock: lazy_tensor = RootLazyTensor(mat) res_inv_quad, res_logdet = lazy_tensor.inv_quad_logdet(inv_quad_rhs=inv_quad_rhs, logdet=logdet) # Compare forward pass if inv_quad_rhs is not None: self.assertAllClose(res_inv_quad, actual_inv_quad, rtol=1e-2) if logdet: if improper_logdet: self.assertAlmostEqual(res_logdet.norm().item(), 0) else: self.assertAllClose(res_logdet, actual_logdet, rtol=1e-1, atol=2e-1) # Backward if inv_quad_rhs is not None: actual_inv_quad.sum().backward(retain_graph=True) res_inv_quad.sum().backward(retain_graph=True) if logdet: actual_logdet.sum().backward() res_logdet.sum().backward() self.assertAllClose(mat_clone.grad, mat.grad, rtol=1e-1, atol=2e-1) if inv_quad_rhs is not None: self.assertAllClose(inv_quad_rhs.grad, inv_quad_rhs_clone.grad, rtol=1e-2) # Make sure CG was called self.assertTrue(linear_cg_mock.called)
def create_lazy_tensor(self): mat1 = make_random_mat(6, rank=6, batch_shape=torch.Size(( 2, 3, ))) mat2 = make_random_mat(6, rank=6, batch_shape=torch.Size(( 2, 3, ))) res = RootLazyTensor(mat1) * RootLazyTensor(mat2) return res.add_diag(torch.tensor(0.5))
def test_batch_diag(self): root = torch.randn(4, 5, 3) actual = root.matmul(root.transpose(-1, -2)) actual_diag = torch.cat([ actual[0].diag().unsqueeze(0), actual[1].diag().unsqueeze(0), actual[2].diag().unsqueeze(0), actual[3].diag().unsqueeze(0), ]) res = RootLazyTensor(root) self.assertTrue(approx_equal(actual_diag, res.diag()))
def _get_covariance(self, x1, x2): k_ux1 = delazify(self.base_kernel(x1, self.inducing_points)) if torch.equal(x1, x2): covar = RootLazyTensor(k_ux1.matmul(self._inducing_inv_root)) # Diagonal correction for predictive posterior correction = (self.base_kernel(x1, x2, diag=True) - covar.diag()).clamp(0, math.inf) covar = PsdSumLazyTensor(covar, DiagLazyTensor(correction)) else: k_ux2 = delazify(self.base_kernel(x2, self.inducing_points)) covar = MatmulLazyTensor( k_ux1.matmul(self._inducing_inv_root), k_ux2.matmul(self._inducing_inv_root).transpose(-1, -2)) return covar
def exact_predictive_covar(self, test_test_covar, test_train_covar): """ Computes the posterior predictive covariance of a GP Args: test_train_covar (:obj:`gpytorch.lazy.LazyTensor`): Covariance matrix between test and train inputs test_test_covar (:obj:`gpytorch.lazy.LazyTensor`): Covariance matrix between test inputs Returns: :obj:`gpytorch.lazy.LazyTensor`: A LazyTensor representing the predictive posterior covariance of the test points """ if settings.fast_pred_var.on(): self._last_test_train_covar = test_train_covar if settings.skip_posterior_variances.on(): return ZeroLazyTensor(*test_test_covar.size()) if settings.fast_pred_var.off(): super().exact_predictive_covar(test_test_covar, test_train_covar) else: features_xstar = test_train_covar.evaluate_kernel().get_root( dim=-2) # compute J^T Cache as our root tensor j_star_covar = features_xstar.t() @ self.covar_cache covar_expanded = RootLazyTensor(j_star_covar) return self.noise * covar_expanded
def block_logdet(self, var, cov_mat_root): var = flatten(var) cov_mat_lt = RootLazyTensor(cov_mat_root.t()) var_lt = DiagLazyTensor(var + 1e-6) covar_lt = AddedDiagLazyTensor(var_lt, cov_mat_lt) return covar_lt.log_det()
def root_inv_decomposition(self, method=None, initial_vectors=None, test_vectors=None): from gpytorch.lazy import RootLazyTensor # return a dense root decomposition if the matrix is small if self.shape[-1] <= settings.max_cholesky_size.value(): return super().root_inv_decomposition() root_list = [lt.root_inv_decomposition().root for lt in self.lazy_tensors] kronecker_root = KroneckerProductLazyTensor(*root_list) return RootLazyTensor(kronecker_root)
def test_diag(self): mat1 = make_random_mat(20, rank=4) mat2 = make_random_mat(20, rank=4) mat3 = make_random_mat(20, rank=4) mat1_copy = mat1.clone().detach().requires_grad_(True) mat2_copy = mat2.clone().detach().requires_grad_(True) mat3_copy = mat3.clone().detach().requires_grad_(True) # Forward res = MulLazyTensor(RootLazyTensor(mat1), RootLazyTensor(mat2), RootLazyTensor(mat3)).diag() actual = prod( [ mat1_copy.matmul(mat1_copy.transpose(-1, -2)), mat2_copy.matmul(mat2_copy.transpose(-1, -2)), mat3_copy.matmul(mat3_copy.transpose(-1, -2)), ] ).diag() assert torch.max(((res - actual) / actual).abs()) < 0.01
def test_matmul(self): root = torch.randn(5, 3, requires_grad=True) covar = RootLazyTensor(root) mat = torch.eye(5) res = covar.matmul(mat) root_clone = root.clone().detach() root_clone.requires_grad = True mat_clone = mat.clone().detach() mat_clone.requires_grad = True actual = root_clone.matmul(root_clone.transpose(-1, -2)).matmul(mat_clone) self.assertTrue(approx_equal(res, actual)) gradient = torch.randn(5, 5) actual.backward(gradient=gradient) res.backward(gradient=gradient) self.assertTrue(approx_equal(root.grad, root_clone.grad))
def test_mul_adding_another_variable(self): mat1 = make_random_mat(20, rank=4, batch_size=5) mat2 = make_random_mat(20, rank=4, batch_size=5) mat3 = make_random_mat(20, rank=4, batch_size=5) mat1_copy = mat1.clone().detach().requires_grad_(True) mat2_copy = mat2.clone().detach().requires_grad_(True) mat3_copy = mat3.clone().detach().requires_grad_(True) # Forward res = MulLazyTensor(RootLazyTensor(mat1), RootLazyTensor(mat2)) res = res * RootLazyTensor(mat3) actual = prod( [ mat1_copy.matmul(mat1_copy.transpose(-1, -2)), mat2_copy.matmul(mat2_copy.transpose(-1, -2)), mat3_copy.matmul(mat3_copy.transpose(-1, -2)), ] ) self.assertLess(torch.max(((res.evaluate() - actual) / actual).abs()), 0.01)
def _get_covariance(self, x1, x2): k_ux1 = self.base_kernel_module(x1, self.inducing_points).evaluate() if torch.equal(x1, x2): covar = RootLazyTensor(k_ux1.matmul(self._inducing_inv_root)) else: k_ux2 = self.base_kernel_module(x2, self.inducing_points).evaluate() covar = MatmulLazyTensor( k_ux1.matmul(self._inducing_inv_root), k_ux2.matmul(self._inducing_inv_root).transpose(-1, -2)) return covar
def test_batch_diag(self): mat1 = make_random_mat(20, rank=4, batch_size=5) mat2 = make_random_mat(20, rank=4, batch_size=5) mat3 = make_random_mat(20, rank=4, batch_size=5) mat1_copy = mat1.clone().detach().requires_grad_(True) mat2_copy = mat2.clone().detach().requires_grad_(True) mat3_copy = mat3.clone().detach().requires_grad_(True) # Forward res = MulLazyTensor(RootLazyTensor(mat1), RootLazyTensor(mat2), RootLazyTensor(mat3)).diag() actual = prod( [ mat1_copy.matmul(mat1_copy.transpose(-1, -2)), mat2_copy.matmul(mat2_copy.transpose(-1, -2)), mat3_copy.matmul(mat3_copy.transpose(-1, -2)), ] ) actual = torch.cat([actual[i].diag().unsqueeze(0) for i in range(5)]) self.assertLess(torch.max(((res - actual) / actual).abs()), 0.01)
def test_precond_solve(self): seed = 4 torch.random.manual_seed(seed) tensor = torch.randn(1000, 800) diag = torch.abs(torch.randn(1000)) standard_lt = AddedDiagLazyTensor(RootLazyTensor(tensor), DiagLazyTensor(diag)) evals, evecs = standard_lt.symeig(eigenvectors=True) # this preconditioner is a simple example of near deflation def nonstandard_preconditioner(self): top_100_evecs = evecs[:, :100] top_100_evals = evals[:100] + 0.2 * torch.randn(100) precond_lt = RootLazyTensor( top_100_evecs @ torch.diag(top_100_evals**0.5)) logdet = top_100_evals.log().sum() def precond_closure(rhs): rhs2 = top_100_evecs.t() @ rhs return top_100_evecs @ torch.diag(1.0 / top_100_evals) @ rhs2 return precond_closure, precond_lt, logdet overrode_lt = AddedDiagLazyTensor( RootLazyTensor(tensor), DiagLazyTensor(diag), preconditioner_override=nonstandard_preconditioner) # compute a solve - mostly to make sure that we can actually perform the solve rhs = torch.randn(1000, 1) standard_solve = standard_lt.inv_matmul(rhs) overrode_solve = overrode_lt.inv_matmul(rhs) # gut checking that our preconditioner is not breaking anything self.assertEqual(standard_solve.shape, overrode_solve.shape) self.assertLess( torch.norm(standard_solve - overrode_solve) / standard_solve.norm(), 1.0)
def compute_ll_for_block(self, vec, mean, var, cov_mat_root): vec = flatten(vec) mean = flatten(mean) var = flatten(var) cov_mat_lt = RootLazyTensor(cov_mat_root.t()) var_lt = DiagLazyTensor(var + 1e-6) covar_lt = AddedDiagLazyTensor(var_lt, cov_mat_lt) qdist = MultivariateNormal(mean, covar_lt) with gpytorch.settings.num_trace_samples(1) and gpytorch.settings.max_cg_iterations(25): return qdist.log_prob(vec)
def nonstandard_preconditioner(self): top_100_evecs = evecs[:, :100] top_100_evals = evals[:100] + 0.2 * torch.randn(100) precond_lt = RootLazyTensor(top_100_evecs @ torch.diag(top_100_evals ** 0.5)) logdet = top_100_evals.log().sum() def precond_closure(rhs): rhs2 = top_100_evecs.t() @ rhs return top_100_evecs @ torch.diag(1.0 / top_100_evals) @ rhs2 return precond_closure, precond_lt, logdet
def test_batch_matmul_mat_with_five_matrices(self): mat1 = make_random_mat(20, rank=4, batch_size=5) mat2 = make_random_mat(20, rank=4, batch_size=5) mat3 = make_random_mat(20, rank=4, batch_size=5) mat4 = make_random_mat(20, rank=4, batch_size=5) mat5 = make_random_mat(20, rank=4, batch_size=5) vec = torch.randn(5, 20, 7, requires_grad=True) mat1_copy = mat1.clone().detach().requires_grad_(True) mat2_copy = mat2.clone().detach().requires_grad_(True) mat3_copy = mat3.clone().detach().requires_grad_(True) mat4_copy = mat4.clone().detach().requires_grad_(True) mat5_copy = mat5.clone().detach().requires_grad_(True) vec_copy = vec.clone().detach().requires_grad_(True) # Forward res = MulLazyTensor( RootLazyTensor(mat1), RootLazyTensor(mat2), RootLazyTensor(mat3), RootLazyTensor(mat4), RootLazyTensor(mat5) ).matmul(vec) actual = prod( [ mat1_copy.matmul(mat1_copy.transpose(-1, -2)), mat2_copy.matmul(mat2_copy.transpose(-1, -2)), mat3_copy.matmul(mat3_copy.transpose(-1, -2)), mat4_copy.matmul(mat4_copy.transpose(-1, -2)), mat5_copy.matmul(mat5_copy.transpose(-1, -2)), ] ).matmul(vec_copy) self.assertLess(torch.max(((res - actual) / actual).abs()), 0.01) # Backward res.sum().backward() actual.sum().backward() self.assertLess(torch.max(((mat1.grad - mat1_copy.grad) / mat1_copy.grad).abs()), 0.01) self.assertLess(torch.max(((mat2.grad - mat2_copy.grad) / mat2_copy.grad).abs()), 0.01) self.assertLess(torch.max(((mat3.grad - mat3_copy.grad) / mat3_copy.grad).abs()), 0.01) self.assertLess(torch.max(((mat4.grad - mat4_copy.grad) / mat4_copy.grad).abs()), 0.01) self.assertLess(torch.max(((mat5.grad - mat5_copy.grad) / mat5_copy.grad).abs()), 0.01) self.assertLess(torch.max(((vec.grad - vec_copy.grad) / vec_copy.grad).abs()), 0.01)
def test_matmul_mat_with_two_matrices(self): mat1 = make_random_mat(20, 5) mat2 = make_random_mat(20, 5) vec = torch.randn(20, 7, requires_grad=True) mat1_copy = mat1.clone().detach().requires_grad_(True) mat2_copy = mat2.clone().detach().requires_grad_(True) vec_copy = vec.clone().detach().requires_grad_(True) # Forward res = MulLazyTensor(RootLazyTensor(mat1), RootLazyTensor(mat2)).matmul(vec) actual = prod( [mat1_copy.matmul(mat1_copy.transpose(-1, -2)), mat2_copy.matmul(mat2_copy.transpose(-1, -2))] ).matmul(vec_copy) assert torch.max(((res - actual) / actual).abs()) < 0.01 # Backward res.sum().backward() actual.sum().backward() self.assertLess(torch.max(((mat1.grad - mat1_copy.grad) / mat1_copy.grad).abs()), 0.01) self.assertLess(torch.max(((mat2.grad - mat2_copy.grad) / mat2_copy.grad).abs()), 0.01) self.assertLess(torch.max(((vec.grad - vec_copy.grad) / vec_copy.grad).abs()), 0.01)
def test_getitem(self): mat1 = make_random_mat(20, rank=4) mat2 = make_random_mat(20, rank=4) mat3 = make_random_mat(20, rank=4) mat1_copy = mat1.clone().detach().requires_grad_(True) mat2_copy = mat2.clone().detach().requires_grad_(True) mat3_copy = mat3.clone().detach().requires_grad_(True) # Forward res = MulLazyTensor(RootLazyTensor(mat1), RootLazyTensor(mat2), RootLazyTensor(mat3)) actual = prod( [ mat1_copy.matmul(mat1_copy.transpose(-1, -2)), mat2_copy.matmul(mat2_copy.transpose(-1, -2)), mat3_copy.matmul(mat3_copy.transpose(-1, -2)), ] ) self.assertLess(torch.max(((res[5, 3:5] - actual[5, 3:5]) / actual[5, 3:5]).abs()), 0.01) self.assertLess(torch.max(((res[3:5, 2:].evaluate() - actual[3:5, 2:]) / actual[3:5, 2:]).abs()), 0.01) self.assertLess(torch.max(((res[2:, 3:5].evaluate() - actual[2:, 3:5]) / actual[2:, 3:5]).abs()), 0.01)
def test_base_sample_shape(self): a = torch.randn(5, 10) lazy_square_a = RootLazyTensor(lazify(a)) dist = MultivariateNormal(torch.zeros(5), lazy_square_a) # check that providing the base samples is okay samples = dist.rsample(torch.Size((16, )), base_samples=torch.randn(16, 10)) self.assertEqual(samples.shape, torch.Size((16, 5))) # check that an event shape of base samples fails self.assertRaises(RuntimeError, dist.rsample, torch.Size((16, )), base_samples=torch.randn(16, 5)) # check that the proper event shape of base samples is okay for # a non root lt nonlazy_square_a = lazify(lazy_square_a.evaluate()) dist = MultivariateNormal(torch.zeros(5), nonlazy_square_a) samples = dist.rsample(torch.Size((16, )), base_samples=torch.randn(16, 5)) self.assertEqual(samples.shape, torch.Size((16, 5)))
def root_decomposition(self, method: Optional[str] = None): from gpytorch.lazy import RootLazyTensor if method == "symeig" or method is None: evals, evecs = self._symeig(eigenvectors=True, return_evals_as_lazy=True) # TODO: only use non-zero evals (req. dealing w/ batches...) f_list = [ evec * eval.diag().clamp(0.0).sqrt().unsqueeze(-2) for eval, evec in zip(evals.lazy_tensors, evecs.lazy_tensors) ] F = KroneckerProductLazyTensor(*f_list) return RootLazyTensor(F) else: return super().root_decomposition(method=method)
def _make_predictive_covar(self, qmatrix=None, Kuu=None, Kuu_Lmat=None): if qmatrix is None: qmatrix = self.current_qmatrix if Kuu is None: Kuu = self.Kuu if Kuu_Lmat is None: Kuu_Lmat = self.current_inducing_compression_matrix.evaluate() if fast_pred_var.on(): qmat_inv_root = qmatrix.root_inv_decomposition() # to lazify you have to evaluate the inverse root which is slow # otherwise, you can't backprop your way through it inner_cache = RootLazyTensor( Kuu_Lmat.matmul(qmat_inv_root.root.evaluate())) else: inner_cache = Kuu_Lmat.matmul( qmatrix.inv_matmul(Kuu_Lmat.transpose(-1, -2))) predictive_covar_cache = Kuu - inner_cache return predictive_covar_cache
def forward(self, input): """ Adds the log task noises to the diagonal of the covariance matrix of the supplied :obj:`gpytorch.random_variables.GaussianRandomVariable` or :obj:`gpytorch.random_variables.MultitaskGaussianRandomVariable`, in case of `rank` == 0. Otherwise, adds a rank `rank` covariance matrix to it. To accomplish this, we form a new :obj:`gpytorch.lazy.KroneckerProductLazyTensor` between :math:`I_{n}`, an identity matrix with size equal to the data and a (not necessarily diagonal) matrix containing the task noises :math:`D_{t}`. We also incorporate a shared `log_noise` parameter from the base :class:`gpytorch.likelihoods.GaussianLikelihood` that we extend. The final covariance matrix after this method is then :math:`K + D_{t} \otimes I_{n} + \sigma^{2}I_{nt}`. Args: input (:obj:`gpytorch.random_variables.MultitaskGaussianRandomVariable`): Random variable whose covariance matrix is a :obj:`gpytorch.lazy.LazyTensor` we intend to augment. Returns: :obj:`gpytorch.random_variables.MultitaskGaussianRandomVariable`: A new random variable whose covariance matrix is a :obj:`gpytorch.lazy.LazyTensor` with :math:`D_{t} \otimes I_{n}` and :math:`\sigma^{2}I_{nt}` added. """ mean, covar = input.representation() eye_lv = DiagLazyTensor( torch.ones(covar.size(-1) // self.n_tasks, device=self.log_noise.device)) if hasattr(self, "log_task_noises"): task_var_lv = DiagLazyTensor(self.log_task_noises.exp()) else: task_var_lv = RootLazyTensor(self.task_noise_covar_factor) covar_kron_lv = KroneckerProductLazyTensor(task_var_lv, eye_lv) noise = covar + covar_kron_lv noise = add_diag(noise, self.log_noise.exp()) return input.__class__(mean, noise)
def test_mul_adding_constant_mul(self): mat1 = make_random_mat(20, rank=4, batch_size=5) mat2 = make_random_mat(20, rank=4, batch_size=5) mat3 = make_random_mat(20, rank=4, batch_size=5) const = torch.ones(1, requires_grad=True) mat1_copy = mat1.clone().detach().requires_grad_(True) mat2_copy = mat2.clone().detach().requires_grad_(True) mat3_copy = mat3.clone().detach().requires_grad_(True) const_copy = const.clone().detach().requires_grad_(True) # Forward res = MulLazyTensor(RootLazyTensor(mat1), RootLazyTensor(mat2), RootLazyTensor(mat3)) res = res * const actual = ( prod( [ mat1_copy.matmul(mat1_copy.transpose(-1, -2)), mat2_copy.matmul(mat2_copy.transpose(-1, -2)), mat3_copy.matmul(mat3_copy.transpose(-1, -2)), ] ) * const_copy ) self.assertLess(torch.max(((res.evaluate() - actual) / actual).abs()), 0.01) # Forward res = MulLazyTensor(RootLazyTensor(mat1), RootLazyTensor(mat2), RootLazyTensor(mat3)) res = res * 2.5 actual = ( prod( [ mat1_copy.matmul(mat1_copy.transpose(-1, -2)), mat2_copy.matmul(mat2_copy.transpose(-1, -2)), mat3_copy.matmul(mat3_copy.transpose(-1, -2)), ] ) * 2.5 ) self.assertLess(torch.max(((res.evaluate() - actual) / actual).abs()), 0.01)
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: Union[bool, Tensor] = False, posterior_transform: Optional[PosteriorTransform] = None, **kwargs: Any, ) -> MultitaskGPPosterior: self.eval() if posterior_transform is not None: # this could be very costly, disallow for now raise NotImplementedError( "Posterior transforms currently not supported for " f"{self.__class__.__name__}") X = self.transform_inputs(X) train_x = self.transform_inputs(self.train_inputs[0]) # construct Ktt task_covar = self._task_covar_matrix task_rootlt = self._task_covar_matrix.root_decomposition( method="diagonalization") task_root = task_rootlt.root if task_covar.batch_shape != X.shape[:-2]: task_covar = BatchRepeatLazyTensor(task_covar, batch_repeat=X.shape[:-2]) task_root = BatchRepeatLazyTensor(lazify(task_root), batch_repeat=X.shape[:-2]) task_covar_rootlt = RootLazyTensor(task_root) # construct RR' \approx Kxx data_data_covar = self.train_full_covar.lazy_tensors[0] # populate the diagonalziation caches for the root and inverse root # decomposition data_data_evals, data_data_evecs = data_data_covar.diagonalization() # pad the eigenvalue and eigenvectors with zeros if we are using lanczos if data_data_evecs.shape[-1] < data_data_evecs.shape[-2]: cols_to_add = data_data_evecs.shape[-2] - data_data_evecs.shape[-1] zero_evecs = torch.zeros( *data_data_evecs.shape[:-1], cols_to_add, dtype=data_data_evals.dtype, device=data_data_evals.device, ) zero_evals = torch.zeros( *data_data_evecs.shape[:-2], cols_to_add, dtype=data_data_evals.dtype, device=data_data_evals.device, ) data_data_evecs = CatLazyTensor( data_data_evecs, lazify(zero_evecs), dim=-1, output_device=data_data_evals.device, ) data_data_evals = torch.cat((data_data_evals, zero_evals), dim=-1) # construct K_{xt, x} test_data_covar = self.covar_module.data_covar_module(X, train_x) # construct K_{xt, xt} test_test_covar = self.covar_module.data_covar_module(X) # now update root so that \tilde{R}\tilde{R}' \approx K_{(x,xt), (x,xt)} # cloning preserves the gradient history updated_lazy_tensor = data_data_covar.cat_rows( cross_mat=test_data_covar.clone(), new_mat=test_test_covar, method="diagonalization", ) updated_root = updated_lazy_tensor.root_decomposition().root # occasionally, there's device errors so enforce this comes out right updated_root = updated_root.to(data_data_covar.device) # build a root decomposition of the joint train/test covariance matrix # construct (\tilde{R} \otimes M)(\tilde{R} \otimes M)' \approx # (K_{(x,xt), (x,xt)} \otimes Ktt) joint_covar = RootLazyTensor( KroneckerProductLazyTensor(updated_root, task_covar_rootlt.root.detach())) # construct K_{xt, x} \otimes Ktt test_obs_kernel = KroneckerProductLazyTensor(test_data_covar, task_covar) # collect y - \mu(x) and \mu(X) train_diff = self.train_targets - self.mean_module(train_x) if detach_test_caches.on(): train_diff = train_diff.detach() test_mean = self.mean_module(X) train_noise = self.likelihood._shaped_noise_covar(train_x.shape) diagonal_noise = isinstance(train_noise, DiagLazyTensor) if detach_test_caches.on(): train_noise = train_noise.detach() test_noise = (self.likelihood._shaped_noise_covar(X.shape) if observation_noise else None) # predictive mean and variance for the mvn # first the predictive mean pred_mean = (test_obs_kernel.matmul( self.predictive_mean_cache).reshape_as(test_mean) + test_mean) # next the predictive variance, assume diagonal noise test_var_term = KroneckerProductLazyTensor(test_test_covar, task_covar).diag() if diagonal_noise: task_evals, task_evecs = self._task_covar_matrix.diagonalization() # TODO: make this be the default KPMatmulLT diagonal method in gpytorch full_data_inv_evals = (KroneckerProductDiagLazyTensor( DiagLazyTensor(data_data_evals), DiagLazyTensor(task_evals)) + train_noise).inverse() test_train_hadamard = KroneckerProductLazyTensor( test_data_covar.matmul(data_data_evecs).evaluate()**2, task_covar.matmul(task_evecs).evaluate()**2, ) data_var_term = test_train_hadamard.matmul( full_data_inv_evals).sum(dim=-1) else: # if non-diagonal noise (but still kronecker structured), we have to pull # across the noise because the inverse is not closed form # should be a kronecker lt, R = \Sigma_X^{-1/2} \kron \Sigma_T^{-1/2} # TODO: enforce the diagonalization to return a KPLT for all shapes in # gpytorch or dense linear algebra for small shapes data_noise, task_noise = train_noise.lazy_tensors data_noise_root = data_noise.root_inv_decomposition( method="diagonalization") task_noise_root = task_noise.root_inv_decomposition( method="diagonalization") # ultimately we need to compute the diagonal of # (K_{x* X} \kron K_T)(K_{XX} \kron K_T + \Sigma_X \kron \Sigma_T)^{-1} # (K_{x* X} \kron K_T)^T # = (K_{x* X} \Sigma_X^{-1/2} Q_R)(\Lambda_R + I)^{-1} # (K_{x* X} \Sigma_X^{-1/2} Q_R)^T # where R = (\Sigma_X^{-1/2T}K_{XX}\Sigma_X^{-1/2} \kron # \Sigma_T^{-1/2T}K_{T}\Sigma_T^{-1/2}) # first we construct the components of R's eigen-decomposition # TODO: make this be the default KPMatmulLT diagonal method in gpytorch whitened_data_covar = (data_noise_root.transpose( -1, -2).matmul(data_data_covar).matmul(data_noise_root)) w_data_evals, w_data_evecs = whitened_data_covar.diagonalization() whitened_task_covar = (task_noise_root.transpose(-1, -2).matmul( self._task_covar_matrix).matmul(task_noise_root)) w_task_evals, w_task_evecs = whitened_task_covar.diagonalization() # we add one to the eigenvalues as above (not just for stability) full_data_inv_evals = (KroneckerProductDiagLazyTensor( DiagLazyTensor(w_data_evals), DiagLazyTensor(w_task_evals)).add_jitter(1.0).inverse()) test_data_comp = (test_data_covar.matmul(data_noise_root).matmul( w_data_evecs).evaluate()**2) task_comp = (task_covar.matmul(task_noise_root).matmul( w_task_evecs).evaluate()**2) test_train_hadamard = KroneckerProductLazyTensor( test_data_comp, task_comp) data_var_term = test_train_hadamard.matmul( full_data_inv_evals).sum(dim=-1) pred_variance = test_var_term - data_var_term specialized_mvn = MultitaskMultivariateNormal( pred_mean, DiagLazyTensor(pred_variance)) if observation_noise: specialized_mvn = self.likelihood(specialized_mvn) posterior = MultitaskGPPosterior( mvn=specialized_mvn, joint_covariance_matrix=joint_covar, test_train_covar=test_obs_kernel, train_diff=train_diff, test_mean=test_mean, train_train_covar=self.train_full_covar, train_noise=train_noise, test_noise=test_noise, ) if hasattr(self, "outcome_transform"): posterior = self.outcome_transform.untransform_posterior(posterior) return posterior