def test_unsupported_dimension(self): sampler = SobolQMCNormalSampler(num_samples=2) maxdim = torch.quasirandom.SobolEngine.MAXDIM + 1 mean = torch.zeros(maxdim) cov = DiagLazyTensor(torch.ones(maxdim)) mvn = MultivariateNormal(mean, cov) posterior = GPyTorchPosterior(mvn) with self.assertRaises(UnsupportedError) as e: sampler(posterior) self.assertIn(f"Requested: {maxdim}", str(e.exception))
def forward(self, x): for d in range(self.depth - 1): mean_x = self.mean_module(x) covar_x = self.covar_module(x) mvn = MultivariateNormal(mean_x, covar_x) x = mvn.sample(sample_shape=torch.Size([self.dim])) x = x.t() if self.collect: self.collector[d] = x.detach().numpy() # last layer with single output mean_x = self.mean_module(x) covar_x = self.covar_module(x) mvn = MultivariateNormal(mean_x, covar_x) x = mvn.sample(sample_shape=torch.Size([self.dim])) x = x.t() if self.collect: self.collector[self.depth - 1] = x.detach().numpy() return x
def test_transformed_posterior(self): for dtype in (torch.float, torch.double): for m in (1, 2): shape = torch.Size([3, m]) mean = torch.rand(shape, dtype=dtype, device=self.device) variance = 1 + torch.rand(shape, dtype=dtype, device=self.device) if m == 1: covar = torch.diag_embed(variance.squeeze(-1)) mvn = MultivariateNormal(mean.squeeze(-1), lazify(covar)) else: covar = torch.diag_embed(variance.view(*variance.shape[:-2], -1)) mvn = MultitaskMultivariateNormal(mean, lazify(covar)) p_base = GPyTorchPosterior(mvn=mvn) p_tf = TransformedPosterior( # dummy transforms posterior=p_base, sample_transform=lambda s: s + 2, mean_transform=lambda m, v: 2 * m + v, variance_transform=lambda m, v: m + 2 * v, ) # mean, variance self.assertEqual(p_tf.device.type, self.device.type) self.assertTrue(p_tf.dtype == dtype) self.assertEqual(p_tf.event_shape, shape) self.assertTrue(torch.equal(p_tf.mean, 2 * mean + variance)) self.assertTrue(torch.equal(p_tf.variance, mean + 2 * variance)) # rsample samples = p_tf.rsample() self.assertEqual(samples.shape, torch.Size([1]) + shape) samples = p_tf.rsample(sample_shape=torch.Size([4])) self.assertEqual(samples.shape, torch.Size([4]) + shape) samples2 = p_tf.rsample(sample_shape=torch.Size([4, 2])) self.assertEqual(samples2.shape, torch.Size([4, 2]) + shape) # rsample w/ base samples base_samples = torch.randn(4, *shape, device=self.device, dtype=dtype) # incompatible shapes with self.assertRaises(RuntimeError): p_tf.rsample( sample_shape=torch.Size([3]), base_samples=base_samples ) # make sure sample transform is applied correctly samples_base = p_base.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) samples_tf = p_tf.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) self.assertTrue(torch.equal(samples_tf, samples_base + 2)) # check error handling p_tf_2 = TransformedPosterior( posterior=p_base, sample_transform=lambda s: s + 2 ) with self.assertRaises(NotImplementedError): p_tf_2.mean with self.assertRaises(NotImplementedError): p_tf_2.variance
def test_multivariate_normal_batch_correlated_samples(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") mean = torch.tensor([0, 1, 2], dtype=torch.float, device=device) covmat = torch.diag(torch.tensor([1, 0.75, 1.5], device=device)) mvn = MultivariateNormal(mean=mean.repeat(2, 1), covariance_matrix=NonLazyTensor(covmat).repeat(2, 1, 1)) base_samples = mvn.get_base_samples(torch.Size((3, 4))) self.assertTrue(mvn.sample(base_samples=base_samples).shape == torch.Size([3, 4, 2, 3])) base_samples = mvn.get_base_samples() self.assertTrue(mvn.sample(base_samples=base_samples).shape == torch.Size([2, 3]))
def prior_distribution(self): """Model prior distribution. This method determines how to compute the GP prior distribution of the inducing points, e.g. p(u) ~ N(mu(X_u), K(X_u, X_u)). Most commonly, this is done simply by calling the user defined GP prior on the inducing point data. """ out = self.model.forward(self.inducing_points) res = MultivariateNormal(out.mean, out.lazy_covariance_matrix.add_jitter()) return res
def test_natgrad(self, D=5): mu = torch.randn(D) cov = torch.randn(D, D).tril_() dist = MultivariateNormal(mu, CholLazyTensor(TriangularLazyTensor(cov))) sample = dist.sample() v_dist = NaturalVariationalDistribution(D) v_dist.initialize_variational_distribution(dist) mu = v_dist().mean.detach() v_dist().log_prob(sample).squeeze().backward() eta1 = mu.clone().requires_grad_(True) eta2 = (mu[:, None] * mu + cov @ cov.t()).requires_grad_(True) L = torch.cholesky(eta2 - eta1[:, None] * eta1) dist2 = MultivariateNormal(eta1, CholLazyTensor(TriangularLazyTensor(L))) dist2.log_prob(sample).squeeze().backward() assert torch.allclose(v_dist.natural_vec.grad, eta1.grad) assert torch.allclose(v_dist.natural_mat.grad, eta2.grad)
def post_forward(self, means, variances): # TODO: maybe the two cases can be merged into one with torch.diag_embed assert means.ndim == variances.ndim if means.ndim == 2: mvn = MultivariateNormal(means.squeeze(), torch.diag(variances.squeeze() + 1e-6)) elif means.ndim == 3: assert means.size(-1) == variances.size(-1) == 1 try: mvn = MultivariateNormal( means.squeeze(-1), torch.diag_embed(variances.squeeze(-1) + 1e-6)) except RuntimeError: print('RuntimeError') print(torch.diag_embed(variances.squeeze(-1)) + 1e-6) else: raise NotImplementedError( "Something is wrong, just cmd+f this error message and you can start debugging." ) return mvn
def posterior(self, X: Tensor, observation_noise: bool = False) -> MockPosterior: m_shape = X.shape[:-1] r_shape = list(X.shape[:-2]) + [1, 1] mvn = MultivariateNormal( mean=torch.zeros(m_shape, dtype=X.dtype), covariance_matrix=torch.eye(m_shape[-1], dtype=X.dtype).repeat(r_shape), ) return GPyTorchPosterior(mvn)
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. detach_test_caches: If True, detach GPyTorch test caches during computation of the posterior. Required for being able to compute derivatives with respect to training inputs at test time (used e.g. by qNoisyExpectedImprovement). Defaults to `True`. Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes observation noise if `observation_noise=True`. """ self.eval() # make sure model is in eval mode detach_test_caches = kwargs.get("detach_test_caches", True) with ExitStack() as es: es.enter_context(settings.debug(False)) es.enter_context(settings.fast_pred_var()) es.enter_context(settings.detach_test_caches(detach_test_caches)) # insert a dimension for the output dimension if self._num_outputs > 1: X, output_dim_idx = add_output_dim( X=X, original_batch_shape=self._input_batch_shape ) mvn = self(X) mean_x = mvn.mean covar_x = mvn.covariance_matrix if self._num_outputs > 1: output_indices = output_indices or range(self._num_outputs) mvns = [ MultivariateNormal( mean_x.select(dim=output_dim_idx, index=t), lazify(covar_x.select(dim=output_dim_idx, index=t)), ) for t in output_indices ] mvn = MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns) return GPyTorchPosterior(mvn=mvn)
def test_multivariate_normal_batch_lazy(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") mean = torch.tensor([0, 1, 2], dtype=torch.float, device=device).repeat(2, 1) covmat = torch.diag(torch.tensor([1, 0.75, 1.5], device=device)).repeat(2, 1, 1) covmat_chol = torch.cholesky(covmat) mvn = MultivariateNormal(mean=mean, covariance_matrix=NonLazyTensor(covmat)) self.assertTrue(torch.is_tensor(mvn.covariance_matrix)) self.assertIsInstance(mvn.lazy_covariance_matrix, LazyTensor) self.assertTrue( approx_equal(mvn.variance, torch.diagonal(covmat, dim1=-2, dim2=-1))) self.assertTrue(torch.equal(mvn._unbroadcasted_scale_tril, covmat_chol)) mvn_plus1 = mvn + 1 self.assertTrue(torch.equal(mvn_plus1.mean, mvn.mean + 1)) self.assertTrue( torch.equal(mvn_plus1.covariance_matrix, mvn.covariance_matrix)) self.assertTrue( torch.equal(mvn_plus1._unbroadcasted_scale_tril, covmat_chol)) mvn_times2 = mvn * 2 self.assertTrue(torch.equal(mvn_times2.mean, mvn.mean * 2)) self.assertTrue( torch.equal(mvn_times2.covariance_matrix, mvn.covariance_matrix * 4)) self.assertTrue( torch.equal(mvn_times2._unbroadcasted_scale_tril, covmat_chol * 2)) mvn_divby2 = mvn / 2 self.assertTrue(torch.equal(mvn_divby2.mean, mvn.mean / 2)) self.assertTrue( torch.equal(mvn_divby2.covariance_matrix, mvn.covariance_matrix / 4)) self.assertTrue( torch.equal(mvn_divby2._unbroadcasted_scale_tril, covmat_chol / 2)) # TODO: Add tests for entropy, log_prob, etc. - this an issue b/c it # uses using root_decomposition which is not very reliable # self.assertTrue(approx_equal(mvn.entropy(), 4.3157 * torch.ones(2))) # self.assertTrue( # approx_equal(mvn.log_prob(torch.zeros(2, 3)), -4.8157 * torch.ones(2)) # ) # self.assertTrue( # approx_equal(mvn.log_prob(torch.zeros(2, 2, 3)), -4.8157 * torch.ones(2, 2)) # ) conf_lower, conf_upper = mvn.confidence_region() self.assertTrue(approx_equal(conf_lower, mvn.mean - 2 * mvn.stddev)) self.assertTrue(approx_equal(conf_upper, mvn.mean + 2 * mvn.stddev)) self.assertTrue(mvn.sample().shape == torch.Size([2, 3])) self.assertTrue( mvn.sample(torch.Size([2])).shape == torch.Size([2, 2, 3])) self.assertTrue( mvn.sample(torch.Size([2, 4])).shape == torch.Size([2, 4, 2, 3]))
def test_kl_divergence(self): mean0 = torch.randn(4) mean1 = mean0 + 1 var0 = torch.randn(4).abs_() var1 = var0 * math.exp(2) dist_a = MultivariateNormal(mean0, DiagLazyTensor(var0)) dist_b = MultivariateNormal(mean1, DiagLazyTensor(var0)) dist_c = MultivariateNormal(mean0, DiagLazyTensor(var1)) res = torch.distributions.kl.kl_divergence(dist_a, dist_a) actual = 0.0 self.assertLess((res - actual).abs().item(), 1e-2) res = torch.distributions.kl.kl_divergence(dist_b, dist_a) actual = var0.reciprocal().sum().div(2.0) self.assertLess((res - actual).div(res).abs().item(), 1e-2) res = torch.distributions.kl.kl_divergence(dist_a, dist_c) actual = 0.5 * (8 - 4 + 4 * math.exp(-2)) self.assertLess((res - actual).div(res).abs().item(), 1e-2)
def test_invertible_init(self, D=5): mu = torch.randn(D) cov = torch.randn(D, D).tril_() dist = MultivariateNormal(mu, CholLazyTensor(TriangularLazyTensor(cov))) v_dist = TrilNaturalVariationalDistribution(D, mean_init_std=0.0) v_dist.initialize_variational_distribution(dist) out_dist = v_dist() assert torch.allclose(out_dist.mean, dist.mean) assert torch.allclose(out_dist.covariance_matrix, dist.covariance_matrix)
def compute_ll_for_block(self, vec, mean, var, cov_mat_root): vec = flatten(vec) mean = flatten(mean) var = flatten(var) cov_mat_lt = RootLazyTensor(cov_mat_root.t()) var_lt = DiagLazyTensor(var + 1e-6) covar_lt = AddedDiagLazyTensor(var_lt, cov_mat_lt) qdist = MultivariateNormal(mean, covar_lt) with gpytorch.settings.num_trace_samples(1) and gpytorch.settings.max_cg_iterations(25): return qdist.log_prob(vec)
def forward(self, x, xe): m = self.mean(x) if x.shape[1] > 0: K = self.kern(x) if xe.shape[1] > 0: x_emb = self.emb_trans(xe) K *= self.kern_emb(x_emb) else: K = self.kern_emb(self.emb_trans(xe)) return MultivariateNormal( m, K) if not self.multi_task else MultitaskMultivariateNormal( m, K)
def test_expected_improvement(self): for dtype in (torch.float, torch.double): mean = torch.tensor([[-0.5]], device=self.device, dtype=dtype) variance = torch.ones(1, 1, device=self.device, dtype=dtype) mm = MockModel(MockPosterior(mean=mean, variance=variance)) # basic test module = ExpectedImprovement(model=mm, best_f=0.0) X = torch.empty(1, 1, device=self.device, dtype=dtype) # dummy ei = module(X) ei_expected = torch.tensor(0.19780, device=self.device, dtype=dtype) self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4)) # test maximize module = ExpectedImprovement(model=mm, best_f=0.0, maximize=False) X = torch.empty(1, 1, device=self.device, dtype=dtype) # dummy ei = module(X) ei_expected = torch.tensor(0.6978, device=self.device, dtype=dtype) self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4)) with self.assertRaises(UnsupportedError): module.set_X_pending(None) # test objective (single-output) mean = torch.tensor([0.5], device=self.device, dtype=dtype) covar = torch.tensor([[0.16]], device=self.device, dtype=dtype) mvn = MultivariateNormal(mean, covar) p = GPyTorchPosterior(mvn) mm = MockModel(p) weights = torch.tensor([0.5], device=self.device, dtype=dtype) obj = ScalarizedObjective(weights) ei = ExpectedImprovement(model=mm, best_f=0.0, objective=obj) X = torch.rand(1, 2, device=self.device, dtype=dtype) ei_expected = torch.tensor(0.2601, device=self.device, dtype=dtype) torch.allclose(ei(X), ei_expected, atol=1e-4) # test objective (multi-output) mean = torch.tensor([[-0.25, 0.5]], device=self.device, dtype=dtype) covar = torch.tensor([[[0.5, 0.125], [0.125, 0.5]]], device=self.device, dtype=dtype) mvn = MultitaskMultivariateNormal(mean, covar) p = GPyTorchPosterior(mvn) mm = MockModel(p) weights = torch.tensor([2.0, 1.0], device=self.device, dtype=dtype) obj = ScalarizedObjective(weights) ei = ExpectedImprovement(model=mm, best_f=0.0, objective=obj) X = torch.rand(1, 2, device=self.device, dtype=dtype) ei_expected = torch.tensor(0.6910, device=self.device, dtype=dtype) torch.allclose(ei(X), ei_expected, atol=1e-4)
def test_multitask_from_repeat(self): mean = torch.randn(2, 3) variance = torch.randn(2, 3).clamp_min(1e-6) mvn = MultivariateNormal(mean, DiagLazyTensor(variance)) mmvn = MultitaskMultivariateNormal.from_repeated_mvn(mvn, num_tasks=4) self.assertTrue(isinstance(mmvn, MultitaskMultivariateNormal)) self.assertEqual(mmvn.batch_shape, torch.Size([2])) self.assertEqual(mmvn.event_shape, torch.Size([3, 4])) self.assertEqual(mmvn.covariance_matrix.shape, torch.Size([2, 12, 12])) for i in range(4): self.assertEqual(mmvn.mean[..., i], mean) self.assertEqual(mmvn.variance[..., i], variance)
def test_multitask_from_batch(self): mean = torch.randn(2, 3) variance = torch.randn(2, 3).clamp_min(1e-6) mvn = MultivariateNormal(mean, DiagLazyTensor(variance)) mmvn = MultitaskMultivariateNormal.from_batch_mvn(mvn, task_dim=-1) self.assertTrue(isinstance(mmvn, MultitaskMultivariateNormal)) self.assertEqual(mmvn.batch_shape, torch.Size([])) self.assertEqual(mmvn.event_shape, torch.Size([3, 2])) self.assertEqual(mmvn.covariance_matrix.shape, torch.Size([6, 6])) self.assertEqual(mmvn.mean, mean.transpose(-1, -2)) self.assertEqual(mmvn.variance, variance.transpose(-1, -2)) mean = torch.randn(2, 4, 3) variance = torch.randn(2, 4, 3).clamp_min(1e-6) mvn = MultivariateNormal(mean, DiagLazyTensor(variance)) mmvn = MultitaskMultivariateNormal.from_batch_mvn(mvn, task_dim=0) self.assertTrue(isinstance(mmvn, MultitaskMultivariateNormal)) self.assertEqual(mmvn.batch_shape, torch.Size([4])) self.assertEqual(mmvn.event_shape, torch.Size([3, 2])) self.assertEqual(mmvn.covariance_matrix.shape, torch.Size([4, 6, 6])) self.assertEqual(mmvn.mean, mean.permute(1, 2, 0)) self.assertEqual(mmvn.variance, variance.permute(1, 2, 0))
def forward(self, x: torch.Tensor) -> MultivariateNormal: """Evaluate the model Args: x (torch.Tensor): Points at which to evaluate. Returns: MultivariateNormal: Object containig mean and covariance of GP at these points. """ mean_x = self.mean_module(x) covar_x = self.covar_module(x) return MultivariateNormal(mean_x, covar_x)
def forward(self, indices=None): """ Return the variational posterior for the latent variables, pertaining to provided indices """ if indices is None: ms = self.variational_mean vs = self.variational_variance else: ms = self.variational_mean[indices] vs = self.variational_variance[indices] vs = vs.expand(len(vs), self.output_dims) if self.output_dims == 1: m, = ms v, = vs return MultivariateNormal(m, DiagLazyTensor(v)) else: mvns = [MultivariateNormal(m, DiagLazyTensor(v)) for m, v in zip(ms.T, vs.T)] return MultitaskMultivariateNormal.from_independent_mvns(mvns)
def test_kl_divergence(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): mean0 = torch.randn(4, device=device, dtype=dtype) mean1 = mean0 + 1 var0 = torch.randn(4, device=device, dtype=dtype).abs_() var1 = var0 * math.exp(2) dist_a = MultivariateNormal(mean0, DiagLazyTensor(var0)) dist_b = MultivariateNormal(mean1, DiagLazyTensor(var0)) dist_c = MultivariateNormal(mean0, DiagLazyTensor(var1)) res = torch.distributions.kl.kl_divergence(dist_a, dist_a) actual = 0.0 self.assertLess((res - actual).abs().item(), 1e-2) res = torch.distributions.kl.kl_divergence(dist_b, dist_a) actual = var0.reciprocal().sum().div(2.0) self.assertLess((res - actual).div(res).abs().item(), 1e-2) res = torch.distributions.kl.kl_divergence(dist_a, dist_c) actual = 0.5 * (8 - 4 + 4 * math.exp(-2)) self.assertLess((res - actual).div(res).abs().item(), 1e-2)
def _get_test_posterior(shape, device, dtype, interleaved=True, lazy=False): mean = torch.rand(shape, device=device, dtype=dtype) n_covar = shape[-2:].numel() diag = torch.rand(shape, device=device, dtype=dtype) diag = diag.view(*diag.shape[:-2], n_covar) a = torch.rand(*shape[:-2], n_covar, n_covar, device=device, dtype=dtype) covar = a @ a.transpose(-1, -2) + torch.diag_embed(diag) if lazy: covar = NonLazyTensor(covar) if shape[-1] == 1: mvn = MultivariateNormal(mean.squeeze(-1), covar) else: mvn = MultitaskMultivariateNormal(mean, covar, interleaved=interleaved) return GPyTorchPosterior(mvn)
def forward(self, X): """ Return prior distribution """ mean = self.mean(X) covariance_matrix = self.kernel(X) assert covariance_matrix.dim() == 2 if mean.dim() == 2: m, k = mean.shape covariance_matrix = covariance_matrix.expand(m, k, k) return MultivariateNormal(mean, covariance_matrix)
def test_multivariate_normal_batch_non_lazy(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): mean = torch.tensor([0, 1, 2], device=device, dtype=dtype) covmat = torch.diag( torch.tensor([1, 0.75, 1.5], device=device, dtype=dtype)) mvn = MultivariateNormal(mean=mean.repeat(2, 1), covariance_matrix=covmat.repeat(2, 1, 1), validate_args=True) self.assertTrue(torch.is_tensor(mvn.covariance_matrix)) self.assertIsInstance(mvn.lazy_covariance_matrix, LazyTensor) self.assertAllClose(mvn.variance, covmat.diag().repeat(2, 1)) self.assertAllClose( mvn.scale_tril, torch.diag(covmat.diag().sqrt()).repeat(2, 1, 1)) mvn_plus1 = mvn + 1 self.assertAllClose(mvn_plus1.mean, mvn.mean + 1) self.assertAllClose(mvn_plus1.covariance_matrix, mvn.covariance_matrix) mvn_times2 = mvn * 2 self.assertAllClose(mvn_times2.mean, mvn.mean * 2) self.assertAllClose(mvn_times2.covariance_matrix, mvn.covariance_matrix * 4) mvn_divby2 = mvn / 2 self.assertAllClose(mvn_divby2.mean, mvn.mean / 2) self.assertAllClose(mvn_divby2.covariance_matrix, mvn.covariance_matrix / 4) self.assertAllClose( mvn.entropy(), 4.3157 * torch.ones(2, device=device, dtype=dtype)) logprob = mvn.log_prob( torch.zeros(2, 3, device=device, dtype=dtype)) logprob_expected = -4.8157 * torch.ones( 2, device=device, dtype=dtype) self.assertAllClose(logprob, logprob_expected) logprob = mvn.log_prob( torch.zeros(2, 2, 3, device=device, dtype=dtype)) logprob_expected = -4.8157 * torch.ones( 2, 2, device=device, dtype=dtype) self.assertAllClose(logprob, logprob_expected) conf_lower, conf_upper = mvn.confidence_region() self.assertAllClose(conf_lower, mvn.mean - 2 * mvn.stddev) self.assertAllClose(conf_upper, mvn.mean + 2 * mvn.stddev) self.assertTrue(mvn.sample().shape == torch.Size([2, 3])) self.assertTrue( mvn.sample(torch.Size([2])).shape == torch.Size([2, 2, 3])) self.assertTrue( mvn.sample(torch.Size([2, 4])).shape == torch.Size( [2, 4, 2, 3]))
def test_multivariate_normal_lazy(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): mean = torch.tensor([0, 1, 2], device=device, dtype=dtype) covmat = torch.diag( torch.tensor([1, 0.75, 1.5], device=device, dtype=dtype)) covmat_chol = torch.cholesky(covmat) mvn = MultivariateNormal(mean=mean, covariance_matrix=NonLazyTensor(covmat)) self.assertTrue(torch.is_tensor(mvn.covariance_matrix)) self.assertIsInstance(mvn.lazy_covariance_matrix, LazyTensor) self.assertAllClose(mvn.variance, torch.diag(covmat)) self.assertAllClose(mvn.covariance_matrix, covmat) self.assertAllClose(mvn._unbroadcasted_scale_tril, covmat_chol) mvn_plus1 = mvn + 1 self.assertAllClose(mvn_plus1.mean, mvn.mean + 1) self.assertAllClose(mvn_plus1.covariance_matrix, mvn.covariance_matrix) self.assertAllClose(mvn_plus1._unbroadcasted_scale_tril, covmat_chol) mvn_times2 = mvn * 2 self.assertAllClose(mvn_times2.mean, mvn.mean * 2) self.assertAllClose(mvn_times2.covariance_matrix, mvn.covariance_matrix * 4) self.assertAllClose(mvn_times2._unbroadcasted_scale_tril, covmat_chol * 2) mvn_divby2 = mvn / 2 self.assertAllClose(mvn_divby2.mean, mvn.mean / 2) self.assertAllClose(mvn_divby2.covariance_matrix, mvn.covariance_matrix / 4) self.assertAllClose(mvn_divby2._unbroadcasted_scale_tril, covmat_chol / 2) # TODO: Add tests for entropy, log_prob, etc. - this an issue b/c it # uses using root_decomposition which is not very reliable # self.assertAlmostEqual(mvn.entropy().item(), 4.3157, places=4) # self.assertAlmostEqual(mvn.log_prob(torch.zeros(3)).item(), -4.8157, places=4) # self.assertTrue( # torch.allclose( # mvn.log_prob(torch.zeros(2, 3)), -4.8157 * torch.ones(2)) # ) # ) conf_lower, conf_upper = mvn.confidence_region() self.assertAllClose(conf_lower, mvn.mean - 2 * mvn.stddev) self.assertAllClose(conf_upper, mvn.mean + 2 * mvn.stddev) self.assertTrue(mvn.sample().shape == torch.Size([3])) self.assertTrue( mvn.sample(torch.Size([2])).shape == torch.Size([2, 3])) self.assertTrue( mvn.sample(torch.Size([2, 4])).shape == torch.Size([2, 4, 3]))
def test_base_sample_shape(self): a = torch.randn(5, 10) lazy_square_a = RootLazyTensor(lazify(a)) dist = MultivariateNormal(torch.zeros(5), lazy_square_a) # check that providing the base samples is okay samples = dist.rsample(torch.Size((16, )), base_samples=torch.randn(16, 10)) self.assertEqual(samples.shape, torch.Size((16, 5))) # check that an event shape of base samples fails self.assertRaises(RuntimeError, dist.rsample, torch.Size((16, )), base_samples=torch.randn(16, 5)) # check that the proper event shape of base samples is okay for # a non root lt nonlazy_square_a = lazify(lazy_square_a.evaluate()) dist = MultivariateNormal(torch.zeros(5), nonlazy_square_a) samples = dist.rsample(torch.Size((16, )), base_samples=torch.randn(16, 5)) self.assertEqual(samples.shape, torch.Size((16, 5)))
def test_multivariate_normal_correlated_samples(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): mean = torch.tensor([0, 1, 2], device=device, dtype=dtype) covmat = torch.diag( torch.tensor([1, 0.75, 1.5], device=device, dtype=dtype)) mvn = MultivariateNormal(mean=mean, covariance_matrix=NonLazyTensor(covmat)) base_samples = mvn.get_base_samples(torch.Size([3, 4])) self.assertTrue( mvn.sample( base_samples=base_samples).shape == torch.Size([3, 4, 3])) base_samples = mvn.get_base_samples() self.assertTrue( mvn.sample(base_samples=base_samples).shape == torch.Size([3]))
def _draw_gp_function(self, X, lengthscale=10.0, kernel_str="RBF"): if kernel_str == "RBF": kernel = RBFKernel() elif kernel_str == "Mat": kernel = MaternKernel(nu=0.5) else: raise Exception("Invalid kernel string: {}".format(kernel_str)) kernel.lengthscale = lengthscale with torch.no_grad(): lazy_cov = kernel(X) mean = torch.zeros(lazy_cov.size(0)) mvn = MultivariateNormal(mean, lazy_cov) Y = mvn.rsample()[:, None] return Y
def test_missing_value_inference(self): """ samples = mvn samples + noise samples In this test, we try to recover noise parameters when some elements in 'samples' are missing at random. """ torch.manual_seed(self.seed) mu = torch.zeros(2, 3) sigma = torch.tensor([[[1, 0.999, -0.999], [0.999, 1, -0.999], [-0.999, -0.999, 1]]] * 2).float() mvn = MultivariateNormal(mu, sigma) samples = mvn.sample(torch.Size([10000])) # mvn samples noise_sd = 0.5 noise_dist = torch.distributions.Normal(0, noise_sd) samples += noise_dist.sample(samples.shape) # noise missing_prop = 0.33 missing_idx = torch.distributions.Binomial(1, missing_prop).sample(samples.shape).bool() samples[missing_idx] = float("nan") likelihood = GaussianLikelihoodWithMissingObs() # check that the missing value fill doesn't impact the likelihood likelihood.MISSING_VALUE_FILL = 999.0 like_init_plus = likelihood.log_marginal(samples, mvn).sum().data likelihood.MISSING_VALUE_FILL = -999.0 like_init_minus = likelihood.log_marginal(samples, mvn).sum().data torch.testing.assert_allclose(like_init_plus, like_init_minus) # check that the correct noise sd is recovered opt = torch.optim.Adam(likelihood.parameters(), lr=0.05) for _ in range(100): opt.zero_grad() loss = -likelihood.log_marginal(samples, mvn).sum() loss.backward() opt.step() assert abs(float(likelihood.noise.sqrt()) - 0.5) < 0.02 # Check log marginal works likelihood.log_marginal(samples[0], mvn)
def forward(self, x): """Forward pass method for making predictions through the model. The mean and covariance are each computed to produce a MV distribution. Parameters: x (torch.tensor): The tensor for which we predict a mean and covariance used the BatchedGP model. Returns: mv_normal (gpytorch.distributions.MultivariateNormal): A Multivariate Normal distribution with parameters for mean and covariance computed at x. """ mean_x = self.mean_module(x) # Compute the mean at x covar_x = self.covar_module(x) # Compute the covariance at x return MultivariateNormal(mean_x, covar_x)
def forward(self, *inputs: Tensor, **kwargs) -> MultivariateNormal: """Forward execution of the recognition model.""" output_sequence, input_sequence = inputs num_particles = kwargs.get('num_particles', 1) assert output_sequence.dim() == 3 dim_outputs = output_sequence.shape[-1] batch_size = output_sequence.shape[0] loc = torch.zeros(batch_size, self.dim_states) loc[:, :dim_outputs] = output_sequence[:, 0] loc = loc.expand(num_particles, batch_size, self.dim_states).permute(1, 2, 0) cov = self.variance.expand(num_particles, batch_size, self.dim_states).permute(1, 2, 0) return MultivariateNormal(loc, covariance_matrix=torch.diag_embed(cov))