def __call__(self, inputs, are_samples=False, **kwargs): """ Forward data through this hidden GP layer. The output is a MultitaskMultivariateNormal distribution (or MultivariateNormal distribution is output_dims=None). If the input is >=2 dimensional Tensor (e.g. `n x d`), we pass the input through each hidden GP, resulting in a `n x h` multitask Gaussian distribution (where all of the `h` tasks represent an output dimension and are independent from one another). We then draw `s` samples from these Gaussians, resulting in a `s x n x h` MultitaskMultivariateNormal distribution. If the input is a >=3 dimensional Tensor, and the `are_samples=True` kwarg is set, then we assume that the outermost batch dimension is a samples dimension. The output will have the same number of samples. For example, a `s x b x n x d` input will result in a `s x b x n x h` MultitaskMultivariateNormal distribution. The goal of these last two points is that if you have a tensor `x` that is `n x d`, then: >>> hidden_gp2(hidden_gp(x)) will just work, and return a tensor of size `s x n x h2`, where `h2` is the output dimensionality of hidden_gp2. In this way, hidden GP layers are easily composable. """ deterministic_inputs = not are_samples if isinstance(inputs, MultitaskMultivariateNormal): inputs = torch.distributions.Normal( loc=inputs.mean, scale=inputs.variance.sqrt()).rsample() deterministic_inputs = False if settings.debug.on(): if not torch.is_tensor(inputs): raise ValueError( "`inputs` should either be a MultitaskMultivariateNormal or a Tensor, got " f"{inputs.__class__.__Name__}") if inputs.size(-1) != self.input_dims: raise RuntimeError( f"Input shape did not match self.input_dims. Got total feature dims [{inputs.size(-1)}]," f" expected [{self.input_dims}]") # Repeat the input for all possible outputs if self.output_dims is not None: inputs = inputs.unsqueeze(-3) inputs = inputs.expand(*inputs.shape[:-3], self.output_dims, *inputs.shape[-2:]) # Now run samples through the GP output = ApproximateGP.__call__(self, inputs) if self.output_dims is not None: mean = output.loc.transpose(-1, -2) covar = BlockDiagLazyTensor(output.lazy_covariance_matrix, block_dim=-3) output = MultitaskMultivariateNormal(mean, covar, interleaved=False) # Maybe expand inputs? if deterministic_inputs: output = output.expand( torch.Size([settings.num_likelihood_samples.value()]) + output.batch_shape) return output
def test_from_independent_mvns(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): # Test non-batch mode mvns n_tasks = 2 n = 4 mvns = [ MultivariateNormal( mean=torch.randn(4, device=device, dtype=dtype), covariance_matrix=DiagLazyTensor( torch.randn(n, device=device, dtype=dtype).abs_()), ) for i in range(n_tasks) ] mvn = MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns) expected_mean_shape = [n, n_tasks] expected_covar_shape = [n * n_tasks] * 2 self.assertEqual(list(mvn.mean.shape), expected_mean_shape) self.assertEqual(list(mvn.covariance_matrix.shape), expected_covar_shape) # Test batch mode mvns b = 3 mvns = [ MultivariateNormal( mean=torch.randn(b, n, device=device, dtype=dtype), covariance_matrix=DiagLazyTensor( torch.randn(b, n, device=device, dtype=dtype).abs_()), ) for i in range(n_tasks) ] mvn = MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns) self.assertEqual(list(mvn.mean.shape), [b] + expected_mean_shape) self.assertEqual(list(mvn.covariance_matrix.shape), [b] + expected_covar_shape)
def test_degenerate_GPyTorchPosterior_Multitask(self): for dtype in (torch.float, torch.double): # singular covariance matrix degenerate_covar = torch.tensor( [[1, 1, 0], [1, 1, 0], [0, 0, 2]], dtype=dtype, device=self.device ) mean = torch.rand(3, dtype=dtype, device=self.device) mvn = MultivariateNormal(mean, lazify(degenerate_covar)) mvn = MultitaskMultivariateNormal.from_independent_mvns([mvn, mvn]) posterior = GPyTorchPosterior(mvn=mvn) # basics self.assertEqual(posterior.device.type, self.device.type) self.assertTrue(posterior.dtype == dtype) self.assertEqual(posterior.event_shape, torch.Size([3, 2])) mean_exp = mean.unsqueeze(-1).repeat(1, 2) self.assertTrue(torch.equal(posterior.mean, mean_exp)) variance_exp = degenerate_covar.diag().unsqueeze(-1).repeat(1, 2) self.assertTrue(torch.equal(posterior.variance, variance_exp)) # rsample with warnings.catch_warnings(record=True) as ws: # we check that the p.d. warning is emitted - this only # happens once per posterior, so we need to check only once samples = posterior.rsample(sample_shape=torch.Size([4])) self.assertTrue(any(issubclass(w.category, RuntimeWarning) for w in ws)) self.assertTrue(any("not p.d" in str(w.message) for w in ws)) self.assertEqual(samples.shape, torch.Size([4, 3, 2])) samples2 = posterior.rsample(sample_shape=torch.Size([4, 2])) self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 2])) # rsample w/ base samples base_samples = torch.randn(4, 3, 2, device=self.device, dtype=dtype) samples_b1 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) samples_b2 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) self.assertTrue(torch.allclose(samples_b1, samples_b2)) base_samples2 = torch.randn(4, 2, 3, 2, device=self.device, dtype=dtype) samples2_b1 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) samples2_b2 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) self.assertTrue(torch.allclose(samples2_b1, samples2_b2)) # collapse_batch_dims b_mean = torch.rand(2, 3, dtype=dtype, device=self.device) b_degenerate_covar = degenerate_covar.expand(2, *degenerate_covar.shape) b_mvn = MultivariateNormal(b_mean, lazify(b_degenerate_covar)) b_mvn = MultitaskMultivariateNormal.from_independent_mvns([b_mvn, b_mvn]) b_posterior = GPyTorchPosterior(mvn=b_mvn) b_base_samples = torch.randn(4, 1, 3, 2, device=self.device, dtype=dtype) with warnings.catch_warnings(record=True) as ws: b_samples = b_posterior.rsample( sample_shape=torch.Size([4]), base_samples=b_base_samples ) self.assertTrue(any(issubclass(w.category, RuntimeWarning) for w in ws)) self.assertTrue(any("not p.d" in str(w.message) for w in ws)) self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 2]))
def test_GPyTorchPosterior_Multitask(self): for dtype in (torch.float, torch.double): mean = torch.rand(3, 2, dtype=dtype, device=self.device) variance = 1 + torch.rand(3, 2, dtype=dtype, device=self.device) covar = variance.view(-1).diag() mvn = MultitaskMultivariateNormal(mean, lazify(covar)) posterior = GPyTorchPosterior(mvn=mvn) # basics self.assertEqual(posterior.device.type, self.device.type) self.assertTrue(posterior.dtype == dtype) self.assertEqual(posterior.event_shape, torch.Size([3, 2])) self.assertTrue(torch.equal(posterior.mean, mean)) self.assertTrue(torch.equal(posterior.variance, variance)) # rsample samples = posterior.rsample(sample_shape=torch.Size([4])) self.assertEqual(samples.shape, torch.Size([4, 3, 2])) samples2 = posterior.rsample(sample_shape=torch.Size([4, 2])) self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 2])) # rsample w/ base samples base_samples = torch.randn(4, 3, 2, device=self.device, dtype=dtype) samples_b1 = posterior.rsample(sample_shape=torch.Size([4]), base_samples=base_samples) samples_b2 = posterior.rsample(sample_shape=torch.Size([4]), base_samples=base_samples) self.assertTrue(torch.allclose(samples_b1, samples_b2)) base_samples2 = torch.randn(4, 2, 3, 2, device=self.device, dtype=dtype) samples2_b1 = posterior.rsample(sample_shape=torch.Size([4, 2]), base_samples=base_samples2) samples2_b2 = posterior.rsample(sample_shape=torch.Size([4, 2]), base_samples=base_samples2) self.assertTrue(torch.allclose(samples2_b1, samples2_b2)) # collapse_batch_dims b_mean = torch.rand(2, 3, 2, dtype=dtype, device=self.device) b_variance = 1 + torch.rand( 2, 3, 2, dtype=dtype, device=self.device) b_covar = b_variance.view(2, 6, 1) * torch.eye(6).type_as(b_variance) b_mvn = MultitaskMultivariateNormal(b_mean, lazify(b_covar)) b_posterior = GPyTorchPosterior(mvn=b_mvn) b_base_samples = torch.randn(4, 1, 3, 2, device=self.device, dtype=dtype) b_samples = b_posterior.rsample(sample_shape=torch.Size([4]), base_samples=b_base_samples) self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 2]))
def test_multitask_multivariate_normal_batch(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") mean = torch.tensor([[0, 1], [2, 3]], dtype=torch.float, device=device).repeat(2, 1, 1) variance = 1 + torch.arange(4, dtype=torch.float, device=device) covmat = torch.diag(variance).repeat(2, 1, 1) mtmvn = MultitaskMultivariateNormal(mean=mean, covariance_matrix=covmat) self.assertTrue(torch.equal(mtmvn.mean, mean)) self.assertTrue(approx_equal(mtmvn.variance, variance.repeat(2, 1).view(2, 2, 2))) self.assertTrue(torch.equal(mtmvn.scale_tril, covmat.sqrt())) mvn_plus1 = mtmvn + 1 self.assertTrue(torch.equal(mvn_plus1.mean, mtmvn.mean + 1)) self.assertTrue(torch.equal(mvn_plus1.covariance_matrix, mtmvn.covariance_matrix)) mvn_times2 = mtmvn * 2 self.assertTrue(torch.equal(mvn_times2.mean, mtmvn.mean * 2)) self.assertTrue(torch.equal(mvn_times2.covariance_matrix, mtmvn.covariance_matrix * 4)) mvn_divby2 = mtmvn / 2 self.assertTrue(torch.equal(mvn_divby2.mean, mtmvn.mean / 2)) self.assertTrue(torch.equal(mvn_divby2.covariance_matrix, mtmvn.covariance_matrix / 4)) self.assertTrue(approx_equal(mtmvn.entropy(), 7.2648 * torch.ones(2, device=device))) logprob = mtmvn.log_prob(torch.zeros(2, 2, 2, device=device)) logprob_expected = -7.3064 * torch.ones(2, device=device) self.assertTrue(approx_equal(logprob, logprob_expected)) logprob = mtmvn.log_prob(torch.zeros(3, 2, 2, 2, device=device)) logprob_expected = -7.3064 * torch.ones(3, 2, device=device) self.assertTrue(approx_equal(logprob, logprob_expected)) conf_lower, conf_upper = mtmvn.confidence_region() self.assertTrue(approx_equal(conf_lower, mtmvn.mean - 2 * mtmvn.stddev)) self.assertTrue(approx_equal(conf_upper, mtmvn.mean + 2 * mtmvn.stddev)) self.assertTrue(mtmvn.sample().shape == torch.Size([2, 2, 2])) self.assertTrue(mtmvn.sample(torch.Size([3])).shape == torch.Size([3, 2, 2, 2])) self.assertTrue(mtmvn.sample(torch.Size([3, 4])).shape == torch.Size([3, 4, 2, 2, 2]))
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `q x d` or `batch_shape x q x d` (batch mode) tensor, where `d` is the dimension of the feature space (not including task indices) and `q` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. detach_test_caches: If True, detach GPyTorch test caches during computation of the posterior. Required for being able to compute derivatives with respect to training inputs at test time (used e.g. by qNoisyExpectedImprovement). Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices`. Includes measurement noise if `observation_noise=True`. """ if output_indices is None: output_indices = self._output_tasks if any(i not in self._output_tasks for i in output_indices): raise ValueError("Too many output indices") # construct evaluation X X_full = _make_X_full(X=X, output_indices=output_indices, tf=self._task_feature) self.eval() # make sure model is in eval mode detach_test_caches = kwargs.get("detach_test_caches", True) with ExitStack() as es: es.enter_context(settings.debug(False)) es.enter_context(settings.fast_pred_var()) es.enter_context(settings.detach_test_caches(detach_test_caches)) mvn = self(X_full) if observation_noise: # TODO: Allow passing in observation noise via kwarg mvn = self.likelihood(mvn, X_full) # If single-output, return the posterior of a single-output model if len(output_indices) == 1: return GPyTorchPosterior(mvn=mvn) # Otherwise, make a MultitaskMultivariateNormal out of this mtmvn = MultitaskMultivariateNormal( mean=mvn.mean.view(*X.shape[:-1], len(output_indices)), covariance_matrix=mvn.lazy_covariance_matrix, interleaved=False, ) return GPyTorchPosterior(mvn=mtmvn)
def _create_marginal_input(self, batch_shape=torch.Size([])): mat = torch.randn(*batch_shape, 5, 5) mat2 = torch.randn(*batch_shape, 4, 4) covar = KroneckerProductLazyTensor(RootLazyTensor(mat), RootLazyTensor(mat2)) return MultitaskMultivariateNormal(torch.randn(*batch_shape, 5, 4), covar)
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: Union[bool, Tensor] = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add the observation noise from the likelihood to the posterior. If a Tensor, use it directly as the observation noise (must be of shape `(batch_shape) x q x m`). Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes observation noise if specified. """ self.eval() # make sure model is in eval mode with gpt_posterior_settings(): # insert a dimension for the output dimension if self._num_outputs > 1: X, output_dim_idx = add_output_dim( X=X, original_batch_shape=self._input_batch_shape) mvn = self(X) if observation_noise is not False: if torch.is_tensor(observation_noise): # TODO: Validate noise shape # make observation_noise `batch_shape x q x n` obs_noise = observation_noise.transpose(-1, -2) mvn = self.likelihood(mvn, X, noise=obs_noise) elif isinstance(self.likelihood, FixedNoiseGaussianLikelihood): # Use the mean of the previous noise values (TODO: be smarter here). noise = self.likelihood.noise.mean().expand(X.shape[:-1]) mvn = self.likelihood(mvn, X, noise=noise) else: mvn = self.likelihood(mvn, X) if self._num_outputs > 1: mean_x = mvn.mean covar_x = mvn.covariance_matrix output_indices = output_indices or range(self._num_outputs) mvns = [ MultivariateNormal( mean_x.select(dim=output_dim_idx, index=t), lazify(covar_x.select(dim=output_dim_idx, index=t)), ) for t in output_indices ] mvn = MultitaskMultivariateNormal.from_independent_mvns( mvns=mvns) return GPyTorchPosterior(mvn=mvn)
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: Union[bool, Tensor] = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `q x d` or `batch_shape x q x d` (batch mode) tensor, where `d` is the dimension of the feature space (not including task indices) and `q` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise from the respective likelihoods. If a Tensor, specifies the observation noise levels to add. Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices`. Includes measurement noise if `observation_noise` is specified. """ if output_indices is None: output_indices = self._output_tasks if any(i not in self._output_tasks for i in output_indices): raise ValueError("Too many output indices") cls_name = self.__class__.__name__ if hasattr(self, "outcome_transform"): raise NotImplementedError( f"Outcome transforms currently not supported by {cls_name}") # construct evaluation X X_full = _make_X_full(X=X, output_indices=output_indices, tf=self._task_feature) self.eval() # make sure model is in eval mode with gpt_posterior_settings(): mvn = self(X_full) if observation_noise is not False: raise NotImplementedError( f"Specifying observation noise is not yet supported by {cls_name}" ) # If single-output, return the posterior of a single-output model if len(output_indices) == 1: return GPyTorchPosterior(mvn=mvn) # Otherwise, make a MultitaskMultivariateNormal out of this mtmvn = MultitaskMultivariateNormal( mean=mvn.mean.view(*X.shape[:-1], len(output_indices)), covariance_matrix=mvn.lazy_covariance_matrix, interleaved=False, ) return GPyTorchPosterior(mvn=mtmvn)
def _get_test_posterior(batch_shape, q=1, m=1, **tkwargs): mean = torch.rand(*batch_shape, q, m, **tkwargs) a = torch.rand(*batch_shape, q * m, q * m, **tkwargs) covar = a @ a.transpose(-1, -2) diag = torch.diagonal(covar, dim1=-2, dim2=-1) diag += torch.rand(*batch_shape, q * m, **tkwargs) # in-place mvn = MultitaskMultivariateNormal(mean, covar) return GPyTorchPosterior(mvn)
def test_multitask_multivariate_normal_exceptions(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): mean = torch.tensor([0, 1], device=device, dtype=dtype) covmat = torch.eye(2, device=device, dtype=dtype) with self.assertRaises(RuntimeError): MultitaskMultivariateNormal(mean=mean, covariance_matrix=covmat)
def test_transformed_posterior(self): for dtype in (torch.float, torch.double): for m in (1, 2): shape = torch.Size([3, m]) mean = torch.rand(shape, dtype=dtype, device=self.device) variance = 1 + torch.rand( shape, dtype=dtype, device=self.device) if m == 1: covar = torch.diag_embed(variance.squeeze(-1)) mvn = MultivariateNormal(mean.squeeze(-1), lazify(covar)) else: covar = torch.diag_embed( variance.view(*variance.shape[:-2], -1)) mvn = MultitaskMultivariateNormal(mean, lazify(covar)) p_base = GPyTorchPosterior(mvn=mvn) p_tf = TransformedPosterior( # dummy transforms posterior=p_base, sample_transform=lambda s: s + 2, mean_transform=lambda m, v: 2 * m + v, variance_transform=lambda m, v: m + 2 * v, ) # mean, variance self.assertEqual(p_tf.device.type, self.device.type) self.assertTrue(p_tf.dtype == dtype) self.assertEqual(p_tf.event_shape, shape) self.assertEqual(p_tf.base_sample_shape, shape) self.assertTrue(torch.equal(p_tf.mean, 2 * mean + variance)) self.assertTrue(torch.equal(p_tf.variance, mean + 2 * variance)) # rsample samples = p_tf.rsample() self.assertEqual(samples.shape, torch.Size([1]) + shape) samples = p_tf.rsample(sample_shape=torch.Size([4])) self.assertEqual(samples.shape, torch.Size([4]) + shape) samples2 = p_tf.rsample(sample_shape=torch.Size([4, 2])) self.assertEqual(samples2.shape, torch.Size([4, 2]) + shape) # rsample w/ base samples base_samples = torch.randn(4, *shape, device=self.device, dtype=dtype) # incompatible shapes with self.assertRaises(RuntimeError): p_tf.rsample(sample_shape=torch.Size([3]), base_samples=base_samples) # make sure sample transform is applied correctly samples_base = p_base.rsample(sample_shape=torch.Size([4]), base_samples=base_samples) samples_tf = p_tf.rsample(sample_shape=torch.Size([4]), base_samples=base_samples) self.assertTrue(torch.equal(samples_tf, samples_base + 2)) # check error handling p_tf_2 = TransformedPosterior(posterior=p_base, sample_transform=lambda s: s + 2) with self.assertRaises(NotImplementedError): p_tf_2.mean with self.assertRaises(NotImplementedError): p_tf_2.variance
def test_expected_improvement_batch(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): mean = torch.tensor([-0.5, 0.0, 0.5], device=device, dtype=dtype).view( 3, 1, 1 ) variance = torch.ones(3, 1, 1, device=device, dtype=dtype) mm = MockModel(MockPosterior(mean=mean, variance=variance)) module = ExpectedImprovement(model=mm, best_f=0.0) X = torch.empty(3, 1, 1, device=device, dtype=dtype) # dummy ei = module(X) ei_expected = torch.tensor( [0.19780, 0.39894, 0.69780], device=device, dtype=dtype ) self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4)) # check for proper error if multi-output model mean2 = torch.rand(3, 1, 2, device=device, dtype=dtype) variance2 = torch.rand(3, 1, 2, device=device, dtype=dtype) mm2 = MockModel(MockPosterior(mean=mean2, variance=variance2)) module2 = ExpectedImprovement(model=mm2, best_f=0.0) with self.assertRaises(UnsupportedError): module2(X) # test objective (single-output) mean = torch.tensor([[[0.5]], [[0.25]]], device=device, dtype=dtype) covar = torch.tensor([[[[0.16]]], [[[0.125]]]], device=device, dtype=dtype) mvn = MultivariateNormal(mean, covar) p = GPyTorchPosterior(mvn) mm = MockModel(p) weights = torch.tensor([0.5], device=device, dtype=dtype) obj = ScalarizedObjective(weights) ei = ExpectedImprovement(model=mm, best_f=0.0, objective=obj) X = torch.rand(2, 1, 2, device=device, dtype=dtype) ei_expected = torch.tensor([[0.2601], [0.1500]], device=device, dtype=dtype) torch.allclose(ei(X), ei_expected, atol=1e-4) # test objective (multi-output) mean = torch.tensor( [[[-0.25, 0.5]], [[0.2, -0.1]]], device=device, dtype=dtype ) covar = torch.tensor( [[[0.5, 0.125], [0.125, 0.5]], [[0.25, -0.1], [-0.1, 0.25]]], device=device, dtype=dtype, ) mvn = MultitaskMultivariateNormal(mean, covar) p = GPyTorchPosterior(mvn) mm = MockModel(p) weights = torch.tensor([2.0, 1.0], device=device, dtype=dtype) obj = ScalarizedObjective(weights) ei = ExpectedImprovement(model=mm, best_f=0.0, objective=obj) X = torch.rand(2, 1, 2, device=device, dtype=dtype) ei_expected = torch.tensor([0.6910, 0.5371], device=device, dtype=dtype) torch.allclose(ei(X), ei_expected, atol=1e-4) # test bad objective class with self.assertRaises(UnsupportedError): ExpectedImprovement(model=mm, best_f=0.0, objective=IdentityMCObjective())
def test_expected_improvement(self): for dtype in (torch.float, torch.double): mean = torch.tensor([[-0.5]], device=self.device, dtype=dtype) variance = torch.ones(1, 1, device=self.device, dtype=dtype) mm = MockModel(MockPosterior(mean=mean, variance=variance)) # basic test module = ExpectedImprovement(model=mm, best_f=0.0) X = torch.empty(1, 1, device=self.device, dtype=dtype) # dummy ei = module(X) ei_expected = torch.tensor(0.19780, device=self.device, dtype=dtype) self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4)) # test maximize module = ExpectedImprovement(model=mm, best_f=0.0, maximize=False) X = torch.empty(1, 1, device=self.device, dtype=dtype) # dummy ei = module(X) ei_expected = torch.tensor(0.6978, device=self.device, dtype=dtype) self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4)) with self.assertRaises(UnsupportedError): module.set_X_pending(None) # test posterior transform (single-output) mean = torch.tensor([0.5], device=self.device, dtype=dtype) covar = torch.tensor([[0.16]], device=self.device, dtype=dtype) mvn = MultivariateNormal(mean, covar) p = GPyTorchPosterior(mvn) mm = MockModel(p) weights = torch.tensor([0.5], device=self.device, dtype=dtype) transform = ScalarizedPosteriorTransform(weights) ei = ExpectedImprovement(model=mm, best_f=0.0, posterior_transform=transform) X = torch.rand(1, 2, device=self.device, dtype=dtype) ei_expected = torch.tensor(0.2601, device=self.device, dtype=dtype) torch.allclose(ei(X), ei_expected, atol=1e-4) # test posterior transform (multi-output) mean = torch.tensor([[-0.25, 0.5]], device=self.device, dtype=dtype) covar = torch.tensor([[[0.5, 0.125], [0.125, 0.5]]], device=self.device, dtype=dtype) mvn = MultitaskMultivariateNormal(mean, covar) p = GPyTorchPosterior(mvn) mm = MockModel(p) weights = torch.tensor([2.0, 1.0], device=self.device, dtype=dtype) transform = ScalarizedPosteriorTransform(weights) ei = ExpectedImprovement(model=mm, best_f=0.0, posterior_transform=transform) X = torch.rand(1, 2, device=self.device, dtype=dtype) ei_expected = torch.tensor(0.6910, device=self.device, dtype=dtype) torch.allclose(ei(X), ei_expected, atol=1e-4)
def test_log_prob(self): mean = torch.randn(4, 3) var = torch.randn(12).abs_() values = mean + 0.5 diffs = (values - mean).view(-1) res = MultitaskMultivariateNormal(mean, DiagLazyTensor(var)).log_prob(values) actual = -0.5 * (math.log(math.pi * 2) * 12 + var.log().sum() + (diffs / var * diffs).sum()) self.assertLess((res - actual).div(res).abs().item(), 1e-2) mean = torch.randn(3, 4, 3) var = torch.randn(3, 12).abs_() values = mean + 0.5 diffs = (values - mean).view(3, -1) res = MultitaskMultivariateNormal(mean, DiagLazyTensor(var)).log_prob(values) actual = -0.5 * (math.log(math.pi * 2) * 12 + var.log().sum(-1) + (diffs / var * diffs).sum(-1)) self.assertLess((res - actual).div(res).abs().norm(), 1e-2)
def _get_test_posterior(batch_shape, device, dtype, q=1, o=1): mean = torch.rand(*batch_shape, q, o, device=device, dtype=dtype) a = torch.rand(*batch_shape, q * o, q * o, device=device, dtype=dtype) covar = a @ a.transpose(-1, -2) diag = torch.diagonal(covar, dim1=-2, dim2=-1) diag += torch.rand(*batch_shape, q * o, device=device, dtype=dtype) # in-place mvn = MultitaskMultivariateNormal(mean, covar) return GPyTorchPosterior(mvn)
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes observation noise if `observation_noise=True`. """ self.eval() # make sure model is in eval mode with ExitStack() as es: es.enter_context(gpt_settings.debug(False)) es.enter_context(gpt_settings.fast_pred_var()) es.enter_context( gpt_settings.detach_test_caches( settings.propagate_grads.off())) # insert a dimension for the output dimension if self._num_outputs > 1: X, output_dim_idx = add_output_dim( X=X, original_batch_shape=self._input_batch_shape) mvn = self(X) if observation_noise: if isinstance(self.likelihood, FixedNoiseGaussianLikelihood): # Use the mean of the previous noise values (TODO: be smarter here). noise = self.likelihood.noise.mean().expand(X.shape[:-1]) mvn = self.likelihood(mvn, X, noise=noise) else: mvn = self.likelihood(mvn, X) if self._num_outputs > 1: mean_x = mvn.mean covar_x = mvn.covariance_matrix output_indices = output_indices or range(self._num_outputs) mvns = [ MultivariateNormal( mean_x.select(dim=output_dim_idx, index=t), lazify(covar_x.select(dim=output_dim_idx, index=t)), ) for t in output_indices ] mvn = MultitaskMultivariateNormal.from_independent_mvns( mvns=mvns) return GPyTorchPosterior(mvn=mvn)
def _get_test_posterior(batch_shape: torch.Size, q: int = 1, m: int = 1, interleaved: bool = True, lazy: bool = False, independent: bool = False, **tkwargs) -> GPyTorchPosterior: r"""Generate a Posterior for testing purposes. Args: batch_shape: The batch shape of the data. q: The number of candidates m: The number of outputs. interleaved: A boolean indicating the format of the MultitaskMultivariateNormal lazy: A boolean indicating if the posterior should be lazy indepedent: A boolean indicating whether the outputs are independent tkwargs: `device` and `dtype` tensor constructor kwargs. """ if independent: mvns = [] for _ in range(m): mean = torch.rand(*batch_shape, q, **tkwargs) a = torch.rand(*batch_shape, q, q, **tkwargs) covar = a @ a.transpose(-1, -2) flat_diag = torch.rand(*batch_shape, q, **tkwargs) covar = covar + torch.diag_embed(flat_diag) mvns.append(MultivariateNormal(mean, covar)) mtmvn = MultitaskMultivariateNormal.from_independent_mvns(mvns) else: mean = torch.rand(*batch_shape, q, m, **tkwargs) a = torch.rand(*batch_shape, q * m, q * m, **tkwargs) covar = a @ a.transpose(-1, -2) flat_diag = torch.rand(*batch_shape, q * m, **tkwargs) if lazy: covar = AddedDiagLazyTensor(covar, DiagLazyTensor(flat_diag)) else: covar = covar + torch.diag_embed(flat_diag) mtmvn = MultitaskMultivariateNormal(mean, covar, interleaved=interleaved) return GPyTorchPosterior(mtmvn)
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `b x q x d`-dim Tensor, where `d` is the dimension of the feature space, `q` is the number of points considered jointly, and `b` is the batch dimension. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes measurement noise if `observation_noise=True`. """ self.eval() # make sure model is in eval mode with ExitStack() as es: es.enter_context(gpt_settings.debug(False)) es.enter_context(gpt_settings.fast_pred_var()) es.enter_context( gpt_settings.detach_test_caches( settings.propagate_grads.off())) if output_indices is not None: mvns = [self.forward_i(i, X) for i in output_indices] if observation_noise: lh_kwargs = [{ "noise": lh.noise.mean().expand(X.shape[:-1]) } if isinstance(lh, FixedNoiseGaussianLikelihood) else {} for lh in self.likelihood.likelihoods] mvns = [ self.likelihood_i(i, mvn, X, **lkws) for i, mvn, lkws in zip( output_indices, mvns, lh_kwargs) ] else: mvns = self(*[X for _ in range(self.num_outputs)]) if observation_noise: # TODO: Allow passing in observation noise via kwarg mvns = self.likelihood(*[(mvn, X) for mvn in mvns]) if len(mvns) == 1: return GPyTorchPosterior(mvn=mvns[0]) else: return GPyTorchPosterior( mvn=MultitaskMultivariateNormal.from_independent_mvns( mvns=mvns))
def __call__(self, inputs, are_samples=False, expand_for_quadgrid=True, **kwargs): if isinstance(inputs, MultitaskMultivariateNormal): # inputs is definitely in the second layer, and mean is n x t mus, sigmas = inputs.mean, inputs.variance.sqrt() if expand_for_quadgrid: xi_mus = mus.unsqueeze(0) # 1 x n x t xi_sigmas = sigmas.unsqueeze(0) # 1 x n x t else: xi_mus = mus xi_sigmas = sigmas # unsqueeze sigmas to 1 x n x t, locations from [q] to Q^T x 1 x T. # Broadcasted result will be Q^T x N x T qg = self.quad_sites.view([self.num_quad_sites] + [1] * (xi_mus.dim() - 2) + [self.input_dims]) xi_sigmas = xi_sigmas * qg inputs = xi_mus + xi_sigmas # q^t x n x t if settings.debug.on(): if not torch.is_tensor(inputs): raise ValueError( "`inputs` should either be a MultitaskMultivariateNormal or a Tensor, got " f"{inputs.__class__.__Name__}") if inputs.size(-1) != self.input_dims: raise RuntimeError( f"Input shape did not match self.input_dims. Got total feature dims [{inputs.size(-1)}]," f" expected [{self.input_dims}]") # Repeat the input for all possible outputs if self.output_dims is not None: inputs = inputs.unsqueeze(-3) inputs = inputs.expand(*inputs.shape[:-3], self.output_dims, *inputs.shape[-2:]) # Now run samples through the GP output = ApproximateGP.__call__(self, inputs, **kwargs) if self.num_quad_sites > 0: if self.output_dims is not None and not isinstance( output, MultitaskMultivariateNormal): mean = output.loc.transpose(-1, -2) covar = BlockDiagLazyTensor(output.lazy_covariance_matrix, block_dim=-3) output = MultitaskMultivariateNormal(mean, covar, interleaved=False) else: output = output.loc.transpose( -1, -2) # this layer provides noiseless kernel interpolation return output
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. propagate_grads: If True, do not detach GPyTorch's test caches when computing of the posterior. Required for being able to compute derivatives with respect to training inputs at test time (used e.g. by qNoisyExpectedImprovement). Defaults to `False`. Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes observation noise if `observation_noise=True`. """ self.eval() # make sure model is in eval mode detach_test_caches = not kwargs.get("propagate_grads", False) with ExitStack() as es: es.enter_context(settings.debug(False)) es.enter_context(settings.fast_pred_var()) es.enter_context(settings.detach_test_caches(detach_test_caches)) # insert a dimension for the output dimension if self._num_outputs > 1: X, output_dim_idx = add_output_dim( X=X, original_batch_shape=self._input_batch_shape ) mvn = self(X) if observation_noise: mvn = self.likelihood(mvn, X) if self._num_outputs > 1: mean_x = mvn.mean covar_x = mvn.covariance_matrix output_indices = output_indices or range(self._num_outputs) mvns = [ MultivariateNormal( mean_x.select(dim=output_dim_idx, index=t), lazify(covar_x.select(dim=output_dim_idx, index=t)), ) for t in output_indices ] mvn = MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns) return GPyTorchPosterior(mvn=mvn)
def __call__(self, function, *params, **kwargs): if isinstance(function, Distribution) and not isinstance( function, MultitaskMultivariateNormal): warnings.warn( "The input to DeepGaussianLikelihood should be a MultitaskMultivariateNormal (num_data x num_tasks). " "Batch MultivariateNormal inputs (num_tasks x num_data) will be deprectated.", DeprecationWarning, ) function = MultitaskMultivariateNormal.from_batch_mvn(function) return super().__call__(function, *params, **kwargs)
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `b x q x d`-dim Tensor, where `d` is the dimension of the feature space, `q` is the number of points considered jointly, and `b` is the batch dimension. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. detach_test_caches: If True, detach GPyTorch test caches during computation of the posterior. Required for being able to compute derivatives with respect to training inputs at test time (used e.g. by qNoisyExpectedImprovement). Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes measurement noise if `observation_noise=True`. """ detach_test_caches = kwargs.get("detach_test_caches", True) self.eval() # make sure model is in eval mode with ExitStack() as es: es.enter_context(settings.debug(False)) es.enter_context(settings.fast_pred_var()) es.enter_context(settings.detach_test_caches(detach_test_caches)) if output_indices is not None: mvns = [self.forward_i(i, X) for i in output_indices] if observation_noise: mvns = [ self.likelihood_i(i, mvn, X) for i, mvn in zip(output_indices, mvns) ] else: mvns = self(*[X for _ in range(self.num_outputs)]) if observation_noise: # TODO: Allow passing in observation noise via kwarg mvns = self.likelihood(*[(mvn, X) for mvn in mvns]) if len(mvns) == 1: return GPyTorchPosterior(mvn=mvns[0]) else: return GPyTorchPosterior( mvn=MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns) )
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. detach_test_caches: If True, detach GPyTorch test caches during computation of the posterior. Required for being able to compute derivatives with respect to training inputs at test time (used e.g. by qNoisyExpectedImprovement). Defaults to `True`. Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes observation noise if `observation_noise=True`. """ self.eval() # make sure model is in eval mode detach_test_caches = kwargs.get("detach_test_caches", True) with ExitStack() as es: es.enter_context(settings.debug(False)) es.enter_context(settings.fast_pred_var()) es.enter_context(settings.detach_test_caches(detach_test_caches)) # insert a dimension for the output dimension if self._num_outputs > 1: X, output_dim_idx = add_output_dim( X=X, original_batch_shape=self._input_batch_shape ) mvn = self(X) mean_x = mvn.mean covar_x = mvn.covariance_matrix if self._num_outputs > 1: output_indices = output_indices or range(self._num_outputs) mvns = [ MultivariateNormal( mean_x.select(dim=output_dim_idx, index=t), lazify(covar_x.select(dim=output_dim_idx, index=t)), ) for t in output_indices ] mvn = MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns) return GPyTorchPosterior(mvn=mvn)
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `b x q x d`-dim Tensor, where `d` is the dimension of the feature space, `q` is the number of points considered jointly, and `b` is the batch dimension. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. detach_test_caches: If True, detach GPyTorch test caches during computation of the posterior. Required for being able to compute derivatives with respect to training inputs at test time (used e.g. by qNoisyExpectedImprovement). Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes measurement noise if `observation_noise=True`. """ detach_test_caches = kwargs.get("detach_test_caches", True) self.eval() # make sure model is in eval mode with ExitStack() as es: es.enter_context(settings.debug(False)) es.enter_context(settings.fast_pred_var()) es.enter_context(settings.detach_test_caches(detach_test_caches)) if output_indices is not None: mvns = [self.forward_i(i, X) for i in output_indices] if observation_noise: mvns = [ self.likelihood_i(i, mvn, X) for i, mvn in zip(output_indices, mvns) ] else: mvns = self(*[X for _ in range(self.num_outputs)]) if observation_noise: # TODO: Allow passing in observation noise via kwarg mvns = self.likelihood(*[(mvn, X) for mvn in mvns]) if len(mvns) == 1: return GPyTorchPosterior(mvn=mvns[0]) else: return GPyTorchPosterior( mvn=MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns) )
def __call__(self, inputs, are_samples=False, **kwargs): deterministic_inputs = not are_samples if isinstance(inputs, MultitaskMultivariateNormal): inputs = torch.distributions.Normal( loc=inputs.mean, scale=inputs.variance.sqrt()).rsample() deterministic_inputs = False if settings.debug.on(): if not torch.is_tensor(inputs): raise ValueError( "`inputs` should either be a MultitaskMultivariateNormal or a Tensor, got " f"{inputs.__class__.__Name__}") if inputs.size(-1) != self.input_dims: raise RuntimeError( f"Input shape did not match self.input_dims. Got total feature dims [{inputs.size(-1)}]," f" expected [{self.input_dims}]") # Repeat the input for all possible outputs if self.output_dims is not None: inputs = inputs.unsqueeze(-3) inputs = inputs.expand(*inputs.shape[:-3], self.output_dims, *inputs.shape[-2:]) # Now run samples through the GP output = ApproximateGP.__call__(self, inputs) if self.output_dims is not None: mean = output.loc.transpose(-1, -2) covar = BlockDiagLazyTensor(output.lazy_covariance_matrix, block_dim=-3) output = MultitaskMultivariateNormal(mean, covar, interleaved=False) # Maybe expand inputs? if deterministic_inputs: output = output.expand( torch.Size([settings.num_likelihood_samples.value()]) + output.batch_shape) return output
def __call__(self, inputs, **kwargs): if isinstance(inputs, MultitaskMultivariateNormal): # This is for subsequent layers. We apply quadrature here # Mean, stdv are q x ... x n x t mus, sigmas = inputs.mean, inputs.variance.sqrt() qg = self.quad_sites.view([self.num_quad_sites] + [1] * (mus.dim() - 2) + [self.input_dims]) sigmas = sigmas * qg inputs = mus + sigmas # q^t x n x t deterministic_inputs = False else: deterministic_inputs = True if settings.debug.on(): if not torch.is_tensor(inputs): raise ValueError( "`inputs` should either be a MultitaskMultivariateNormal or a Tensor, got " f"{inputs.__class__.__Name__}") if inputs.size(-1) != self.input_dims: raise RuntimeError( f"Input shape did not match self.input_dims. Got total feature dims [{inputs.size(-1)}]," f" expected [{self.input_dims}]") # Repeat the input for all possible outputs if self.output_dims is not None: inputs = inputs.unsqueeze(-3) inputs = inputs.expand(*inputs.shape[:-3], self.output_dims, *inputs.shape[-2:]) # Now run samples through the GP output = ApproximateGP.__call__(self, inputs, **kwargs) # If this is the first layer (deterministic inputs), expand the output # This allows quadrature to be applied to future layers if deterministic_inputs: output = output.expand( torch.Size([self.num_quad_sites]) + output.batch_shape) if self.num_quad_sites > 0: if self.output_dims is not None and not isinstance( output, MultitaskMultivariateNormal): mean = output.loc.transpose(-1, -2) covar = BlockDiagLazyTensor(output.lazy_covariance_matrix, block_dim=-3) output = MultitaskMultivariateNormal(mean, covar, interleaved=False) else: output = output.loc.transpose( -1, -2) # this layer provides noiseless kernel interpolation return output
def test_multitask_from_repeat(self): mean = torch.randn(2, 3) variance = torch.randn(2, 3).clamp_min(1e-6) mvn = MultivariateNormal(mean, DiagLazyTensor(variance)) mmvn = MultitaskMultivariateNormal.from_repeated_mvn(mvn, num_tasks=4) self.assertTrue(isinstance(mmvn, MultitaskMultivariateNormal)) self.assertEqual(mmvn.batch_shape, torch.Size([2])) self.assertEqual(mmvn.event_shape, torch.Size([3, 4])) self.assertEqual(mmvn.covariance_matrix.shape, torch.Size([2, 12, 12])) for i in range(4): self.assertEqual(mmvn.mean[..., i], mean) self.assertEqual(mmvn.variance[..., i], variance)
def forward(self, x, xe): m = self.mean(x) if x.shape[1] > 0: K = self.kern(x) if xe.shape[1] > 0: x_emb = self.emb_trans(xe) K *= self.kern_emb(x_emb) else: K = self.kern_emb(self.emb_trans(xe)) return MultivariateNormal( m, K) if not self.multi_task else MultitaskMultivariateNormal( m, K)
def test_multitask_from_batch(self): mean = torch.randn(2, 3) variance = torch.randn(2, 3).clamp_min(1e-6) mvn = MultivariateNormal(mean, DiagLazyTensor(variance)) mmvn = MultitaskMultivariateNormal.from_batch_mvn(mvn, task_dim=-1) self.assertTrue(isinstance(mmvn, MultitaskMultivariateNormal)) self.assertEqual(mmvn.batch_shape, torch.Size([])) self.assertEqual(mmvn.event_shape, torch.Size([3, 2])) self.assertEqual(mmvn.covariance_matrix.shape, torch.Size([6, 6])) self.assertEqual(mmvn.mean, mean.transpose(-1, -2)) self.assertEqual(mmvn.variance, variance.transpose(-1, -2)) mean = torch.randn(2, 4, 3) variance = torch.randn(2, 4, 3).clamp_min(1e-6) mvn = MultivariateNormal(mean, DiagLazyTensor(variance)) mmvn = MultitaskMultivariateNormal.from_batch_mvn(mvn, task_dim=0) self.assertTrue(isinstance(mmvn, MultitaskMultivariateNormal)) self.assertEqual(mmvn.batch_shape, torch.Size([4])) self.assertEqual(mmvn.event_shape, torch.Size([3, 2])) self.assertEqual(mmvn.covariance_matrix.shape, torch.Size([4, 6, 6])) self.assertEqual(mmvn.mean, mean.permute(1, 2, 0)) self.assertEqual(mmvn.variance, variance.permute(1, 2, 0))
def _get_test_posterior(shape, device, dtype, interleaved=True, lazy=False): mean = torch.rand(shape, device=device, dtype=dtype) n_covar = shape[-2:].numel() diag = torch.rand(shape, device=device, dtype=dtype) diag = diag.view(*diag.shape[:-2], n_covar) a = torch.rand(*shape[:-2], n_covar, n_covar, device=device, dtype=dtype) covar = a @ a.transpose(-1, -2) + torch.diag_embed(diag) if lazy: covar = NonLazyTensor(covar) if shape[-1] == 1: mvn = MultivariateNormal(mean.squeeze(-1), covar) else: mvn = MultitaskMultivariateNormal(mean, covar, interleaved=interleaved) return GPyTorchPosterior(mvn)
def test_log_prob(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): mean = torch.randn(4, 3, device=device, dtype=dtype) var = torch.randn(12, device=device, dtype=dtype).abs_() values = mean + 0.5 diffs = (values - mean).view(-1) res = MultitaskMultivariateNormal( mean, DiagLazyTensor(var)).log_prob(values) actual = -0.5 * (math.log(math.pi * 2) * 12 + var.log().sum() + (diffs / var * diffs).sum()) self.assertLess((res - actual).div(res).abs().item(), 1e-2) mean = torch.randn(3, 4, 3, device=device, dtype=dtype) var = torch.randn(3, 12, device=device, dtype=dtype).abs_() values = mean + 0.5 diffs = (values - mean).view(3, -1) res = MultitaskMultivariateNormal( mean, DiagLazyTensor(var)).log_prob(values) actual = -0.5 * (math.log(math.pi * 2) * 12 + var.log().sum(-1) + (diffs / var * diffs).sum(-1)) self.assertLess((res - actual).div(res).abs().norm(), 1e-2)
def test_degenerate_GPyTorchPosterior_Multitask(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): # singular covariance matrix degenerate_covar = torch.tensor( [[1, 1, 0], [1, 1, 0], [0, 0, 2]], dtype=dtype, device=device ) mean = torch.rand(3, dtype=dtype, device=device) mvn = MultivariateNormal(mean, lazify(degenerate_covar)) mvn = MultitaskMultivariateNormal.from_independent_mvns([mvn, mvn]) posterior = GPyTorchPosterior(mvn=mvn) # basics self.assertEqual(posterior.device.type, device.type) self.assertTrue(posterior.dtype == dtype) self.assertEqual(posterior.event_shape, torch.Size([3, 2])) mean_exp = mean.unsqueeze(-1).repeat(1, 2) self.assertTrue(torch.equal(posterior.mean, mean_exp)) variance_exp = degenerate_covar.diag().unsqueeze(-1).repeat(1, 2) self.assertTrue(torch.equal(posterior.variance, variance_exp)) # rsample with warnings.catch_warnings(record=True) as w: # we check that the p.d. warning is emitted - this only # happens once per posterior, so we need to check only once samples = posterior.rsample(sample_shape=torch.Size([4])) self.assertEqual(len(w), 1) self.assertTrue(issubclass(w[-1].category, RuntimeWarning)) self.assertTrue("not p.d." in str(w[-1].message)) self.assertEqual(samples.shape, torch.Size([4, 3, 2])) samples2 = posterior.rsample(sample_shape=torch.Size([4, 2])) self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 2])) # rsample w/ base samples base_samples = torch.randn(4, 3, 2, device=device, dtype=dtype) samples_b1 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) samples_b2 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) self.assertTrue(torch.allclose(samples_b1, samples_b2)) base_samples2 = torch.randn(4, 2, 3, 2, device=device, dtype=dtype) samples2_b1 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) samples2_b2 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) self.assertTrue(torch.allclose(samples2_b1, samples2_b2)) # collapse_batch_dims b_mean = torch.rand(2, 3, dtype=dtype, device=device) b_degenerate_covar = degenerate_covar.expand(2, *degenerate_covar.shape) b_mvn = MultivariateNormal(b_mean, lazify(b_degenerate_covar)) b_mvn = MultitaskMultivariateNormal.from_independent_mvns([b_mvn, b_mvn]) b_posterior = GPyTorchPosterior(mvn=b_mvn) b_base_samples = torch.randn(4, 1, 3, 2, device=device, dtype=dtype) with warnings.catch_warnings(record=True) as w: b_samples = b_posterior.rsample( sample_shape=torch.Size([4]), base_samples=b_base_samples ) self.assertEqual(len(w), 1) self.assertTrue(issubclass(w[-1].category, RuntimeWarning)) self.assertTrue("not p.d." in str(w[-1].message)) self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 2]))