def test_matmul(self):
        # Forward
        res = CholLazyTensor(self.chol).matmul(self.vecs)
        actual = self.actual_mat.matmul(self.vecs_copy)
        self.assertTrue(approx_equal(res, actual))

        # Backward
        grad_output = torch.randn(*self.vecs.size())
        res.backward(gradient=grad_output)
        actual.backward(gradient=grad_output)
        self.assertTrue(approx_equal(self.chol.grad, self.chol_copy.grad))
        self.assertTrue(approx_equal(self.vecs.grad, self.vecs_copy.grad))
 def test_diag(self):
     res = CholLazyTensor(self.chol).diag()
     actual = torch.cat([
         self.actual_mat[0].diag().unsqueeze(0),
         self.actual_mat[1].diag().unsqueeze(0)
     ], 0)
     self.assertTrue(approx_equal(res, actual))
示例#3
0
    def untransform_posterior(self, posterior: Posterior) -> Posterior:
        r"""Un-standardize the posterior.

        Args:
            posterior: A posterior in the standardized space.

        Returns:
            The un-standardized posterior. If the input posterior is a MVN,
            the transformed posterior is again an MVN.
        """
        if self._outputs is not None:
            raise NotImplementedError(
                "Standardize does not yet support output selection for "
                "untransform_posterior"
            )
        if not self._m == posterior.event_shape[-1]:
            raise RuntimeError(
                "Incompatible output dimensions encountered for transform "
                f"{self._m} and posterior {posterior.event_shape[-1]}"
            )
        if not isinstance(posterior, GPyTorchPosterior):
            # fall back to TransformedPosterior
            return TransformedPosterior(
                posterior=posterior,
                sample_transform=lambda s: self.means + self.stdvs * s,
                mean_transform=lambda m, v: self.means + self.stdvs * m,
                variance_transform=lambda m, v: self._stdvs_sq * v,
            )
        # GPyTorchPosterior (TODO: Should we Lazy-evaluate the mean here as well?)
        mvn = posterior.mvn
        offset = self.means
        scale_fac = self.stdvs
        if not posterior._is_mt:
            mean_tf = offset.squeeze(-1) + scale_fac.squeeze(-1) * mvn.mean
            scale_fac = scale_fac.squeeze(-1).expand_as(mean_tf)
        else:
            mean_tf = offset + scale_fac * mvn.mean
            reps = mean_tf.shape[-2:].numel() // scale_fac.size(-1)
            scale_fac = scale_fac.squeeze(-2)
            if mvn._interleaved:
                scale_fac = scale_fac.repeat(*[1 for _ in scale_fac.shape[:-1]], reps)
            else:
                scale_fac = torch.repeat_interleave(scale_fac, reps, dim=-1)

        if (
            not mvn.islazy
            # TODO: Figure out attribute namming weirdness here
            or mvn._MultivariateNormal__unbroadcasted_scale_tril is not None
        ):
            # if already computed, we can save a lot of time using scale_tril
            covar_tf = CholLazyTensor(mvn.scale_tril * scale_fac.unsqueeze(-1))
        else:
            lcv = mvn.lazy_covariance_matrix
            # allow batch-evaluation of the model
            scale_mat = DiagLazyTensor(scale_fac.expand(lcv.shape[:-1]))
            covar_tf = scale_mat @ lcv @ scale_mat

        kwargs = {"interleaved": mvn._interleaved} if posterior._is_mt else {}
        mvn_tf = mvn.__class__(mean=mean_tf, covariance_matrix=covar_tf, **kwargs)
        return GPyTorchPosterior(mvn_tf)
示例#4
0
 def create_lazy_tensor(self):
     chol = torch.tensor(
         [[3, 0, 0, 0, 0], [-1, 2, 0, 0, 0], [1, 4, 1, 0, 0],
          [0, 2, 3, 2, 0], [-4, -2, 1, 3, 4]],
         dtype=torch.float,
         requires_grad=True,
     )
     return CholLazyTensor(TriangularLazyTensor(chol))
 def test_inv_quad_log_det(self):
     # Forward
     res_inv_quad, res_log_det = CholLazyTensor(self.chol).inv_quad_log_det(
         inv_quad_rhs=self.vecs, log_det=True)
     res = res_inv_quad + res_log_det
     actual_inv_quad = self.actual_mat.inverse().matmul(self.vecs_copy).mul(
         self.vecs_copy).sum()
     actual = actual_inv_quad + torch.log(torch.det(self.actual_mat))
     self.assertLess(((res - actual) / actual).abs().item(), 1e-2)
示例#6
0
    def test_natgrad(self, D=5):
        mu = torch.randn(D)
        cov = torch.randn(D, D).tril_()
        dist = MultivariateNormal(mu, CholLazyTensor(TriangularLazyTensor(cov)))
        sample = dist.sample()

        v_dist = NaturalVariationalDistribution(D)
        v_dist.initialize_variational_distribution(dist)
        mu = v_dist().mean.detach()

        v_dist().log_prob(sample).squeeze().backward()

        eta1 = mu.clone().requires_grad_(True)
        eta2 = (mu[:, None] * mu + cov @ cov.t()).requires_grad_(True)
        L = torch.cholesky(eta2 - eta1[:, None] * eta1)
        dist2 = MultivariateNormal(eta1, CholLazyTensor(TriangularLazyTensor(L)))
        dist2.log_prob(sample).squeeze().backward()

        assert torch.allclose(v_dist.natural_vec.grad, eta1.grad)
        assert torch.allclose(v_dist.natural_mat.grad, eta2.grad)
示例#7
0
    def test_invertible_init(self, D=5):
        mu = torch.randn(D)
        cov = torch.randn(D, D).tril_()
        dist = MultivariateNormal(mu, CholLazyTensor(TriangularLazyTensor(cov)))

        v_dist = TrilNaturalVariationalDistribution(D, mean_init_std=0.0)
        v_dist.initialize_variational_distribution(dist)

        out_dist = v_dist()

        assert torch.allclose(out_dist.mean, dist.mean)
        assert torch.allclose(out_dist.covariance_matrix, dist.covariance_matrix)
示例#8
0
    def _get_bound_for_point_samples(self, X, Y, samples):
        """
        Helper function for getting the ELBO when only a subset of inducing
        points are present in each layer, as given by argument samples
        """
        def _update_inducing_points(gp, Z, m, S=None, L=None):
            # Setting the inducing points for a given layer
            assert (S is None) ^ (L is None)

            strat = gp.variational_strategy
            dist = strat._variational_distribution

            strat.inducing_points.data = Z
            dist.variational_mean.data = m
            if S is not None:
                L = psd_safe_cholesky(S)
            dist.chol_variational_covar.data = L
            gp.clear_caches()

        # Save current parameters
        initial_params = {}
        for n, gp in self.named_gps:
            var_dist = gp.variational_strategy.variational_distribution
            initial_params[n] = {
                "Z": gp.inducing_inputs.clone(),
                "m": var_dist.mean.clone(),
                "L": var_dist.scale_tril
            }

        # Remove points according to provided point samples
        for (n, gp), sample in zip(self.named_gps, samples):
            # Include only the selected inducing points for the given sample
            Z_ = initial_params[n]["Z"][sample]
            m_ = initial_params[n]["m"][..., sample]
            L = initial_params[n]["L"]
            S = CholLazyTensor(L).add_jitter().evaluate()
            S = S[..., sample, :][..., :, sample]
            _update_inducing_points(gp, Z_, m_, S=S)

        # Evaluate bound
        X_scaled = self.X_scaler.transform(X)
        Y_scaled = self.Y_scaler.transform(Y)
        bound = self._log_lik(X=X_scaled, Y=Y_scaled) - self._KL()

        # Restore parameters
        for n, gp in self.named_gps:
            Z = initial_params[n]["Z"]
            m = initial_params[n]["m"]
            L = initial_params[n]["L"]
            _update_inducing_points(gp, Z, m, L=L)

        return bound
示例#9
0
 def create_lazy_tensor(self):
     chol = torch.tensor(
         [
             [[3, 0, 0, 0, 0], [-1, 2, 0, 0, 0], [1, 4, 1, 0, 0],
              [0, 2, 3, 2, 0], [-4, -2, 1, 3, 4]],
             [[2, 0, 0, 0, 0], [3, 1, 0, 0, 0], [-2, 3, 2, 0, 0],
              [-2, 1, -1, 3, 0], [-4, -4, 5, 2, 3]],
         ],
         dtype=torch.float,
     )
     chol.add_(torch.eye(5).unsqueeze(0))
     chol.requires_grad_(True)
     return CholLazyTensor(TriangularLazyTensor(chol))
    def test_inv_quad_log_det(self):
        # Forward
        res_inv_quad, res_log_det = CholLazyTensor(self.chol).inv_quad_log_det(
            inv_quad_rhs=self.vecs, log_det=True)
        res = res_inv_quad + res_log_det
        actual_inv_quad = self.actual_mat_inv.matmul(self.vecs_copy).mul(
            self.vecs_copy).sum(-1).sum(-1)
        actual_log_det = torch.tensor([
            torch.log(torch.det(self.actual_mat[0])),
            torch.log(torch.det(self.actual_mat[1]))
        ])

        actual = actual_inv_quad + actual_log_det
        self.assertLess(torch.max((res - actual).abs() / actual.norm()), 1e-2)
    def variational_output(self):
        chol_variational_covar = self.chol_variational_covar

        # Negate each row with a negative diagonal (the Cholesky decomposition
        # of a matrix requires that the diagonal elements be positive).
        if chol_variational_covar.ndimension() == 2:
            chol_variational_covar = chol_variational_covar.triu()
            inside = chol_variational_covar.diag().sign().unsqueeze(
                1).expand_as(chol_variational_covar).triu()
        elif chol_variational_covar.ndimension() == 3:
            batch_size, diag_size, _ = chol_variational_covar.size()

            # Batch mode
            chol_variational_covar_size = list(
                chol_variational_covar.size())[-2:]
            mask = torch.ones(*chol_variational_covar_size,
                              dtype=chol_variational_covar.dtype,
                              device=chol_variational_covar.device).triu_()
            mask = mask.unsqueeze(0).expand(
                *([chol_variational_covar.size(0)] +
                  chol_variational_covar_size))

            batch_index = torch.arange(0,
                                       batch_size,
                                       dtype=torch.long,
                                       device=mask.device)
            batch_index = batch_index.unsqueeze(1).repeat(1,
                                                          diag_size).view(-1)
            diag_index = torch.arange(0,
                                      diag_size,
                                      dtype=torch.long,
                                      device=mask.device)
            diag_index = diag_index.unsqueeze(1).repeat(batch_size, 1).view(-1)
            diag = chol_variational_covar[batch_index, diag_index,
                                          diag_index].view(
                                              batch_size, diag_size)

            chol_variational_covar = chol_variational_covar.mul(mask)
            inside = diag.sign().unsqueeze(-1).expand_as(
                chol_variational_covar).mul(mask)
        else:
            raise RuntimeError(
                "Invalid number of variational covar dimensions")

        chol_variational_covar = inside.mul(chol_variational_covar)
        variational_covar = CholLazyTensor(
            chol_variational_covar.transpose(-1, -2))
        return GaussianRandomVariable(self.variational_mean, variational_covar)
示例#12
0
 def create_lazy_tensor(self):
     chol = torch.tensor(
         [
             [[3, 0, 0, 0, 0], [-1, 2, 0, 0, 0], [1, 4, 1, 0, 0],
              [0, 2, 3, 2, 0], [-4, -2, 1, 3, 4]],
             [[2, 0, 0, 0, 0], [3, 1, 0, 0, 0], [-2, 3, 2, 0, 0],
              [-2, 1, -1, 3, 0], [-4, -4, 5, 2, 3]],
         ],
         dtype=torch.float,
     )
     chol = chol.repeat(3, 1, 1, 1)
     chol[1].mul_(2)
     chol[2].mul_(0.5)
     chol.add_(torch.eye(5).unsqueeze_(0).unsqueeze_(0))
     chol.requires_grad_(True)
     return CholLazyTensor(chol)
示例#13
0
    def test_natgrad(self, D=5):
        mu = torch.randn(D)
        cov = torch.randn(D, D)
        cov = cov @ cov.t()
        dist = MultivariateNormal(
            mu,
            CholLazyTensor(TriangularLazyTensor(torch.linalg.cholesky(cov))))
        sample = dist.sample()

        v_dist = TrilNaturalVariationalDistribution(D, mean_init_std=0.0)
        v_dist.initialize_variational_distribution(dist)
        v_dist().log_prob(sample).squeeze().backward()
        dout_dnat1 = v_dist.natural_vec.grad
        dout_dnat2 = v_dist.natural_tril_mat.grad

        # mean_init_std=0. because we need to ensure both have the same distribution
        v_dist_ref = NaturalVariationalDistribution(D, mean_init_std=0.0)
        v_dist_ref.initialize_variational_distribution(dist)
        v_dist_ref().log_prob(sample).squeeze().backward()
        dout_dnat1_noforward_ref = v_dist_ref.natural_vec.grad
        dout_dnat2_noforward_ref = v_dist_ref.natural_mat.grad

        def f(natural_vec, natural_tril_mat):
            "Transform natural_tril_mat to L"
            Sigma = torch.inverse(-2 * natural_tril_mat)
            mu = natural_vec
            return mu, torch.linalg.cholesky(Sigma).inverse().tril()

        (mu_ref, natural_tril_mat_ref), (dout_dmu_ref, dout_dnat2_ref) = jvp(
            f,
            (v_dist_ref.natural_vec.detach(), v_dist_ref.natural_mat.detach()),
            (dout_dnat1_noforward_ref, dout_dnat2_noforward_ref),
        )

        assert torch.allclose(natural_tril_mat_ref,
                              v_dist.natural_tril_mat), "Sigma transformation"
        assert torch.allclose(dout_dnat2_ref, dout_dnat2), "Sigma gradient"

        assert torch.allclose(mu_ref, v_dist.natural_vec), "mu transformation"
        assert torch.allclose(dout_dmu_ref, dout_dnat1), "mu gradient"
示例#14
0
 def lazy_covariance_matrix(self):
     """Get lazy covariance matrix."""
     return CholLazyTensor(torch.diag_embed(self.variance))
 def test_inv_matmul(self):
     # Forward
     res = CholLazyTensor(self.chol).inv_matmul(self.vecs)
     actual = self.actual_mat.inverse().matmul(self.vecs_copy)
     self.assertLess(torch.max((res - actual).abs() / actual.norm()), 1e-2)
示例#16
0
 def forward(self):
     m = self.variational_mean
     L = self.chol_variational_covar
     return MultivariateNormal(m, CholLazyTensor(L))
示例#17
0
    def forward(self, x):
        """Forward propagate the module.

        This method determines how to marginalize out the inducing function values.
        Specifically, forward defines how to transform a variational distribution over
        the inducing point values, q(u), in to a variational distribution over
        the function values at specified locations x, q(f|x), by integrating
        p(f|x, u)q(u)du

        Parameters
        ----------
        x (torch.tensor):
            Locations x to get the variational posterior of the function values at.

        Returns
        -------
            The distribution q(f|x)
        """
        variational_dist = self.variational_distribution.approx_variational_distribution
        inducing_points = self.inducing_points
        inducing_batch_shape = inducing_points.shape[:-2]
        if inducing_batch_shape < x.shape[:-2] or len(
                inducing_batch_shape) < len(x.shape[:-2]):
            batch_shape = _mul_broadcast_shape(inducing_points.shape[:-2],
                                               x.shape[:-2])
            inducing_points = inducing_points.expand(
                *batch_shape, *inducing_points.shape[-2:])
            x = x.expand(*batch_shape, *x.shape[-2:])
            variational_dist = variational_dist.expand(batch_shape)

        # If our points equal the inducing points, we're done
        if torch.equal(x, inducing_points):
            return variational_dist

        # Otherwise, we have to marginalize
        else:
            num_induc = inducing_points.size(-2)
            full_inputs = torch.cat([inducing_points, x], dim=-2)
            full_output = self.model.forward(full_inputs)
            full_mean, full_covar = full_output.mean, full_output.lazy_covariance_matrix

            # Mean terms
            test_mean = full_mean[..., num_induc:]
            induc_mean = full_mean[..., :num_induc]
            mean_diff = (variational_dist.mean - induc_mean).unsqueeze(-1)

            # Covariance terms
            induc_induc_covar = full_covar[
                ..., :num_induc, :num_induc].add_jitter()
            induc_data_covar = full_covar[..., :num_induc,
                                          num_induc:].evaluate()
            data_data_covar = full_covar[..., num_induc:, num_induc:]
            aux = variational_dist.lazy_covariance_matrix.root_decomposition()
            root_variational_covar = aux.root.evaluate()

            # If we had to expand the inducing points,
            # shrink the inducing mean and induc_induc_covar dimension
            # This makes everything more computationally efficient
            if len(inducing_batch_shape) < len(induc_induc_covar.batch_shape):
                index = tuple(0 for _ in range(
                    len(induc_induc_covar.batch_shape) -
                    len(inducing_batch_shape)))
                repeat_size = torch.Size(
                    (tuple(induc_induc_covar.batch_shape[:len(index)]) + tuple(
                        1
                        for _ in induc_induc_covar.batch_shape[len(index):])))
                induc_induc_covar = BatchRepeatLazyTensor(
                    induc_induc_covar.__getitem__(index), repeat_size)

            # If we're less than a certain size, we'll compute the Cholesky
            # decomposition of induc_induc_covar
            cholesky = False
            if settings.fast_computations.log_prob.off() or (
                    num_induc <= settings.max_cholesky_size.value()):
                induc_induc_covar = CholLazyTensor(
                    induc_induc_covar.cholesky())
                cholesky = True

            # If we are making predictions and don't need variances, we can do things
            # very quickly.
            if not self.training and settings.skip_posterior_variances.on():
                if not hasattr(self, "_mean_cache"):
                    self._mean_cache = induc_induc_covar.inv_matmul(
                        mean_diff).detach()

                predictive_mean = torch.add(
                    test_mean,
                    induc_data_covar.transpose(-2, -1).matmul(
                        self._mean_cache).squeeze(-1))

                predictive_covar = ZeroLazyTensor(test_mean.size(-1),
                                                  test_mean.size(-1))

                return MultivariateNormal(predictive_mean, predictive_covar)

            # Cache the CG results
            # For now: run variational inference without a preconditioner
            # The preconditioner screws things up for some reason
            with settings.max_preconditioner_size(0):
                # Cache the CG results
                left_tensors = torch.cat([mean_diff, root_variational_covar],
                                         -1)
                with torch.no_grad():
                    eager_rhs = torch.cat([left_tensors, induc_data_covar], -1)
                    solve, probe_vecs, probe_vec_norms, probe_vec_solves, tmats = \
                        CachedCGLazyTensor.precompute_terms(
                            induc_induc_covar, eager_rhs.detach(),
                            logdet_terms=(not cholesky),
                            include_tmats=(not settings.skip_logdet_forward.on() and
                                           not cholesky)
                        )
                    eager_rhss = [
                        eager_rhs.detach(),
                        eager_rhs[..., left_tensors.size(-1):].detach(),
                        eager_rhs[..., :left_tensors.size(-1)].detach()
                    ]
                    solves = [
                        solve.detach(), solve[...,
                                              left_tensors.size(-1):].detach(),
                        solve[..., :left_tensors.size(-1)].detach()
                    ]
                    if settings.skip_logdet_forward.on():
                        eager_rhss.append(
                            torch.cat([probe_vecs, left_tensors], -1))
                        solves.append(
                            torch.cat([
                                probe_vec_solves,
                                solve[..., :left_tensors.size(-1)]
                            ], -1))
                induc_induc_covar = CachedCGLazyTensor(
                    induc_induc_covar,
                    eager_rhss=eager_rhss,
                    solves=solves,
                    probe_vectors=probe_vecs,
                    probe_vector_norms=probe_vec_norms,
                    probe_vector_solves=probe_vec_solves,
                    probe_vector_tmats=tmats,
                )

            if self.training:
                self._memoize_cache[
                    "prior_distribution_memo"] = MultivariateNormal(
                        induc_mean, induc_induc_covar)

            # Compute predictive mean/covariance
            inv_products = induc_induc_covar.inv_matmul(
                induc_data_covar, left_tensors.transpose(-1, -2))
            predictive_mean = torch.add(test_mean, inv_products[..., 0, :])
            predictive_covar = RootLazyTensor(inv_products[...,
                                                           1:, :].transpose(
                                                               -1, -2))
            if self.training:
                interp_data_data_var, _ = induc_induc_covar.inv_quad_logdet(
                    induc_data_covar, logdet=False, reduce_inv_quad=False)
                data_covariance = DiagLazyTensor(
                    (data_data_covar.diag() - interp_data_data_var).clamp(
                        0, math.inf))
            else:
                neg_induc_data_data_covar = torch.matmul(
                    induc_data_covar.transpose(-1, -2).mul(-1),
                    induc_induc_covar.inv_matmul(induc_data_covar))
                data_covariance = data_data_covar + neg_induc_data_data_covar
            predictive_covar = PsdSumLazyTensor(predictive_covar,
                                                data_covariance)

            return MultivariateNormal(predictive_mean, predictive_covar)
 def test_diag(self):
     res = CholLazyTensor(self.chol).diag()
     actual = self.actual_mat.diag()
     self.assertTrue(approx_equal(res, actual))
 def test_evaluate(self):
     res = CholLazyTensor(self.chol).evaluate()
     actual = self.actual_mat
     self.assertTrue(approx_equal(res, actual))
 def test_getitem(self):
     res = CholLazyTensor(self.chol)[2:4, -2]
     actual = self.actual_mat[2:4, -2]
     self.assertTrue(approx_equal(res, actual))