Пример #1
0
    def test_posterior(self):
        for dtype in [torch.float, torch.double]:
            for mcs in [800, 10]:
                torch.random.manual_seed(0)
                with max_cholesky_size(mcs):
                    test_x = torch.rand(2, 12, 1).to(device=self.device,
                                                     dtype=dtype)

                    self.model.to(dtype)
                    # clear caches
                    self.model.train()
                    self.model.eval()
                    # test the posterior works
                    posterior = self.model.posterior(test_x)
                    self.assertIsInstance(posterior, GPyTorchPosterior)

                    # test the posterior works with observation noise
                    posterior = self.model.posterior(test_x,
                                                     observation_noise=True)
                    self.assertIsInstance(posterior, GPyTorchPosterior)

                    # test the posterior works with no variances
                    # some funkiness in MVNs registration so the variance is non-zero.
                    with skip_posterior_variances():
                        posterior = self.model.posterior(test_x)
                        self.assertIsInstance(posterior, GPyTorchPosterior)
                        self.assertLessEqual(posterior.variance.max(), 1e-6)
Пример #2
0
def sm_partial_mll(ski_gp, new_x, new_y, num_seen):
    # M := (K_{uu}^{-1} + W'W)^{-1} = K_{uu} - K_{uu}LQ^{-1}L'K_{uu}
    with skip_posterior_variances(False):
        M = ski_gp.prediction_cache['pred_cov'].detach()
    W_y = ski_gp._kernel_cache["interpolation_cache"].detach()

    # Q = ski_gp.current_qmatrix.detach()
    # Kuu_L = ski_gp.current_inducing_compression_matrix.detach()
    # Kuu_L_t = Kuu_L.transpose(-1, -2)
    # Kuu = ski_gp.kxx_cache.base_lazy_tensor.detach()
    # if ski_gp.has_learnable_noise:
    #     Kuu = Kuu / ski_gp.likelihood.second_noise_covar.noise.detach()

    # w:= w(x')
    lazy_kernel = ski_gp.covar_module(new_x).evaluate_kernel()
    w = _get_wmat_from_kernel(lazy_kernel)
    if w.ndim < 3:
        w = w.unsqueeze(0)

    new_W_y = W_y + w * new_y
    new_W_y_t = new_W_y.transpose(-1, -2)

    rhs = torch.cat([w, new_W_y], dim=-1)
    solves = M.matmul(rhs)

    # v := Mw
    v = solves[..., :1]
    # v_rhs = Kuu_L_t.matmul(w)
    # v = Kuu.matmul(w) - Kuu_L.matmul(Q.inv_matmul(v_rhs))
    v_t = v.transpose(-1, -2)
    sm_divisor = 1 + v_t.bmm(w)

    # quad_term_1 := y'WK_{uu}W'y
    # quad_term_1 = new_W_y_t.matmul(Kuu.matmul(new_W_y))
    # # quad_term_2 := y'WK_{uu}LQ^{-1}L'K_{uu}W'y
    # term_2_rhs = Kuu_L_t.matmul(new_W_y)
    # term_2_rhs_t = term_2_rhs.transpose(-1, -2)
    # quad_term_2 = term_2_rhs_t.matmul(Q.inv_matmul(term_2_rhs))
    # quad_term_3 := y'Wvv'W'y / (1 + v'w)

    M_W_y = solves[..., 1:]
    quad_term_1 = new_W_y_t.matmul(M_W_y)

    quad_term_3 = (v_t.bmm(new_W_y)**2) / sm_divisor

    # quad_term := y'WAW'y - (y'Wvv'W'y) / (1 + v'w)
    # quad_term = (quad_term_1 - quad_term_2 - quad_term_3)
    quad_term = quad_term_1 - quad_term_3
    if ski_gp.has_learnable_noise:
        quad_term = quad_term / ski_gp.likelihood.second_noise_covar.noise.detach(
        )

    # \log|WKW' + \sigma^2 I| = n\log(\sigma^2) + \log|K_{uu}| - \log|A_t|
    # \log|A_t| = \log|A_{t-1}| - \log(1 + v'w)
    logdet_term = torch.log(sm_divisor)

    partial_mll = (quad_term - logdet_term) / 2
    return partial_mll / (num_seen + 1)
Пример #3
0
    def test_posterior(self):
        torch.random.manual_seed(0)
        test_x = torch.rand(2, 30, 1).to(device=self.device)

        # test the posterior works
        posterior = self.model.posterior(test_x)
        self.assertIsInstance(posterior, GPyTorchPosterior)

        # test the posterior works with observation noise
        posterior = self.model.posterior(test_x, observation_noise=True)
        self.assertIsInstance(posterior, GPyTorchPosterior)

        # test the posterior works with no variances
        # some funkiness in MVNs registration so the variance is non-zero.
        with skip_posterior_variances():
            posterior = self.model.posterior(test_x)
            self.assertIsInstance(posterior, GPyTorchPosterior)
            self.assertLessEqual(posterior.variance.max(), 1e-6)
Пример #4
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: Union[bool, Tensor] = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        self.eval()  # make sure we're calling a posterior

        no_pred_variance = skip_posterior_variances._state

        with ExitStack() as es:
            es.enter_context(gpt_posterior_settings())
            es.enter_context(fast_pred_var(True))

            # we need to skip posterior variances here
            es.enter_context(skip_posterior_variances(True))
            mvn = self(X)
            if observation_noise is not False:
                # TODO: implement Kronecker + diagonal solves so that this is possible.
                # if torch.is_tensor(observation_noise):
                #     # TODO: Validate noise shape
                #     # make observation_noise `batch_shape x q x n`
                #     obs_noise = observation_noise.transpose(-1, -2)
                #     mvn = self.likelihood(mvn, X, noise=obs_noise)
                # elif isinstance(self.likelihood, FixedNoiseGaussianLikelihood):
                #     noise = self.likelihood.noise.mean().expand(X.shape[:-1])
                #     mvn = self.likelihood(mvn, X, noise=noise)
                # else:
                mvn = self.likelihood(mvn, X)

            # lazy covariance matrix includes the interpolated version of the full
            # covariance matrix so we can actually grab that instead.
            if X.ndimension() > self.train_inputs[0].ndimension():
                X_batch_shape = X.shape[:-2]
                train_inputs = self.train_inputs[0].reshape(
                    *[1] * len(X_batch_shape), *self.train_inputs[0].shape
                )
                train_inputs = train_inputs.repeat(
                    *X_batch_shape, *[1] * self.train_inputs[0].ndimension()
                )
            else:
                train_inputs = self.train_inputs[0]
            full_covar = self.covar_modules[0](torch.cat((train_inputs, X), dim=-2))

            if no_pred_variance:
                pred_variance = mvn.variance
            else:
                joint_covar = self._get_joint_covariance([X])
                pred_variance = self.make_posterior_variances(joint_covar)

                full_covar = KroneckerProductLazyTensor(
                    full_covar, *joint_covar.lazy_tensors[1:]
                )

            joint_covar_list = [self.covar_modules[0](X, train_inputs)]
            batch_shape = joint_covar_list[0].batch_shape
            for cm, param in zip(self.covar_modules[1:], self.latent_parameters):
                covar = cm(param)
                if covar.batch_shape != batch_shape:
                    covar = BatchRepeatLazyTensor(covar, batch_shape)
                joint_covar_list.append(covar)

            test_train_covar = KroneckerProductLazyTensor(*joint_covar_list)

            # mean and variance get reshaped into the target shape
            new_mean = mvn.mean.reshape(*X.shape[:-1], *self.target_shape)
            if not no_pred_variance:
                new_variance = pred_variance.reshape(*X.shape[:-1], *self.target_shape)
                new_variance = DiagLazyTensor(new_variance)
            else:
                new_variance = ZeroLazyTensor(
                    *X.shape[:-1], *self.target_shape, self.target_shape[-1]
                )

            mvn = MultivariateNormal(new_mean, new_variance)

            # return a specialized Posterior to allow for sampling
            posterior = HigherOrderGPPosterior(
                mvn=mvn,
                train_targets=self.train_targets.unsqueeze(-1),
                train_train_covar=self.prediction_strategy.lik_train_train_covar,
                test_train_covar=test_train_covar,
                joint_covariance_matrix=full_covar,
                output_shape=Size(
                    (
                        *X.shape[:-1],
                        *self.target_shape,
                    )
                ),
                num_outputs=self._num_outputs,
            )
            if hasattr(self, "outcome_transform"):
                posterior = self.outcome_transform.untransform_posterior(posterior)

            return posterior
Пример #5
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: Union[bool, Tensor] = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        self.eval()  # make sure we're calling a posterior
        # input transforms are applied at `posterior` in `eval` mode, and at
        # `model.forward()` at the training time
        X = self.transform_inputs(X)
        no_pred_variance = skip_posterior_variances._state

        with ExitStack() as es:
            es.enter_context(gpt_posterior_settings())
            es.enter_context(fast_pred_var(True))

            # we need to skip posterior variances here
            es.enter_context(skip_posterior_variances(True))
            mvn = self(X)
            if observation_noise is not False:
                # TODO: ensure that this still works for structured noise solves.
                mvn = self.likelihood(mvn, X)

            # lazy covariance matrix includes the interpolated version of the full
            # covariance matrix so we can actually grab that instead.
            if X.ndimension() > self.train_inputs[0].ndimension():
                X_batch_shape = X.shape[:-2]
                train_inputs = self.train_inputs[0].reshape(
                    *[1] * len(X_batch_shape), *self.train_inputs[0].shape
                )
                train_inputs = train_inputs.repeat(
                    *X_batch_shape, *[1] * self.train_inputs[0].ndimension()
                )
            else:
                train_inputs = self.train_inputs[0]

            # we now compute the data covariances for the training data, the testing
            # data, the joint covariances, and the test train cross-covariance
            train_train_covar = self.prediction_strategy.lik_train_train_covar.detach()
            base_train_train_covar = train_train_covar.lazy_tensor

            data_train_covar = base_train_train_covar.lazy_tensors[0]
            data_covar = self.covar_modules[0]
            data_train_test_covar = data_covar(X, train_inputs)
            data_test_test_covar = data_covar(X)
            data_joint_covar = data_train_covar.cat_rows(
                cross_mat=data_train_test_covar,
                new_mat=data_test_test_covar,
            )

            # we detach the latents so that they don't cause gradient errors
            # TODO: Can we enable backprop through the latent covariances?
            batch_shape = data_train_test_covar.batch_shape
            latent_covar_list = []
            for latent_covar in base_train_train_covar.lazy_tensors[1:]:
                if latent_covar.batch_shape != batch_shape:
                    latent_covar = BatchRepeatLazyTensor(latent_covar, batch_shape)
                latent_covar_list.append(latent_covar.detach())

            joint_covar = KroneckerProductLazyTensor(
                data_joint_covar, *latent_covar_list
            )
            test_train_covar = KroneckerProductLazyTensor(
                data_train_test_covar, *latent_covar_list
            )

            # compute the posterior variance if necessary
            if no_pred_variance:
                pred_variance = mvn.variance
            else:
                pred_variance = self.make_posterior_variances(joint_covar)

            # mean and variance get reshaped into the target shape
            new_mean = mvn.mean.reshape(*X.shape[:-1], *self.target_shape)
            if not no_pred_variance:
                new_variance = pred_variance.reshape(*X.shape[:-1], *self.target_shape)
                new_variance = DiagLazyTensor(new_variance)
            else:
                new_variance = ZeroLazyTensor(
                    *X.shape[:-1], *self.target_shape, self.target_shape[-1]
                )

            mvn = MultivariateNormal(new_mean, new_variance)

            # return a specialized Posterior to allow for sampling
            # cloning the full covar allows backpropagation through it
            posterior = HigherOrderGPPosterior(
                mvn=mvn,
                train_targets=self.train_targets.unsqueeze(-1),
                train_train_covar=train_train_covar,
                test_train_covar=test_train_covar,
                joint_covariance_matrix=joint_covar.clone(),
                output_shape=X.shape[:-1] + self.target_shape,
                num_outputs=self._num_outputs,
            )
            if hasattr(self, "outcome_transform"):
                posterior = self.outcome_transform.untransform_posterior(posterior)

            return posterior