Exemplo n.º 1
0
    def predictive_mean_cache(self):
        train_x = self.transform_inputs(self.train_inputs[0])
        train_noise = self.likelihood._shaped_noise_covar(train_x.shape)
        if detach_test_caches.on():
            train_noise = train_noise.detach()

        train_diff = self.train_targets - self.mean_module(train_x)
        train_solve = (self.train_full_covar + train_noise).inv_matmul(
            train_diff.reshape(*train_diff.shape[:-2], -1))
        if detach_test_caches.on():
            train_solve = train_solve.detach()

        return train_solve
Exemplo n.º 2
0
    def train_full_covar(self):
        train_x = self.transform_inputs(self.train_inputs[0])

        # construct Kxx \otimes Ktt
        train_full_covar = self.covar_module(train_x).evaluate_kernel()
        if detach_test_caches.on():
            train_full_covar = train_full_covar.detach()
        return train_full_covar
Exemplo n.º 3
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: Union[bool, Tensor] = False,
        posterior_transform: Optional[PosteriorTransform] = None,
        **kwargs: Any,
    ) -> MultitaskGPPosterior:
        self.eval()

        if posterior_transform is not None:
            # this could be very costly, disallow for now
            raise NotImplementedError(
                "Posterior transforms currently not supported for "
                f"{self.__class__.__name__}")

        X = self.transform_inputs(X)
        train_x = self.transform_inputs(self.train_inputs[0])

        # construct Ktt
        task_covar = self._task_covar_matrix
        task_rootlt = self._task_covar_matrix.root_decomposition(
            method="diagonalization")
        task_root = task_rootlt.root
        if task_covar.batch_shape != X.shape[:-2]:
            task_covar = BatchRepeatLazyTensor(task_covar,
                                               batch_repeat=X.shape[:-2])
            task_root = BatchRepeatLazyTensor(lazify(task_root),
                                              batch_repeat=X.shape[:-2])

        task_covar_rootlt = RootLazyTensor(task_root)

        # construct RR' \approx Kxx
        data_data_covar = self.train_full_covar.lazy_tensors[0]
        # populate the diagonalziation caches for the root and inverse root
        # decomposition
        data_data_evals, data_data_evecs = data_data_covar.diagonalization()

        # pad the eigenvalue and eigenvectors with zeros if we are using lanczos
        if data_data_evecs.shape[-1] < data_data_evecs.shape[-2]:
            cols_to_add = data_data_evecs.shape[-2] - data_data_evecs.shape[-1]
            zero_evecs = torch.zeros(
                *data_data_evecs.shape[:-1],
                cols_to_add,
                dtype=data_data_evals.dtype,
                device=data_data_evals.device,
            )
            zero_evals = torch.zeros(
                *data_data_evecs.shape[:-2],
                cols_to_add,
                dtype=data_data_evals.dtype,
                device=data_data_evals.device,
            )
            data_data_evecs = CatLazyTensor(
                data_data_evecs,
                lazify(zero_evecs),
                dim=-1,
                output_device=data_data_evals.device,
            )
            data_data_evals = torch.cat((data_data_evals, zero_evals), dim=-1)

        # construct K_{xt, x}
        test_data_covar = self.covar_module.data_covar_module(X, train_x)
        # construct K_{xt, xt}
        test_test_covar = self.covar_module.data_covar_module(X)

        # now update root so that \tilde{R}\tilde{R}' \approx K_{(x,xt), (x,xt)}
        # cloning preserves the gradient history
        updated_lazy_tensor = data_data_covar.cat_rows(
            cross_mat=test_data_covar.clone(),
            new_mat=test_test_covar,
            method="diagonalization",
        )
        updated_root = updated_lazy_tensor.root_decomposition().root
        # occasionally, there's device errors so enforce this comes out right
        updated_root = updated_root.to(data_data_covar.device)

        # build a root decomposition of the joint train/test covariance matrix
        # construct (\tilde{R} \otimes M)(\tilde{R} \otimes M)' \approx
        # (K_{(x,xt), (x,xt)} \otimes Ktt)
        joint_covar = RootLazyTensor(
            KroneckerProductLazyTensor(updated_root,
                                       task_covar_rootlt.root.detach()))

        # construct K_{xt, x} \otimes Ktt
        test_obs_kernel = KroneckerProductLazyTensor(test_data_covar,
                                                     task_covar)

        # collect y - \mu(x) and \mu(X)
        train_diff = self.train_targets - self.mean_module(train_x)
        if detach_test_caches.on():
            train_diff = train_diff.detach()
        test_mean = self.mean_module(X)

        train_noise = self.likelihood._shaped_noise_covar(train_x.shape)
        diagonal_noise = isinstance(train_noise, DiagLazyTensor)
        if detach_test_caches.on():
            train_noise = train_noise.detach()
        test_noise = (self.likelihood._shaped_noise_covar(X.shape)
                      if observation_noise else None)

        # predictive mean and variance for the mvn
        # first the predictive mean
        pred_mean = (test_obs_kernel.matmul(
            self.predictive_mean_cache).reshape_as(test_mean) + test_mean)
        # next the predictive variance, assume diagonal noise
        test_var_term = KroneckerProductLazyTensor(test_test_covar,
                                                   task_covar).diag()

        if diagonal_noise:
            task_evals, task_evecs = self._task_covar_matrix.diagonalization()
            # TODO: make this be the default KPMatmulLT diagonal method in gpytorch
            full_data_inv_evals = (KroneckerProductDiagLazyTensor(
                DiagLazyTensor(data_data_evals), DiagLazyTensor(task_evals)) +
                                   train_noise).inverse()
            test_train_hadamard = KroneckerProductLazyTensor(
                test_data_covar.matmul(data_data_evecs).evaluate()**2,
                task_covar.matmul(task_evecs).evaluate()**2,
            )
            data_var_term = test_train_hadamard.matmul(
                full_data_inv_evals).sum(dim=-1)
        else:
            # if non-diagonal noise (but still kronecker structured), we have to pull
            # across the noise because the inverse is not closed form
            # should be a kronecker lt, R = \Sigma_X^{-1/2} \kron \Sigma_T^{-1/2}
            # TODO: enforce the diagonalization to return a KPLT for all shapes in
            # gpytorch or dense linear algebra for small shapes
            data_noise, task_noise = train_noise.lazy_tensors
            data_noise_root = data_noise.root_inv_decomposition(
                method="diagonalization")
            task_noise_root = task_noise.root_inv_decomposition(
                method="diagonalization")

            # ultimately we need to compute the diagonal of
            # (K_{x* X} \kron K_T)(K_{XX} \kron K_T + \Sigma_X \kron \Sigma_T)^{-1}
            #                           (K_{x* X} \kron K_T)^T
            # = (K_{x* X} \Sigma_X^{-1/2} Q_R)(\Lambda_R + I)^{-1}
            #                       (K_{x* X} \Sigma_X^{-1/2} Q_R)^T
            # where R = (\Sigma_X^{-1/2T}K_{XX}\Sigma_X^{-1/2} \kron
            #                   \Sigma_T^{-1/2T}K_{T}\Sigma_T^{-1/2})
            # first we construct the components of R's eigen-decomposition
            # TODO: make this be the default KPMatmulLT diagonal method in gpytorch
            whitened_data_covar = (data_noise_root.transpose(
                -1, -2).matmul(data_data_covar).matmul(data_noise_root))
            w_data_evals, w_data_evecs = whitened_data_covar.diagonalization()
            whitened_task_covar = (task_noise_root.transpose(-1, -2).matmul(
                self._task_covar_matrix).matmul(task_noise_root))
            w_task_evals, w_task_evecs = whitened_task_covar.diagonalization()

            # we add one to the eigenvalues as above (not just for stability)
            full_data_inv_evals = (KroneckerProductDiagLazyTensor(
                DiagLazyTensor(w_data_evals),
                DiagLazyTensor(w_task_evals)).add_jitter(1.0).inverse())

            test_data_comp = (test_data_covar.matmul(data_noise_root).matmul(
                w_data_evecs).evaluate()**2)
            task_comp = (task_covar.matmul(task_noise_root).matmul(
                w_task_evecs).evaluate()**2)

            test_train_hadamard = KroneckerProductLazyTensor(
                test_data_comp, task_comp)
            data_var_term = test_train_hadamard.matmul(
                full_data_inv_evals).sum(dim=-1)

        pred_variance = test_var_term - data_var_term
        specialized_mvn = MultitaskMultivariateNormal(
            pred_mean, DiagLazyTensor(pred_variance))
        if observation_noise:
            specialized_mvn = self.likelihood(specialized_mvn)

        posterior = MultitaskGPPosterior(
            mvn=specialized_mvn,
            joint_covariance_matrix=joint_covar,
            test_train_covar=test_obs_kernel,
            train_diff=train_diff,
            test_mean=test_mean,
            train_train_covar=self.train_full_covar,
            train_noise=train_noise,
            test_noise=test_noise,
        )

        if hasattr(self, "outcome_transform"):
            posterior = self.outcome_transform.untransform_posterior(posterior)
        return posterior
Exemplo n.º 4
0
 def _task_covar_matrix(self):
     res = self.covar_module.task_covar_module.covar_matrix
     if detach_test_caches.on():
         res = res.detach()
     return res