def forward(self, x1, x2, diag=False, **params): res = ZeroLazyTensor() if not diag else 0 for kern in self.kernels: next_term = kern(x1, x2, diag=diag, **params) if not diag: res = res + lazify(next_term) else: res = res + next_term return res
def forward(self, x1, x2, diag=False, last_dim_is_batch=False, **params): if last_dim_is_batch: raise RuntimeError( "MultitaskKernel does not accept the last_dim_is_batch argument." ) covar_i = self.task_covar_module.covar_matrix if len(x1.shape[:-2]): covar_i = covar_i.repeat(*x1.shape[:-2], 1, 1) covar_x = lazify(self.data_covar_module.forward(x1, x2, **params)) res = KroneckerProductLazyTensor(covar_x, covar_i) return res.diag() if diag else res
def _exact_predictive_covar_inv_quad_form_cache(self, train_train_covar_inv_root, test_train_covar): test_train_covar = lazify(test_train_covar).evaluate_kernel() if not isinstance(test_train_covar, SumLazyTensor): return super(SumPredictionStrategy, self)._exact_predictive_covar_inv_quad_form_cache( train_train_covar_inv_root, test_train_covar) else: return tuple( sub_strat._exact_predictive_covar_inv_quad_form_cache( train_train_covar_inv_root, test_train_covar_comp) for sub_strat, test_train_covar_comp in zip( self._sub_strategies, test_train_covar.lazy_tensors))
def _shaped_noise_covar(self, base_shape, *params): if len(base_shape) >= 2: *batch_shape, n, _ = base_shape else: *batch_shape, n = base_shape # compute the noise covariance if len(params) > 0: shape = None else: shape = base_shape if len(base_shape) == 1 else base_shape[:-1] noise_covar = self.noise_covar(*params, shape=shape) if self.rank > 0: # if rank > 0, compute the task correlation matrix # TODO: This is inefficient, change repeat so it can repeat LazyTensors w/ multiple batch dimensions task_corr = self._eval_corr_matrix() exp_shape = torch.Size([*batch_shape, n]) + task_corr.shape[-2:] task_corr_exp = lazify(task_corr.unsqueeze(-3).expand(exp_shape)) noise_sem = noise_covar.sqrt() task_covar_blocks = MatmulLazyTensor( MatmulLazyTensor(noise_sem, task_corr_exp), noise_sem) else: # otherwise tasks are uncorrelated if isinstance(noise_covar, DiagLazyTensor): flattened_diag = noise_covar._diag.view( *noise_covar._diag.shape[:-2], -1) return DiagLazyTensor(flattened_diag) task_covar_blocks = noise_covar if len(batch_shape) == 1: # TODO: Properly support general batch shapes in BlockDiagLazyTensor (no shape arithmetic) tcb_eval = task_covar_blocks.evaluate() task_covar = BlockDiagLazyTensor(lazify(tcb_eval), block_dim=-3) else: task_covar = BlockDiagLazyTensor(task_covar_blocks) return task_covar
def forward(self, x1, x2, diag=False, **params): x1_eq_x2 = torch.equal(x1, x2) if not x1_eq_x2: # If x1 != x2, then we can't make a MulLazyTensor because the kernel won't necessarily be square/symmetric res = delazify(self.kernels[0](x1, x2, diag=diag, **params)) else: res = self.kernels[0](x1, x2, diag=diag, **params) if not diag: res = lazify(res) for kern in self.kernels[1:]: next_term = kern(x1, x2, diag=diag, **params) if not x1_eq_x2: # Again delazify if x1 != x2 res = res * delazify(next_term) else: if not diag: res = res * lazify(next_term) else: res = res * next_term return res
def _exact_predictive_covar_inv_quad_form_root(self, precomputed_cache, test_train_covar): # Here the precomputed cache is a list # where each component in the list is the precomputed cache for each component lazy tensor test_train_covar = lazify(test_train_covar).evaluate_kernel() if not isinstance(test_train_covar, SumLazyTensor): return super(SumPredictionStrategy, self)._exact_predictive_covar_inv_quad_form_root( precomputed_cache, test_train_covar) else: return sum( sub_strat._exact_predictive_covar_inv_quad_form_root( cache_comp, test_train_covar_comp) for sub_strat, cache_comp, test_train_covar_comp in zip( self._sub_strategies, precomputed_cache, test_train_covar.evaluate_kernel().lazy_tensors))
def to_data_independent_dist(self): """ Convert a multitask MVN into a batched (non-multitask) MVNs The result retains the intertask covariances, but gets rid of the inter-data covariances. The resulting distribution will have :attr:`len(mvns)` tasks, and the tasks will be independent. :returns: the bached data-independent MVN :rtype: gpytorch.distributions.MultivariateNormal """ # Create batch distribution where all data are independent, but the tasks are dependent full_covar = self.lazy_covariance_matrix num_data, num_tasks = self.mean.shape[-2:] data_indices = torch.arange(0, num_data * num_tasks, num_tasks, device=full_covar.device).view(-1, 1, 1) task_indices = torch.arange(num_tasks, device=full_covar.device) task_covars = full_covar[..., data_indices + task_indices.unsqueeze(-2), data_indices + task_indices.unsqueeze(-1)] return MultivariateNormal(self.mean, lazify(task_covars).add_jitter())
def __call__(self, x1_, x2_=None, diag=False, last_dim_is_batch=False, **params): """ We cannot lazily evaluate actual kernel calls when using SKIP, because we cannot root decompose rectangular matrices. Because we slice in to the kernel during prediction to get the test x train covar before calling evaluate_kernel, the order of operations would mean we would get a MulLazyTensor representing a rectangular matrix, which we cannot matmul with because we cannot root decompose it. Thus, SKIP actually *requires* that we work with the full (train + test) x (train + test) kernel matrix. """ res = super().__call__(x1_, x2_, diag=diag, last_dim_is_batch=last_dim_is_batch, **params) res = lazify(res).evaluate_kernel() return res
def gather(self, outputs, output_device): return CatLazyTensor(*[lazify(o) for o in outputs], dim=self.dim, output_device=self.output_device)
def __call__(self, x1, x2=None, diag=False, last_dim_is_batch=False, **params): x1_, x2_ = x1, x2 # Select the active dimensions if self.active_dims is not None: x1_ = x1_.index_select(-1, self.active_dims) if x2_ is not None: x2_ = x2_.index_select(-1, self.active_dims) # Give x1_ and x2_ a last dimension, if necessary if x1_.ndimension() == 1: x1_ = x1_.unsqueeze(1) if x2_ is not None: if x2_.ndimension() == 1: x2_ = x2_.unsqueeze(1) if not x1_.size(-1) == x2_.size(-1): raise RuntimeError( "x1_ and x2_ must have the same number of dimensions!") if x2_ is None: x2_ = x1_ # Check that ard_num_dims matches the supplied number of dimensions if settings.debug.on(): if self.ard_num_dims is not None and self.ard_num_dims != x1_.size( -1): raise RuntimeError( "Expected the input to have {} dimensionality " "(based on the ard_num_dims argument). Got {}.".format( self.ard_num_dims, x1_.size(-1))) if diag: res = super(Kernel, self).__call__(x1_, x2_, diag=True, last_dim_is_batch=last_dim_is_batch, **params) # Did this Kernel eat the diag option? # If it does not return a LazyEvaluatedKernelTensor, we can call diag on the output if not isinstance(res, LazyEvaluatedKernelTensor): if res.dim() == x1_.dim() and res.shape[-2:] == torch.Size( (x1_.size(-2), x2_.size(-2))): res = res.diag() return res else: if settings.lazily_evaluate_kernels.on(): res = LazyEvaluatedKernelTensor( x1_, x2_, kernel=self, last_dim_is_batch=last_dim_is_batch, **params) else: res = lazify( super(Kernel, self).__call__(x1_, x2_, last_dim_is_batch=last_dim_is_batch, **params)) return res
def forward(self, x1, x2, diag=False, last_dim_is_batch=False, **params): # See if we need to update the grid or not if self.grid_is_dynamic: # This is true if a grid_bounds wasn't passed in if torch.equal(x1, x2): x = x1.reshape(-1, self.num_dims) else: x = torch.cat([ x1.reshape(-1, self.num_dims), x2.reshape(-1, self.num_dims) ]) x_maxs = x.max(0)[0].tolist() x_mins = x.min(0)[0].tolist() # We need to update the grid if # 1) it hasn't ever been initialized, or # 2) if any of the grid points are "out of bounds" update_grid = (not self.has_initialized_grid.item()) or any( x_min < bound[0] or x_max > bound[1] for x_min, x_max, bound in zip(x_mins, x_maxs, self._tight_grid_bounds)) # Update the grid if needed if update_grid: grid_spacings = tuple((x_max - x_min) / (gs - 4.02) for gs, x_min, x_max in zip( self.grid_sizes, x_mins, x_maxs)) self.grid_bounds = tuple( (x_min - 2.01 * spacing, x_max + 2.01 * spacing) for x_min, x_max, spacing in zip(x_mins, x_maxs, grid_spacings)) grid = create_grid( self.grid_sizes, self.grid_bounds, dtype=self.grid[0].dtype, device=self.grid[0].device, ) self.update_grid(grid) base_lazy_tsr = lazify( self._inducing_forward(last_dim_is_batch=last_dim_is_batch, **params)) if last_dim_is_batch and base_lazy_tsr.size(-3) == 1: base_lazy_tsr = base_lazy_tsr.repeat(*x1.shape[:-2], x1.size(-1), 1, 1) left_interp_indices, left_interp_values = self._compute_grid( x1, last_dim_is_batch) if torch.equal(x1, x2): right_interp_indices = left_interp_indices right_interp_values = left_interp_values else: right_interp_indices, right_interp_values = self._compute_grid( x2, last_dim_is_batch) batch_shape = _mul_broadcast_shape( base_lazy_tsr.batch_shape, left_interp_indices.shape[:-2], right_interp_indices.shape[:-2], ) res = InterpolatedLazyTensor( base_lazy_tsr.expand(*batch_shape, *base_lazy_tsr.matrix_shape), left_interp_indices.detach().expand( *batch_shape, *left_interp_indices.shape[-2:]), left_interp_values.expand(*batch_shape, *left_interp_values.shape[-2:]), right_interp_indices.detach().expand( *batch_shape, *right_interp_indices.shape[-2:]), right_interp_values.expand(*batch_shape, *right_interp_values.shape[-2:]), ) if diag: return res.diag() else: return res
def exact_predictive_covar(self, test_test_covar, test_train_covar): """ Computes the posterior predictive covariance of a GP Args: test_train_covar (:obj:`gpytorch.lazy.LazyTensor`): Covariance matrix between test and train inputs test_test_covar (:obj:`gpytorch.lazy.LazyTensor`): Covariance matrix between test inputs Returns: :obj:`gpytorch.lazy.LazyTensor`: A LazyTensor representing the predictive posterior covariance of the test points """ if settings.fast_pred_var.on(): self._last_test_train_covar = test_train_covar if settings.skip_posterior_variances.on(): return ZeroLazyTensor(*test_test_covar.size()) if settings.fast_pred_var.off(): dist = self.train_prior_dist.__class__( torch.zeros_like(self.train_prior_dist.mean), self.train_prior_dist.lazy_covariance_matrix) if settings.detach_test_caches.on(): train_train_covar = self.likelihood( dist, self.train_inputs).lazy_covariance_matrix.detach() else: train_train_covar = self.likelihood( dist, self.train_inputs).lazy_covariance_matrix test_train_covar = delazify(test_train_covar) train_test_covar = test_train_covar.transpose(-1, -2) covar_correction_rhs = train_train_covar.inv_matmul( train_test_covar) # For efficiency if torch.is_tensor(test_test_covar): # We can use addmm in the 2d case if test_test_covar.dim() == 2: return lazify( torch.addmm(test_test_covar, test_train_covar, covar_correction_rhs, beta=1, alpha=-1)) else: return lazify( test_test_covar + test_train_covar @ covar_correction_rhs.mul(-1)) # In other cases - we'll use the standard infrastructure else: return test_test_covar + MatmulLazyTensor( test_train_covar, covar_correction_rhs.mul(-1)) precomputed_cache = self.covar_cache covar_inv_quad_form_root = self._exact_predictive_covar_inv_quad_form_root( precomputed_cache, test_train_covar) if torch.is_tensor(test_test_covar): return lazify( torch.add(test_test_covar, covar_inv_quad_form_root @ covar_inv_quad_form_root.transpose(-1, -2), alpha=-1)) else: return test_test_covar + MatmulLazyTensor( covar_inv_quad_form_root, covar_inv_quad_form_root.transpose(-1, -2).mul(-1))