def test_broadcast_rhs(self): i = torch.tensor([[0, 1, 1, 0, 1, 1], [2, 0, 2, 2, 0, 2]], dtype=torch.long) v = torch.tensor([3, 4, 5, 6, 7, 8], dtype=torch.float) sparse = torch.sparse.FloatTensor(i, v, torch.Size([2, 3])) dense = torch.randn(4, 2, 3, 4, requires_grad=True) dense_copy = dense.clone().detach().requires_grad_(True) res = gpytorch.dsmm(sparse, dense) actual = torch.matmul(sparse.to_dense(), dense_copy) self.assertLess(torch.norm(res - actual), 1e-5) grad_output = torch.randn(4, 2, 2, 4) res.backward(grad_output) actual.backward(grad_output) self.assertLess(torch.norm(dense.grad - dense_copy.grad).item(), 1e-5) i = torch.tensor([[0, 0, 0, 1, 1, 1], [0, 1, 1, 0, 1, 1], [2, 0, 2, 2, 0, 2]], dtype=torch.long) v = torch.tensor([3, 4, 5, 6, 7, 8], dtype=torch.float) sparse = torch.sparse.FloatTensor(i, v, torch.Size([2, 2, 3])) dense = torch.randn(4, 2, 3, 4, requires_grad=True) dense_copy = dense.clone().detach().requires_grad_(True) res = gpytorch.dsmm(sparse, dense) actual = torch.matmul(sparse.to_dense(), dense_copy) self.assertLess(torch.norm(res - actual), 1e-5) grad_output = torch.randn(4, 2, 2, 4) res.backward(grad_output) actual.backward(grad_output) self.assertLess(torch.norm(dense.grad - dense_copy.grad).item(), 1e-5)
def evaluate(self): """ Explicitly evaluate and return the Toeplitz matrix this object wraps as a float Tensor. To do this, we explicitly compute W_{left}TW_{right}^{T} and return it. Warning: as implicitly stored by this LazyVariable, W is very sparse and T requires O(n) storage, where as the full matrix requires O(n^2) storage. Calling evaluate can very easily lead to memory issues. As a result, using it should be a last resort. """ if self.J_left is not None: n_left = len(self.J_left) n_right = len(self.J_right) W_left = toeplitz.index_coef_to_sparse(self.J_left, self.C_left, len(self.c)) W_right = toeplitz.index_coef_to_sparse(self.J_right, self.C_right, len(self.c)) if n_left <= n_right: W_left_T = self.explicit_interpolate_T(self.J_left, self.C_left) WTW = gpytorch.dsmm(Variable(W_right), W_left_T.t()).t() else: W_right_T = self.explicit_interpolate_T( self.J_right, self.C_right) WTW = gpytorch.dsmm(Variable(W_left), W_right_T.t()) else: WTW = ToeplitzLazyVariable(self.c).mm( Variable(torch.eye(len(self.c)))) if self.added_diag is not None: WTW = WTW + torch.diag(self.added_diag) return WTW
def variational_posterior_covar(self, induc_test_covar, chol_variational_covar, test_test_covar, induc_induc_covar): covar_right = gpytorch.dsmm(self.interp_left, chol_variational_covar.t()).t() covar_left = gpytorch.dsmm(self.interp_left, chol_variational_covar.t()) return covar_left.matmul(covar_right)
def variational_samples(self, output, n_samples=None): if n_samples is None: n_samples = gpytorch.functions.num_trace_samples # Draw samplse from variational distribution base_samples = Variable( self.variational_mean.data.new(self.variational_mean.size(-1), n_samples).normal_()) if self.variational_mean.ndimension() > 1: # Batch mode base_samples = base_samples.unsqueeze(0) samples = self.chol_variational_covar.transpose( -1, -2).matmul(base_samples) samples = samples + self.variational_mean.unsqueeze(-1) # Hacky code for now for KroneckerProductLazyVariable. Let's change it soon. if isinstance(output.covar(), KroneckerProductLazyVariable): interp_matrix = output.covar().representation()[1] samples = gpytorch.dsmm(interp_matrix, samples) return samples if not isinstance(output.covar(), InterpolatedLazyVariable): raise RuntimeError( 'Output should be an interpolated lazy variable') # Left multiply samples by interpolation matrix interp_indices = output.covar().left_interp_indices interp_values = output.covar().left_interp_values samples = left_interp(interp_indices, interp_values, samples) if isinstance(output.covar(), SumInterpolatedLazyVariable): samples = samples.sum(0) return samples
def matmul(self, tensor): # We're using a custom matmul here, because it is significantly faster than # what we get from the function factory. # The _matmul_closure is optimized for repeated calls, such as for inv_matmul if tensor.ndimension() == 1: is_vector = True tensor = tensor.unsqueeze(-1) else: is_vector = False # right_interp^T * tensor right_interp_t = _make_sparse_from_indices_and_values( self.right_interp_indices, self.right_interp_values, self.base_lazy_variable.size()[-1]) right_interp_res = gpytorch.dsmm(right_interp_t, tensor) # base_lazy_var * right_interp^T * tensor base_res = self.base_lazy_variable.matmul(right_interp_res) # left_interp * base_lazy_var * right_interp^T * tensor res = left_interp(self.left_interp_indices, self.left_interp_values, base_res) # Squeeze if necessary if is_vector: res = res.squeeze(-1) return res
def monte_carlo_log_likelihood(self, log_probability_func, train_y, variational_mean, chol_var_covar): epsilon = Variable(torch.randn(self.kronecker_product_size, gpytorch.functions.num_trace_samples)) samples = chol_var_covar.mm(epsilon) samples = samples + variational_mean.unsqueeze(1).expand_as(samples) W_left = Variable(list_of_indices_and_values_to_sparse(self.J_lefts, self.C_lefts, self.columns)) samples = gpytorch.dsmm(W_left, samples) log_likelihood = log_probability_func(samples, train_y) return log_likelihood
def exact_posterior_alpha(self, train_mean, train_y): train_residual = (train_y - train_mean).unsqueeze(1) gpytorch.functions.max_cg_iterations *= 10 alpha = self.var.inv_matmul(train_residual) gpytorch.functions.max_cg_iterations /= 10 alpha = gpytorch.dsmm(Variable(self.interp_right.data.t()), alpha) alpha = self.grid.matmul(alpha) return alpha.squeeze()
def exact_posterior_alpha(self, train_mean, train_y): train_residual = (train_y - train_mean).unsqueeze(1) alpha = self.invmm(train_residual) W_train_right = Variable( index_coef_to_sparse(self.J_right, self.C_right, len(self.c))) alpha = gpytorch.dsmm(W_train_right.t(), alpha) alpha = ToeplitzLazyVariable(self.c).mm(alpha) return alpha.squeeze()
def variational_posterior_covar(self, chol_variational_covar): """ Assumes self is the covariance matrix between test and inducing points Returns the covar of the posterior GP on test points, given prior covars Args: - chol_variational_covar (Variable nxn) - Cholesky decomposition of variational covar """ W_left = index_coef_to_sparse(self.J_left, self.C_left, len(self.c)) W_right = W_left.t() covar_right = gpytorch.dsmm(W_right.t(), chol_variational_covar.t()).t() covar_left = gpytorch.dsmm(W_left, chol_variational_covar.t()) return covar_left.mm(covar_right)
def test_forward(self): i = torch.tensor([[0, 1, 1], [2, 0, 2]], dtype=torch.long) v = torch.tensor([3, 4, 5], dtype=torch.float) sparse = torch.sparse.FloatTensor(i, v, torch.Size([2, 3])) dense = torch.randn(3, 3) res = gpytorch.dsmm(sparse, dense) actual = torch.mm(sparse.to_dense(), dense) self.assertLess(torch.norm(res - actual), 1e-5)
def test_forward(): i = torch.LongTensor([[0, 1, 1], [2, 0, 2]]) v = torch.FloatTensor([3, 4, 5]) sparse = torch.sparse.FloatTensor(i, v, torch.Size([2, 3])) dense = Variable(torch.randn(3, 3)) res = gpytorch.dsmm(Variable(sparse), dense) actual = torch.mm(Variable(sparse.to_dense()), dense) assert (torch.norm(res.data - actual.data) < 1e-5)
def test_forward_batch(self): i = torch.LongTensor([[0, 0, 0, 1, 1, 1], [0, 1, 1, 0, 1, 1], [2, 0, 2, 2, 0, 2]]) v = torch.FloatTensor([3, 4, 5, 6, 7, 8]) sparse = torch.sparse.FloatTensor(i, v, torch.Size([2, 2, 3])) dense = Variable(torch.randn(2, 3, 3)) res = gpytorch.dsmm(Variable(sparse), dense) actual = torch.matmul(Variable(sparse.to_dense()), dense) self.assertLess(torch.norm(res.data - actual.data), 1e-5)
def variational_posterior_mean(self, alpha): """ Assumes self is the covariance matrix between test and inducing points Returns the mean of the posterior GP on test points, given prior means/covars Args: - alpha (Variable m) - alpha vector, computed from exact_posterior_alpha """ W_left = index_coef_to_sparse(self.J_left, self.C_left, len(self.c)) return gpytorch.dsmm(W_left, alpha.unsqueeze(1)).squeeze()
def monte_carlo_log_likelihood(self, log_probability_func, train_y, variational_mean, chol_var_covar, num_samples): epsilon = Variable(torch.randn(len(self.c), num_samples)) samples = chol_var_covar.mm(epsilon) samples = samples + variational_mean.unsqueeze(1).expand_as(samples) W_left = Variable( toeplitz.index_coef_to_sparse(self.J_left, self.C_left, len(self.c))) samples = gpytorch.dsmm(W_left, samples) log_likelihood = log_probability_func(samples, train_y) return log_likelihood
def test_backward(): i = torch.LongTensor([[0, 1, 1], [2, 0, 2]]) v = torch.FloatTensor([3, 4, 5]) sparse = torch.sparse.FloatTensor(i, v, torch.Size([2, 3])) dense = Variable(torch.randn(3, 4), requires_grad=True) dense_copy = Variable(dense.data.clone(), requires_grad=True) grad_output = torch.randn(2, 4) res = gpytorch.dsmm(Variable(sparse), dense) res.backward(grad_output) actual = torch.mm(Variable(sparse.to_dense()), dense_copy) actual.backward(grad_output) assert (torch.norm(dense.grad.data - dense_copy.grad.data) < 1e-5)
def variational_posterior_mean(self, alpha): return gpytorch.dsmm(self.interp_left, alpha.unsqueeze(1)).squeeze()
def __call__(self, inputs, **kwargs): if self.exact_inference: if self.conditioning: interp_indices, interp_values = self._compute_grid(inputs) self.train_interp_indices = interp_indices self.train_interp_values = interp_values else: train_data = self.train_inputs[0].data if hasattr( self, 'train_inputs') else None if train_data is not None and torch.equal( inputs.data, train_data): interp_indices = self.train_interp_indices interp_values = self.train_interp_values else: interp_indices, interp_values, = self._compute_grid(inputs) induc_output = gpytorch.Module.__call__( self, Variable(self._inducing_points)) if not isinstance(induc_output, GaussianRandomVariable): raise RuntimeError('Output should be a GaussianRandomVariable') if isinstance(induc_output.covar(), KroneckerProductLazyVariable): covar = KroneckerProductLazyVariable( induc_output.covar().columns, interp_indices, interp_values, interp_indices, interp_values) interp_matrix = covar.representation()[1] mean = gpytorch.dsmm( interp_matrix, induc_output.mean().unsqueeze(-1)).squeeze(-1) else: # Compute test mean # Left multiply samples by interpolation matrix interp_indices = Variable(interp_indices) interp_values = Variable(interp_values) mean = left_interp(interp_indices, interp_values, induc_output.mean()) # Compute test covar base_lv = induc_output.covar() covar = InterpolatedLazyVariable(base_lv, interp_indices, interp_values, interp_indices, interp_values) return GaussianRandomVariable(mean, covar) else: variational_mean = self.variational_mean chol_variational_covar = self.chol_variational_covar induc_output = gpytorch.Module.__call__( self, Variable(self._inducing_points)) interp_indices, interp_values = self._compute_grid(inputs) # Initialize variational parameters, if necessary if not self.variational_params_initialized[0]: mean_init = induc_output.mean().data chol_covar_init = torch.eye(len(mean_init)).type_as(mean_init) variational_mean.data.copy_(mean_init) chol_variational_covar.data.copy_(chol_covar_init) self.variational_params_initialized.fill_(1) # Calculate alpha vector if self.training: alpha = induc_output.mean() else: if not self.has_computed_alpha[0]: alpha = variational_mean.sub(induc_output.mean()) self.alpha.copy_(alpha.data) self.has_computed_alpha.fill_(1) else: alpha = Variable(self.alpha) if isinstance(induc_output.covar(), KroneckerProductLazyVariable): test_covar = KroneckerProductLazyVariable( induc_output.covar().columns, interp_indices, interp_values, interp_indices, interp_values) interp_matrix = test_covar.representation()[1] test_mean = gpytorch.dsmm(interp_matrix, alpha.unsqueeze(-1)).squeeze(-1) if not self.training: test_chol_covar = gpytorch.dsmm(interp_matrix, chol_variational_covar) test_covar = MatmulLazyVariable( test_chol_covar, test_chol_covar.transpose(-2, -1)) else: # Compute test mean # Left multiply samples by interpolation matrix interp_indices = Variable(interp_indices) interp_values = Variable(interp_values) test_mean = left_interp(interp_indices, interp_values, alpha) # Compute test covar if self.training: base_lv = induc_output.covar() else: base_lv = NonLazyVariable(self.variational_covar) test_covar = InterpolatedLazyVariable(base_lv, interp_indices, interp_values, interp_indices, interp_values) output = GaussianRandomVariable(test_mean, test_covar) # Add variational strategy if self.training: output._variational_strategy = GridInducingPointStrategy( variational_mean, chol_variational_covar, induc_output) if not isinstance(output, GaussianRandomVariable): raise RuntimeError('Output should be a GaussianRandomVariable') return output
def exact_posterior_mean(self, test_mean, alpha): alpha = alpha.unsqueeze(1) W_test_left = index_coef_to_sparse(self.J_left, self.C_left, len(self.c)) return test_mean.add(gpytorch.dsmm(W_test_left, alpha).squeeze())
def exact_posterior_mean(self, test_mean, alpha): alpha = alpha.unsqueeze(1) return test_mean.add(gpytorch.dsmm(self.interp_left, alpha).squeeze())