def test_inv_matmul(self): c_1 = Variable(torch.Tensor([4, 1, 1]), requires_grad=True) c_2 = Variable(torch.Tensor([4, 1, 1]), requires_grad=True) T_1 = Variable(torch.zeros(3, 3)) for i in range(3): for j in range(3): T_1[i, j] = c_1[abs(i - j)] T_2 = gpytorch.lazy.ToeplitzLazyVariable(c_2) B = Variable(torch.randn(3, 4)) res_1 = gpytorch.inv_matmul(T_1, B).sum() res_2 = gpytorch.inv_matmul(T_2, B).sum() res_1.backward() res_2.backward() self.assertLess( torch.norm(res_1.data - res_2.data), 1e-4, ) self.assertLess( torch.norm(c_1.grad.data - c_2.grad.data), 1e-4, )
def variational_posterior_covar(self, induc_test_covar, chol_variational_covar, test_test_covar, induc_induc_covar): # left_factor = K_{mn}K_{nn}^{-1}(S - K_{nn}) variational_covar = chol_variational_covar.t().matmul( chol_variational_covar) left_factor = torch.mm( self.var, gpytorch.inv_matmul(induc_induc_covar, variational_covar - induc_induc_covar)) # right_factor = K_{nn}^{-1}K_{nm} right_factor = gpytorch.inv_matmul(induc_induc_covar, induc_test_covar) # test_test_covar = K_{mm} + K_{mn}K_{nn}^{-1}(S - K_{nn})K_{nn}^{-1}K_{nm} return test_test_covar + left_factor.mm(right_factor)
def test_backward_inv_mm(self): for n_cols in [2, 3, 4]: a = torch.Tensor([ [5, -3, 0], [-3, 5, 0], [0, 0, 2], ]) b = torch.ones(3, 3).fill_(2) c = torch.randn(3, n_cols) actual_a_grad = -torch.mm( a.inverse().mul_(0.5).mm(torch.eye(3, n_cols)), a.inverse().mul_(0.5).mm(c).t()) * 2 * 2 actual_c_grad = (a.inverse() / 2).t().mm(torch.eye(3, n_cols)) * 2 a_var = Variable(a, requires_grad=True) c_var = Variable(c, requires_grad=True) out_var = a_var.mul(Variable(b)) out_var = gpytorch.inv_matmul(out_var, c_var) out_var = out_var.mul(Variable(torch.eye(3, n_cols))).sum() * 2 out_var.backward() a_res = a_var.grad.data c_res = c_var.grad.data self.assertLess(torch.norm(actual_a_grad - a_res), 1e-4) self.assertLess(torch.norm(actual_c_grad - c_res), 1e-4)
def test_inv_matmul(self): labels_var = Variable(torch.randn(4)) grad_output = torch.randn(4) # Test case c1_var = Variable(torch.Tensor([5, 1, 2, 0]), requires_grad=True) c2_var = Variable(torch.Tensor([12.5, 2.5, 5, 0]), requires_grad=True) toeplitz_lazy_var = ToeplitzLazyVariable(c1_var) * 2.5 actual = ToeplitzLazyVariable(c2_var) # Test forward with gpytorch.settings.max_cg_iterations(1000): res = toeplitz_lazy_var.inv_matmul(labels_var) actual = gpytorch.inv_matmul(actual, labels_var) # Test backwards res.backward(grad_output) actual.backward(grad_output) self.assertLess( math.fabs(res.data.squeeze()[0] - actual.data.squeeze()[0]), 6e-1, ) self.assertLess(math.fabs(c1_var.grad.data[0] - c2_var.grad.data[0]), 1)
def test_batch_inv_matmul(self): labels_var = torch.randn(2, 4, 1, requires_grad=True) labels_var_copy = labels_var.clone().detach().requires_grad_(True) grad_output = torch.randn(2, 4, 1) # Test case c1_var = torch.tensor([[5, 1, 2, 0]], dtype=torch.float).repeat(2, 1) c2_var = torch.tensor([[5, 1, 2, 0]], dtype=torch.float).repeat(2, 1) c1_var.requires_grad = True c2_var.requires_grad = True toeplitz_lazy_var = ToeplitzLazyTensor(c1_var) * torch.tensor( [2.5, 1.]) actual = ToeplitzLazyTensor(c2_var).evaluate() * torch.tensor( [2.5, 1.]).view(2, 1, 1) # Test forward with gpytorch.settings.max_cg_iterations(1000): res = toeplitz_lazy_var.inv_matmul(labels_var) actual = gpytorch.inv_matmul(actual, labels_var_copy) # Test backwards res.backward(grad_output) actual.backward(grad_output) for i in range(c1_var.size(0)): for j in range(c1_var.size(1)): self.assertLess( math.fabs(res[i, j].item() - actual[i, j].item()), 1e-2) self.assertLess( math.fabs(c1_var.grad[i, j].item() - c2_var.grad[i, j].item()), 1e-2)
def pending_test_inv_matmul(): left_interp_indices = Variable(torch.LongTensor([[2, 3], [3, 4], [4, 5]])) left_interp_values = Variable(torch.Tensor([[1, 2], [0.5, 1], [1, 3]])) right_interp_indices = Variable(torch.LongTensor([[2, 3], [3, 4], [4, 5]])) right_interp_values = Variable(torch.Tensor([[1, 2], [0.5, 1], [1, 3]])) base_lazy_variable_mat = torch.randn(6, 6) base_lazy_variable_mat = base_lazy_variable_mat.t().matmul(base_lazy_variable_mat) base_lazy_variable = NonLazyVariable(Variable(base_lazy_variable_mat)) test_matrix = torch.randn(3, 4) interp_lazy_var = InterpolatedLazyVariable(base_lazy_variable, left_interp_indices, left_interp_values, right_interp_indices, right_interp_values) res = interp_lazy_var.inv_matmul(Variable(test_matrix)).data left_matrix = torch.Tensor([ [0, 0, 1, 2, 0, 0], [0, 0, 0, 0.5, 1, 0], [0, 0, 0, 0, 1, 3], ]) right_matrix = torch.Tensor([ [0, 0, 1, 2, 0, 0], [0, 0, 0, 0.5, 1, 0], [0, 0, 0, 0, 1, 3], ]) actual_mat = Variable(left_matrix.matmul(base_lazy_variable_mat).matmul(right_matrix.t())) actual = gpytorch.inv_matmul(actual_mat, Variable(test_matrix)).data assert approx_equal(res, actual)
def exact_posterior_covar(self, test_train_covar, train_test_covar, test_test_covar): """ Returns the covar of the posterior GP on test points, given prior means/covars Assumes self.var is train_train_covar (prior covariance matrix between train points) ((Lazy)Variable nxn) Args: - test_train_covar ((Lazy)Variable nxm) - prior covariance matrix between test and training points. Usually, this is simply the transpose of train_test_covar. - train_test_covar ((Lazy)Variable nxm) - prior covariance matrix between training and test points. - test_test_covar ((Lazy)Variable mxm) - prior covariance matrix between test points """ from ..lazy import NonLazyVariable, MatmulLazyVariable if isinstance(train_test_covar, LazyVariable): train_test_covar = train_test_covar.evaluate() if isinstance(test_train_covar, LazyVariable): test_train_covar = train_test_covar.t() if not isinstance(test_test_covar, LazyVariable): test_test_covar = NonLazyVariable(test_test_covar) covar_correction_rhs = gpytorch.inv_matmul(self.var, train_test_covar).mul_(-1) return test_test_covar + MatmulLazyVariable(test_train_covar, covar_correction_rhs)
def test_inv_matmul(self): base_lazy_variable_mat = torch.randn(6, 6) base_lazy_variable_mat = base_lazy_variable_mat.t().matmul( base_lazy_variable_mat) test_matrix = torch.randn(3, 4) left_interp_indices = Variable(torch.LongTensor([[2, 3], [3, 4], [4, 5]]), requires_grad=True) left_interp_values = Variable(torch.Tensor([[1, 2], [0.5, 1], [1, 3]]), requires_grad=True) right_interp_indices = Variable(torch.LongTensor([[2, 3], [3, 4], [4, 5]]), requires_grad=True) right_interp_values = Variable(torch.Tensor([[1, 2], [0.5, 1], [1, 3]]), requires_grad=True) left_interp_values_copy = Variable(left_interp_values.data, requires_grad=True) right_interp_values_copy = Variable(right_interp_values.data, requires_grad=True) base_lazy_variable = Variable(base_lazy_variable_mat, requires_grad=True) base_lazy_variable_copy = Variable(base_lazy_variable_mat, requires_grad=True) test_matrix_var = Variable(test_matrix, requires_grad=True) test_matrix_var_copy = Variable(test_matrix, requires_grad=True) interp_lazy_var = InterpolatedLazyVariable( NonLazyVariable(base_lazy_variable), left_interp_indices, left_interp_values, right_interp_indices, right_interp_values, ) res = interp_lazy_var.inv_matmul(test_matrix_var) left_matrix = Variable(torch.zeros(3, 6)) right_matrix = Variable(torch.zeros(3, 6)) left_matrix.scatter_(1, left_interp_indices, left_interp_values_copy) right_matrix.scatter_(1, right_interp_indices, right_interp_values_copy) actual_mat = left_matrix.matmul(base_lazy_variable_copy).matmul( right_matrix.transpose(-1, -2)) actual = gpytorch.inv_matmul(actual_mat, test_matrix_var_copy) self.assertTrue(approx_equal(res.data, actual.data)) # Backward pass res.sum().backward() actual.sum().backward() self.assertTrue( approx_equal(base_lazy_variable.grad.data, base_lazy_variable_copy.grad.data)) self.assertTrue( approx_equal(left_interp_values.grad.data, left_interp_values_copy.grad.data))
def test_inv_matmul(self): base_lazy_tensor_mat = torch.randn(6, 6) base_lazy_tensor_mat = base_lazy_tensor_mat.t().matmul( base_lazy_tensor_mat) test_matrix = torch.randn(3, 4) left_interp_indices = torch.LongTensor([[2, 3], [3, 4], [4, 5]]) left_interp_values = torch.tensor([[1, 2], [0.5, 1], [1, 3]], dtype=torch.float) left_interp_values_copy = left_interp_values.clone() left_interp_values.requires_grad = True left_interp_values_copy.requires_grad = True right_interp_indices = torch.LongTensor([[2, 3], [3, 4], [4, 5]]) right_interp_values = torch.tensor([[1, 2], [0.5, 1], [1, 3]], dtype=torch.float) right_interp_values_copy = right_interp_values.clone() right_interp_values.requires_grad = True right_interp_values_copy.requires_grad = True base_lazy_tensor = base_lazy_tensor_mat base_lazy_tensor.requires_grad = True base_lazy_tensor_copy = base_lazy_tensor_mat test_matrix_tensor = test_matrix test_matrix_tensor.requires_grad = True test_matrix_tensor_copy = test_matrix interp_lazy_tensor = InterpolatedLazyTensor( NonLazyTensor(base_lazy_tensor), left_interp_indices, left_interp_values, right_interp_indices, right_interp_values, ) res = interp_lazy_tensor.inv_matmul(test_matrix_tensor) left_matrix = torch.zeros(3, 6) right_matrix = torch.zeros(3, 6) left_matrix.scatter_(1, left_interp_indices, left_interp_values_copy) right_matrix.scatter_(1, right_interp_indices, right_interp_values_copy) actual_mat = left_matrix.matmul(base_lazy_tensor_copy).matmul( right_matrix.transpose(-1, -2)) actual = gpytorch.inv_matmul(actual_mat, test_matrix_tensor_copy) self.assertTrue(approx_equal(res, actual)) # Backward pass res.sum().backward() actual.sum().backward() self.assertTrue( approx_equal(base_lazy_tensor.grad, base_lazy_tensor_copy.grad)) self.assertTrue( approx_equal(left_interp_values.grad, left_interp_values_copy.grad))
def test_inv_matmul(self): c_1 = torch.tensor([4, 1, 1], dtype=torch.float, requires_grad=True) c_2 = torch.tensor([4, 1, 1], dtype=torch.float, requires_grad=True) T_1 = torch.zeros(3, 3) for i in range(3): for j in range(3): T_1[i, j] = c_1[abs(i - j)] T_2 = gpytorch.lazy.ToeplitzLazyTensor(c_2) B = torch.randn(3, 4) res_1 = gpytorch.inv_matmul(T_1, B).sum() res_2 = gpytorch.inv_matmul(T_2, B).sum() res_1.backward() res_2.backward() self.assertLess(torch.norm(res_1 - res_2), 1e-4) self.assertLess(torch.norm(c_1.grad - c_2.grad), 1e-4)
def interpolate(idx_train, idx_test, res_pred_train, Gamma): idx_train = idx_train.cpu().detach().numpy() idx_test = idx_test.cpu().detach().numpy() idx = np.arange(Gamma.shape[0]) idx_val = np.setdiff1d(idx, np.concatenate((idx_train, idx_test))) idx_test_val = np.concatenate((idx_test, idx_val)) test_val_Gamma = Gamma[idx_test_val, :][:, idx_test_val] res_pred_test = inv_matmul(test_val_Gamma, -matmul(Gamma[idx_test_val, :][:, idx_train], res_pred_train)) return res_pred_test[:len(idx_test)]
def test_function_factory(self): # 1d diag_var1 = Variable(diag, requires_grad=True) diag_var2 = Variable(diag, requires_grad=True) test_mat = torch.Tensor([3, 4, 5]) diag_lv = DiagLazyVariable(diag_var1) diag_ev = DiagLazyVariable(diag_var2).evaluate() # Forward res = diag_lv.inv_matmul(Variable(test_mat)) actual = gpytorch.inv_matmul(diag_ev, Variable(test_mat)) self.assertLess(torch.norm(res.data - actual.data), 1e-4) # Backward res.sum().backward() actual.sum().backward() self.assertLess( torch.norm(diag_var1.grad.data - diag_var2.grad.data), 1e-3, ) # 2d diag_var1 = Variable(diag, requires_grad=True) diag_var2 = Variable(diag, requires_grad=True) test_mat = torch.eye(3) diag_lv = DiagLazyVariable(diag_var1) diag_ev = DiagLazyVariable(diag_var2).evaluate() # Forward res = diag_lv.inv_matmul(Variable(test_mat)) actual = gpytorch.inv_matmul(diag_ev, Variable(test_mat)) self.assertLess(torch.norm(res.data - actual.data), 1e-4) # Backward res.sum().backward() actual.sum().backward() self.assertLess( torch.norm(diag_var1.grad.data - diag_var2.grad.data), 1e-3, )
def exact_posterior_covar(self, test_train_covar, train_test_covar, test_test_covar): # TODO: Add a diagonal only mode / use implicit math train_test_covar = train_test_covar.evaluate() test_train_covar = train_test_covar.t() test_test_covar = test_test_covar.evaluate() gpytorch.functions.max_cg_iterations *= 10 test_test_covar_correction = torch.matmul( test_train_covar, gpytorch.inv_matmul(self.var, train_test_covar)) gpytorch.functions.max_cg_iterations /= 10 return test_test_covar.sub(test_test_covar_correction)
def exact_posterior_covar(self, test_train_covar, train_test_covar, test_test_covar): # TODO: Add a diagonal only mode / use implicit math if isinstance(train_test_covar, LazyVariable): train_test_covar = train_test_covar.evaluate() if isinstance(test_train_covar, LazyVariable): test_train_covar = train_test_covar.t() if isinstance(self.var, LazyVariable): test_test_covar = test_test_covar.evaluate() test_test_covar_correction = torch.mm( test_train_covar, gpytorch.inv_matmul(self.var, train_test_covar)) return test_test_covar.sub(test_test_covar_correction)
def test_forward_inv_mv(self): a = torch.Tensor([ [5, -3, 0], [-3, 5, 0], [0, 0, 2], ]) b = torch.randn(3) actual = a.inverse().mv(b) a_var = Variable(a) b_var = Variable(b) out_var = gpytorch.inv_matmul(a_var, b_var) res = out_var.data self.assertLess(torch.norm(actual - res), 1e-4)
def test_forward_inv_mm(): for n_cols in [2, 3, 4]: a = torch.Tensor([ [5, -3, 0], [-3, 5, 0], [0, 0, 2], ]) b = torch.randn(3, n_cols) actual = a.inverse().mm(b) a_var = Variable(a) b_var = Variable(b) out_var = gpytorch.inv_matmul(a_var, b_var) res = out_var.data assert (torch.norm(actual - res) < 1e-4)
def loss_fcn(output, labels, idx, S, coeffs, device, add_logdet): rL = labels - output S = S.to_dense() Gamma = (torch.eye(S.size(0)).to(device) - torch.tanh(coeffs[0]) * S.to(device)) * torch.exp(coeffs[1]) cp_idx = setdiff(len(S), idx) loss1 = rL.dot( matmul(Gamma[idx, :][:, idx], rL) - matmul( Gamma[idx, :][:, cp_idx], inv_matmul(Gamma[cp_idx, :][:, cp_idx], matmul(Gamma[cp_idx, :][:, idx], rL)))) loss2 = 0. if add_logdet: loss2 = logdet(Gamma) - logdet(Gamma[cp_idx, :][:, cp_idx]) l = loss1 - loss2 return l / len(idx)
def loss(output, labels, idx, S, coeffs, add_logdet): output = output.view(-1) rL = labels[idx] - output[idx] S = S.to_dense() Gamma = (I - torch.tanh(coeffs[0]) * S) * torch.exp(coeffs[1]) cp_idx = setdiff(len(S), idx) loss1 = rL.dot( matmul(Gamma[idx, :][:, idx], rL) - matmul( Gamma[idx, :][:, cp_idx], inv_matmul(Gamma[cp_idx, :][:, cp_idx], matmul(Gamma[cp_idx, :][:, idx], rL)))) loss2 = torch.Tensor([0.]).cuda() if args.cuda else torch.Tensor([0.]) if add_logdet: loss2 = logdet(Gamma) - logdet(Gamma[cp_idx, :][:, cp_idx]) l = loss1 - loss2 return l / len(idx)
def loss_fcn(output, labels, idx, S, coeffs, add_logdet): output, labels = output.squeeze(), labels.squeeze() rL = labels - output S = S.to_dense() Gamma = (torch.eye(S.size(0)).cuda() - torch.tanh(coeffs[0]) * S.cuda()) * torch.exp(coeffs[1]) cp_idx = setdiff(len(S), idx) loss1 = rL.dot( matmul(Gamma[idx, :][:, idx], rL) - matmul( Gamma[idx, :][:, cp_idx], inv_matmul(Gamma[cp_idx, :][:, cp_idx], matmul(Gamma[cp_idx, :][:, idx], rL)))) loss2 = torch.Tensor([0.]).cuda() if add_logdet: loss2 = logdet(Gamma) - logdet(Gamma[cp_idx, :][:, cp_idx]) l = loss1 - loss2 return l / len(idx)
def test_backward_inv_mv(): a = torch.Tensor([ [5, -3, 0], [-3, 5, 0], [0, 0, 2], ]) b = torch.ones(3, 3).fill_(2) c = torch.randn(3) actual_a_grad = -torch.ger(a.inverse().mul_(0.5).mv(torch.ones(3)), a.inverse().mul_(0.5).mv(c)) * 2 * 2 actual_c_grad = (a.inverse() / 2).t().mv(torch.ones(3)) * 2 a_var = Variable(a, requires_grad=True) c_var = Variable(c, requires_grad=True) out_var = a_var.mul(Variable(b)) out_var = gpytorch.inv_matmul(out_var, c_var) out_var = out_var.sum() * 2 out_var.backward() a_res = a_var.grad.data c_res = c_var.grad.data assert (torch.norm(actual_a_grad - a_res) < 1e-4) assert (torch.norm(actual_c_grad - c_res) < 1e-4)
def exact_posterior_alpha(self, train_mean, train_y): res = gpytorch.inv_matmul(self.var, train_y - train_mean) return res
def variational_posterior_alpha(self, variational_mean): return gpytorch.inv_matmul(self.var, variational_mean)
def test_inv_matmul_batch(self): base_lazy_variable_mat = torch.randn(6, 6) base_lazy_variable_mat = ((base_lazy_variable_mat.t().matmul( base_lazy_variable_mat)).unsqueeze(0).repeat(5, 1, 1)) test_matrix = torch.randn(5, 3, 4) left_interp_indices = Variable(torch.LongTensor( [[2, 3], [3, 4], [4, 5]]).unsqueeze(0).repeat(5, 1, 1), requires_grad=True) left_interp_values = Variable(torch.Tensor( [[1, 2], [0.5, 1], [1, 3]]).unsqueeze(0).repeat(5, 1, 1), requires_grad=True) right_interp_indices = Variable(torch.LongTensor( [[2, 3], [3, 4], [4, 5]]).unsqueeze(0).repeat(5, 1, 1), requires_grad=True) right_interp_values = Variable(torch.Tensor( [[1, 2], [0.5, 1], [1, 3]]).unsqueeze(0).repeat(5, 1, 1), requires_grad=True) left_interp_values_copy = Variable(left_interp_values.data, requires_grad=True) right_interp_values_copy = Variable(right_interp_values.data, requires_grad=True) base_lazy_variable = Variable(base_lazy_variable_mat, requires_grad=True) base_lazy_variable_copy = Variable(base_lazy_variable_mat, requires_grad=True) test_matrix_var = Variable(test_matrix, requires_grad=True) test_matrix_var_copy = Variable(test_matrix, requires_grad=True) interp_lazy_var = InterpolatedLazyVariable( NonLazyVariable(base_lazy_variable), left_interp_indices, left_interp_values, right_interp_indices, right_interp_values, ) res = interp_lazy_var.inv_matmul(test_matrix_var) left_matrix_comps = [] right_matrix_comps = [] for i in range(5): left_matrix_comp = Variable(torch.zeros(3, 6)) right_matrix_comp = Variable(torch.zeros(3, 6)) left_matrix_comp.scatter_(1, left_interp_indices[i], left_interp_values_copy[i]) right_matrix_comp.scatter_(1, right_interp_indices[i], right_interp_values_copy[i]) left_matrix_comps.append(left_matrix_comp.unsqueeze(0)) right_matrix_comps.append(right_matrix_comp.unsqueeze(0)) left_matrix = torch.cat(left_matrix_comps) right_matrix = torch.cat(right_matrix_comps) actual_mat = left_matrix.matmul(base_lazy_variable_copy).matmul( right_matrix.transpose(-1, -2)) actual = gpytorch.inv_matmul(actual_mat, test_matrix_var_copy) self.assertTrue(approx_equal(res.data, actual.data)) # Backward pass res.sum().backward() actual.sum().backward() self.assertTrue( approx_equal(base_lazy_variable.grad.data, base_lazy_variable_copy.grad.data)) self.assertTrue( approx_equal(left_interp_values.grad.data, left_interp_values_copy.grad.data))
def test_inv_matmul_batch(self): base_lazy_tensor = torch.randn(6, 6) base_lazy_tensor = ( base_lazy_tensor.t().matmul(base_lazy_tensor)).unsqueeze(0).repeat( 5, 1, 1) base_lazy_tensor_copy = base_lazy_tensor.clone() base_lazy_tensor.requires_grad = True base_lazy_tensor_copy.requires_grad = True test_matrix_tensor = torch.randn(5, 3, 4) test_matrix_tensor_copy = test_matrix_tensor.clone() test_matrix_tensor.requires_grad = True test_matrix_tensor_copy.requires_grad = True left_interp_indices = torch.LongTensor([[2, 3], [3, 4], [4, 5]]).unsqueeze(0).repeat( 5, 1, 1) left_interp_values = torch.tensor( [[1, 2], [0.5, 1], [1, 3]], dtype=torch.float).unsqueeze(0).repeat(5, 1, 1) left_interp_values_copy = left_interp_values.clone() left_interp_values.requires_grad = True left_interp_values_copy.requires_grad = True right_interp_indices = torch.LongTensor([[2, 3], [3, 4], [4, 5]]).unsqueeze(0).repeat( 5, 1, 1) right_interp_values = torch.tensor( [[1, 2], [0.5, 1], [1, 3]], dtype=torch.float).unsqueeze(0).repeat(5, 1, 1) right_interp_values_copy = right_interp_values.clone() right_interp_values.requires_grad = True right_interp_values_copy.requires_grad = True interp_lazy_tensor = InterpolatedLazyTensor( NonLazyTensor(base_lazy_tensor), left_interp_indices, left_interp_values, right_interp_indices, right_interp_values, ) res = interp_lazy_tensor.inv_matmul(test_matrix_tensor) left_matrix_comps = [] right_matrix_comps = [] for i in range(5): left_matrix_comp = torch.zeros(3, 6) right_matrix_comp = torch.zeros(3, 6) left_matrix_comp.scatter_(1, left_interp_indices[i], left_interp_values_copy[i]) right_matrix_comp.scatter_(1, right_interp_indices[i], right_interp_values_copy[i]) left_matrix_comps.append(left_matrix_comp.unsqueeze(0)) right_matrix_comps.append(right_matrix_comp.unsqueeze(0)) left_matrix = torch.cat(left_matrix_comps) right_matrix = torch.cat(right_matrix_comps) actual_mat = left_matrix.matmul(base_lazy_tensor_copy).matmul( right_matrix.transpose(-1, -2)) actual = gpytorch.inv_matmul(actual_mat, test_matrix_tensor_copy) self.assertTrue(approx_equal(res, actual)) # Backward pass res.sum().backward() actual.sum().backward() self.assertTrue( approx_equal(base_lazy_tensor.grad, base_lazy_tensor_copy.grad)) self.assertTrue( approx_equal(left_interp_values.grad, left_interp_values_copy.grad))
def __call__(self, inputs, **kwargs): if self.exact_inference: raise RuntimeError('At the moment, the InducingPointModule only works for variational inference') # Training mode: optimizing if self.training: if not torch.equal(inputs.data, self._inducing_points): raise RuntimeError('At the moment, we assume that the inducing_points are the' ' training inputs.') prior_output = self.prior_output() # Initialize variational parameters, if necessary if not self.variational_params_initialized[0]: mean_init = prior_output.mean().data chol_covar_init = torch.eye(len(mean_init)).type_as(mean_init) self.variational_mean.data.copy_(mean_init) self.chol_variational_covar.data.copy_(chol_covar_init) self.variational_params_initialized.fill_(1) variational_output = self.variational_output() new_variational_strategy = MVNVariationalStrategy(variational_output, prior_output) self.update_variational_strategy('inducing_point_strategy', new_variational_strategy) return variational_output # Posterior mode elif self.posterior: variational_output = self.variational_output() n_induc = len(self._inducing_points) full_inputs = torch.cat([Variable(self._inducing_points), inputs]) full_output = super(InducingPointModule, self).__call__(full_inputs) full_mean, full_covar = full_output.representation() induc_mean = full_mean[:n_induc] test_mean = full_mean[n_induc:] induc_induc_covar = full_covar[:n_induc, :n_induc] induc_test_covar = full_covar[:n_induc, n_induc:] test_induc_covar = full_covar[n_induc:, :n_induc] test_test_covar = full_covar[n_induc:, n_induc:] # Calculate posterior components if not self.has_computed_alpha[0]: alpha = gpytorch.inv_matmul(induc_induc_covar, variational_output.mean() - induc_mean) self.alpha.copy_(alpha.data) self.has_computed_alpha.fill_(1) else: alpha = Variable(self.alpha) test_mean = torch.add(test_mean, test_induc_covar.matmul(alpha)) # Test covariance if isinstance(induc_test_covar, LazyVariable): induc_test_covar = induc_test_covar.evaluate() inv_product = gpytorch.inv_matmul(induc_induc_covar, induc_test_covar) factor = variational_output.covar().chol_matmul(inv_product) right_factor = factor - inv_product left_factor = (factor - induc_test_covar).transpose(-1, -2) if not isinstance(test_test_covar, LazyVariable): test_test_covar = NonLazyVariable(test_test_covar) test_covar = test_test_covar + MatmulLazyVariable(left_factor, right_factor) output = GaussianRandomVariable(test_mean, test_covar) return output # Prior mode else: return super(InducingPointModule, self).__call__(inputs)
def __call__(self, inputs, **kwargs): # Training mode: optimizing if self.training: if not torch.equal(inputs.data, self.inducing_points): raise RuntimeError('You must train on the training inputs!') prior_output = self.prior_output() # Initialize variational parameters, if necessary if not self.variational_params_initialized[0]: mean_init = prior_output.mean().data chol_covar_init = torch.eye(len(mean_init)).type_as(mean_init) self.variational_mean.data.copy_(mean_init) self.chol_variational_covar.data.copy_(chol_covar_init) self.variational_params_initialized.fill_(1) variational_output = self.variational_output() new_variational_strategy = MVNVariationalStrategy( variational_output, prior_output) self.update_variational_strategy('inducing_point_strategy', new_variational_strategy) return variational_output # Posterior mode else: variational_output = self.variational_output() n_induc = len(self.inducing_points) full_inputs = torch.cat([Variable(self.inducing_points), inputs]) full_output = super(VariationalGP, self).__call__(full_inputs) full_mean, full_covar = full_output.representation() induc_mean = full_mean[:n_induc] test_mean = full_mean[n_induc:] induc_induc_covar = full_covar[:n_induc, :n_induc] induc_test_covar = full_covar[:n_induc, n_induc:] test_induc_covar = full_covar[n_induc:, :n_induc] test_test_covar = full_covar[n_induc:, n_induc:] # Compute alpha cache if not self.has_computed_alpha: self.alpha = gpytorch.inv_matmul( induc_induc_covar, variational_output.mean() - induc_mean) self.has_computed_alpha = True # Compute chol cache, if necessary if not self.has_computed_root and beta_features.fast_pred_var.on(): if not isinstance(induc_induc_covar, LazyVariable): induc_induc_covar = NonLazyVariable(induc_induc_covar) self.prior_root_inv = induc_induc_covar.root_inv_decomposition( ) chol_variational_output = variational_output.covar( ).root.evaluate() self.variational_root = gpytorch.inv_matmul( induc_induc_covar, chol_variational_output) self.has_computed_root = True # Test mean predictive_mean = torch.add(test_mean, test_induc_covar.matmul(self.alpha)) # Test covariance if not isinstance(test_test_covar, LazyVariable): predictive_covar = NonLazyVariable(test_test_covar) else: predictive_covar = test_test_covar if beta_features.fast_pred_var.on(): correction = RootLazyVariable( test_induc_covar.matmul(self.prior_root_inv)).mul(-1) correction = correction + RootLazyVariable( test_induc_covar.matmul(self.variational_root)) predictive_covar = predictive_covar + correction else: if isinstance(induc_test_covar, LazyVariable): induc_test_covar = induc_test_covar.evaluate() inv_product = gpytorch.inv_matmul(induc_induc_covar, induc_test_covar) factor = variational_output.covar().root_decomposition( ).matmul(inv_product) right_factor = factor - inv_product left_factor = (factor - induc_test_covar).transpose(-1, -2) predictive_covar = predictive_covar + MatmulLazyVariable( left_factor, right_factor) output = GaussianRandomVariable(predictive_mean, predictive_covar) return output