Exemplo n.º 1
0
def test_interpolated_toeplitz_gp_marginal_log_likelihood_forward():
    x = Variable(torch.linspace(0, 1, 5))
    y = torch.randn(5)
    noise = torch.Tensor([1e-4])
    rbf_covar = RBFKernel()
    rbf_covar.initialize(log_lengthscale=-4)
    covar_module = GridInterpolationKernel(rbf_covar)
    covar_module.initialize_interpolation_grid(10, grid_bounds=(0, 1))
    covar_x = covar_module.forward(x.unsqueeze(1), x.unsqueeze(1))
    c = covar_x.c.data
    T = utils.toeplitz.sym_toeplitz(c)

    W_left = index_coef_to_sparse(covar_x.J_left, covar_x.C_left, len(c))
    W_right = index_coef_to_sparse(covar_x.J_right, covar_x.C_right, len(c))

    W_left_dense = W_left.to_dense()
    W_right_dense = W_right.to_dense()

    WTW = W_left_dense.matmul(T.matmul(W_right_dense.t())) + torch.eye(len(x)) * 1e-4

    quad_form_actual = y.dot(WTW.inverse().matmul(y))
    chol_T = torch.potrf(WTW)
    log_det_actual = chol_T.diag().log().sum() * 2

    actual = -0.5 * (log_det_actual + quad_form_actual + math.log(2 * math.pi) * len(y))

    res = InterpolatedToeplitzGPMarginalLogLikelihood(W_left, W_right, num_samples=1000)(Variable(c),
                                                                                         Variable(y),
                                                                                         Variable(noise)).data
    assert all(torch.abs((res - actual) / actual) < 0.05)
Exemplo n.º 2
0
    def test_ard_batch(self):
        a = torch.tensor([[[1, 2, 3], [2, 4, 0]], [[-1, 1, 2], [2, 1, 4]]],
                         dtype=torch.float)
        b = torch.tensor([[[1, 3, 1]], [[2, -1, 0]]],
                         dtype=torch.float).repeat(1, 2, 1)
        lengthscales = torch.tensor([[[1, 2, 1]]], dtype=torch.float)

        kernel = RBFKernel(batch_shape=torch.Size([2]), ard_num_dims=3)
        kernel.initialize(lengthscale=lengthscales)
        kernel.eval()

        scaled_a = a.div(lengthscales)
        scaled_b = b.div(lengthscales)
        actual = (scaled_a.unsqueeze(-2) -
                  scaled_b.unsqueeze(-3)).pow(2).sum(dim=-1).mul_(-0.5).exp()
        res = kernel(a, b).evaluate()
        self.assertLess(torch.norm(res - actual), 1e-5)

        # diag
        res = kernel(a, b).diag()
        actual = actual.diagonal(dim1=-1, dim2=-2)
        self.assertLess(torch.norm(res - actual), 1e-5)

        # batch_dims
        double_batch_a = scaled_a.transpose(-1, -2).unsqueeze(-1)
        double_batch_b = scaled_b.transpose(-1, -2).unsqueeze(-2)
        actual = double_batch_a - double_batch_b
        actual = actual.pow(2).mul_(-0.5).exp()
        res = kernel(a, b, last_dim_is_batch=True).evaluate()
        self.assertLess(torch.norm(res - actual), 1e-5)

        # batch_dims and diag
        res = kernel(a, b, last_dim_is_batch=True).diag()
        actual = actual.diagonal(dim1=-2, dim2=-1)
        self.assertLess(torch.norm(res - actual), 1e-5)
Exemplo n.º 3
0
    def test_ard_batch(self):
        a = torch.tensor([[[1, 2, 3], [2, 4, 0]], [[-1, 1, 2], [2, 1, 4]]], dtype=torch.float)
        b = torch.tensor([[[1, 3, 1]], [[2, -1, 0]]], dtype=torch.float).repeat(1, 2, 1)
        lengthscales = torch.tensor([[[1, 2, 1]]], dtype=torch.float)

        kernel = RBFKernel(batch_size=2, ard_num_dims=3)
        kernel.initialize(log_lengthscale=lengthscales.log())
        kernel.eval()

        scaled_a = a.div(lengthscales)
        scaled_b = b.div(lengthscales)
        actual = (scaled_a.unsqueeze(-2) - scaled_b.unsqueeze(-3)).pow(2).sum(dim=-1).mul_(-0.5).exp()
        res = kernel(a, b).evaluate()
        self.assertLess(torch.norm(res - actual), 1e-5)

        # diag
        res = kernel(a, b).diag()
        actual = torch.cat([actual[i].diag().unsqueeze(0) for i in range(actual.size(0))])
        self.assertLess(torch.norm(res - actual), 1e-5)

        # batch_dims
        actual = scaled_a.transpose(-1, -2).unsqueeze(-1) - scaled_b.transpose(-1, -2).unsqueeze(-2)
        actual = actual.pow(2).mul_(-0.5).exp().view(6, 2, 2)
        res = kernel(a, b, batch_dims=(0, 2)).evaluate()
        self.assertLess(torch.norm(res - actual), 1e-5)

        # batch_dims and diag
        res = kernel(a, b, batch_dims=(0, 2)).diag()
        actual = torch.cat([actual[i].diag().unsqueeze(0) for i in range(actual.size(0))])
        self.assertLess(torch.norm(res - actual), 1e-5)
Exemplo n.º 4
0
    def test_ard(self):
        a = torch.tensor([[[1, 2], [2, 4]]], dtype=torch.float).repeat(2, 1, 1)
        b = torch.tensor([[[1, 3], [0, 4]]], dtype=torch.float).repeat(2, 1, 1)
        lengthscales = torch.tensor([1, 2], dtype=torch.float).view(1, 1, 2)

        base_kernel = RBFKernel(ard_num_dims=2)
        base_kernel.initialize(lengthscale=lengthscales)
        kernel = ScaleKernel(base_kernel)
        kernel.initialize(outputscale=torch.tensor([3], dtype=torch.float))
        kernel.eval()

        scaled_a = a.div(lengthscales)
        scaled_b = b.div(lengthscales)
        actual = (scaled_a.unsqueeze(-2) - scaled_b.unsqueeze(-3)).pow(2).sum(dim=-1).mul_(-0.5).exp()
        actual.mul_(3)
        res = kernel(a, b).evaluate()
        self.assertLess(torch.norm(res - actual), 1e-5)

        # Diag
        res = kernel(a, b).diag()
        actual = torch.cat([actual[i].diag().unsqueeze(0) for i in range(actual.size(0))])
        self.assertLess(torch.norm(res - actual), 1e-5)

        # batch_dims
        actual = scaled_a.transpose(-1, -2).unsqueeze(-1) - scaled_b.transpose(-1, -2).unsqueeze(-2)
        actual = actual.pow(2).mul_(-0.5).exp().view(4, 2, 2)
        actual.mul_(3)
        res = kernel(a, b, batch_dims=(0, 2)).evaluate()
        self.assertLess(torch.norm(res - actual), 1e-5)

        # batch_dims and diag
        res = kernel(a, b, batch_dims=(0, 2)).diag()
        actual = torch.cat([actual[i].diag().unsqueeze(0) for i in range(actual.size(0))])
        self.assertLess(torch.norm(res - actual), 1e-5)
def test_toeplitz_mvn_kl_divergence_forward():
    x = Variable(torch.linspace(0, 1, 5))
    rbf_covar = RBFKernel()
    rbf_covar.initialize(log_lengthscale=-4)
    covar_module = GridInterpolationKernel(rbf_covar)
    covar_module.initialize_interpolation_grid(10, grid_bounds=(0, 1))
    covar_x = covar_module.forward(x.unsqueeze(1), x.unsqueeze(1))

    c = Variable(covar_x.c.data, requires_grad=True)
    mu1 = Variable(torch.randn(10), requires_grad=True)
    mu2 = Variable(torch.randn(10), requires_grad=True)

    T = Variable(torch.zeros(len(c), len(c)))
    for i in range(len(c)):
        for j in range(len(c)):
            T[i, j] = utils.toeplitz.toeplitz_getitem(c, c, i, j)

    U = torch.randn(10, 10).triu()
    U = Variable(U.mul(U.diag().sign().unsqueeze(1).expand_as(U).triu()),
                 requires_grad=True)

    actual = gpytorch.mvn_kl_divergence(mu1, U, mu2, T, num_samples=1000)

    res = gpytorch.mvn_kl_divergence(mu1, U, mu2, covar_x, num_samples=1000)

    assert all(torch.abs((res.data - actual.data) / actual.data) < 0.15)
Exemplo n.º 6
0
    def test_ard(self):
        a = torch.tensor([[1, 2], [2, 4]], dtype=torch.float)
        b = torch.tensor([[1, 3], [0, 4]], dtype=torch.float)
        lengthscales = torch.tensor([1, 2], dtype=torch.float).view(1, 2)

        kernel = RBFKernel(ard_num_dims=2)
        kernel.initialize(lengthscale=lengthscales)
        kernel.eval()

        scaled_a = a.div(lengthscales)
        scaled_b = b.div(lengthscales)
        actual = (scaled_a.unsqueeze(-2) -
                  scaled_b.unsqueeze(-3)).pow(2).sum(dim=-1).mul_(-0.5).exp()
        res = kernel(a, b).evaluate()
        self.assertLess(torch.norm(res - actual), 1e-5)

        # Diag
        res = kernel(a, b).diag()
        actual = actual.diag()
        self.assertLess(torch.norm(res - actual), 1e-5)

        # batch_dims
        actual = scaled_a.transpose(-1, -2).unsqueeze(-1) - scaled_b.transpose(
            -1, -2).unsqueeze(-2)
        actual = actual.pow(2).mul_(-0.5).exp()
        res = kernel(a, b, last_dim_is_batch=True).evaluate()
        self.assertLess(torch.norm(res - actual), 1e-5)

        # batch_dims and diag
        res = kernel(a, b, last_dim_is_batch=True).diag()
        actual = actual.diagonal(dim1=-1, dim2=-2)
        self.assertLess(torch.norm(res - actual), 1e-5)
Exemplo n.º 7
0
def test_interpolated_toeplitz_gp_marginal_log_likelihood_backward():
    x = Variable(torch.linspace(0, 1, 5))
    y = Variable(torch.randn(5), requires_grad=True)
    noise = Variable(torch.Tensor([1e-4]), requires_grad=True)

    rbf_covar = RBFKernel()
    rbf_covar.initialize(log_lengthscale=-4)
    covar_module = GridInterpolationKernel(rbf_covar)
    covar_module.eval()
    covar_module.initialize_interpolation_grid(10, [(0, 1)])
    covar_x = covar_module.forward(x.unsqueeze(1), x.unsqueeze(1))

    c = Variable(covar_x.c.data, requires_grad=True)

    W_left = index_coef_to_sparse(covar_x.J_left, covar_x.C_left, len(c))
    W_right = index_coef_to_sparse(covar_x.J_right, covar_x.C_right, len(c))

    W_left_dense = Variable(W_left.to_dense())
    W_right_dense = Variable(W_right.to_dense())

    T = Variable(torch.zeros(len(c), len(c)))
    for i in range(len(c)):
        for j in range(len(c)):
            T[i, j] = utils.toeplitz.sym_toeplitz_getitem(c, i, j)

    WTW = W_left_dense.matmul(T.matmul(
        W_right_dense.t())) + Variable(torch.eye(len(x))) * noise

    quad_form_actual = y.dot(WTW.inverse().matmul(y))
    log_det_actual = _det(WTW).log()

    actual_nll = -0.5 * (log_det_actual + quad_form_actual +
                         math.log(2 * math.pi) * len(y))
    actual_nll.backward()

    actual_c_grad = c.grad.data.clone()
    actual_y_grad = y.grad.data.clone()
    actual_noise_grad = noise.grad.data.clone()

    c.grad.data.fill_(0)
    y.grad.data.fill_(0)
    noise.grad.data.fill_(0)

    covar_x = gpytorch.lazy.ToeplitzLazyVariable(c, covar_x.J_left,
                                                 covar_x.C_left,
                                                 covar_x.J_right,
                                                 covar_x.C_right, noise)
    res = covar_x.exact_gp_marginal_log_likelihood(y)
    res.backward()

    res_c_grad = covar_x.c.grad.data
    res_y_grad = y.grad.data
    res_noise_grad = noise.grad.data

    assert (actual_c_grad - res_c_grad).norm() / res_c_grad.norm() < 0.05
    assert (actual_y_grad - res_y_grad).norm() / res_y_grad.norm() < 1e-3
    assert (actual_noise_grad -
            res_noise_grad).norm() / res_noise_grad.norm() < 1e-3
def test_toeplitz_mvn_kl_divergence_backward():
    x = Variable(torch.linspace(0, 1, 5))
    rbf_covar = RBFKernel()
    rbf_covar.initialize(log_lengthscale=-4)
    covar_module = GridInterpolationKernel(rbf_covar)
    covar_module.initialize_interpolation_grid(4, grid_bounds=(0, 1))
    covar_x = covar_module.forward(x.unsqueeze(1), x.unsqueeze(1))
    covar_x.c = Variable(covar_x.c.data, requires_grad=True)

    c = covar_x.c
    mu1 = Variable(torch.randn(4), requires_grad=True)
    mu2 = Variable(torch.randn(4), requires_grad=True)

    mu_diff = mu2 - mu1

    T = Variable(torch.zeros(len(c), len(c)))
    for i in range(len(c)):
        for j in range(len(c)):
            T[i, j] = utils.toeplitz.toeplitz_getitem(c, c, i, j)

    U = torch.randn(4, 4).triu()
    U = Variable(U.mul(U.diag().sign().unsqueeze(1).expand_as(U).triu()),
                 requires_grad=True)

    actual = 0.5 * (_det(T).log() + mu_diff.dot(T.inverse().mv(mu_diff)) +
                    T.inverse().mm(U.t().mm(U)).trace() -
                    U.diag().log().sum(0) * 2 - len(mu_diff))
    actual.backward()

    actual_c_grad = c.grad.data.clone()
    actual_mu1_grad = mu1.grad.data.clone()
    actual_mu2_grad = mu2.grad.data.clone()
    actual_U_grad = U.grad.data.clone()

    c.grad.data.fill_(0)
    mu1.grad.data.fill_(0)
    mu2.grad.data.fill_(0)
    U.grad.data.fill_(0)

    res = gpytorch.mvn_kl_divergence(mu1, U, mu2, covar_x, num_samples=1000)
    res.backward()

    res_c_grad = c.grad.data
    res_mu1_grad = mu1.grad.data
    res_mu2_grad = mu2.grad.data
    res_U_grad = U.grad.data

    assert torch.abs(
        (res_c_grad - actual_c_grad)).sum() / actual_c_grad.abs().sum() < 1e-1
    assert torch.abs(
        (res_mu1_grad -
         actual_mu1_grad)).sum() / actual_mu1_grad.abs().sum() < 1e-5
    assert torch.abs(
        (res_mu2_grad -
         actual_mu2_grad)).sum() / actual_mu2_grad.abs().sum() < 1e-5
    assert torch.abs(
        (res_U_grad - actual_U_grad)).sum() / actual_U_grad.abs().sum() < 1e-2
Exemplo n.º 9
0
 def test_inherit_active_dims(self):
     lengthscales = torch.tensor([1, 1], dtype=torch.float)
     base_kernel = RBFKernel(active_dims=(1, 2), ard_num_dims=2)
     base_kernel.initialize(lengthscale=lengthscales)
     kernel = ScaleKernel(base_kernel)
     kernel.initialize(outputscale=torch.tensor([3], dtype=torch.float))
     kernel.eval()
     self.assertTrue(
         torch.all(kernel.active_dims == base_kernel.active_dims))
Exemplo n.º 10
0
def test_trace_logdet_quad_form_factory():
    x = Variable(torch.linspace(0, 1, 10))
    rbf_covar = RBFKernel()
    rbf_covar.initialize(log_lengthscale=-4)
    covar_module = GridInterpolationKernel(rbf_covar)
    covar_module.initialize_interpolation_grid(4, grid_bounds=(0, 1))
    c = Variable(covar_module.forward(x.unsqueeze(1), x.unsqueeze(1)).c.data,
                 requires_grad=True)

    T = Variable(torch.zeros(4, 4))
    for i in range(4):
        for j in range(4):
            T[i, j] = utils.toeplitz.toeplitz_getitem(c, c, i, j)

    U = torch.randn(4, 4).triu()
    U = Variable(U.mul(U.diag().sign().unsqueeze(1).expand_as(U).triu()),
                 requires_grad=True)

    mu_diff = Variable(torch.randn(4), requires_grad=True)

    actual = _det(T).log() + mu_diff.dot(
        T.inverse().mv(mu_diff)) + T.inverse().mm(U.t().mm(U)).trace()
    actual.backward()

    actual_c_grad = c.grad.data
    actual_mu_diff_grad = mu_diff.grad.data
    actual_U_grad = U.grad.data

    c.grad.data.fill_(0)
    mu_diff.grad.data.fill_(0)
    U.grad.data.fill_(0)

    def _mm_closure_factory(*args):
        c, = args
        return lambda mat2: utils.toeplitz.sym_toeplitz_mm(c, mat2)

    def _derivative_quadratic_form_factory(*args):
        return lambda left_vector, right_vector: (
            sym_toeplitz_derivative_quadratic_form(left_vector, right_vector
                                                   ), )

    covar_args = (c, )

    res = trace_logdet_quad_form_factory(
        _mm_closure_factory,
        _derivative_quadratic_form_factory)(num_samples=1000)(mu_diff, U,
                                                              *covar_args)
    res.backward()

    res_c_grad = c.grad.data
    res_mu_diff_grad = mu_diff.grad.data
    res_U_grad = U.grad.data

    assert all(torch.abs((res.data - actual.data) / actual.data) < 0.15)
    assert utils.approx_equal(res_c_grad, actual_c_grad)
    assert utils.approx_equal(res_mu_diff_grad, actual_mu_diff_grad)
    assert utils.approx_equal(res_U_grad, actual_U_grad)
Exemplo n.º 11
0
    def test_ard(self):
        a = torch.Tensor([[1, 2], [2, 4]])
        b = torch.Tensor([1, 3]).view(1, 1, 2)
        lengthscales = torch.Tensor([1, 2]).view(1, 1, 2)

        kernel = RBFKernel(ard_num_dims=2)
        kernel.initialize(log_lengthscale=lengthscales.log())
        kernel.eval()

        actual = (a - b).div_(lengthscales).pow(2).sum(dim=-1).mul_(-0.5).exp()
        res = kernel(a, b).evaluate()
        self.assertLess(torch.norm(res - actual.unsqueeze(-1)), 1e-5)
Exemplo n.º 12
0
    def test_ard_batch(self):
        a = torch.tensor([[[1, 2, 3], [2, 4, 0]], [[-1, 1, 2], [2, 1, 4]]],
                         dtype=torch.float)
        b = torch.tensor([[[1, 3, 1]], [[2, -1, 0]]], dtype=torch.float)
        lengthscales = torch.tensor([[[1, 2, 1]]], dtype=torch.float)

        kernel = RBFKernel(batch_size=2, ard_num_dims=3)
        kernel.initialize(log_lengthscale=lengthscales.log())
        kernel.eval()

        actual = (a - b).div_(lengthscales).pow(2).sum(dim=-1).mul_(-0.5).exp()
        res = kernel(a, b).evaluate()
        self.assertLess(torch.norm(res - actual.unsqueeze(-1)), 1e-5)
Exemplo n.º 13
0
    def test_subset_active_compute_radial_basis_function(self):
        a = torch.Tensor([4, 2, 8]).view(3, 1)
        a_p = torch.Tensor([1, 2, 3]).view(3, 1)
        a = torch.cat((a, a_p), 1)
        b = torch.Tensor([0, 2]).view(2, 1)
        lengthscale = 2

        kernel = RBFKernel(active_dims=[0])
        kernel.initialize(log_lengthscale=math.log(lengthscale))
        kernel.eval()

        actual = torch.Tensor([[16, 4], [4, 0], [64, 36]]).mul_(-0.5).div_(lengthscale ** 2).exp()
        res = kernel(a, b).evaluate()
        self.assertLess(torch.norm(res - actual), 1e-5)
Exemplo n.º 14
0
def test_kp_toeplitz_gp_marginal_log_likelihood_forward():
    x = torch.cat([Variable(torch.linspace(0, 1, 2)).unsqueeze(1)] * 3, 1)
    y = torch.randn(2)
    rbf_module = RBFKernel()
    rbf_module.initialize(log_lengthscale=-2)
    covar_module = GridInterpolationKernel(rbf_module)
    covar_module.eval()
    covar_module.initialize_interpolation_grid(5, [(0, 1), (0, 1), (0, 1)])

    kronecker_var = covar_module.forward(x, x)
    kronecker_var_eval = kronecker_var.evaluate()
    res = kronecker_var.exact_gp_marginal_log_likelihood(Variable(y)).data
    actual = gpytorch.exact_gp_marginal_log_likelihood(kronecker_var_eval,
                                                       Variable(y)).data
    assert all(torch.abs((res - actual) / actual) < 0.05)
def test_mvn_kl_divergence_backward():
    x = Variable(torch.linspace(0, 1, 4))
    rbf_covar = RBFKernel()
    rbf_covar.initialize(log_lengthscale=-4)
    K = Variable(rbf_covar.forward(x.unsqueeze(1), x.unsqueeze(1)).data,
                 requires_grad=True)

    mu1 = Variable(torch.randn(4), requires_grad=True)
    mu2 = Variable(torch.randn(4), requires_grad=True)

    U = torch.randn(4, 4).triu()
    U = Variable(U.mul(U.diag().sign().unsqueeze(1).expand_as(U).triu()),
                 requires_grad=True)

    mu_diff = mu2 - mu1
    actual = 0.5 * (_det(K).log() + mu_diff.dot(K.inverse().mv(mu_diff)) +
                    K.inverse().mm(U.t().mm(U)).trace() -
                    U.diag().log().sum(0) * 2 - len(mu_diff))
    actual.backward()

    actual_K_grad = K.grad.data.clone()
    actual_mu1_grad = mu1.grad.data.clone()
    actual_mu2_grad = mu2.grad.data.clone()
    actual_U_grad = U.grad.data.clone()

    K.grad.data.fill_(0)
    mu1.grad.data.fill_(0)
    mu2.grad.data.fill_(0)
    U.grad.data.fill_(0)

    res = gpytorch.mvn_kl_divergence(mu1, U, mu2, K, num_samples=10000)
    res.backward()

    res_K_grad = K.grad.data
    res_mu1_grad = mu1.grad.data
    res_mu2_grad = mu2.grad.data
    res_U_grad = U.grad.data

    assert torch.abs(
        (res_K_grad - actual_K_grad)).sum() / actual_K_grad.abs().sum() < 1e-1
    assert torch.abs(
        (res_mu1_grad -
         actual_mu1_grad)).sum() / actual_mu1_grad.abs().sum() < 1e-5
    assert torch.abs(
        (res_mu2_grad -
         actual_mu2_grad)).sum() / actual_mu2_grad.abs().sum() < 1e-5
    assert torch.abs(
        (res_U_grad - actual_U_grad)).sum() / actual_U_grad.abs().sum() < 1e-2
    def test_ard(self):
        base_k = RBFKernel(ard_num_dims=3)
        base_k.initialize(lengthscale=[1., 2., 3.])
        AddK = NewtonGirardAdditiveKernel(base_k, 3, max_degree=1)

        testvals = torch.tensor([[1, 2, 3], [7, 5, 2]], dtype=torch.float)
        add_k_val = AddK(testvals, testvals).evaluate()

        ks = []
        for i in range(3):
            k = RBFKernel(active_dims=i)
            k.initialize(lengthscale=i + 1)
            ks.append(k)
        manual_k = ScaleKernel(AdditiveKernel(*ks))
        manual_k.initialize(outputscale=1.)
        manual_add_k_val = manual_k(testvals, testvals).evaluate()

        # np.testing.assert_allclose(add_k_val.detach().numpy(), manual_add_k_val.detach().numpy(), atol=1e-5)
        self.assertTrue(torch.allclose(add_k_val, manual_add_k_val, atol=1e-5))
def test_mvn_kl_divergence_forward():
    x = Variable(torch.linspace(0, 1, 4))
    rbf_covar = RBFKernel()
    rbf_covar.initialize(log_lengthscale=-4)
    K = rbf_covar.forward(x.unsqueeze(1), x.unsqueeze(1))

    mu1 = Variable(torch.randn(4), requires_grad=True)
    mu2 = Variable(torch.randn(4), requires_grad=True)

    U = torch.randn(4, 4).triu()
    U = Variable(U.mul(U.diag().sign().unsqueeze(1).expand_as(U).triu()),
                 requires_grad=True)

    mu_diff = mu2 - mu1
    actual = 0.5 * (_det(K).log() + mu_diff.dot(K.inverse().mv(mu_diff)) +
                    K.inverse().mm(U.t().mm(U)).trace() -
                    U.diag().log().sum(0) * 2 - len(mu_diff))

    res = gpytorch.mvn_kl_divergence(mu1, U, mu2, K, num_samples=1000)
    assert all(torch.abs((res.data - actual.data) / actual.data) < 0.15)
Exemplo n.º 18
0
    def test_subset_active_compute_radial_basis_function(self):
        a = torch.tensor([4, 2, 8], dtype=torch.float).view(3, 1)
        a_p = torch.tensor([1, 2, 3], dtype=torch.float).view(3, 1)
        a = torch.cat((a, a_p), 1)
        b = torch.tensor([0, 2, 4], dtype=torch.float).view(3, 1)
        lengthscale = 2

        kernel = RBFKernel(active_dims=[0])
        kernel.initialize(lengthscale=lengthscale)
        kernel.eval()

        actual = torch.tensor([[16, 4, 0], [4, 0, 4], [64, 36, 16]], dtype=torch.float)
        actual.mul_(-0.5).div_(lengthscale ** 2).exp_()
        res = kernel(a, b).evaluate()
        self.assertLess(torch.norm(res - actual), 1e-5)

        # diag
        res = kernel(a, b).diag()
        actual = actual.diag()
        self.assertLess(torch.norm(res - actual), 1e-5)
Exemplo n.º 19
0
    def test_subset_active_computes_radial_basis_function_gradient(self):
        a_1 = torch.Tensor([4, 2, 8]).view(3, 1)
        a_p = torch.Tensor([1, 2, 3]).view(3, 1)
        a = torch.cat((a_1, a_p), 1)
        b = torch.Tensor([0, 2, 2]).view(3, 1)
        lengthscale = 2

        param = math.log(lengthscale) * torch.ones(3, 3)
        param.requires_grad_()
        diffs = a_1.expand(3, 3) - b.expand(3, 3).transpose(0, 1)
        actual_output = (-0.5 * (diffs / param.exp()) ** 2).exp()
        actual_output.backward(torch.eye(3))
        actual_param_grad = param.grad.sum()

        kernel = RBFKernel(active_dims=[0])
        kernel.initialize(log_lengthscale=math.log(lengthscale))
        kernel.eval()
        output = kernel(a, b).evaluate()
        output.backward(gradient=torch.eye(3))
        res = kernel.log_lengthscale.grad

        self.assertLess(torch.norm(res - actual_param_grad), 1e-5)
    def test_postscale(self):
        x = torch.tensor([[1., 2., 3.], [1.1, 2.2, 3.3]])
        kbase = RBFKernel()
        kbase.initialize(lengthscale=torch.tensor([1.]))
        base_kernel = AdditiveStructureKernel(kbase, 3)
        proj_module = torch.nn.Linear(3, 3, bias=False)
        proj_module.weight.data = torch.eye(3, dtype=torch.float)
        proj_kernel = ScaledProjectionKernel(proj_module,
                                             base_kernel,
                                             prescale=False,
                                             ard_num_dims=3)
        proj_kernel.initialize(lengthscale=torch.tensor([1., 2., 3.]))

        with torch.no_grad():
            K = proj_kernel(x, x).evaluate()

        k = RBFKernel()
        k.initialize(lengthscale=torch.tensor([1.]))

        with torch.no_grad():
            K2 = 3 * k(x[:, 0:1], x[:, 0:1]).evaluate()

        np.testing.assert_allclose(K.numpy(), K2.numpy())
    def test_subset_active_computes_radial_basis_function_gradient(self):
        a_1 = torch.Tensor([4, 2, 8]).view(3, 1)
        a_p = torch.Tensor([1, 2, 3]).view(3, 1)
        a = torch.cat((a_1, a_p), 1)
        b = torch.Tensor([0, 2, 2]).view(3, 1)
        lengthscale = 2

        kernel = RBFKernel(active_dims=[0])
        kernel.initialize(log_lengthscale=math.log(lengthscale))
        kernel.eval()
        param = Variable(
            torch.Tensor(3, 3).fill_(math.log(lengthscale)),
            requires_grad=True,
        )
        output = kernel(Variable(a), Variable(b))
        output.backward(gradient=torch.eye(3))
        res = kernel.log_lengthscale.grad.data

        diffs = Variable(a_1.expand(3, 3) - b.expand(3, 3).transpose(0, 1))
        actual_output = (-(diffs ** 2) / (param.exp())).exp()
        actual_output.backward(torch.eye(3))
        actual_param_grad = param.grad.data.sum()

        self.assertLess(torch.norm(res - actual_param_grad), 1e-5)
    def test_gradients(self):
        x = torch.tensor([[1., 2., 3.], [1.1, 2.2, 3.3]])
        y = torch.sin(x).sum(dim=1)
        kbase = RBFKernel()
        kbase.initialize(lengthscale=torch.tensor([1.]))
        base_kernel = AdditiveStructureKernel(kbase, 3)
        proj_module = torch.nn.Linear(3, 3, bias=False)
        proj_module.weight.data = torch.eye(3, dtype=torch.float)
        proj_kernel = ScaledProjectionKernel(proj_module,
                                             base_kernel,
                                             prescale=True,
                                             ard_num_dims=3)
        proj_kernel.initialize(lengthscale=torch.tensor([1., 2., 3.]))

        model = ExactGPModel(x, y, gpytorch.likelihoods.GaussianLikelihood(),
                             proj_kernel)
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(model.likelihood, model)
        optimizer_ = torch.optim.Adam(model.parameters(), lr=0.1)
        optimizer_.zero_grad()

        pred = model(x)
        loss = -mll(pred, y)
        loss.backward()

        optimizer_.step()

        np.testing.assert_allclose(
            proj_kernel.base_kernel.base_kernel.lengthscale.numpy(),
            torch.tensor([[1.]]).numpy())
        np.testing.assert_allclose(
            proj_kernel.projection_module.weight.numpy(),
            torch.eye(3, dtype=torch.float).numpy())
        self.assertFalse(
            np.allclose(proj_kernel.lengthscale.detach().numpy(),
                        torch.tensor([1., 2., 3.]).numpy()))

        proj_module = torch.nn.Linear(3, 3, bias=False)
        proj_module.weight.data = torch.eye(3, dtype=torch.float)
        proj_kernel2 = ScaledProjectionKernel(proj_module,
                                              base_kernel,
                                              prescale=True,
                                              ard_num_dims=3,
                                              learn_proj=True)

        proj_kernel2.initialize(lengthscale=torch.tensor([1., 2., 3.]))

        model = ExactGPModel(x, y, gpytorch.likelihoods.GaussianLikelihood(),
                             proj_kernel2)
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(model.likelihood, model)
        optimizer_ = torch.optim.Adam(model.parameters(), lr=0.1)
        optimizer_.zero_grad()

        pred = model(x)
        loss = -mll(pred, y)
        loss.backward()

        optimizer_.step()

        np.testing.assert_allclose(
            proj_kernel2.base_kernel.base_kernel.lengthscale.numpy(),
            torch.tensor([[1.]]).numpy())
        self.assertFalse(
            np.allclose(proj_kernel2.projection_module.weight.detach().numpy(),
                        torch.eye(3, dtype=torch.float).numpy()))
        self.assertFalse(
            np.allclose(proj_kernel2.lengthscale.detach().numpy(),
                        torch.tensor([1., 2., 3.]).numpy()))
Exemplo n.º 23
0
def foo_kp_toeplitz_gp_marginal_log_likelihood_backward():
    x = torch.cat([Variable(torch.linspace(0, 1, 2)).unsqueeze(1)] * 3, 1)
    y = Variable(torch.randn(2), requires_grad=True)
    rbf_module = RBFKernel()
    rbf_module.initialize(log_lengthscale=-2)
    covar_module = GridInterpolationKernel(rbf_module)
    covar_module.eval()
    covar_module.initialize_interpolation_grid(5, [(0, 1), (0, 1), (0, 1)])

    kronecker_var = covar_module.forward(x, x)

    cs = Variable(torch.zeros(3, 5), requires_grad=True)
    J_lefts = []
    C_lefts = []
    J_rights = []
    C_rights = []
    Ts = []
    for i in range(3):
        covar_x = covar_module.forward(x[:, i].unsqueeze(1), x[:,
                                                               i].unsqueeze(1))
        cs.data[i] = covar_x.c.data
        J_lefts.append(covar_x.J_left)
        C_lefts.append(covar_x.C_left)
        J_rights.append(covar_x.J_right)
        C_rights.append(covar_x.C_right)
        T = Variable(torch.zeros(len(cs[i].data), len(cs[i].data)))
        for k in range(len(cs[i].data)):
            for j in range(len(cs[i].data)):
                T[k, j] = utils.toeplitz.toeplitz_getitem(cs[i], cs[i], k, j)
        Ts.append(T)

    W_left = list_of_indices_and_values_to_sparse(J_lefts, C_lefts, cs)
    W_right = list_of_indices_and_values_to_sparse(J_rights, C_rights, cs)
    W_left_dense = Variable(W_left.to_dense())
    W_right_dense = Variable(W_right.to_dense())
    K = kronecker_product(Ts)
    WKW = W_left_dense.matmul(K.matmul(W_right_dense.t()))
    quad_form_actual = y.dot(WKW.inverse().matmul(y))
    log_det_actual = _det(WKW).log()

    actual_nll = -0.5 * (log_det_actual + quad_form_actual +
                         math.log(2 * math.pi) * len(y))
    actual_nll.backward()
    actual_cs_grad = cs.grad.data.clone()
    actual_y_grad = y.grad.data.clone()

    y.grad.data.fill_(0)
    cs.grad.data.fill_(0)

    kronecker_var = gpytorch.lazy.kroneckerProductLazyVariable(
        cs, kronecker_var.J_lefts, kronecker_var.C_lefts,
        kronecker_var.J_rights, kronecker_var.C_rights)
    gpytorch.functions.num_trace_samples = 100
    res = kronecker_var.exact_gp_marginal_log_likelihood(y)
    res.backward()

    res_cs_grad = covar_x.cs.grad.data
    res_y_grad = y.grad.data

    assert (actual_cs_grad - res_cs_grad).norm() / res_cs_grad.norm() < 0.05
    assert (actual_y_grad - res_y_grad).norm() / res_y_grad.norm() < 1e-3

    y.grad.data.fill_(0)
    cs.grad.data.fill_(0)

    gpytorch.functions.fastest = False
    res = kronecker_var.exact_gp_marginal_log_likelihood(y)
    res.backward()

    res_cs_grad = covar_x.cs.grad.data
    res_y_grad = y.grad.data

    assert (actual_cs_grad - res_cs_grad).norm() / res_cs_grad.norm() < 1e-3
    assert (actual_y_grad - res_y_grad).norm() / res_y_grad.norm() < 1e-3
Exemplo n.º 24
0
 def test_initialize_lengthscale_batch(self):
     kernel = RBFKernel(batch_size=2)
     ls_init = torch.tensor([3.14, 4.13])
     kernel.initialize(lengthscale=ls_init)
     actual_value = ls_init.view_as(kernel.lengthscale)
     self.assertLess(torch.norm(kernel.lengthscale - actual_value), 1e-5)
Exemplo n.º 25
0
 def test_initialize_lengthscale(self):
     kernel = RBFKernel()
     kernel.initialize(lengthscale=3.14)
     actual_value = torch.tensor(3.14).view_as(kernel.lengthscale)
     self.assertLess(torch.norm(kernel.lengthscale - actual_value), 1e-5)