def __init__(self, train_x, train_y, likelihood):
     super(MultitaskGPModel, self).__init__(train_x, train_y, likelihood)
     self.mean_module = MultitaskMean(ConstantMean(), num_tasks=2)
     self_covar_module = RBFKernel()
     self.covar_module = MultitaskKernel(self_covar_module, num_tasks=2, rank=2)
 def create_kernel_ard(self, num_dims, **kwargs):
     return NewtonGirardAdditiveKernel(RBFKernel(ard_num_dims=num_dims), num_dims, 2, **kwargs)
示例#3
0
 def create_kernel_ard(self, num_dims, **kwargs):
     return RBFKernel(ard_num_dims=num_dims, **kwargs)
    def test_solve(self):
        size = 100
        train_x = torch.cat(
            [
                torch.linspace(0, 1, size).unsqueeze(0),
                torch.linspace(0, 0.5, size).unsqueeze(0),
                torch.linspace(0, 0.25, size).unsqueeze(0),
                torch.linspace(0, 1.25, size).unsqueeze(0),
                torch.linspace(0, 1.5, size).unsqueeze(0),
                torch.linspace(0, 1, size).unsqueeze(0),
                torch.linspace(0, 0.5, size).unsqueeze(0),
                torch.linspace(0, 0.25, size).unsqueeze(0),
                torch.linspace(0, 1.25, size).unsqueeze(0),
                torch.linspace(0, 1.25, size).unsqueeze(0),
                torch.linspace(0, 1.5, size).unsqueeze(0),
                torch.linspace(0, 1, size).unsqueeze(0),
            ],
            0,
        ).unsqueeze(-1)
        covar_matrix = RBFKernel()(train_x, train_x).evaluate().view(
            2, 2, 3, size, size)
        piv_chol = pivoted_cholesky.pivoted_cholesky(covar_matrix, 10)
        woodbury_factor = pivoted_cholesky.woodbury_factor(
            piv_chol, torch.ones(2, 2, 3, 100))

        rhs_vector = torch.randn(2, 2, 3, 100, 5)
        shifted_covar_matrix = covar_matrix + torch.eye(size)
        real_solve = torch.cat(
            [
                shifted_covar_matrix[0, 0, 0].inverse().matmul(
                    rhs_vector[0, 0, 0]).unsqueeze(0),
                shifted_covar_matrix[0, 0, 1].inverse().matmul(
                    rhs_vector[0, 0, 1]).unsqueeze(0),
                shifted_covar_matrix[0, 0, 2].inverse().matmul(
                    rhs_vector[0, 0, 2]).unsqueeze(0),
                shifted_covar_matrix[0, 1, 0].inverse().matmul(
                    rhs_vector[0, 1, 0]).unsqueeze(0),
                shifted_covar_matrix[0, 1, 1].inverse().matmul(
                    rhs_vector[0, 1, 1]).unsqueeze(0),
                shifted_covar_matrix[0, 1, 2].inverse().matmul(
                    rhs_vector[0, 1, 2]).unsqueeze(0),
                shifted_covar_matrix[1, 0, 0].inverse().matmul(
                    rhs_vector[1, 0, 0]).unsqueeze(0),
                shifted_covar_matrix[1, 0, 1].inverse().matmul(
                    rhs_vector[1, 0, 1]).unsqueeze(0),
                shifted_covar_matrix[1, 0, 2].inverse().matmul(
                    rhs_vector[1, 0, 2]).unsqueeze(0),
                shifted_covar_matrix[1, 1, 0].inverse().matmul(
                    rhs_vector[1, 1, 0]).unsqueeze(0),
                shifted_covar_matrix[1, 1, 1].inverse().matmul(
                    rhs_vector[1, 1, 1]).unsqueeze(0),
                shifted_covar_matrix[1, 1, 2].inverse().matmul(
                    rhs_vector[1, 1, 2]).unsqueeze(0),
            ],
            0,
        ).view_as(rhs_vector)
        approx_solve = pivoted_cholesky.woodbury_solve(rhs_vector, piv_chol,
                                                       woodbury_factor,
                                                       torch.ones(2, 3, 100))

        self.assertTrue(approx_equal(approx_solve, real_solve, 2e-4))
示例#5
0
def gpnet_nonconj(args, dataloader, test_x, prior_gp):
    N = len(dataloader.dataset)
    x_dim = 1
    prior_gp.train()

    if args.net == 'tangent':
        kernel = prior_gp.covar_module
        bnn_prev = FirstOrder([x_dim] + [args.n_hidden] * args.n_layer,
                              mvn=False)
        bnn = FirstOrder([x_dim] + [args.n_hidden] * args.n_layer, mvn=True)
    elif args.net == 'deep':
        kernel = prior_gp.covar_module
        bnn_prev = DeepKernel([x_dim] + [args.n_hidden] * args.n_layer,
                              mvn=False)
        bnn = DeepKernel([x_dim] + [args.n_hidden] * args.n_layer, mvn=True)
    elif args.net == 'rf':
        kernel = ScaleKernel(RBFKernel())
        kernel_prev = ScaleKernel(RBFKernel())
        bnn_prev = RFExpansion(x_dim,
                               args.n_hidden,
                               kernel_prev,
                               mvn=False,
                               fix_ls=args.fix_rf_ls,
                               residual=args.residual)
        bnn = RFExpansion(x_dim,
                          args.n_hidden,
                          kernel,
                          fix_ls=args.fix_rf_ls,
                          residual=args.residual)
        bnn_prev.load_state_dict(bnn.state_dict())
    else:
        raise NotImplementedError('Unknown inference net')

    infer_gpnet_optimizer = optim.Adam(bnn.parameters(), lr=args.learning_rate)
    hyper_opt_optimizer = optim.Adam(prior_gp.parameters(), lr=args.hyper_rate)

    x_min, x_max = dataloader.dataset.range
    n = dataloader.batch_size

    bnn.train()
    bnn_prev.train()
    prior_gp.train()

    mb = master_bar(range(1, args.n_iters + 1))

    for t in mb:
        beta = args.beta0 * 1. / (1. + args.gamma * math.sqrt(t - 1))
        dl_bar = progress_bar(dataloader, parent=mb)
        for x, y in dl_bar:
            n = x.size(0)
            x_star = torch.Tensor(args.measurement_size,
                                  x_dim).uniform_(x_min, x_max)
            xx = torch.cat([x, x_star], 0)

            # inference net
            infer_gpnet_optimizer.zero_grad()
            hyper_opt_optimizer.zero_grad()

            qff = bnn(xx)
            qff_mean_prev, K_prox = bnn_prev(xx)
            qf_mean, qf_var = bnn(x, full_cov=False)

            # Eq.(8)
            K_prior = kernel(xx, xx).add_jitter(1e-6)
            pff = MultivariateNormal(torch.zeros(xx.size(0)), K_prior)

            f_term = expected_log_prob(prior_gp.likelihood, qf_mean, qf_var,
                                       y.squeeze(-1))
            f_term = torch.sum(
                expected_log_prob(prior_gp.likelihood, qf_mean, qf_var,
                                  y.squeeze(-1)))
            f_term *= N / x.size(0) * beta

            prior_term = -beta * cross_entropy(qff, pff)

            qff_prev = MultivariateNormal(qff_mean_prev, K_prox)
            prox_term = -(1 - beta) * cross_entropy(qff, qff_prev)

            entropy_term = entropy(qff)

            lower_bound = f_term + prior_term + prox_term + entropy_term
            loss = -lower_bound / n

            loss.backward(retain_graph=True)

            infer_gpnet_optimizer.step()

            # Hyper-parameter update
            Kn_prior = K_prior[:n, :n]
            pf = MultivariateNormal(torch.zeros(n), Kn_prior)
            Kn_prox = K_prox[:n, :n]
            qf_prev_mean = qff_mean_prev[:n]
            qf_prev_var = torch.diagonal(Kn_prox)
            qf_prev = MultivariateNormal(qf_prev_mean, Kn_prior)
            hyper_obj = expected_log_prob(
                prior_gp.likelihood, qf_prev_mean, qf_prev_var,
                y.squeeze(-1)).sum() - kl_div(qf_prev, pf)
            hyper_obj = -hyper_obj
            hyper_obj.backward()
            hyper_opt_optimizer.step()

        bnn_prev.load_state_dict(bnn.state_dict())
        if args.net == 'rf':
            kernel_prev.load_state_dict(kernel.state_dict())
        if t % 50 == 0:
            mb.write("Iter {}/{}, kl_obj = {:.4f}, noise = {:.4f}".format(
                t, args.n_iters, lower_bound.item(),
                prior_gp.likelihood.noise.item()))
    test_x = test_x.to(args.device)
    test_stats = evaluate(bnn, prior_gp.likelihood, test_x,
                          args.net == 'tangent')

    return test_stats
示例#6
0
 def create_kernel_ard(self, num_dims, **kwargs):
     base_kernel = RBFKernel(ard_num_dims=num_dims)
     kernel = ScaleKernel(base_kernel, **kwargs)
     return kernel
def foo_kp_toeplitz_gp_marginal_log_likelihood_backward():
    x = torch.cat([Variable(torch.linspace(0, 1, 2)).unsqueeze(1)] * 3, 1)
    y = Variable(torch.randn(2), requires_grad=True)
    rbf_module = RBFKernel()
    rbf_module.initialize(log_lengthscale=-2)
    covar_module = GridInterpolationKernel(rbf_module)
    covar_module.eval()
    covar_module.initialize_interpolation_grid(5, [(0, 1), (0, 1), (0, 1)])

    kronecker_var = covar_module.forward(x, x)

    cs = Variable(torch.zeros(3, 5), requires_grad=True)
    J_lefts = []
    C_lefts = []
    J_rights = []
    C_rights = []
    Ts = []
    for i in range(3):
        covar_x = covar_module.forward(x[:, i].unsqueeze(1), x[:,
                                                               i].unsqueeze(1))
        cs.data[i] = covar_x.c.data
        J_lefts.append(covar_x.J_left)
        C_lefts.append(covar_x.C_left)
        J_rights.append(covar_x.J_right)
        C_rights.append(covar_x.C_right)
        T = Variable(torch.zeros(len(cs[i].data), len(cs[i].data)))
        for k in range(len(cs[i].data)):
            for j in range(len(cs[i].data)):
                T[k, j] = utils.toeplitz.toeplitz_getitem(cs[i], cs[i], k, j)
        Ts.append(T)

    W_left = list_of_indices_and_values_to_sparse(J_lefts, C_lefts, cs)
    W_right = list_of_indices_and_values_to_sparse(J_rights, C_rights, cs)
    W_left_dense = Variable(W_left.to_dense())
    W_right_dense = Variable(W_right.to_dense())
    K = kronecker_product(Ts)
    WKW = W_left_dense.matmul(K.matmul(W_right_dense.t()))
    quad_form_actual = y.dot(WKW.inverse().matmul(y))
    log_det_actual = _det(WKW).log()

    actual_nll = -0.5 * (log_det_actual + quad_form_actual +
                         math.log(2 * math.pi) * len(y))
    actual_nll.backward()
    actual_cs_grad = cs.grad.data.clone()
    actual_y_grad = y.grad.data.clone()

    y.grad.data.fill_(0)
    cs.grad.data.fill_(0)

    kronecker_var = gpytorch.lazy.kroneckerProductLazyVariable(
        cs, kronecker_var.J_lefts, kronecker_var.C_lefts,
        kronecker_var.J_rights, kronecker_var.C_rights)
    gpytorch.functions.num_trace_samples = 100
    res = kronecker_var.exact_gp_marginal_log_likelihood(y)
    res.backward()

    res_cs_grad = covar_x.cs.grad.data
    res_y_grad = y.grad.data

    assert (actual_cs_grad - res_cs_grad).norm() / res_cs_grad.norm() < 0.05
    assert (actual_y_grad - res_y_grad).norm() / res_y_grad.norm() < 1e-3

    y.grad.data.fill_(0)
    cs.grad.data.fill_(0)

    gpytorch.functions.fastest = False
    res = kronecker_var.exact_gp_marginal_log_likelihood(y)
    res.backward()

    res_cs_grad = covar_x.cs.grad.data
    res_y_grad = y.grad.data

    assert (actual_cs_grad - res_cs_grad).norm() / res_cs_grad.norm() < 1e-3
    assert (actual_y_grad - res_y_grad).norm() / res_y_grad.norm() < 1e-3
    def __init__(self,
                 input_dims,
                 output_dims,
                 num_inducing=128,
                 mean_type='constant'):
        # FOR VARIATIONAL INFERENCE: CREATE INDUCING POINTS DRAWN FROM N(0,1)
        if output_dims is None:
            print("num_inducing:", num_inducing)
            print("input_dims:", input_dims)
            inducing_points = torch.randn(num_inducing, input_dims)
            batch_shape = torch.Size([])
        else:
            inducing_points = torch.randn(output_dims, num_inducing,
                                          input_dims)
            batch_shape = torch.Size([output_dims])

        # INITALIZE VARIATIONAL DISTRUBUTION
        # The distrubution used for approximation of true posterior distrubution.
        # Cholesky has a full mean vector of size num_induxing and a full covariance
        # matrix of size num_inducing * num_inducing. These are learning during training.
        variational_distribution = CholeskyVariationalDistribution(
            num_inducing_points=num_inducing, batch_shape=batch_shape)

        # INITIALIZE VARIATIONAL STRATEGY
        # Variational strategy wrapper for variational distrubution above.
        variational_strategy = VariationalStrategy(
            self,
            inducing_points,
            variational_distribution,
            learn_inducing_locations=True)

        # Call the DeepGPLayer of GPyTorch do initalize the real class for DGPs.
        super(DGPHiddenLayer, self).__init__(variational_strategy, input_dims,
                                             output_dims)

        # INITALIZE MEAN
        # The mean module to be used. A true Gaussian is often times constant in it's output.
        if mean_type == 'constant':
            self.mean_module = ConstantMean(
                batch_shape=batch_shape
            )  # batch_shape so it knows the dimensions
        else:  # (if 'linear')
            self.mean_module = LinearMean(input_dims)

        # INITIALIZE KERNEL
        # RBF has no scaling, so wrap it with a ScaleKernel with constant k, that is
        # kernel = k * kernel_rbf. Can make constraints and priors for parameters as well.
        # It's probobly a good idea to set a prior since we normalize the data and have a
        # prior belief about them since we can observe the training data that has a certain appearance.
        # The question is what to set them to. One might have them free to begin with and note
        # what lengthscales turn out good and then constrain to them to get faster convergence
        # for future training.

        #lengthscale_constraint = gpytorch.constraints.Interval(0.0001, 10.0) # needs to be floats
        lengthscale_prior = gpytorch.priors.NormalPrior(0.5, 3.0)
        lengthscale_constraint = None
        #lengthscale_prior = None

        self.covar_module = ScaleKernel(
            RBFKernel(
                batch_shape=
                batch_shape,  # to set separate lengthscale for each eventuall batch
                ard_num_dims=input_dims,
                #active_dims=(0), # set input dims to compute covariance for, tuple of ints corresponding to indices of dimensions
                lengthscale_constraint=lengthscale_constraint,
                lengthscale_prior=lengthscale_prior),
            batch_shape=batch_shape,  # for ScaleKernel
            ard_num_dims=None)  # for ScaleKernel
    def test_solve(self):
        size = 100
        train_x = torch.cat(
            [
                torch.linspace(0, 1, size).unsqueeze(0),
                torch.linspace(0, 0.5, size).unsqueeze(0),
                torch.linspace(0, 0.25, size).unsqueeze(0),
                torch.linspace(0, 1.25, size).unsqueeze(0),
                torch.linspace(0, 1.5, size).unsqueeze(0),
                torch.linspace(0, 1, size).unsqueeze(0),
                torch.linspace(0, 0.5, size).unsqueeze(0),
                torch.linspace(0, 0.25, size).unsqueeze(0),
                torch.linspace(0, 1.25, size).unsqueeze(0),
                torch.linspace(0, 1.25, size).unsqueeze(0),
                torch.linspace(0, 1.5, size).unsqueeze(0),
                torch.linspace(0, 1, size).unsqueeze(0),
            ],
            0,
        ).unsqueeze(-1)
        covar_matrix = RBFKernel()(train_x, train_x).evaluate().view(
            2, 2, 3, size, size)
        piv_chol = pivoted_cholesky.pivoted_cholesky(covar_matrix, 10)
        woodbury_factor, inv_scale, logdet = woodbury.woodbury_factor(
            piv_chol, piv_chol, torch.ones(2, 2, 3, 100), logdet=True)
        actual_logdet = torch.stack([
            mat.logdet() for mat in (piv_chol @ piv_chol.transpose(-1, -2) +
                                     torch.eye(100)).view(-1, 100, 100)
        ], 0).view(2, 2, 3)
        self.assertTrue(approx_equal(logdet, actual_logdet, 2e-4))

        rhs_vector = torch.randn(2, 2, 3, 100, 5)
        shifted_covar_matrix = covar_matrix + torch.eye(size)
        real_solve = torch.cat(
            [
                shifted_covar_matrix[0, 0, 0].inverse().matmul(
                    rhs_vector[0, 0, 0]).unsqueeze(0),
                shifted_covar_matrix[0, 0, 1].inverse().matmul(
                    rhs_vector[0, 0, 1]).unsqueeze(0),
                shifted_covar_matrix[0, 0, 2].inverse().matmul(
                    rhs_vector[0, 0, 2]).unsqueeze(0),
                shifted_covar_matrix[0, 1, 0].inverse().matmul(
                    rhs_vector[0, 1, 0]).unsqueeze(0),
                shifted_covar_matrix[0, 1, 1].inverse().matmul(
                    rhs_vector[0, 1, 1]).unsqueeze(0),
                shifted_covar_matrix[0, 1, 2].inverse().matmul(
                    rhs_vector[0, 1, 2]).unsqueeze(0),
                shifted_covar_matrix[1, 0, 0].inverse().matmul(
                    rhs_vector[1, 0, 0]).unsqueeze(0),
                shifted_covar_matrix[1, 0, 1].inverse().matmul(
                    rhs_vector[1, 0, 1]).unsqueeze(0),
                shifted_covar_matrix[1, 0, 2].inverse().matmul(
                    rhs_vector[1, 0, 2]).unsqueeze(0),
                shifted_covar_matrix[1, 1, 0].inverse().matmul(
                    rhs_vector[1, 1, 0]).unsqueeze(0),
                shifted_covar_matrix[1, 1, 1].inverse().matmul(
                    rhs_vector[1, 1, 1]).unsqueeze(0),
                shifted_covar_matrix[1, 1, 2].inverse().matmul(
                    rhs_vector[1, 1, 2]).unsqueeze(0),
            ],
            0,
        ).view_as(rhs_vector)
        scaled_inv_diag = (inv_scale / torch.ones(2, 3, 100)).unsqueeze(-1)
        approx_solve = woodbury.woodbury_solve(rhs_vector,
                                               piv_chol * scaled_inv_diag,
                                               woodbury_factor,
                                               scaled_inv_diag, inv_scale)

        self.assertTrue(approx_equal(approx_solve, real_solve, 2e-4))
示例#10
0
    def test_online_train_mll_backprop(self):
        """This test is intended to test consecutive observe-train-observe-train patterns"""
        r_lik = GaussianLikelihood()
        r_kernel = GridInterpolationKernelWithFantasy(RBFKernel(),
                                                      grid_size=self.grid_size,
                                                      grid_bounds=[(-4.0, 14.0)
                                                                   ]).double()
        r_model = RegularExactGP(self.xs, self.labels, r_lik, r_kernel,
                                 ZeroMean())

        lik = GaussianLikelihood()
        kernel = GridInterpolationKernelWithFantasy(RBFKernel(),
                                                    grid_size=self.grid_size,
                                                    grid_bounds=[(-4.0, 14.0)
                                                                 ]).double()
        model = OnlineWoodburyGP(self.xs, self.labels, lik, kernel, ZeroMean())

        def observe_and_update(r_model,
                               model,
                               lengthscale,
                               noise_var,
                               xs,
                               ys,
                               set_online=False):
            r_model.covar_module.base_kernel.lengthscale = lengthscale
            if set_online:
                model.covar_module.base_kernel.lengthscale = lengthscale

            r_model.likelihood.noise = noise_var
            if set_online:
                model.likelihood.noise = noise_var

            r_model.eval()
            r_model(self.new_points)
            r_model = r_model.get_fantasy_model(xs, ys)
            r_model.train()
            r_optim = torch.optim.SGD(r_model.parameters(), self.lr)

            model.eval()
            model(self.new_points)
            model = model.get_online_model(xs, ys)
            model.train()
            optim = torch.optim.SGD(model.parameters(), self.lr)

            with gpytorch.settings.fast_computations(
            ), gpytorch.settings.max_cholesky_size(
                    1), gpytorch.settings.skip_logdet_forward():
                r_mll = ExactMarginalLogLikelihood(r_model.likelihood, r_model)
                r_train_output = r_model(r_model.train_inputs[0])
                r_mll_val = r_mll(r_train_output, r_model.train_targets)

                mll = WoodburyExactMarginalLogLikelihood(lik, model)
                train_output = model(model.train_inputs[0])
                mll_val = mll(train_output, model.train_targets)

                np.testing.assert_allclose(r_mll_val.item(),
                                           mll_val.item(),
                                           rtol=1e-4)

                loss = -mll_val
                loss.backward()
                r_loss = -r_mll_val
                r_loss.backward()

            print(
                "online ls grad",
                model.covar_module.base_kernel.raw_lengthscale.grad.item(),
            )
            print(
                "ski ls grad",
                r_model.covar_module.base_kernel.raw_lengthscale.grad.item(),
            )

            print("online ls",
                  model.covar_module.base_kernel.lengthscale.item())
            print("ski ls",
                  r_model.covar_module.base_kernel.lengthscale.item())

            print("online noise grad", model.likelihood.raw_noise.grad.item())
            print("ski noise grad", r_model.likelihood.raw_noise.grad.item())

            print("online noise", model.likelihood.noise.item())
            print("ski noise", r_model.likelihood.noise.item())

            # Make sure the gradients are the same
            np.testing.assert_allclose(
                model.covar_module.base_kernel.raw_lengthscale.grad.item(),
                r_model.covar_module.base_kernel.raw_lengthscale.grad.item(),
                rtol=0.01,
                atol=0.01,
            )
            np.testing.assert_allclose(
                model.likelihood.raw_noise.grad.item(),
                r_model.likelihood.raw_noise.grad.item(),
                rtol=0.01,
                atol=0.01,
            )

            r_optim.step()
            r_optim.zero_grad()

            optim.step()
            optim.zero_grad()
            model.get_updated_hyper_strategy()

            # Make sure the values are the same
            np.testing.assert_allclose(
                model.covar_module.base_kernel.lengthscale.item(),
                r_model.covar_module.base_kernel.lengthscale.item(),
                rtol=0.01,
                atol=0.01,
            )
            np.testing.assert_allclose(
                model.likelihood.noise.item(),
                r_model.likelihood.noise.item(),
                rtol=0.01,
                atol=0.01,
            )

            # Verify the gradients are the same = 0
            np.testing.assert_allclose(
                model.covar_module.base_kernel.raw_lengthscale.grad.item(),
                r_model.covar_module.base_kernel.raw_lengthscale.grad.item(),
                rtol=0.01,
                atol=0.01,
            )
            np.testing.assert_allclose(
                model.likelihood.raw_noise.grad.item(),
                r_model.likelihood.raw_noise.grad.item(),
            )
            return r_model, model

        # dot = make_dot(mll_val, dict(model.named_parameters()))
        # dot.render('test-mll_graph.gv', view=True)
        # r_dot = make_dot(r_mll_val, dict(r_model.named_parameters()))
        # r_dot.render('test-r_mll_graph.gv', view=True)
        # # self.assertAlmostEqual(mll_val.item(), r_mll_val.item(), places=4)

        r_model, model = observe_and_update(
            r_model,
            model,
            self.lengthscale,
            self.noise_var,
            self.points_sequence[1],
            self.targets_sequence[1],
            set_online=True,
        )
        ls = deepcopy(model.covar_module.base_kernel.lengthscale.item())
        nv = deepcopy(model.likelihood.noise.item())

        r_model, model = observe_and_update(r_model, model, ls, nv,
                                            self.points_sequence[2],
                                            self.targets_sequence[2])
        ls = deepcopy(model.covar_module.base_kernel.lengthscale.item())
        nv = deepcopy(model.likelihood.noise.item())

        r_model, model = observe_and_update(r_model, model, ls, nv,
                                            self.points_sequence[3],
                                            self.targets_sequence[3])
        ls = deepcopy(model.covar_module.base_kernel.lengthscale.item())
        nv = deepcopy(model.likelihood.noise.item())

        observe_and_update(r_model, model, ls, nv, self.points_sequence[4],
                           self.targets_sequence[4])
 def __init__(self, train_x, train_y, likelihood):
     super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
     self.mean_module = ConstantMean(prior=SmoothedBoxPrior(-1e-5, 1e-5))
     self.base_covar_module = ScaleKernel(RBFKernel(lengthscale_prior=SmoothedBoxPrior(exp(-5), exp(6), sigma=0.1)))
     self.covar_module = GridInterpolationKernel(self.base_covar_module, grid_size=50, num_dims=1)
示例#12
0
        def update(lengthscale, noise_var, xs, ys):
            r_lik = GaussianLikelihood()
            r_kernel = GridInterpolationKernelWithFantasy(
                RBFKernel(),
                grid_size=self.grid_size,
                grid_bounds=[(-4.0, 14.0)]).double()
            r_model = RegularExactGP(xs, ys, r_lik, r_kernel, ZeroMean())

            lik = GaussianLikelihood()
            kernel = GridInterpolationKernelWithFantasy(
                RBFKernel(),
                grid_size=self.grid_size,
                grid_bounds=[(-4.0, 14.0)]).double()
            model = OnlineWoodburyGP(xs, ys, lik, kernel, ZeroMean())

            r_model.covar_module.base_kernel.lengthscale = lengthscale
            model.covar_module.base_kernel.lengthscale = lengthscale

            r_model.likelihood.noise = noise_var
            model.likelihood.noise = noise_var

            r_model.train()
            r_optim = torch.optim.SGD(r_model.parameters(), self.lr)

            model.train()
            optim = torch.optim.SGD(model.parameters(), self.lr)

            with gpytorch.settings.fast_computations(
            ), gpytorch.settings.max_cholesky_size(
                    1), gpytorch.settings.skip_logdet_forward():
                r_mll = ExactMarginalLogLikelihood(r_model.likelihood, r_model)
                r_train_output = r_model(r_model.train_inputs[0])
                r_mll_val = r_mll(r_train_output, r_model.train_targets)

                mll = WoodburyExactMarginalLogLikelihood(
                    model.likelihood, model)
                train_output = model(model.train_inputs[0])
                mll_val = mll(train_output, model.train_targets)

                loss = -mll_val
                loss.backward()
                r_loss = -r_mll_val
                r_loss.backward()

            print(
                "online ls grad",
                model.covar_module.base_kernel.raw_lengthscale.grad.item(),
            )
            print(
                "ski ls grad",
                r_model.covar_module.base_kernel.raw_lengthscale.grad.item(),
            )

            print("online ls",
                  model.covar_module.base_kernel.lengthscale.item())
            print("ski ls",
                  r_model.covar_module.base_kernel.lengthscale.item())

            print("online noise grad", model.likelihood.raw_noise.grad.item())
            print("ski noise grad", r_model.likelihood.raw_noise.grad.item())

            print("online noise", model.likelihood.noise.item())
            print("ski noise", r_model.likelihood.noise.item())

            # Make sure the gradients are the same
            np.testing.assert_allclose(
                model.covar_module.base_kernel.raw_lengthscale.grad.item(),
                r_model.covar_module.base_kernel.raw_lengthscale.grad.item(),
                rtol=0.01,
                atol=0.01,
            )
            np.testing.assert_allclose(
                model.likelihood.raw_noise.grad.item(),
                r_model.likelihood.raw_noise.grad.item(),
                rtol=0.01,
                atol=0.01,
            )

            r_optim.step()
            r_optim.zero_grad()

            optim.step()
            optim.zero_grad()
            model.get_updated_hyper_strategy()

            # Make sure the values are the same
            np.testing.assert_allclose(
                model.covar_module.base_kernel.lengthscale.item(),
                r_model.covar_module.base_kernel.lengthscale.item(),
                rtol=0.01,
                atol=0.01,
            )
            np.testing.assert_allclose(
                model.likelihood.noise.item(),
                r_model.likelihood.noise.item(),
                rtol=0.01,
                atol=0.01,
            )

            # Verify the gradients are the same = 0
            np.testing.assert_allclose(
                model.covar_module.base_kernel.raw_lengthscale.grad.item(),
                r_model.covar_module.base_kernel.raw_lengthscale.grad.item(),
                rtol=0.01,
                atol=0.01,
            )
            np.testing.assert_allclose(
                model.likelihood.raw_noise.grad.item(),
                r_model.likelihood.raw_noise.grad.item(),
            )
            return r_model, model
示例#13
0
    def __init__(
        self,
        num_outputs,
        initial_lengthscale,
        initial_inducing_points,
        separate_inducing_points=False,
        kernel="RBF",
        ard=None,
        lengthscale_prior=False,
    ):
        n_inducing_points = initial_inducing_points.shape[0]
        if separate_inducing_points:
            # Use independent inducing points per output GP
            initial_inducing_points = initial_inducing_points.repeat(num_outputs, 1, 1)

        if num_outputs > 1:
            batch_shape = torch.Size([num_outputs])
        else:
            batch_shape = torch.Size([])

        variational_distribution = CholeskyVariationalDistribution(
            n_inducing_points, batch_shape=batch_shape
        )

        variational_strategy = VariationalStrategy(
            self, initial_inducing_points, variational_distribution
        )

        if num_outputs > 1:
            variational_strategy = IndependentMultitaskVariationalStrategy(
                variational_strategy, num_tasks=num_outputs
            )

        super().__init__(variational_strategy)

        if lengthscale_prior:
            lengthscale_prior = SmoothedBoxPrior(math.exp(-1), math.exp(1), sigma=0.1)
        else:
            lengthscale_prior = None

        kwargs = {
            "ard_num_dims": ard,
            "batch_shape": batch_shape,
            "lengthscale_prior": lengthscale_prior,
        }

        if kernel == "RBF":
            kernel = RBFKernel(**kwargs)
        elif kernel == "Matern12":
            kernel = MaternKernel(nu=1 / 2, **kwargs)
        elif kernel == "Matern32":
            kernel = MaternKernel(nu=3 / 2, **kwargs)
        elif kernel == "Matern52":
            kernel = MaternKernel(nu=5 / 2, **kwargs)
        elif kernel == "RQ":
            kernel = RQKernel(**kwargs)
        else:
            raise ValueError("Specified kernel not known.")

        kernel.lengthscale = initial_lengthscale * torch.ones_like(kernel.lengthscale)

        self.mean_module = ConstantMean(batch_shape=batch_shape)
        self.covar_module = ScaleKernel(kernel, batch_shape=batch_shape)
 def __init__(self, train_x, train_y, likelihood):
     super(MultitaskGPModel, self).__init__(train_x, train_y, likelihood)
     self.mean_module = MultitaskMean(ConstantMean(), n_tasks=2)
     self.data_covar_module = GridInterpolationKernel(RBFKernel(), grid_size=100, grid_bounds=[(0, 1)])
     self.covar_module = MultitaskKernel(self.data_covar_module, n_tasks=2, rank=1)
示例#15
0
 def test_initialize_outputscale_batch(self):
     kernel = ScaleKernel(RBFKernel(), batch_shape=torch.Size([2]))
     ls_init = torch.tensor([3.14, 4.13])
     kernel.initialize(outputscale=ls_init)
     actual_value = ls_init.view_as(kernel.outputscale)
     self.assertLess(torch.norm(kernel.outputscale - actual_value), 1e-5)
示例#16
0
    def _test_inv_quad_logdet(self,
                              inv_quad_rhs=None,
                              logdet=False,
                              improper_logdet=False,
                              add_diag=False):
        # Set up
        x = torch.randn(*self.__class__.matrix_shape[:-1], 3)
        kern = RBFKernel()
        kern_copy = RBFKernel()
        mat = kern(x).evaluate()
        mat_clone = kern_copy(x).evaluate()

        if inv_quad_rhs is not None:
            inv_quad_rhs.requires_grad_(True)
            inv_quad_rhs_clone = inv_quad_rhs.detach().clone().requires_grad_(
                True)

        mat_clone_with_diag = mat_clone
        if add_diag:
            mat_clone_with_diag = mat_clone_with_diag + torch.eye(
                mat_clone.size(-1))

        if inv_quad_rhs is not None:
            actual_inv_quad = mat_clone_with_diag.inverse().matmul(
                inv_quad_rhs_clone).mul(inv_quad_rhs_clone)
            actual_inv_quad = actual_inv_quad.sum([
                -1, -2
            ]) if inv_quad_rhs.dim() >= 2 else actual_inv_quad.sum()
        if logdet:
            flattened_tensor = mat_clone_with_diag.view(
                -1, *mat_clone.shape[-2:])
            logdets = torch.cat(
                [mat.logdet().unsqueeze(0) for mat in flattened_tensor])
            if mat_clone.dim() > 2:
                actual_logdet = logdets.view(*mat_clone.shape[:-2])
            else:
                actual_logdet = logdets.squeeze()

        # Compute values with LazyTensor
        _wrapped_cg = MagicMock(wraps=gpytorch.utils.linear_cg)
        with gpytorch.settings.num_trace_samples(
                2000
        ), gpytorch.settings.max_cholesky_size(
                0
        ), gpytorch.settings.cg_tolerance(
                1e-5
        ), gpytorch.settings.skip_logdet_forward(improper_logdet), patch(
                "gpytorch.utils.linear_cg", new=_wrapped_cg
        ) as linear_cg_mock, gpytorch.settings.min_preconditioning_size(
                0), gpytorch.settings.max_preconditioner_size(30):
            lazy_tensor = NonLazyTensor(mat)

            if add_diag:
                lazy_tensor = lazy_tensor.add_jitter(1.0)

            res_inv_quad, res_logdet = lazy_tensor.inv_quad_logdet(
                inv_quad_rhs=inv_quad_rhs, logdet=logdet)

        # Compare forward pass
        if inv_quad_rhs is not None:
            self.assertAllClose(res_inv_quad, actual_inv_quad, rtol=1e-2)
        if logdet and not improper_logdet:
            self.assertAllClose(res_logdet,
                                actual_logdet,
                                rtol=1e-1,
                                atol=2e-1)

        # Backward
        if inv_quad_rhs is not None:
            actual_inv_quad.sum().backward(retain_graph=True)
            res_inv_quad.sum().backward(retain_graph=True)
        if logdet:
            actual_logdet.sum().backward()
            res_logdet.sum().backward()

        self.assertAllClose(kern.raw_lengthscale.grad,
                            kern_copy.raw_lengthscale.grad,
                            rtol=1e-2,
                            atol=1e-2)
        if inv_quad_rhs is not None:
            self.assertAllClose(inv_quad_rhs.grad,
                                inv_quad_rhs_clone.grad,
                                rtol=2e-2,
                                atol=1e-2)

        # Make sure CG was called
        self.assertTrue(linear_cg_mock.called)
示例#17
0
 def create_kernel_no_ard(self, **kwargs):
     base_kernel = RBFKernel()
     kernel = ScaleKernel(base_kernel, **kwargs)
     return kernel
示例#18
0
    def build(self):
        """
        Right now this isn't need by this method
        """
        def prod(iterable):
            return reduce(operator.mul, iterable)

        mass_kernel = RBFKernel(active_dims=1,
                                lengthscale_constraint=GreaterThan(10.))
        time_kernel = RBFKernel(active_dims=0,
                                lengthscale_constraint=GreaterThan(0.1))
        spin_kernels = [
            RBFKernel(active_dims=dimension,
                      lengthscale_constraint=GreaterThan(7))
            for dimension in range(2, 8)
        ]

        class ExactGPModel(gpytorch.models.ExactGP):
            """
            Use the GpyTorch Exact GP
            """
            def __init__(self, train_x, train_y, likelihood):
                """Initialise the model"""
                super(ExactGPModel, self).__init__(train_x, train_y,
                                                   likelihood)
                self.mean_module = gpytorch.means.ZeroMean()
                self.covar_module = gpytorch.kernels.ScaleKernel(
                    time_kernel * mass_kernel * prod(spin_kernels),
                    lengthscale_constraint=gpytorch.constraints.LessThan(0.01))

            def forward(self, x):
                """Run the forward method of the model"""
                mean_x = self.mean_module(x)
                covar_x = self.covar_module(x)
                return gpytorch.distributions.MultivariateNormal(
                    mean_x, covar_x)

        data = np.genfromtxt(
            pkg_resources.resource_filename('heron',
                                            'models/data/gt-M60-F1024.dat'))

        training_x = self.training_x = torch.tensor(data[:, 0:-2] *
                                                    100).float().cuda()
        training_y = self.training_y = torch.tensor(data[:, -2] *
                                                    1e21).float().cuda()
        training_yx = torch.tensor(data[:, -1] * 1e21).float().cuda()

        likelihood = gpytorch.likelihoods.GaussianLikelihood(
            noise_constraint=LessThan(10))
        model = ExactGPModel(training_x, training_y, likelihood)
        model2 = ExactGPModel(training_x, training_yx, likelihood)
        state_vector = pkg_resources.resource_filename(
            'heron', 'models/data/gt-gpytorch.pth')

        model = model.cuda()
        model2 = model2.cuda()
        likelihood = likelihood.cuda()

        model.load_state_dict(torch.load(state_vector))
        model2.load_state_dict(torch.load(state_vector))

        return [model, model2], likelihood
示例#19
0
def test_trace_logdet_quad_form_factory():
    x = Variable(torch.linspace(0, 1, 10))
    rbf_covar = RBFKernel()
    rbf_covar.initialize(log_lengthscale=-4)
    covar_module = GridInterpolationKernel(rbf_covar)
    covar_module.eval()
    covar_module.initialize_interpolation_grid(4, [(0, 1)])
    c = Variable(covar_module.forward(x.unsqueeze(1), x.unsqueeze(1)).c.data,
                 requires_grad=True)

    T = Variable(torch.zeros(4, 4))
    for i in range(4):
        for j in range(4):
            T[i, j] = utils.toeplitz.toeplitz_getitem(c, c, i, j)

    U = torch.randn(4, 4).triu()
    U = Variable(U.mul(U.diag().sign().unsqueeze(1).expand_as(U).triu()),
                 requires_grad=True)

    mu_diff = Variable(torch.randn(4), requires_grad=True)

    actual = _det(T).log() + mu_diff.dot(
        T.inverse().mv(mu_diff)) + T.inverse().mm(U.t().mm(U)).trace()
    actual.backward()

    actual_c_grad = c.grad.data.clone()
    actual_mu_diff_grad = mu_diff.grad.data.clone()
    actual_U_grad = U.grad.data.clone()

    c.grad.data.fill_(0)
    mu_diff.grad.data.fill_(0)
    U.grad.data.fill_(0)

    def _matmul_closure_factory(*args):
        c, = args
        return lambda mat2: sym_toeplitz_matmul(c, mat2)

    def _derivative_quadratic_form_factory(*args):
        return lambda left_vector, right_vector: (
            sym_toeplitz_derivative_quadratic_form(left_vector, right_vector
                                                   ), )

    covar_args = (c, )

    gpytorch.functions.num_trace_samples = 1000
    res = trace_logdet_quad_form_factory(_matmul_closure_factory,
                                         _derivative_quadratic_form_factory)()(
                                             mu_diff, U, *covar_args)
    res.backward()

    res_c_grad = c.grad.data
    res_mu_diff_grad = mu_diff.grad.data
    res_U_grad = U.grad.data

    assert (res.data - actual.data).norm() / actual.data.norm() < 0.15
    assert (res_c_grad - actual_c_grad).norm() / actual_c_grad.norm() < 0.15
    assert (res_mu_diff_grad -
            actual_mu_diff_grad).norm() / actual_mu_diff_grad.norm() < 1e-3
    assert (res_U_grad - actual_U_grad).norm() / actual_U_grad.norm() < 1e-3

    c.grad.data.fill_(0)
    mu_diff.grad.data.fill_(0)
    U.grad.data.fill_(0)

    covar_args = (c, )

    gpytorch.functions.fastest = False
    res = trace_logdet_quad_form_factory(_matmul_closure_factory,
                                         _derivative_quadratic_form_factory)()(
                                             mu_diff, U, *covar_args)
    res.backward()

    res_c_grad = c.grad.data
    res_mu_diff_grad = mu_diff.grad.data
    res_U_grad = U.grad.data

    assert (res.data - actual.data).norm() / actual.data.norm() < 1e-3
    assert (res_c_grad - actual_c_grad).norm() / actual_c_grad.norm() < 1e-3
    assert (res_mu_diff_grad -
            actual_mu_diff_grad).norm() / actual_mu_diff_grad.norm() < 1e-3
    assert (res_U_grad - actual_U_grad).norm() / actual_U_grad.norm() < 1e-3
 def __init__(self):
     super(GPRegressionModel, self).__init__(grid_size=20, grid_bounds=[(-0.05, 1.05)])
     self.mean_module = ConstantMean(prior=SmoothedBoxPrior(-10, 10))
     self.covar_module = ScaleKernel(
         RBFKernel(log_lengthscale_prior=SmoothedBoxPrior(exp(-3), exp(6), sigma=0.1, log_transform=True))
     )
def kernel_fun(rbf_var, rbf_lengthscale, lin_var):
    return (gpytorch.kernels.ScaleKernel(
        RBFKernel(lengthscale=torch.tensor(rbf_lengthscale)),
        outputscale=torch.tensor(rbf_var)) +
            ScaleKernel(LinearKernel(), outputscale=torch.tensor(lin_var)))
 def __init__(self, train_inputs, train_targets, likelihood):
     super(ExactGPModel, self).__init__(train_inputs, train_targets,
                                        likelihood)
     self.mean_module = ConstantMean(constant_bounds=(-1, 1))
     self.covar_module = RBFKernel(log_lengthscale_bounds=(-3, 3))
示例#23
0
def gpnet(args, dataloader, test_x, prior_gp):
    N = len(dataloader.dataset)
    x_dim = 1
    prior_gp.train()

    if args.net == 'tangent':
        kernel = prior_gp.covar_module
        bnn_prev = FirstOrder([x_dim] + [args.n_hidden] * args.n_layer,
                              mvn=False)
        bnn = FirstOrder([x_dim] + [args.n_hidden] * args.n_layer, mvn=True)
    elif args.net == 'deep':
        kernel = prior_gp.covar_module
        bnn_prev = DeepKernel([x_dim] + [args.n_hidden] * args.n_layer,
                              mvn=False)
        bnn = DeepKernel([x_dim] + [args.n_hidden] * args.n_layer, mvn=True)
    elif args.net == 'rf':
        kernel = ScaleKernel(RBFKernel())
        kernel_prev = ScaleKernel(RBFKernel())
        bnn_prev = RFExpansion(x_dim,
                               args.n_hidden,
                               kernel_prev,
                               mvn=False,
                               fix_ls=args.fix_rf_ls,
                               residual=args.residual)
        bnn = RFExpansion(x_dim,
                          args.n_hidden,
                          kernel,
                          fix_ls=args.fix_rf_ls,
                          residual=args.residual)
        bnn_prev.load_state_dict(bnn.state_dict())
    else:
        raise NotImplementedError('Unknown inference net')
    bnn = bnn.to(args.device)
    bnn_prev = bnn_prev.to(args.device)
    prior_gp = prior_gp.to(args.device)

    infer_gpnet_optimizer = optim.Adam(bnn.parameters(), lr=args.learning_rate)
    hyper_opt_optimizer = optim.Adam(prior_gp.parameters(), lr=args.hyper_rate)

    x_min, x_max = dataloader.dataset.range

    bnn.train()
    bnn_prev.train()
    prior_gp.train()

    mb = master_bar(range(1, args.n_iters + 1))

    for t in mb:
        # Hyperparameter selection
        beta = args.beta0 * 1. / (1. + args.gamma * math.sqrt(t - 1))
        dl_bar = progress_bar(dataloader, parent=mb)
        for x, y in dl_bar:
            observed_size = x.size(0)
            x, y = x.to(args.device), y.to(args.device)
            x_star = torch.Tensor(args.measurement_size,
                                  x_dim).uniform_(x_min, x_max).to(args.device)
            # [Batch + Measurement Points x x_dims]
            xx = torch.cat([x, x_star], 0)

            infer_gpnet_optimizer.zero_grad()
            hyper_opt_optimizer.zero_grad()

            # inference net
            # Eq.(6) Prior p(f)
            # \mu_1=0, \Sigma_1
            mean_prior = torch.zeros(observed_size).to(args.device)
            K_prior = kernel(xx, xx).add_jitter(1e-6)

            # q_{\gamma_t}(f_M, f_n) = Normal(mu_2, sigma_2|x_n, x_m)
            # \mu_2, \Sigma_2
            qff_mean_prev, K_prox = bnn_prev(xx)

            # Eq.(8) adapt prior; p(f)^\beta x q(f)^{1 - \beta}
            mean_adapt, K_adapt = product_gaussians(mu1=mean_prior,
                                                    sigma1=K_prior,
                                                    mu2=qff_mean_prev,
                                                    sigma2=K_prox,
                                                    beta=beta)

            # Eq.(8)
            (mean_n, mean_m), (Knn, Knm,
                               Kmm) = split_gaussian(mean_adapt, K_adapt,
                                                     observed_size)

            # Eq.(2) K_{D,D} + noise / (N\beta_t)
            Ky = Knn + torch.eye(observed_size).to(
                args.device) * prior_gp.likelihood.noise / (N / observed_size *
                                                            beta)
            Ky_tril = torch.cholesky(Ky)

            # Eq.(2)
            mean_target = Knm.t().mm(cholesky_solve(y - mean_n,
                                                    Ky_tril)) + mean_m
            mean_target = mean_target.squeeze(-1)
            K_target = gpytorch.add_jitter(
                Kmm - Knm.t().mm(cholesky_solve(Knm, Ky_tril)), 1e-6)
            # \hat{q}_{t+1} (f_M)
            target_pf_star = MultivariateNormal(mean_target, K_target)

            # q_\gamma (f_M)
            qf_star = bnn(x_star)

            # Eq. (11)
            kl_obj = kl_div(qf_star, target_pf_star).sum()

            kl_obj.backward(retain_graph=True)
            infer_gpnet_optimizer.step()

            # Hyper paramter update
            (mean_n_prior, _), (Kn_prior, _,
                                _) = split_gaussian(mean_prior, K_prior,
                                                    observed_size)
            pf = MultivariateNormal(mean_n_prior, Kn_prior)

            (qf_prev_mean, _), (Kn_prox, _,
                                _) = split_gaussian(qff_mean_prev, K_prox,
                                                    observed_size)
            qf_prev = MultivariateNormal(qf_prev_mean, Kn_prox)

            hyper_obj = -(prior_gp.likelihood.expected_log_prob(
                y.squeeze(-1), qf_prev) - kl_div(qf_prev, pf))
            hyper_obj.backward(retain_graph=True)
            hyper_opt_optimizer.step()

            mb.child.comment = "kl_obj = {:.3f}, obs_var={:.3f}".format(
                kl_obj.item(), prior_gp.likelihood.noise.item())

        # update q_{\gamma_t} to q_{\gamma_{t+1}}
        bnn_prev.load_state_dict(bnn.state_dict())
        if args.net == 'rf':
            kernel_prev.load_state_dict(kernel.state_dict())
        if t % 50 == 0:
            mb.write("Iter {}/{}, kl_obj = {:.4f}, noise = {:.4f}".format(
                t, args.n_iters, kl_obj.item(),
                prior_gp.likelihood.noise.item()))

    test_x = test_x.to(args.device)
    test_stats = evaluate(bnn, prior_gp.likelihood, test_x,
                          args.net == 'tangent')
    return test_stats
示例#24
0
    def test_random_fourier_features(self):
        # test kernel that is not Scale, RBF, or Matern
        with self.assertRaises(NotImplementedError):
            RandomFourierFeatures(
                kernel=PeriodicKernel(),
                input_dim=2,
                num_rff_features=3,
            )

        # test batched kernel
        with self.assertRaises(NotImplementedError):
            RandomFourierFeatures(
                kernel=RBFKernel(batch_shape=torch.Size([2])),
                input_dim=2,
                num_rff_features=3,
            )
        tkwargs = {"device": self.device}
        for dtype in (torch.float, torch.double):
            tkwargs["dtype"] = dtype
            # test init
            # test ScaleKernel
            base_kernel = RBFKernel(ard_num_dims=2)
            kernel = ScaleKernel(base_kernel).to(**tkwargs)
            rff = RandomFourierFeatures(
                kernel=kernel,
                input_dim=2,
                num_rff_features=3,
            )
            self.assertTrue(torch.equal(rff.outputscale, kernel.outputscale))
            # check that rff makes a copy
            self.assertFalse(rff.outputscale is kernel.outputscale)
            self.assertTrue(
                torch.equal(rff.lengthscale, base_kernel.lengthscale))
            # check that rff makes a copy
            self.assertFalse(rff.lengthscale is kernel.lengthscale)

            # test not ScaleKernel
            rff = RandomFourierFeatures(
                kernel=base_kernel,
                input_dim=2,
                num_rff_features=3,
            )
            self.assertTrue(
                torch.equal(rff.outputscale, torch.tensor(1, **tkwargs)))
            self.assertTrue(
                torch.equal(rff.lengthscale, base_kernel.lengthscale))
            # check that rff makes a copy
            self.assertFalse(rff.lengthscale is kernel.lengthscale)
            self.assertEqual(rff.weights.shape, torch.Size([2, 3]))
            self.assertEqual(rff.bias.shape, torch.Size([3]))
            self.assertTrue(((rff.bias <= 2 * pi) & (rff.bias >= 0.0)).all())

            # test forward
            rff = RandomFourierFeatures(
                kernel=kernel,
                input_dim=2,
                num_rff_features=3,
            )
            for batch_shape in (torch.Size([]), torch.Size([3])):
                X = torch.rand(*batch_shape, 1, 2, **tkwargs)
                Y = rff(X)
                self.assertTrue(Y.shape, torch.Size([*batch_shape, 1, 1]))
                expected_Y = torch.sqrt(
                    2 * rff.outputscale / rff.weights.shape[-1]) * (torch.cos(
                        X / base_kernel.lengthscale @ rff.weights + rff.bias))
                self.assertTrue(torch.equal(Y, expected_Y))

            # test get_weights
            with mock.patch("torch.randn", wraps=torch.randn) as mock_randn:
                rff._get_weights(base_kernel=base_kernel,
                                 input_dim=2,
                                 num_rff_features=3)
                mock_randn.assert_called_once_with(
                    2,
                    3,
                    dtype=base_kernel.lengthscale.dtype,
                    device=base_kernel.lengthscale.device,
                )
            # test get_weights with Matern kernel
            with mock.patch("torch.randn",
                            wraps=torch.randn) as mock_randn, mock.patch(
                                "torch.distributions.Gamma",
                                wraps=torch.distributions.Gamma) as mock_gamma:
                base_kernel = MaternKernel(ard_num_dims=2).to(**tkwargs)
                rff._get_weights(base_kernel=base_kernel,
                                 input_dim=2,
                                 num_rff_features=3)
                mock_randn.assert_called_once_with(
                    2,
                    3,
                    dtype=base_kernel.lengthscale.dtype,
                    device=base_kernel.lengthscale.device,
                )
                mock_gamma.assert_called_once_with(
                    base_kernel.nu,
                    base_kernel.nu,
                )
 def create_kernel_no_ard(self, **kwargs):
     return NewtonGirardAdditiveKernel(RBFKernel(), 4, 2, **kwargs)
示例#26
0
	def __init__(self, train_x, train_y, likelihood):
		super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
		self.mean_module = ConstantMean()
		self.base_covar_module = ScaleKernel(RBFKernel())
		self.covar_module = InducingPointKernel(self.base_covar_module, inducing_points=train_x[:500, :], likelihood=likelihood)
示例#27
0
 def create_kernel_no_ard(self, **kwargs):
     return RBFKernel(**kwargs)
示例#28
0
 def test_initialize_outputscale(self):
     kernel = ScaleKernel(RBFKernel())
     kernel.initialize(outputscale=3.14)
     actual_value = torch.tensor(3.14).view_as(kernel.outputscale)
     self.assertLess(torch.norm(kernel.outputscale - actual_value), 1e-5)
示例#29
0
 def test_initialize_lengthscale(self):
     kernel = RBFKernel()
     kernel.initialize(lengthscale=3.14)
     actual_value = torch.tensor(3.14).view_as(kernel.lengthscale)
     self.assertLess(torch.norm(kernel.lengthscale - actual_value), 1e-5)
示例#30
0
 def __init__(self, train_x, train_y, likelihood):
     super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
     self.mean_module = ConstantMean(constant_bounds=(-1, 1))
     self.base_covar_module = RBFKernel(log_lengthscale_bounds=(-3, 3))
     self.covar_module = GridInterpolationKernel(self.base_covar_module, grid_size=64, grid_bounds=[(0, 1), (0, 1)])