def test_fantasy_updates_batch(self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood() gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(1)) gp_model.mean_module.initialize(constant=0) likelihood.initialize(noise=exp(1)) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.15) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() with gpytorch.settings.debug(False): output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() with gpytorch.settings.fast_pred_var(): # Test the model gp_model.eval() likelihood.eval() test_function_predictions = likelihood(gp_model(test_x)) # Cut data down, and then add back via the fantasy interface gp_model.set_train_data(train_x[:5], train_y[:5], strict=False) likelihood(gp_model(test_x)) fantasy_x = train_x[5:].clone().unsqueeze(0).unsqueeze(-1).repeat( 3, 1, 1).requires_grad_(True) fantasy_y = train_y[5:].unsqueeze(0).repeat(3, 1) fant_model = gp_model.get_fantasy_model(fantasy_x, fantasy_y) fant_function_predictions = likelihood(fant_model(test_x)) self.assertTrue( approx_equal(test_function_predictions.mean, fant_function_predictions.mean[0])) fant_function_predictions.mean.sum().backward() self.assertTrue(fantasy_x.grad is not None)
def test_train_on_batch_test_on_batch(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood( noise_prior=gpytorch.priors.NormalPrior(loc=torch.zeros(2), scale=torch.ones(2)), batch_shape=torch.Size([2])) gp_model = ExactGPModel(train_x12, train_y12, likelihood, batch_shape=torch.Size([2])) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.1) for _ in range(50): optimizer.zero_grad() output = gp_model(train_x12) loss = -mll(output, train_y12, train_x12).sum() loss.backward() optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model gp_model.eval() likelihood.eval() # First test on non-batch non_batch_predictions = likelihood(gp_model(test_x1)) preds1 = non_batch_predictions.mean mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1[0])) self.assertLess(mean_abs_error1.squeeze().item(), 0.1) # Make predictions for both sets of test points, and check MAEs. batch_predictions = likelihood(gp_model(test_x12)) preds1 = batch_predictions.mean[0] preds2 = batch_predictions.mean[1] mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1)) mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2)) self.assertLess(mean_abs_error1.squeeze().item(), 0.1) self.assertLess(mean_abs_error2.squeeze().item(), 0.1) # Smoke test for batch mode derivatives failing test_x_param = torch.nn.Parameter(test_x12.data) batch_predictions = likelihood(gp_model(test_x_param)) batch_predictions.mean.sum().backward() self.assertTrue(test_x_param.grad is not None) # Smoke test for non-batch mode derivatives failing test_x_param = torch.nn.Parameter(test_x1.data) batch_predictions = likelihood(gp_model(test_x_param)) batch_predictions.mean.sum().backward() self.assertTrue(test_x_param.grad is not None)
def test_posterior_latent_gp_and_likelihood_with_optimization(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood(log_noise_bounds=(-3, 3)) gp_model = ExactGPModel(train_x.data, train_y.data, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.initialize(log_lengthscale=1) gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam( list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1, ) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Test the model gp_model.eval() likelihood.eval() test_function_predictions = likelihood(gp_model(test_x)) mean_abs_error = torch.mean( torch.abs(test_y - test_function_predictions.mean())) self.assertLess(mean_abs_error.data.squeeze()[0], 0.05)
def test_posterior_latent_gp_and_likelihood_with_optimization(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood(log_noise_bounds=(-3, 3)) gp_model = ExactGPModel(train_x1.data, train_y1.data, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.initialize(log_lengthscale=1) gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() output = gp_model(train_x1) loss = -mll(output, train_y1) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() # Create data batches train_x12 = torch.cat((train_x1.unsqueeze(0), train_x2.unsqueeze(0)), dim=0).contiguous() train_y12 = torch.cat((train_y1.unsqueeze(0), train_y2.unsqueeze(0)), dim=0).contiguous() test_x12 = torch.cat((test_x1.unsqueeze(0), test_x2.unsqueeze(0)), dim=0).contiguous() # Update gp model to use both sine and cosine training data as train data gp_model.set_train_data(train_x12, train_y12, strict=False) # Make predictions for both sets of test points, and check MAEs. batch_predictions = likelihood(gp_model(test_x12)) preds1 = batch_predictions.mean()[0] preds2 = batch_predictions.mean()[1] mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1)) mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2)) self.assertLess(mean_abs_error1.data.squeeze().item(), 0.05) self.assertLess(mean_abs_error2.data.squeeze().item(), 0.05)
def test_posterior_latent_gp_and_likelihood_with_optimization( self, cuda=False, checkpoint=0): train_x, test_x, train_y, test_y = self._get_data( cuda=cuda, num_data=(1000 if checkpoint else 11), add_noise=bool(checkpoint), ) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)) gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(1)) gp_model.mean_module.initialize(constant=0) likelihood.initialize(noise=exp(1)) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.15) optimizer.n_iter = 0 with gpytorch.beta_features.checkpoint_kernel( checkpoint), gpytorch.settings.fast_pred_var(): for _ in range(20 if checkpoint else 50): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() with gpytorch.settings.skip_posterior_variances(True): test_function_predictions = likelihood(gp_model(test_x)) mean_abs_error = torch.mean( torch.abs(test_y - test_function_predictions.mean)) self.assertLess(mean_abs_error.item(), 0.05)
def test_fixed_noise_fantasy_updates_batch(self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) noise = torch.full_like(train_y, 2e-4) test_noise = torch.full_like(test_y, 3e-4) likelihood = FixedNoiseGaussianLikelihood(noise) gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(1)) gp_model.mean_module.initialize(constant=0) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.15) for _ in range(50): optimizer.zero_grad() with gpytorch.settings.debug(False): output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() with gpytorch.settings.fast_pred_var(): # Test the model gp_model.eval() likelihood.eval() test_function_predictions = likelihood(gp_model(test_x), noise=test_noise) # Cut data down, and then add back via the fantasy interface gp_model.set_train_data(train_x[:5], train_y[:5], strict=False) gp_model.likelihood.noise_covar = FixedGaussianNoise(noise=noise[:5]) likelihood(gp_model(test_x), noise=test_noise) fantasy_x = train_x[5:].clone().unsqueeze(0).unsqueeze(-1).repeat(3, 1, 1).requires_grad_(True) fantasy_y = train_y[5:].unsqueeze(0).repeat(3, 1) fant_model = gp_model.get_fantasy_model(fantasy_x, fantasy_y, noise=noise[5:].unsqueeze(0).repeat(3, 1)) fant_function_predictions = likelihood(fant_model(test_x), noise=test_noise) self.assertAllClose(test_function_predictions.mean, fant_function_predictions.mean[0], atol=1e-4) fant_function_predictions.mean.sum().backward() self.assertTrue(fantasy_x.grad is not None)
def test_posterior_latent_gp_and_likelihood_with_optimization( self, cuda=False): # This test throws a warning because the fixed noise likelihood gets the wrong input warnings.simplefilter("ignore", GPInputWarning) train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = FixedNoiseGaussianLikelihood(torch.ones(11) * 0.001) gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.rbf_covar_module.initialize(lengthscale=exp(1)) gp_model.mean_module.initialize(constant=0) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 with gpytorch.settings.debug(False): for _ in range(75): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() test_function_predictions = likelihood(gp_model(test_x)) mean_abs_error = torch.mean( torch.abs(test_y - test_function_predictions.mean)) self.assertLess(mean_abs_error.squeeze().item(), 0.05)
def test_train_on_single_set_test_on_batch(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = MultitaskGaussianLikelihood( log_noise_prior=gpytorch.priors.NormalPrior(loc=torch.zeros(1), scale=torch.ones(1), log_transform=True), num_tasks=2, ) gp_model = ExactGPModel(train_x1, train_y1, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 gp_model.train() likelihood.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.1) for _ in range(50): optimizer.zero_grad() output = gp_model(train_x1) loss = -mll(output, train_y1).sum() loss.backward() optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model gp_model.eval() likelihood.eval() # Make predictions for both sets of test points, and check MAEs. batch_predictions = likelihood(gp_model(test_x12)) preds1 = batch_predictions.mean[0] preds2 = batch_predictions.mean[1] mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1)) mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2)) self.assertLess(mean_abs_error1.squeeze().item(), 0.05) self.assertLess(mean_abs_error2.squeeze().item(), 0.05)
def ss_multmodel_factory(nsamples, data_mods, data_lhs, idx=None): for dm in data_mods: dm.train() for dlh in data_lhs: dlh.train() mll_list = [ gpytorch.ExactMarginalLogLikelihood(dlh, dm) for dlh, dm in zip(data_lhs, data_mods) ] latent_lh = data_mods[0].covar_module.latent_lh latent_mod = data_mods[0].covar_module.latent_mod # print(list(latent_mod.named_parameters())) def ss_ell_builder(latent_mod, latent_lh, data_mods, data_lhs): latent_lh.train() latent_mod.train() # compute prob # loss = 0. for i in range(len(data_mods)): # pull out latent GP and omega demeaned_logdens = data_mods[i].covar_module.latent_params omega = data_mods[i].covar_module.omega # update latent model latent_mod.set_train_data(inputs=omega, targets=demeaned_logdens.detach(), strict=False) # compute loss loss = loss + mll_list[i]( data_mods[i](*mll_list[i].model.train_inputs), mll_list[i].model.train_targets) return loss ell_func = lambda h: ss_ell_builder(latent_mod, latent_lh, data_mods, data_lhs) data_par_list = [list(dm.parameters()) for dm in data_mods] optim_pars = [par for sublist in data_par_list for par in sublist] return SGD(optim_pars, ell_func, n_samples=nsamples, lr=1e-1)
def ss_factory(nsamples, data_mod, data_lh, idx=None): if isinstance(data_mod, list): data_mod = data_mod[0] data_lh = data_lh[0] # defining log-likelihood function data_mod.train() data_lh.train() # pull out latent model and spectrum from the data model latent_lh = data_mod.covar_module.get_latent_lh(idx) latent_mod = data_mod.covar_module.get_latent_mod(idx) omega = data_mod.covar_module.get_omega(idx) demeaned_logdens = data_mod.covar_module.get_latent_params(idx) # update the training inputs latent_mod.set_train_data(inputs=omega, targets=demeaned_logdens.detach(), strict=False) data_mll = gpytorch.ExactMarginalLogLikelihood(data_lh, data_mod) def ss_ell_builder(latent_mod, latent_lh, data_mod, data_lh): latent_lh.train() latent_mod.train() with gpytorch.settings.max_preconditioner_size( 15), gpytorch.settings.cg_tolerance( 1e-3), gpytorch.settings.max_cg_iterations(1000): loss = data_mll(data_mod(*data_mod.train_inputs), data_mod.train_targets) print('Loss is: ', loss) #num_y = len(data_mod.train_targets) #print('P_y is: ', data_lh(data_mod(*data_mod.train_inputs)).log_prob(data_mod.train_targets)/num_y) #print('p_nu is: ', data_mod.covar_module.latent_prior.log_prob(data_mod.covar_module.latent_params)/num_y) return loss ell_func = lambda h: ss_ell_builder(latent_mod, latent_lh, data_mod, data_lh) pars_for_optimizer = list(data_mod.parameters()) return SGD(pars_for_optimizer, ell_func, n_samples=nsamples, lr=1e-2)
def test_train_and_eval(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = MultitaskGaussianLikelihood(num_tasks=2) gp_model = ExactGPModel(train_x, train_y12, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.1) optimizer.n_iter = 0 for _ in range(75): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y12).sum() loss.backward() optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model gp_model.eval() likelihood.eval() # Make predictions for both sets of test points, and check MAEs. with torch.no_grad(), gpytorch.settings.max_eager_kernel_size(1): batch_predictions = likelihood(gp_model(test_x)) preds1 = batch_predictions.mean[:, 0] preds2 = batch_predictions.mean[:, 1] mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1)) mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2)) self.assertLess(mean_abs_error1.squeeze().item(), 0.01) self.assertLess(mean_abs_error2.squeeze().item(), 0.01) # Smoke test for getting predictive uncertainties lower, upper = batch_predictions.confidence_region() self.assertEqual(lower.shape, test_y12.shape) self.assertEqual(upper.shape, test_y12.shape)
def test_posterior_latent_gp_and_likelihood_with_optimization(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood(log_noise_prior=SmoothedBoxPrior( exp(-3), exp(3), sigma=0.1, log_transform=True)) gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.initialize(log_lengthscale=1) gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() with gpytorch.settings.debug(False): output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() test_function_predictions = likelihood(gp_model(test_x)) mean_abs_error = torch.mean( torch.abs(test_y - test_function_predictions.mean())) self.assertLess(mean_abs_error.item(), 0.05)
def test_train_on_batch_test_on_batch(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood() gp_model = ExactGPModel(train_x12, train_y12, likelihood, batch_size=2) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(log_lengthscale=-1) gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=0) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() output = gp_model(train_x12) loss = -mll(output, train_y12).sum() loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() # Make predictions for both sets of test points, and check MAEs. batch_predictions = likelihood(gp_model(test_x12)) preds1 = batch_predictions.mean()[0] preds2 = batch_predictions.mean()[1] mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1)) mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2)) self.assertLess(mean_abs_error1.squeeze().item(), 0.05) self.assertLess(mean_abs_error2.squeeze().item(), 0.05)
def test_fantasy_updates(self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood() gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(1)) gp_model.mean_module.initialize(constant=0) likelihood.initialize(noise=exp(1)) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.15) for _ in range(50): optimizer.zero_grad() with gpytorch.settings.debug(False): output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() train_x.requires_grad = True gp_model.set_train_data(train_x, train_y) with gpytorch.settings.fast_pred_var( ), gpytorch.settings.detach_test_caches(False): # Test the model gp_model.eval() likelihood.eval() test_function_predictions = likelihood(gp_model(test_x)) test_function_predictions.mean.sum().backward() real_fant_x_grad = train_x.grad[5:].clone() train_x.grad = None train_x.requires_grad = False gp_model.set_train_data(train_x, train_y) # Cut data down, and then add back via the fantasy interface gp_model.set_train_data(train_x[:5], train_y[:5], strict=False) likelihood(gp_model(test_x)) fantasy_x = train_x[5:].clone().detach().requires_grad_(True) fant_model = gp_model.get_fantasy_model(fantasy_x, train_y[5:]) fant_function_predictions = likelihood(fant_model(test_x)) self.assertAllClose(test_function_predictions.mean, fant_function_predictions.mean, atol=1e-4) fant_function_predictions.mean.sum().backward() self.assertTrue(fantasy_x.grad is not None) relative_error = torch.norm( real_fant_x_grad - fantasy_x.grad) / fantasy_x.grad.norm() self.assertLess(relative_error, 15e-1) # This was only passing by a hair before