def test_posterior_latent_gp_and_likelihood_without_optimization( self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood() gp_model = ExactGPModel(train_x, train_y, likelihood) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(-15)) likelihood.initialize(noise=exp(-15)) if cuda: gp_model.cuda() likelihood.cuda() # Compute posterior distribution gp_model.eval() likelihood.eval() # Let's see how our model does, conditioned with weird hyperparams # The posterior should fit all the data with gpytorch.settings.debug(False): function_predictions = likelihood(gp_model(train_x)) self.assertLess(torch.norm(function_predictions.mean - train_y), 1e-3) self.assertLess(torch.norm(function_predictions.variance), 1e-3) # It shouldn't fit much else though test_function_predictions = gp_model( torch.tensor([1.1]).type_as(test_x)) self.assertLess(torch.norm(test_function_predictions.mean - 0), 1e-4) self.assertLess( torch.norm(test_function_predictions.variance - gp_model.covar_module.outputscale), 1e-4)
def test_prior(self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)) gp_model = ExactGPModel(None, None, likelihood) # Update lengthscale prior to accommodate extreme parameters gp_model.covar_module.base_kernel.register_prior( "lengthscale_prior", SmoothedBoxPrior(exp(-10), exp(10), sigma=0.5), "raw_lengthscale") gp_model.mean_module.initialize(constant=1.5) gp_model.covar_module.base_kernel.initialize(lengthscale=1) likelihood.initialize(noise=0) if cuda: gp_model.cuda() likelihood.cuda() # Compute posterior distribution gp_model.eval() likelihood.eval() # The model should predict in prior mode function_predictions = likelihood(gp_model(train_x)) correct_variance = gp_model.covar_module.outputscale + likelihood.noise self.assertLess(torch.norm(function_predictions.mean - 1.5), 1e-3) self.assertLess( torch.norm(function_predictions.variance - correct_variance), 1e-3)
def test_fantasy_updates_batch(self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood() gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(1)) gp_model.mean_module.initialize(constant=0) likelihood.initialize(noise=exp(1)) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.15) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() with gpytorch.settings.debug(False): output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() with gpytorch.settings.fast_pred_var(): # Test the model gp_model.eval() likelihood.eval() test_function_predictions = likelihood(gp_model(test_x)) # Cut data down, and then add back via the fantasy interface gp_model.set_train_data(train_x[:5], train_y[:5], strict=False) likelihood(gp_model(test_x)) fantasy_x = train_x[5:].clone().unsqueeze(0).unsqueeze(-1).repeat( 3, 1, 1).requires_grad_(True) fantasy_y = train_y[5:].unsqueeze(0).repeat(3, 1) fant_model = gp_model.get_fantasy_model(fantasy_x, fantasy_y) fant_function_predictions = likelihood(fant_model(test_x)) self.assertTrue( approx_equal(test_function_predictions.mean, fant_function_predictions.mean[0])) fant_function_predictions.mean.sum().backward() self.assertTrue(fantasy_x.grad is not None)
def test_prior(self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1), noise_constraint=Positive(), # Prior for this test is looser than default bound ) gp_model = ExactGPModel(None, None, likelihood) # Update lengthscale prior to accommodate extreme parameters gp_model.covar_module.base_kernel.register_prior( "lengthscale_prior", SmoothedBoxPrior(exp(-10), exp(10), sigma=0.5), "raw_lengthscale" ) gp_model.mean_module.initialize(constant=1.5) gp_model.covar_module.base_kernel.initialize(lengthscale=1) likelihood.initialize(noise=0) if cuda: gp_model.cuda() likelihood.cuda() # Compute posterior distribution gp_model.eval() likelihood.eval() # The model should predict in prior mode function_predictions = likelihood(gp_model(train_x)) correct_variance = gp_model.covar_module.outputscale + likelihood.noise self.assertAllClose(function_predictions.mean, torch.full_like(function_predictions.mean, fill_value=1.5)) self.assertAllClose( function_predictions.variance, correct_variance.squeeze().expand_as(function_predictions.variance) )
def test_regression_error(self, cuda=False, skip_logdet_forward=False, cholesky=False): train_x, train_y = train_data(cuda=cuda) likelihood = GaussianLikelihood() inducing_points = torch.linspace(0, 1, 25) model = SVGPRegressionModel(inducing_points=inducing_points, learn_locs=False) if cuda: likelihood.cuda() model.cuda() mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(train_y)) # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.Adam([{ "params": model.parameters() }, { "params": likelihood.parameters() }], lr=0.01) _wrapped_cg = MagicMock(wraps=gpytorch.utils.linear_cg) with gpytorch.settings.max_cholesky_size(math.inf if cholesky else 0), \ gpytorch.settings.skip_logdet_forward(skip_logdet_forward), \ warnings.catch_warnings(record=True) as w, \ patch("gpytorch.utils.linear_cg", new=_wrapped_cg) as linear_cg_mock: for _ in range(200): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() # Make sure CG was called (or not), and no warnings were thrown self.assertEqual(len(w), 0) if cholesky: self.assertFalse(linear_cg_mock.called) else: self.assertTrue(linear_cg_mock.called) for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Set back to eval mode model.eval() likelihood.eval() test_preds = likelihood(model(train_x)).mean.squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) self.assertLess(mean_abs_error.item(), 1e-1)
def test_posterior_latent_gp_and_likelihood_fast_pred_var( self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) with gpytorch.settings.fast_pred_var(), gpytorch.settings.debug(False): # We're manually going to set the hyperparameters to # something they shouldn't be likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)) gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood( likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(1)) gp_model.mean_module.initialize(constant=0) likelihood.initialize(noise=exp(1)) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() # Set the cache test_function_predictions = likelihood(gp_model(train_x)) # Now bump up the likelihood to something huge # This will make it easy to calculate the variance likelihood.noise_covar.raw_noise.data.fill_(3) test_function_predictions = likelihood(gp_model(train_x)) noise = likelihood.noise_covar.noise var_diff = (test_function_predictions.variance - noise).abs() self.assertLess(torch.max(var_diff / noise), 0.05)
def test_posterior_latent_gp_and_likelihood_with_optimization( self, cuda=False, checkpoint=0): train_x, test_x, train_y, test_y = self._get_data( cuda=cuda, num_data=(1000 if checkpoint else 11), add_noise=bool(checkpoint), ) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)) gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(1)) gp_model.mean_module.initialize(constant=0) likelihood.initialize(noise=exp(1)) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.15) optimizer.n_iter = 0 with gpytorch.beta_features.checkpoint_kernel( checkpoint), gpytorch.settings.fast_pred_var(): for _ in range(20 if checkpoint else 50): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() with gpytorch.settings.skip_posterior_variances(True): test_function_predictions = likelihood(gp_model(test_x)) mean_abs_error = torch.mean( torch.abs(test_y - test_function_predictions.mean)) self.assertLess(mean_abs_error.item(), 0.05)
def test_posterior_latent_gp_and_likelihood_with_optimization( self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)) gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.rbf_covar_module.initialize(log_lengthscale=1) gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 with gpytorch.settings.debug(False): for _ in range(75): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() test_function_predictions = likelihood(gp_model(test_x)) mean_abs_error = torch.mean( torch.abs(test_y - test_function_predictions.mean)) self.assertLess(mean_abs_error.squeeze().item(), 0.05)
def test_regression_error(self, cuda=False): train_x, train_y = train_data(cuda=cuda) likelihood = GaussianLikelihood() inducing_points = torch.linspace(0, 1, 25).unsqueeze(-1).repeat(2, 1, 1) model = SVGPRegressionModel(inducing_points) if cuda: likelihood.cuda() model.cuda() mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y.size(-1)) # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.Adam([{ "params": model.parameters() }, { "params": likelihood.parameters() }], lr=0.01) for _ in range(200): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss = loss.sum() loss.backward() optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Set back to eval mode model.eval() likelihood.eval() test_preds = likelihood(model(train_x)).mean.squeeze() mean_abs_error = torch.mean( torch.abs(train_y[0, :] - test_preds[0, :]) / 2) mean_abs_error2 = torch.mean( torch.abs(train_y[1, :] - test_preds[1, :]) / 2) self.assertLess(mean_abs_error.item(), 1e-1) self.assertLess(mean_abs_error2.item(), 1e-1)
def test_posterior_latent_gp_and_likelihood_without_optimization( self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) with gpytorch.settings.debug(False): # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(exp(-10), exp(10), sigma=0.25), noise_constraint=Positive(), ) gp_model = ExactGPModel(train_x, train_y, likelihood) # Update lengthscale prior to accommodate extreme parameters gp_model.rbf_covar_module.register_prior( "lengthscale_prior", SmoothedBoxPrior(exp(-10), exp(10), sigma=0.5), "raw_lengthscale") gp_model.rbf_covar_module.initialize(lengthscale=exp(-10)) gp_model.mean_module.initialize(constant=0) likelihood.initialize(noise=exp(-10)) if cuda: gp_model.cuda() likelihood.cuda() # Compute posterior distribution gp_model.eval() likelihood.eval() # Let's see how our model does, conditioned with weird hyperparams # The posterior should fit all the data function_predictions = likelihood(gp_model(train_x)) self.assertLess(torch.norm(function_predictions.mean - train_y), 1e-3) self.assertLess(torch.norm(function_predictions.variance), 5e-3) # It shouldn't fit much else though test_function_predictions = gp_model( torch.tensor([1.1]).type_as(test_x)) self.assertLess(torch.norm(test_function_predictions.mean - 0), 1e-4) self.assertLess( torch.norm(test_function_predictions.variance - gp_model.covar_module.outputscale), 1e-4)
def test_regression_error( self, cuda=False, mll_cls=gpytorch.mlls.VariationalELBO, distribution_cls=gpytorch.variational.CholeskyVariationalDistribution, ): train_x, train_y = train_data(cuda=cuda) likelihood = GaussianLikelihood() model = SVGPRegressionModel(torch.linspace(0, 1, 25), distribution_cls) mll = mll_cls(likelihood, model, num_data=len(train_y)) if cuda: likelihood = likelihood.cuda() model = model.cuda() mll = mll.cuda() # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.Adam([{ "params": model.parameters() }, { "params": likelihood.parameters() }], lr=0.01) for _ in range(200): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Set back to eval mode model.eval() likelihood.eval() test_preds = likelihood(model(train_x)).mean.squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) self.assertLess(mean_abs_error.item(), 0.014) if distribution_cls is gpytorch.variational.CholeskyVariationalDistribution: # finally test fantasization # we only will check that tossing the entire training set into the model will reduce the mae model.likelihood = likelihood fant_model = model.get_fantasy_model(train_x, train_y) fant_preds = fant_model.likelihood( fant_model(train_x)).mean.squeeze() updated_abs_error = torch.mean(torch.abs(train_y - fant_preds) / 2) # TODO: figure out why this error is worse than before self.assertLess(updated_abs_error.item(), 0.15)
def test_regression_error_full(self, skip_logdet_forward=False, cuda=False): train_x, train_y = train_data(cuda=cuda) likelihood = GaussianLikelihood() model = SVGPRegressionModel(inducing_points=train_x, learn_locs=False) if cuda: likelihood.cuda() model.cuda() mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(train_y)) # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.Adam([{ "params": model.parameters() }, { "params": likelihood.parameters() }], lr=0.01) with gpytorch.settings.skip_logdet_forward(skip_logdet_forward): for _ in range(200): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Set back to eval mode model.eval() likelihood.eval() test_preds = likelihood(model(train_x)).mean.squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) self.assertLess(mean_abs_error.item(), 1e-1)
def test_regression_error( self, cuda=False, mll_cls=gpytorch.mlls.VariationalELBO, distribution_cls=gpytorch.variational.CholeskyVariationalDistribution, ): train_x, train_y = train_data(cuda=cuda) likelihood = GaussianLikelihood() model = SVGPRegressionModel(torch.linspace(0, 1, 25), distribution_cls) mll = mll_cls(likelihood, model, num_data=len(train_y)) if cuda: likelihood = likelihood.cuda() model = model.cuda() mll = mll.cuda() # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.Adam([{ "params": model.parameters() }, { "params": likelihood.parameters() }], lr=0.01) _wrapped_cg = MagicMock(wraps=gpytorch.utils.linear_cg) _cg_mock = patch("gpytorch.utils.linear_cg", new=_wrapped_cg) with warnings.catch_warnings(record=True) as ws, _cg_mock as cg_mock: for _ in range(150): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Set back to eval mode model.eval() likelihood.eval() test_preds = likelihood(model(train_x)).mean.squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) self.assertLess(mean_abs_error.item(), 1e-1) # Make sure CG was called (or not), and no warnings were thrown self.assertFalse(cg_mock.called) self.assertFalse( any( issubclass(w.category, ExtraComputationWarning) for w in ws))
def test_posterior_latent_gp_and_likelihood_without_optimization( self, cuda=False): warnings.simplefilter("ignore", gpytorch.utils.warnings.NumericalWarning) train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood(noise_constraint=Positive( )) # This test actually wants a noise < 1e-4 gp_model = ExactGPModel(train_x, train_y, likelihood) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(-15)) likelihood.initialize(noise=exp(-15)) if cuda: gp_model.cuda() likelihood.cuda() # Compute posterior distribution gp_model.eval() likelihood.eval() # Let's see how our model does, conditioned with weird hyperparams # The posterior should fit all the data with gpytorch.settings.debug(False): function_predictions = likelihood(gp_model(train_x)) self.assertAllClose(function_predictions.mean, train_y) self.assertAllClose(function_predictions.variance, torch.zeros_like(function_predictions.variance)) # It shouldn't fit much else though test_function_predictions = gp_model( torch.tensor([1.1]).type_as(test_x)) self.assertAllClose(test_function_predictions.mean, torch.zeros_like(test_function_predictions.mean)) self.assertAllClose( test_function_predictions.variance, gp_model.covar_module.outputscale.expand_as( test_function_predictions.variance), )
def test_gp_posterior_mean_skip_variances_slow_cuda(self): if not torch.cuda.is_available(): return with least_used_cuda_device(): train_x, test_x, train_y, _ = self._get_data(cuda=True) likelihood = GaussianLikelihood() gp_model = ExactGPModel(train_x, train_y, likelihood) gp_model.cuda() likelihood.cuda() # Compute posterior distribution gp_model.eval() likelihood.eval() with gpytorch.settings.fast_pred_var(False): with gpytorch.settings.skip_posterior_variances(True): mean_skip_var = gp_model(test_x).mean mean = gp_model(test_x).mean likelihood_mean = likelihood(gp_model(test_x)).mean self.assertTrue(torch.allclose(mean_skip_var, mean)) self.assertTrue(torch.allclose(mean_skip_var, likelihood_mean))
def test_regression_error( self, cuda=False, mll_cls=gpytorch.mlls.VariationalELBO, distribution_cls=gpytorch.variational.CholeskyVariationalDistribution, ): train_x, train_y = train_data(cuda=cuda) likelihood = GaussianLikelihood() model = SVGPRegressionModel(torch.linspace(0, 1, 25), distribution_cls) mll = mll_cls(likelihood, model, num_data=len(train_y)) if cuda: likelihood = likelihood.cuda() model = model.cuda() mll = mll.cuda() # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.Adam([{ "params": model.parameters() }, { "params": likelihood.parameters() }], lr=0.01) for _ in range(200): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Set back to eval mode model.eval() likelihood.eval() test_preds = likelihood(model(train_x)).mean.squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) self.assertLess(mean_abs_error.item(), 1e-1)
def test_sgpr_mean_abs_error(self, cuda=False): # Suppress numerical warnings warnings.simplefilter("ignore", NumericalWarning) train_x, train_y, test_x, test_y = make_data(cuda=cuda) likelihood = GaussianLikelihood() gp_model = GPRegressionModel(train_x, train_y, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) if cuda: gp_model = gp_model.cuda() likelihood = likelihood.cuda() # Mock cholesky _wrapped_cholesky = MagicMock(wraps=torch.linalg.cholesky_ex) with patch("torch.linalg.cholesky_ex", new=_wrapped_cholesky) as cholesky_mock: # Optimize the model gp_model.train() likelihood.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.1) for _ in range(30): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() # Check that we have the right LazyTensor type kernel = likelihood(gp_model( train_x)).lazy_covariance_matrix.evaluate_kernel() self.assertIsInstance( kernel, gpytorch.lazy.LowRankRootAddedDiagLazyTensor) for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model gp_model.eval() likelihood.eval() test_preds = likelihood(gp_model(test_x)).mean mean_abs_error = torch.mean(torch.abs(test_y - test_preds)) cholesky_mock.assert_called() # We SHOULD call Cholesky... for chol_arg in cholesky_mock.call_args_list: first_arg = chol_arg[0][0] self.assertTrue(torch.is_tensor(first_arg)) self.assertTrue( first_arg.size(-1) == gp_model.covar_module.inducing_points.size(-2)) self.assertLess(mean_abs_error.squeeze().item(), 0.1) # Test variances test_vars = likelihood(gp_model(test_x)).variance self.assertAllClose( test_vars, likelihood(gp_model(test_x)).covariance_matrix.diagonal(dim1=-1, dim2=-2)) self.assertGreater(test_vars.min().item() + 0.1, likelihood.noise.item()) self.assertLess( test_vars.max().item() - 0.05, likelihood.noise.item() + gp_model.covar_module.base_kernel.outputscale.item(), ) # Test on training data test_outputs = likelihood(gp_model(train_x)) self.assertLess((test_outputs.mean - train_y).max().item(), 0.1) self.assertLess(test_outputs.variance.max().item(), likelihood.noise.item() * 2)
covar_x = self.covar_module(x) return MultivariateNormal(mean_x, covar_x) data = pods.datasets.olympic_marathon_men() x_train = torch.from_numpy(data["X"]).squeeze(-1) y_train = torch.from_numpy(data["Y"]).squeeze( -1) # + torch.randn(train_x.size()) * np.sqrt(0.04) likelihood = GaussianLikelihood() model = ExactGPModel(x_train, y_train, likelihood) x_train = x_train.cuda() y_train = y_train.cuda() model = model.cuda() likelihood = likelihood.cuda() model.train() likelihood.train() optimizer = torch.optim.Adam([{'params': model.parameters()}], lr=0.1) ##loss for gp marginal_loglikelihood = ExactMarginalLogLikelihood(likelihood, model) training_iter = 2000 for i in range(training_iter): optimizer.zero_grad() output = model(x_train)
def test_fantasy_updates(self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood() gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(1)) gp_model.mean_module.initialize(constant=0) likelihood.initialize(noise=exp(1)) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.15) for _ in range(50): optimizer.zero_grad() with gpytorch.settings.debug(False): output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() train_x.requires_grad = True gp_model.set_train_data(train_x, train_y) with gpytorch.settings.fast_pred_var( ), gpytorch.settings.detach_test_caches(False): # Test the model gp_model.eval() likelihood.eval() test_function_predictions = likelihood(gp_model(test_x)) test_function_predictions.mean.sum().backward() real_fant_x_grad = train_x.grad[5:].clone() train_x.grad = None train_x.requires_grad = False gp_model.set_train_data(train_x, train_y) # Cut data down, and then add back via the fantasy interface gp_model.set_train_data(train_x[:5], train_y[:5], strict=False) likelihood(gp_model(test_x)) fantasy_x = train_x[5:].clone().detach().requires_grad_(True) fant_model = gp_model.get_fantasy_model(fantasy_x, train_y[5:]) fant_function_predictions = likelihood(fant_model(test_x)) self.assertAllClose(test_function_predictions.mean, fant_function_predictions.mean, atol=1e-4) fant_function_predictions.mean.sum().backward() self.assertTrue(fantasy_x.grad is not None) relative_error = torch.norm( real_fant_x_grad - fantasy_x.grad) / fantasy_x.grad.norm() self.assertLess(relative_error, 15e-1) # This was only passing by a hair before