def test_logpdf_differentiable(): reg = GPARRegressor(replace=False, impute=False, linear=True, linear_scale=1., nonlinear=False, noise=1e-8, normalise_y=False) x = np.linspace(0, 5, 10) y = reg.sample(x, p=2, latent=True) # Test that gradient calculation works. reg.vs.requires_grad(True) for var in reg.vs.get_vars(): assert var.grad is None reg.logpdf(torch.tensor(x), torch.tensor(y)).backward() for var in reg.vs.get_vars(): assert var.grad is not None
def test_logpdf(x, w): # Sample some data from a "sensitive" GPAR. reg = GPARRegressor( replace=False, impute=False, nonlinear=True, nonlinear_scale=0.1, linear=True, linear_scale=10.0, noise=1e-2, normalise_y=False, ) y = reg.sample(x, w, p=2, latent=True) # Extract models. gpar = _construct_gpar(reg, reg.vs, B.shape(B.uprank(x))[1], 2) f1, e1 = gpar.layers[0]() f2, e2 = gpar.layers[1]() # Test computation under prior. x1 = x x2 = B.concat(B.uprank(x), y[:, 0:1], axis=1) if w is not None: x1 = WeightedUnique(x1, w[:, 0]) x2 = WeightedUnique(x2, w[:, 1]) logpdf1 = (f1 + e1)(x1).logpdf(y[:, 0]) logpdf2 = (f2 + e2)(x2).logpdf(y[:, 1]) approx(reg.logpdf(x, y, w), logpdf1 + logpdf2, atol=1e-6) # Test computation under posterior. post1 = f1.measure | ((f1 + e1)(x1), y[:, 0]) post2 = f2.measure | ((f2 + e2)(x2), y[:, 1]) e1_post = GP(e1.mean, e1.kernel, measure=post1) e2_post = GP(e2.mean, e2.kernel, measure=post2) logpdf1 = (post1(f1) + e1_post)(x1).logpdf(y[:, 0]) logpdf2 = (post2(f2) + e2_post)(x2).logpdf(y[:, 1]) with pytest.raises(RuntimeError): reg.logpdf(x, y, w, posterior=True) reg.condition(x, y, w) approx(reg.logpdf(x, y, w, posterior=True), logpdf1 + logpdf2, atol=1e-6) # Test that sampling missing gives a stochastic estimate. y[::2, 0] = np.nan all_different( reg.logpdf(x, y, w, sample_missing=True), reg.logpdf(x, y, w, sample_missing=True), )
def test_logpdf(x, w): # Sample some data from a "sensitive" GPAR. reg = GPARRegressor( replace=False, impute=False, nonlinear=True, nonlinear_scale=0.1, linear=True, linear_scale=10.0, noise=1e-2, normalise_y=False, ) y = reg.sample(x, w, p=2, latent=True) # Extract models. gpar = _construct_gpar(reg, reg.vs, B.shape(B.uprank(x))[1], 2) f1, noise1 = gpar.layers[0]() f2, noise2 = gpar.layers[1]() if w is not None: noise1 = noise1 / w[:, 0] noise2 = noise2 / w[:, 1] # Test computation under prior. x1 = x x2 = B.concat(B.uprank(x), y[:, 0:1], axis=1) logpdf1 = f1(x1, noise1).logpdf(y[:, 0]) logpdf2 = f2(x2, noise2).logpdf(y[:, 1]) approx(reg.logpdf(x, y, w), logpdf1 + logpdf2, atol=1e-6) # Test computation under posterior. f1_post = f1 | (f1(x1, noise1), y[:, 0]) f2_post = f2 | (f2(x2, noise2), y[:, 1]) logpdf1 = f1_post(x1, noise1).logpdf(y[:, 0]) logpdf2 = f2_post(x2, noise2).logpdf(y[:, 1]) with pytest.raises(RuntimeError): reg.logpdf(x, y, w, posterior=True) reg.condition(x, y, w) approx(reg.logpdf(x, y, w, posterior=True), logpdf1 + logpdf2, atol=1e-6) # Test that sampling missing gives a stochastic estimate. y[::2, 0] = np.nan all_different( reg.logpdf(x, y, w, sample_missing=True), reg.logpdf(x, y, w, sample_missing=True), )
def test_logpdf(): # Sample some data from a "sensitive" GPAR. reg = GPARRegressor(replace=False, impute=False, nonlinear=True, nonlinear_scale=0.1, linear=True, linear_scale=10., noise=1e-4, normalise_y=False) x = np.linspace(0, 5, 10) y = reg.sample(x, p=2, latent=True) # Extract models. gpar = _construct_gpar(reg, reg.vs, 1, 2) f1, e1 = gpar.layers[0]() f2, e2 = gpar.layers[1]() # Test computation under prior. logpdf1 = (f1 + e1)(B.array(x)).logpdf(B.array(y[:, 0])) x_stack = np.concatenate([x[:, None], y[:, 0:1]], axis=1) logpdf2 = (f2 + e2)(B.array(x_stack)).logpdf(B.array(y[:, 1])) yield approx, reg.logpdf(x, y), logpdf1 + logpdf2, 6 # Test computation under posterior. e1_post = GP(e1.kernel, e1.mean, graph=e1.graph) e2_post = GP(e2.kernel, e2.mean, graph=e2.graph) f1_post = f1 | ((f1 + e1)(B.array(x)), B.array(y[:, 0])) f2_post = f2 | ((f2 + e2)(B.array(x_stack)), B.array(y[:, 1])) logpdf1 = (f1_post + e1_post)(B.array(x)).logpdf(B.array(y[:, 0])) logpdf2 = (f2_post + e2_post)(B.array(x_stack)).logpdf(B.array(y[:, 1])) yield raises, RuntimeError, lambda: reg.logpdf(x, y, posterior=True) reg.fit(x, y, iters=0) yield approx, reg.logpdf(x, y, posterior=True), logpdf1 + logpdf2, 6 # Test that sampling missing gives a stochastic estimate. y[::2, 0] = np.nan yield ge, \ np.abs(reg.logpdf(x, y, sample_missing=True) - reg.logpdf(x, y, sample_missing=True)), \ 1e-3
for sample in samples] # Compute metrics of training if x_valid is not None: print('Predicting validation points') samples = unnormalise( model.sample(transform_x(x_valid), num_samples=50, latent=True, posterior=True)) means = np.mean(samples, axis=0) stds = np.std(samples, axis=0) valid_rmse = np.mean((y_valid - means)**2)**0.5 valid_loglik = model.logpdf(transform_x(x_valid), y_valid) / len(x_valid) else: valid_rmse = 0 valid_loglik = 0 print('Predicting training points') samples = unnormalise( model.sample(transform_x(x), num_samples=50, latent=True, posterior=True)) means = np.mean(samples, axis=0) stds = np.std(samples, axis=0) train_rmse = np.mean((y - means)**2)**0.5 train_loglik = model.logpdf(transform_x(x), y) / len(x) # Plot the result.