Пример #1
0
def test_logpdf_differentiable():
    reg = GPARRegressor(replace=False, impute=False,
                        linear=True, linear_scale=1., nonlinear=False,
                        noise=1e-8, normalise_y=False)
    x = np.linspace(0, 5, 10)
    y = reg.sample(x, p=2, latent=True)

    # Test that gradient calculation works.
    reg.vs.requires_grad(True)
    for var in reg.vs.get_vars():
        assert var.grad is None
    reg.logpdf(torch.tensor(x), torch.tensor(y)).backward()
    for var in reg.vs.get_vars():
        assert var.grad is not None
Пример #2
0
def test_logpdf(x, w):
    # Sample some data from a "sensitive" GPAR.
    reg = GPARRegressor(
        replace=False,
        impute=False,
        nonlinear=True,
        nonlinear_scale=0.1,
        linear=True,
        linear_scale=10.0,
        noise=1e-2,
        normalise_y=False,
    )
    y = reg.sample(x, w, p=2, latent=True)

    # Extract models.
    gpar = _construct_gpar(reg, reg.vs, B.shape(B.uprank(x))[1], 2)
    f1, e1 = gpar.layers[0]()
    f2, e2 = gpar.layers[1]()

    # Test computation under prior.
    x1 = x
    x2 = B.concat(B.uprank(x), y[:, 0:1], axis=1)
    if w is not None:
        x1 = WeightedUnique(x1, w[:, 0])
        x2 = WeightedUnique(x2, w[:, 1])
    logpdf1 = (f1 + e1)(x1).logpdf(y[:, 0])
    logpdf2 = (f2 + e2)(x2).logpdf(y[:, 1])
    approx(reg.logpdf(x, y, w), logpdf1 + logpdf2, atol=1e-6)

    # Test computation under posterior.
    post1 = f1.measure | ((f1 + e1)(x1), y[:, 0])
    post2 = f2.measure | ((f2 + e2)(x2), y[:, 1])
    e1_post = GP(e1.mean, e1.kernel, measure=post1)
    e2_post = GP(e2.mean, e2.kernel, measure=post2)
    logpdf1 = (post1(f1) + e1_post)(x1).logpdf(y[:, 0])
    logpdf2 = (post2(f2) + e2_post)(x2).logpdf(y[:, 1])
    with pytest.raises(RuntimeError):
        reg.logpdf(x, y, w, posterior=True)
    reg.condition(x, y, w)
    approx(reg.logpdf(x, y, w, posterior=True), logpdf1 + logpdf2, atol=1e-6)

    # Test that sampling missing gives a stochastic estimate.
    y[::2, 0] = np.nan
    all_different(
        reg.logpdf(x, y, w, sample_missing=True),
        reg.logpdf(x, y, w, sample_missing=True),
    )
Пример #3
0
def test_logpdf(x, w):
    # Sample some data from a "sensitive" GPAR.
    reg = GPARRegressor(
        replace=False,
        impute=False,
        nonlinear=True,
        nonlinear_scale=0.1,
        linear=True,
        linear_scale=10.0,
        noise=1e-2,
        normalise_y=False,
    )
    y = reg.sample(x, w, p=2, latent=True)

    # Extract models.
    gpar = _construct_gpar(reg, reg.vs, B.shape(B.uprank(x))[1], 2)
    f1, noise1 = gpar.layers[0]()
    f2, noise2 = gpar.layers[1]()

    if w is not None:
        noise1 = noise1 / w[:, 0]
        noise2 = noise2 / w[:, 1]

    # Test computation under prior.
    x1 = x
    x2 = B.concat(B.uprank(x), y[:, 0:1], axis=1)
    logpdf1 = f1(x1, noise1).logpdf(y[:, 0])
    logpdf2 = f2(x2, noise2).logpdf(y[:, 1])
    approx(reg.logpdf(x, y, w), logpdf1 + logpdf2, atol=1e-6)

    # Test computation under posterior.
    f1_post = f1 | (f1(x1, noise1), y[:, 0])
    f2_post = f2 | (f2(x2, noise2), y[:, 1])
    logpdf1 = f1_post(x1, noise1).logpdf(y[:, 0])
    logpdf2 = f2_post(x2, noise2).logpdf(y[:, 1])
    with pytest.raises(RuntimeError):
        reg.logpdf(x, y, w, posterior=True)
    reg.condition(x, y, w)
    approx(reg.logpdf(x, y, w, posterior=True), logpdf1 + logpdf2, atol=1e-6)

    # Test that sampling missing gives a stochastic estimate.
    y[::2, 0] = np.nan
    all_different(
        reg.logpdf(x, y, w, sample_missing=True),
        reg.logpdf(x, y, w, sample_missing=True),
    )
Пример #4
0
def test_logpdf():
    # Sample some data from a "sensitive" GPAR.
    reg = GPARRegressor(replace=False,
                        impute=False,
                        nonlinear=True,
                        nonlinear_scale=0.1,
                        linear=True,
                        linear_scale=10.,
                        noise=1e-4,
                        normalise_y=False)
    x = np.linspace(0, 5, 10)
    y = reg.sample(x, p=2, latent=True)

    # Extract models.
    gpar = _construct_gpar(reg, reg.vs, 1, 2)
    f1, e1 = gpar.layers[0]()
    f2, e2 = gpar.layers[1]()

    # Test computation under prior.
    logpdf1 = (f1 + e1)(B.array(x)).logpdf(B.array(y[:, 0]))
    x_stack = np.concatenate([x[:, None], y[:, 0:1]], axis=1)
    logpdf2 = (f2 + e2)(B.array(x_stack)).logpdf(B.array(y[:, 1]))
    yield approx, reg.logpdf(x, y), logpdf1 + logpdf2, 6

    # Test computation under posterior.
    e1_post = GP(e1.kernel, e1.mean, graph=e1.graph)
    e2_post = GP(e2.kernel, e2.mean, graph=e2.graph)
    f1_post = f1 | ((f1 + e1)(B.array(x)), B.array(y[:, 0]))
    f2_post = f2 | ((f2 + e2)(B.array(x_stack)), B.array(y[:, 1]))
    logpdf1 = (f1_post + e1_post)(B.array(x)).logpdf(B.array(y[:, 0]))
    logpdf2 = (f2_post + e2_post)(B.array(x_stack)).logpdf(B.array(y[:, 1]))
    yield raises, RuntimeError, lambda: reg.logpdf(x, y, posterior=True)
    reg.fit(x, y, iters=0)
    yield approx, reg.logpdf(x, y, posterior=True), logpdf1 + logpdf2, 6

    # Test that sampling missing gives a stochastic estimate.
    y[::2, 0] = np.nan
    yield ge, \
          np.abs(reg.logpdf(x, y, sample_missing=True) -
                 reg.logpdf(x, y, sample_missing=True)), \
          1e-3
         for sample in samples]

# Compute metrics of training
if x_valid is not None:
    print('Predicting validation points')
    samples = unnormalise(
        model.sample(transform_x(x_valid),
                     num_samples=50,
                     latent=True,
                     posterior=True))

    means = np.mean(samples, axis=0)
    stds = np.std(samples, axis=0)

    valid_rmse = np.mean((y_valid - means)**2)**0.5
    valid_loglik = model.logpdf(transform_x(x_valid), y_valid) / len(x_valid)
else:
    valid_rmse = 0
    valid_loglik = 0

print('Predicting training points')
samples = unnormalise(
    model.sample(transform_x(x), num_samples=50, latent=True, posterior=True))

means = np.mean(samples, axis=0)
stds = np.std(samples, axis=0)

train_rmse = np.mean((y - means)**2)**0.5
train_loglik = model.logpdf(transform_x(x), y) / len(x)

# Plot the result.