예제 #1
0
        first_monday(2016) + i * timedelta(weeks=1),
        first_monday(2016) + (i + 4) * timedelta(weeks=1),
    )
    t_test2, y_test2 = get_data(
        first_monday(2016) + (i + 4) * timedelta(weeks=1),
        first_monday(2016) + (i + 5) * timedelta(weeks=1),
    )
    # Count since beginning of conditioning window. This assumes stationarity.
    t_test2 -= t_test1[0]
    t_test1 -= t_test1[0]
    tests.append(((t_test1, y_test1), (t_test2, y_test2)))
# Save the data sets.
wd.save(
    {
        "t_train": t_train,
        "y_train": y_train,
        "tests": tests
    },
    "data.pickle",
)

# Setup GPCM models.
window = 7 * 6
scale = 5
n_u = 60
n_z = 150
noise = 0.05

# Normalise.
normaliser = Normaliser()
y_train = normaliser.transform(y_train)
예제 #2
0
    return ks, us, fs


t = np.linspace(0, 10, 300)
noise_f = np.random.randn(len(t), 1)

# Construct model.
model = CGPCM(window=2, scale=1, n_u=10, t=t)

# Instantiate model.
models = model()

# Perform sampling.
if args.train:
    ks, us, fs = sample(model, t, noise_f)
    wd.save((ks, us, fs), "samples.pickle")
else:
    ks, us, fs = wd.load("samples.pickle")

# Plot.
plt.figure(figsize=(15, 4))

for i, (k, u, f) in enumerate(zip(ks, us, fs)):
    plt.subplot(3, 5, 1 + i)
    plt.plot(
        B.concat(-t[::-1][:-1], t),
        B.concat(u[:-1] * 0, u),
        lw=1,
    )
    if hasattr(model, "t_u"):
        plt.scatter(model.t_u, model.t_u * 0, s=5, marker="o", c="black")
예제 #3
0
파일: priors.py 프로젝트: wesselb/gpcm
        B.concat(samples[::-1, :][:-1, :], samples, axis=0),
    )


# Perform sampling.
if args.train:
    ks = [
        _extract_samples(model.predict_kernel(num_samples=20000))
        for model in models
    ]
    psds = [
        _extract_samples(model.predict_psd(num_samples=20000))
        for model in models
    ]
    model_ks, model_psds = ks, psds
    wd.save((model_ks, model_psds), "samples.pickle")
else:
    model_ks, model_psds = wd.load("samples.pickle")

# Plot.
plt.figure(figsize=(15, 2.5))

for i, (model, (x, ks)) in enumerate(zip(models, model_ks)):
    plt.subplot(1, 6, 1 + i)
    for q in [1, 5, 10, 20, 30, 40]:
        plt.fill_between(
            x,
            B.quantile(ks, q / 100, axis=1),
            B.quantile(ks, 1 - q / 100, axis=1),
            facecolor="tab:blue",
            alpha=0.2,
예제 #4
0
            m_max=n_z // 2,
            t=t,
        ),
    ),
]:
    # Sample data.
    gp_f = GP(kernel)
    gp_y = gp_f + GP(noise * Delta(), measure=gp_f.measure)
    f, y = gp_f.measure.sample(gp_f(t), gp_y(t))
    f, y = B.flatten(f), B.flatten(y)
    wd.save(
        {
            "t": t,
            "f": f,
            "k": B.flatten(kernel(t_k, 0)),
            "y": y,
            "true_logpdf": gp_y(t).logpdf(y),
        },
        slugify(str(kernel)),
        "data.pickle",
    )

    for scheme in ["mean-field", "structured"]:
        model = model_constructor(scheme)
        prefix = (slugify(str(kernel)), scheme, slugify(model.name))

        # Fit model and predict function and kernel.
        model.fit(t, y, iters=10_000)
        elbo = model.elbo(t, y)
        posterior = model.condition(t, y)
        f_pred = posterior.predict(t)
예제 #5
0
# Make and save predictions.
if args.predict:
    posterior = model.condition(t, y)
    pred_f = (t, ) + posterior.predict(t)
    pred_psd = posterior.predict_psd()
    pred_psd = (
        pred_psd.x,
        pred_psd.mean,
        pred_psd.err_95_lower,
        pred_psd.err_95_upper,
        pred_psd.all_samples,
    )
    pred_k = posterior.predict_kernel()
    pred_k = (pred_k.x, pred_k.mean, pred_k.var)
    wd.save(pred_f, "pred_f.pickle")
    wd.save(pred_psd, "pred_psd.pickle")
    wd.save(pred_k, "pred_k.pickle")
else:
    pred_f = wd.load("pred_f.pickle")
    pred_psd = wd.load("pred_psd.pickle")
    pred_k = wd.load("pred_k.pickle")

# Unpack prediction for the PDF and cut off a frequency 0.5.
freqs, mean, lower, upper, samps = pred_psd
upper_freq = 0.5
samps = samps[freqs <= upper_freq, :]
mean = mean[freqs <= upper_freq]
lower = lower[freqs <= upper_freq]
upper = upper[freqs <= upper_freq]
freqs = freqs[freqs <= upper_freq]
예제 #6
0
파일: simulators.py 프로젝트: wesselb/oilmm
        [sim.to_numpy()[:args.n].reshape(-1, 1) for sim in sims.values()],
        axis=1)
    corr_empirical = cov_to_corr(np.cov(all_obs.T))

    # Compute predictions for latent processes.
    model = construct_model(vs)
    model = model.condition(x_data, y_data, x_ind=vs["x_ind"])
    x_proj, y_proj, _, _ = model.project(x_data, y_data)
    means, lowers, uppers = model.model.predict(x_proj)

    # Save for processing.
    wd.save(
        B.to_numpy({
            "n": args.n,
            "m": m,
            "p": p,
            "m_r": m_r,
            "m_s": m_s,
            "x_proj": x_proj,
            "y_proj": y_proj,
            "means": means,
            "lowers": lowers,
            "uppers": uppers,
            "learned_parameters": {name: vs[name]
                                   for name in vs.names},
            "corr_learned": corr_learned,
            "corr_empirical": corr_empirical,
        }),
        f"results_mr{m_r}_ms{m_s}{suffix}.pickle",
    )
예제 #7
0
파일: air_temp.py 프로젝트: lzongren/gpar
    d_all, d_train, d_tests = load_temp()[d_size]

    # Determine the number of inducing points.
    n_ind = [10 * 10 + 1, 10 * 15 + 1, 10 * 31 + 1][d_size]

    # Place inducing points evenly spaced.
    x = convert_index(d_all)
    x_ind = np.linspace(x.min(), x.max(), n_ind)

    # Fit and predict GPAR.
    #   Note: we use D-GPAR-L-NL here, as opposed to D-GPAR-L, to make the
    #   results a little more drastic.
    model = GPARRegressor(scale=0.2,
                          linear=True, linear_scale=10.,
                          nonlinear=True, nonlinear_scale=1.,
                          noise=0.1,
                          impute=True, replace=True, normalise_y=True,
                          x_ind=x_ind)
    model.fit(convert_index(d_train), d_train.to_numpy())

    # Predict for the test sets.
    preds = []
    for i, d in enumerate(d_tests):
        preds.append(model.predict(convert_index(d),
                                   num_samples=50,
                                   credible_bounds=True,
                                   latent=False))

    # Save predictions.
    wd.save(preds, f'results{d_size}.pickle')
예제 #8
0
# Split data.
test_inds = np.empty(t.shape, dtype=bool)
test_inds.fill(False)
for lower, upper in [(
        datetime(args.year, 1, 1) + i * timedelta(weeks=1),
        datetime(args.year, 1, 1) + (i + 1) * timedelta(weeks=1),
) for i in range(26, 53) if i % 2 == 1]:
    lower_mask = lower <= data.index
    upper_mask = upper > data.index
    test_inds = test_inds | (lower_mask & upper_mask)
t_train = t[~test_inds]
y_train = y[~test_inds]
t_test = t[test_inds]
y_test = y[test_inds]
# Save data for easier later reference.
wd.save({"train": (t_train, y_train), "test": (t_test, y_test)}, "data.pickle")

# Normalise training data.
normaliser = Normaliser()
y_train = normaliser.transform(y_train)

# Configure GPCM models.
window = 30
scale = 5
n_u = 50
n_z = 150

# Setup, fit, and save models.
models = [
    Model(
        window=window,
예제 #9
0
    x = convert_index(d_all)
    x_ind = np.linspace(x.min(), x.max(), n_ind)

    # Fit and predict GPAR. NOTE: we use D-GPAR-L-NL here, as opposed to D-GPAR-L,
    # to make the results a little more drastic.
    model = GPARRegressor(
        scale=0.2,
        linear=True,
        linear_scale=10.0,
        nonlinear=True,
        nonlinear_scale=1.0,
        noise=0.1,
        impute=True,
        replace=True,
        normalise_y=True,
        x_ind=x_ind,
    )
    model.fit(convert_index(d_train), d_train.to_numpy())

    # Predict for the test sets.
    preds = []
    for i, d in enumerate(d_tests):
        preds.append(
            model.predict(
                convert_index(d), num_samples=50, credible_bounds=True, latent=False
            )
        )

    # Save predictions.
    wd.save(preds, f"results{d_size}.pickle")
예제 #10
0
파일: smk.py 프로젝트: wesselb/gpcm
    # Train structured approximation.
    model = GPCM(
        scheme="structured",
        window=window,
        scale=scale,
        noise=noise,
        n_u=n_u,
        n_z=n_z,
        t=t,
    )
    model.fit(t, y, iters=30_000)
    k_pred_struc = extract(model.condition(t, y).predict_kernel(t_k))
    psd_pred_struc = extract(model.condition(t, y).predict_psd())

    wd.save((k_pred_mf, psd_pred_mf, k_pred_struc, psd_pred_struc),
            "preds.pickle")
else:
    k_pred_mf, psd_pred_mf, k_pred_struc, psd_pred_struc = wd.load(
        "preds.pickle")

# Report metrics.

with out.Section("Structured"):
    t, mean, var, _, _ = k_pred_struc
    inds = t <= 3
    out.kv("MLL", metric.mll(mean[inds], var[inds], k[inds]))
    out.kv("RMSE", metric.rmse(mean[inds], k[inds]))
with out.Section("Mean field"):
    t, mean, var, _, _ = k_pred_mf
    inds = t <= 3
    out.kv("MLL", metric.mll(mean[inds], var[inds], k[inds]))