first_monday(2016) + i * timedelta(weeks=1), first_monday(2016) + (i + 4) * timedelta(weeks=1), ) t_test2, y_test2 = get_data( first_monday(2016) + (i + 4) * timedelta(weeks=1), first_monday(2016) + (i + 5) * timedelta(weeks=1), ) # Count since beginning of conditioning window. This assumes stationarity. t_test2 -= t_test1[0] t_test1 -= t_test1[0] tests.append(((t_test1, y_test1), (t_test2, y_test2))) # Save the data sets. wd.save( { "t_train": t_train, "y_train": y_train, "tests": tests }, "data.pickle", ) # Setup GPCM models. window = 7 * 6 scale = 5 n_u = 60 n_z = 150 noise = 0.05 # Normalise. normaliser = Normaliser() y_train = normaliser.transform(y_train)
return ks, us, fs t = np.linspace(0, 10, 300) noise_f = np.random.randn(len(t), 1) # Construct model. model = CGPCM(window=2, scale=1, n_u=10, t=t) # Instantiate model. models = model() # Perform sampling. if args.train: ks, us, fs = sample(model, t, noise_f) wd.save((ks, us, fs), "samples.pickle") else: ks, us, fs = wd.load("samples.pickle") # Plot. plt.figure(figsize=(15, 4)) for i, (k, u, f) in enumerate(zip(ks, us, fs)): plt.subplot(3, 5, 1 + i) plt.plot( B.concat(-t[::-1][:-1], t), B.concat(u[:-1] * 0, u), lw=1, ) if hasattr(model, "t_u"): plt.scatter(model.t_u, model.t_u * 0, s=5, marker="o", c="black")
B.concat(samples[::-1, :][:-1, :], samples, axis=0), ) # Perform sampling. if args.train: ks = [ _extract_samples(model.predict_kernel(num_samples=20000)) for model in models ] psds = [ _extract_samples(model.predict_psd(num_samples=20000)) for model in models ] model_ks, model_psds = ks, psds wd.save((model_ks, model_psds), "samples.pickle") else: model_ks, model_psds = wd.load("samples.pickle") # Plot. plt.figure(figsize=(15, 2.5)) for i, (model, (x, ks)) in enumerate(zip(models, model_ks)): plt.subplot(1, 6, 1 + i) for q in [1, 5, 10, 20, 30, 40]: plt.fill_between( x, B.quantile(ks, q / 100, axis=1), B.quantile(ks, 1 - q / 100, axis=1), facecolor="tab:blue", alpha=0.2,
m_max=n_z // 2, t=t, ), ), ]: # Sample data. gp_f = GP(kernel) gp_y = gp_f + GP(noise * Delta(), measure=gp_f.measure) f, y = gp_f.measure.sample(gp_f(t), gp_y(t)) f, y = B.flatten(f), B.flatten(y) wd.save( { "t": t, "f": f, "k": B.flatten(kernel(t_k, 0)), "y": y, "true_logpdf": gp_y(t).logpdf(y), }, slugify(str(kernel)), "data.pickle", ) for scheme in ["mean-field", "structured"]: model = model_constructor(scheme) prefix = (slugify(str(kernel)), scheme, slugify(model.name)) # Fit model and predict function and kernel. model.fit(t, y, iters=10_000) elbo = model.elbo(t, y) posterior = model.condition(t, y) f_pred = posterior.predict(t)
# Make and save predictions. if args.predict: posterior = model.condition(t, y) pred_f = (t, ) + posterior.predict(t) pred_psd = posterior.predict_psd() pred_psd = ( pred_psd.x, pred_psd.mean, pred_psd.err_95_lower, pred_psd.err_95_upper, pred_psd.all_samples, ) pred_k = posterior.predict_kernel() pred_k = (pred_k.x, pred_k.mean, pred_k.var) wd.save(pred_f, "pred_f.pickle") wd.save(pred_psd, "pred_psd.pickle") wd.save(pred_k, "pred_k.pickle") else: pred_f = wd.load("pred_f.pickle") pred_psd = wd.load("pred_psd.pickle") pred_k = wd.load("pred_k.pickle") # Unpack prediction for the PDF and cut off a frequency 0.5. freqs, mean, lower, upper, samps = pred_psd upper_freq = 0.5 samps = samps[freqs <= upper_freq, :] mean = mean[freqs <= upper_freq] lower = lower[freqs <= upper_freq] upper = upper[freqs <= upper_freq] freqs = freqs[freqs <= upper_freq]
[sim.to_numpy()[:args.n].reshape(-1, 1) for sim in sims.values()], axis=1) corr_empirical = cov_to_corr(np.cov(all_obs.T)) # Compute predictions for latent processes. model = construct_model(vs) model = model.condition(x_data, y_data, x_ind=vs["x_ind"]) x_proj, y_proj, _, _ = model.project(x_data, y_data) means, lowers, uppers = model.model.predict(x_proj) # Save for processing. wd.save( B.to_numpy({ "n": args.n, "m": m, "p": p, "m_r": m_r, "m_s": m_s, "x_proj": x_proj, "y_proj": y_proj, "means": means, "lowers": lowers, "uppers": uppers, "learned_parameters": {name: vs[name] for name in vs.names}, "corr_learned": corr_learned, "corr_empirical": corr_empirical, }), f"results_mr{m_r}_ms{m_s}{suffix}.pickle", )
d_all, d_train, d_tests = load_temp()[d_size] # Determine the number of inducing points. n_ind = [10 * 10 + 1, 10 * 15 + 1, 10 * 31 + 1][d_size] # Place inducing points evenly spaced. x = convert_index(d_all) x_ind = np.linspace(x.min(), x.max(), n_ind) # Fit and predict GPAR. # Note: we use D-GPAR-L-NL here, as opposed to D-GPAR-L, to make the # results a little more drastic. model = GPARRegressor(scale=0.2, linear=True, linear_scale=10., nonlinear=True, nonlinear_scale=1., noise=0.1, impute=True, replace=True, normalise_y=True, x_ind=x_ind) model.fit(convert_index(d_train), d_train.to_numpy()) # Predict for the test sets. preds = [] for i, d in enumerate(d_tests): preds.append(model.predict(convert_index(d), num_samples=50, credible_bounds=True, latent=False)) # Save predictions. wd.save(preds, f'results{d_size}.pickle')
# Split data. test_inds = np.empty(t.shape, dtype=bool) test_inds.fill(False) for lower, upper in [( datetime(args.year, 1, 1) + i * timedelta(weeks=1), datetime(args.year, 1, 1) + (i + 1) * timedelta(weeks=1), ) for i in range(26, 53) if i % 2 == 1]: lower_mask = lower <= data.index upper_mask = upper > data.index test_inds = test_inds | (lower_mask & upper_mask) t_train = t[~test_inds] y_train = y[~test_inds] t_test = t[test_inds] y_test = y[test_inds] # Save data for easier later reference. wd.save({"train": (t_train, y_train), "test": (t_test, y_test)}, "data.pickle") # Normalise training data. normaliser = Normaliser() y_train = normaliser.transform(y_train) # Configure GPCM models. window = 30 scale = 5 n_u = 50 n_z = 150 # Setup, fit, and save models. models = [ Model( window=window,
x = convert_index(d_all) x_ind = np.linspace(x.min(), x.max(), n_ind) # Fit and predict GPAR. NOTE: we use D-GPAR-L-NL here, as opposed to D-GPAR-L, # to make the results a little more drastic. model = GPARRegressor( scale=0.2, linear=True, linear_scale=10.0, nonlinear=True, nonlinear_scale=1.0, noise=0.1, impute=True, replace=True, normalise_y=True, x_ind=x_ind, ) model.fit(convert_index(d_train), d_train.to_numpy()) # Predict for the test sets. preds = [] for i, d in enumerate(d_tests): preds.append( model.predict( convert_index(d), num_samples=50, credible_bounds=True, latent=False ) ) # Save predictions. wd.save(preds, f"results{d_size}.pickle")
# Train structured approximation. model = GPCM( scheme="structured", window=window, scale=scale, noise=noise, n_u=n_u, n_z=n_z, t=t, ) model.fit(t, y, iters=30_000) k_pred_struc = extract(model.condition(t, y).predict_kernel(t_k)) psd_pred_struc = extract(model.condition(t, y).predict_psd()) wd.save((k_pred_mf, psd_pred_mf, k_pred_struc, psd_pred_struc), "preds.pickle") else: k_pred_mf, psd_pred_mf, k_pred_struc, psd_pred_struc = wd.load( "preds.pickle") # Report metrics. with out.Section("Structured"): t, mean, var, _, _ = k_pred_struc inds = t <= 3 out.kv("MLL", metric.mll(mean[inds], var[inds], k[inds])) out.kv("RMSE", metric.rmse(mean[inds], k[inds])) with out.Section("Mean field"): t, mean, var, _, _ = k_pred_mf inds = t <= 3 out.kv("MLL", metric.mll(mean[inds], var[inds], k[inds]))