def main(): # Load static data. G = torch.from_numpy( np.load(os.path.join(data_folder, "F_corrected_final.npy"))).float().detach() grid = Grid.load(os.path.join(data_folder, "grid.pickle")) volcano_coords = torch.from_numpy(grid.cells).float().detach() data_coords = torch.from_numpy( np.load( os.path.join(data_folder, "niklas_data_coords_corrected_final.npy"))).float() data_values = torch.from_numpy( np.load( os.path.join(data_folder, "niklas_data_obs_corrected_final.npy"))).float() data_std = 0.1 sigma0_matern32 = 284.66 m0_matern32 = 2139.1 lambda0_matern32 = 651.58 constant_updatable_gp = UpdatableGP(kernel, lambda0_matern32, sigma0_matern32, m0_matern32, volcano_coords, n_chunks=200) residuals = constant_updatable_gp.leave_1_out_residuals( G, data_values, data_std) np.save(residuals.numpy(), os.path.join(results_folder, "./loocv_niklas.pck"))
def main(): os.makedirs(output_path, exist_ok=True) # Load F = torch.from_numpy( np.load(os.path.join(data_folder, "F_full_surface.npy"))).float().detach() grid = Grid.load(os.path.join(data_folder, "grid.pickle")) volcano_coords = torch.from_numpy(grid.cells).float().detach() data_coords = torch.from_numpy( np.load(os.path.join(data_folder, "surface_data_coords.npy"))).float() ground_truth = torch.from_numpy( np.load(os.path.join(data_folder, "post_sample.npy"))) data_values = torch.from_numpy( np.load(os.path.join(data_folder, "post_data_sample.npy"))) # Dictionary between the original Niklas data and our discretization. niklas_data_inds = torch.from_numpy( np.load(os.path.join(data_folder, "niklas_data_inds_insurf.npy"))).long() niklas_coords = data_coords[niklas_data_inds].numpy() # -------------------------------- # DEFINITION OF THE EXCURSION SET. # -------------------------------- THRESHOLD_low = 700.0 excursion_inds = (ground_truth >= THRESHOLD_low).nonzero()[:, 0] # Params data_std = 0.1 # lambda0 = 338.0 lambda0 = 338.46 sigma0 = 359.49 m0 = -114.40 # Define GP model. data_feed = lambda x: data_values[x] updatable_gp = UpdatableGP(cl, lambda0, sigma0, m0, volcano_coords, n_chunks=80) for i, current_ind in enumerate(niklas_data_inds): y = data_feed(current_ind) G = F[current_ind, :].reshape(1, -1) updatable_gp.update(G, y, data_std) # Extract variance and coverage function. coverage = updatable_gp.coverage(THRESHOLD_low, None) # Save. np.save(os.path.join(output_path, "coverage_{}.npy".format(i)), coverage)
def main(): os.makedirs(output_path, exist_ok=True) # Load F = torch.from_numpy( np.load(os.path.join(data_folder, "F_full_surface.npy"))).float().detach() grid = Grid.load(os.path.join(data_folder, "grid.pickle")) volcano_coords = torch.from_numpy(grid.cells).float().detach() data_coords = torch.from_numpy( np.load(os.path.join(data_folder, "surface_data_coords.npy"))).float() ground_truth = torch.from_numpy( np.load(os.path.join(data_folder, "post_sample.npy"))) data_values = torch.from_numpy( np.load(os.path.join(data_folder, "post_data_sample.npy"))) # Dictionary between the original Niklas data and our discretization. niklas_data_inds = torch.from_numpy( np.load(os.path.join(data_folder, "niklas_data_inds_insurf.npy"))).long() niklas_coords = data_coords[niklas_data_inds].numpy() # -------------------------------- # DEFINITION OF THE EXCURSION SET. # -------------------------------- THRESHOLD_low = 700.0 excursion_inds = (ground_truth >= THRESHOLD_low).nonzero()[:, 0] # Params data_std = 0.1 # lambda0 = 338.0 lambda0 = 338.46 sigma0 = 359.49 m0 = -114.40 # Define GP model. data_feed = lambda x: data_values[x] updatable_gp = UpdatableGP(cl, lambda0, sigma0, m0, volcano_coords, n_chunks=80) # Asimilate myopic data collection plan. visited_inds = np.load(os.path.join(results_folder, "visited_inds.npy")) observed_data = np.load(os.path.join(results_folder, "visited_inds.npy")) n_chunks = 80 for i, inds in enumerate(np.array_split(visited_inds, n_chunks)): print("Processing chunk {} / {}".format(i, n_chunks)) y = data_feed(inds) G_current = F[inds, :] updatable_gp.update(G_current, y, data_std)
def main(): # Load G = torch.from_numpy( np.load(os.path.join(data_folder, "F_full_surface.npy"))).float().detach() grid = Grid.load(os.path.join(data_folder, "grid.pickle")) volcano_coords = torch.from_numpy(grid.cells).float().detach() data_coords = torch.from_numpy( np.load(os.path.join(data_folder,"surface_data_coords.npy"))).float() ground_truth = torch.from_numpy(np.load(ground_truth_path)) # Dictionary between the original Niklas data and our discretization. niklas_data_inds = torch.from_numpy( np.load(os.path.join(data_folder, "niklas_data_inds_insurf.npy"))).long() niklas_coords = data_coords[niklas_data_inds].numpy() # -------------------------------- # DEFINITION OF THE EXCURSION SET. # -------------------------------- THRESHOLD_low = 700.0 excursion_inds = (ground_truth >= THRESHOLD_low).nonzero()[:, 0] # Load results. visited_inds_IVR = np.load(os.path.join( results_folder_IVR, "visited_inds.npy")) visited_inds_wIVR = np.load(os.path.join( results_folder_wIVR, "visited_inds.npy")) # Observation operators. # G_stacked_IVR = G[visited_inds_IVR, :] G_stacked_wIVR = G[visited_inds_wIVR, :] # Reload the GPs. # gpIVR = UpdatableGP.load(os.path.join(results_folder_IVR, "gp_state.pkl")) # gpwIVR = UpdatableGP.load(os.path.join(results_folder_wIVR, "gp_state.pkl")) gpINFILL = UpdatableGP.load(os.path.join(results_folder_INFILL, "gp_state.pkl")) # Produce posterior realization. for reskrig_sample_nr in range(300, 400): prior_realization = torch.from_numpy(np.load( os.path.join(reskrig_samples_folder, "prior_sample_{}.npy".format(reskrig_sample_nr)))) myReal = UpdatableRealization.bootstrap(prior_realization, G_stacked_wIVR, data_std=0.1, gp_module=gpINFILL) np.save( os.path.join(results_folder_wIVR, "Cond_Reals_Infill/conditional_real_{}.npy".format(reskrig_sample_nr)), myReal._realization.detach().cpu().numpy())
def main(sample_nr): # Create output directory. output_folder = os.path.join(base_folder, "wIVR_final_big/sample_{}".format(sample_nr)) os.makedirs(output_folder, exist_ok=True) # Load static data. F = torch.from_numpy( np.load(os.path.join(data_folder, "F_full_surface.npy"))).float().detach() grid = Grid.load(os.path.join(data_folder, "grid.pickle")) volcano_coords = torch.from_numpy(grid.cells).float().detach() data_coords = torch.from_numpy( np.load(os.path.join(data_folder, "surface_data_coords.npy"))).float() # Load generated data. post_sample_path = os.path.join(ground_truth_folder, "prior_sample_{}.npy".format(sample_nr)) ground_truth = torch.from_numpy(np.load(post_sample_path)) # -------------------------------- # DEFINITION OF THE EXCURSION SET. # -------------------------------- THRESHOLD_low = 2500.0 # Choose a starting points on the coast. start_ind = 4478 # ------------------------------------- # Define GP model. # ------------------------------------- data_std = 0.1 sigma0_matern32 = 284.66 m0_matern32 = 2139.1 lambda0_matern32 = 651.58 # Prepare data. data_values = F @ ground_truth data_feed = lambda x: data_values[x] updatable_gp = UpdatableGP(cl, lambda0_matern32, sigma0_matern32, m0_matern32, volcano_coords, n_chunks=200) # ------------------------------------- from volcapy.strategy.random_walk import RandomWalkStrategy strategy = MyopicWIVRStrategy( updatable_gp, data_coords, F, data_feed, lower=THRESHOLD_low, upper=None, ) start = timer() # Run strategy. visited_inds, observed_data = strategy.run(start_ind, n_steps=4000, data_std=0.1, max_step=151.0, min_step=60.0, output_folder=output_folder) end = timer() print("Run in {} mins.".format((end - start) / 60))
def main(sample_nr): output_folder = os.path.join(base_folder, "wIVR_results_350_nonoise_step_310/prior_samples_April2021/sample_{}".format(sample_nr)) # Load static data. F = torch.from_numpy( np.load(os.path.join(data_folder, "F_full_surface.npy"))).float().detach() grid = Grid.load(os.path.join(data_folder, "grid.pickle")) volcano_coords = torch.from_numpy(grid.cells).float().detach() data_coords = torch.from_numpy( np.load(os.path.join(data_folder,"surface_data_coords.npy"))).float() # Load generated data. post_sample_path = os.path.join(ground_truth_folder, "prior_sample_{}.npy".format(sample_nr)) ground_truth = torch.from_numpy( np.load(post_sample_path)) # -------------------------------- # DEFINITION OF THE EXCURSION SET. # -------------------------------- THRESHOLD_low = 350.0 true_excursion_inds = (ground_truth >= THRESHOLD_low).nonzero()[:, 0] print("True excursion size: {} cells.".format(true_excursion_inds.shape[0])) # ------------------------------------- # Define GP model. # ------------------------------------- data_std = 0.1 lambda0 = 338.46 sigma0 = 359.49 m0 = -114.40 # Prepare data. data_values = F @ ground_truth data_feed = lambda x: data_values[x] print("y") print(data_feed(1)) updatable_gp = UpdatableGP(cl, lambda0, sigma0, m0, volcano_coords, n_chunks=200) # ------------------------------------- from volcapy.strategy.random_walk import RandomWalkStrategy strategy = MyopicWIVRStrategy(updatable_gp, data_coords, F, data_feed, lower=THRESHOLD_low, upper=None, ) start = timer() # Run strategy. visited_inds, observed_data = strategy.run( start_ind=-1, n_steps=4000, data_std=0.1, max_step=310.0, output_folder=output_folder, restart_from_save=output_folder ) end = timer() print("Run in {} mins.".format((end - start)/60))
ans = linear_cg(matmul_closure, rhs) # Test the LazyTensor wrapper for UpdatableCovariance. # In particular test the pivoted Cholesky decomposition. m0 = 1.0 sigma0 = 2.0 lambda0 = 0.5 n_cells_1d = 50 forward_cutoff = 400 # Only make 200 observations (Fourier and pointwise). my_problem = ToyFourier2d.build_problem(n_cells_1d, forward_cutoff) updatable_gp = UpdatableGP(kernel, lambda0, sigma0, m0, torch.tensor(my_problem.grid.cells).float(), n_chunks=200) lazy_cov = UpdatableCovLazyTensor(updatable_gp.covariance) # Test getitem. lazy_cov[0:10, 0:10].evaluate() # Test pivoted Cholesky decomposition. from gpytorch.utils.pivoted_cholesky import pivoted_cholesky res = pivoted_cholesky(lazy_cov, max_iter=300, error_tol=0.01) preconditioner = MatmulLazyTensor(res, res.t()) # Now test conjugate gradient inversion.
def main(): os.makedirs(output_path, exist_ok=True) # Load F = torch.from_numpy( np.load(os.path.join(data_folder, "F_full_surface.npy"))).float().detach() grid = Grid.load(os.path.join(data_folder, "grid.pickle")) volcano_coords = torch.from_numpy(grid.cells).float().detach() data_coords = torch.from_numpy( np.load(os.path.join(data_folder, "surface_data_coords.npy"))).float() ground_truth = torch.from_numpy( np.load(os.path.join(data_folder, "post_sample.npy"))) data_values = torch.from_numpy( np.load(os.path.join(data_folder, "post_data_sample.npy"))) # Dictionary between the original Niklas data and our discretization. niklas_data_inds = torch.from_numpy( np.load(os.path.join(data_folder, "niklas_data_inds_insurf.npy"))).long() niklas_coords = data_coords[niklas_data_inds].numpy() # PATHS ON THE VOLCANO. from volcapy.data_preparation.paths import paths as paths_niklas # Convert to indices in the full dataset. paths = [] for path in paths_niklas: paths.append(niklas_data_inds[path].long()) # -------------------------------- # DEFINITION OF THE EXCURSION SET. # -------------------------------- THRESHOLD_low = 700.0 excursion_inds = (ground_truth >= THRESHOLD_low).nonzero()[:, 0] # Plot situation. plt.scatter(data_coords[:, 0], data_coords[:, 1], c="k", alpha=0.1) plt.scatter(volcano_coords[excursion_inds, 0], volcano_coords[excursion_inds, 1], c="r", alpha=0.07) plt.scatter(niklas_coords[:, 0], niklas_coords[:, 1], c=niklas_coords[:, 2]) plt.scatter(niklas_coords[coast_data_inds, 0], niklas_coords[coast_data_inds, 1], c="r") for i, path in enumerate(paths): for x, y in zip(data_coords[path, 0], data_coords[path, 1]): plt.text(x, y, str(i), color="black", fontsize=6) plt.title( "Paths on the Stromboli, location of coastal data and excursion set.") plt.show() # Coast data. coast_data_inds_infull = niklas_data_inds[coast_data_inds] # Choose a starting points on the coast. start_ind = coast_data_inds_infull[0] # Params data_std = 0.1 # lambda0 = 338.0 lambda0 = 338.46 sigma0 = 359.49 m0 = -114.40 # Define GP model. data_feed = lambda x: data_values[x] updatable_gp = UpdatableGP(cl, lambda0, sigma0, m0, volcano_coords, n_chunks=80) from volcapy.strategy.myopic_weighted_ivr import MyopicStrategy strategy = MyopicStrategy(updatable_gp, data_coords, F, data_feed, lower=THRESHOLD_low, upper=None) start = timer() visited_inds, observed_data, ivrs = strategy.run(start_ind, n_steps=2000, data_std=0.1, output_folder=output_path, save_coverage=True) end = timer() print("Run in {} mins.".format((end - start) / 60)) np.save("visited_inds.npy", visited_inds) np.save("observed_data.npy", observed_data) np.save("ivrs.npy", ivrs)
def main(sample_nr): # Create output directory. output_folder = os.path.join(base_folder, "wIVR_final_big/sample_{}".format(sample_nr)) os.makedirs(output_folder, exist_ok=True) # Load static data. F = torch.from_numpy( np.load(os.path.join(data_folder, "F_full_surface.npy"))).float().detach() grid = Grid.load(os.path.join(data_folder, "grid.pickle")) volcano_coords = torch.from_numpy(grid.cells).float().detach() data_coords = torch.from_numpy( np.load(os.path.join(data_folder, "surface_data_coords.npy"))).float() # Load generated data. ground_truth_path = os.path.join(ground_truth_folder, "prior_sample_{}.npy".format(sample_nr)) ground_truth = torch.from_numpy(np.load(ground_truth_path)) # Load prior realizations. N_REALIZATIONS = 100 prior_realizations = [] for i in range(100, 100 + N_REALIZATIONS): realization_path = os.path.join(ground_truth_folder, "prior_sample_{}.npy".format(i)) prior_realizations.append(torch.from_numpy(np.load(realization_path))) # -------------------------------- # DEFINITION OF THE EXCURSION SET. # -------------------------------- THRESHOLD_low = 2500.0 # Choose a starting points on the coast. start_ind = 4478 # ------------------------------------- # Define GP model. # ------------------------------------- data_std = 0.1 sigma0_matern32 = 284.66 m0_matern32 = 2139.1 lambda0_matern32 = 651.58 # Prepare data. data_values = F @ ground_truth data_feed = lambda x: data_values[x] updatable_gp = UpdatableGP(cl, lambda0_matern32, sigma0_matern32, m0_matern32, volcano_coords, n_chunks=200) # ------------------------------------- strategy = ConservativeStrategy(updatable_gp, data_coords, F, data_feed, lower=THRESHOLD_low, upper=None, prior_realizations=prior_realizations) # Run strategy. visited_inds, observed_data = strategy.run(start_ind, n_steps=4000, data_std=0.1, max_step=151.0, min_step=60.0, output_folder=output_folder)
def main(): # Load G = torch.from_numpy(np.load(os.path.join( data_folder, "F_niklas.npy"))).float().detach() grid = Grid.load(os.path.join(data_folder, "grid.pickle")) volcano_coords = torch.from_numpy(grid.cells).float().detach() data_coords = torch.from_numpy( np.load(os.path.join(data_folder, "niklas_data_coords.npy"))).float() data_values = torch.from_numpy( np.load(os.path.join(data_folder, "niklas_data_obs.npy"))).float() n_data = G.shape[0] # Define GP model. data_std = 0.1 sigma0 = 284.66 m0 = 2139.1 lambda0 = 651.58 # Build trends: constant + planar + cylindrical. x0 = volcano_coords[:, 0].mean() # Volcano center. y0 = volcano_coords[:, 1].mean() z0 = volcano_coords[:, 2].mean() coeff_mean = torch.tensor([m0, 0.0, 0.0]).reshape(-1, 1) coeff_cov = torch.tensor([[200.0, 0, 0], [0, 0.05, 0], [0, 0, 0.05]]) coeff_F = torch.hstack([ torch.ones(volcano_coords.shape[0], 1), planar(volcano_coords, x0, y0, z0, phi=torch.tensor([45]), theta=torch.tensor([45])).reshape(-1, 1), cylindrical(volcano_coords, x0, y0).reshape(-1, 1) ]) # Model with trend. updatable_gp = UniversalUpdatableGP(kernel, lambda0, torch.tensor([sigma0]), volcano_coords, coeff_F, coeff_cov, coeff_mean, n_chunks=200) # Sample artificial volcano and # invert data at Niklas points. ground_truth, true_trend_coeffs = updatable_gp.sample_prior() noise = MultivariateNormal(loc=torch.zeros(n_data), covariance_matrix=data_std**2 * torch.eye(n_data)).rsample() synth_data = G @ ground_truth + noise updatable_gp.update(G, synth_data, data_std) np.save("post_mean_universal.npy", updatable_gp.mean_vec.detach().cpu().numpy()) np.save("ground_truth.npy", ground_truth.cpu().numpy()) # Model who thinks the trend is a constant. # Let's be fair and allow it to know the true mean. m0_true = true_trend_coeffs[0] constant_updatable_gp = UpdatableGP(kernel, lambda0, torch.tensor([sigma0]), m0_true, volcano_coords, n_chunks=200) constant_updatable_gp.update(G, synth_data, data_std) np.save("post_mean_constant.npy", constant_updatable_gp.mean_vec.detach().cpu().numpy()) np.save("true_trend_coeffs.npy", true_trend_coeffs.detach().cpu().numpy()) np.save("trend_matrix.npy", coeff_F.detach().cpu().numpy())
def main(): # Load G = torch.from_numpy(np.load(os.path.join( data_folder, "F_niklas.npy"))).float().detach() grid = Grid.load(os.path.join(data_folder, "grid.pickle")) volcano_coords = torch.from_numpy(grid.cells).float().detach() data_coords = torch.from_numpy( np.load(os.path.join(data_folder, "niklas_data_coords.npy"))).float() data_values = torch.from_numpy( np.load(os.path.join(data_folder, "niklas_data_obs.npy"))).float() n_data = G.shape[0] # Define GP model. data_std = 0.1 # sigma0 = 284.66 sigma0 = 1.0 m0 = 2139.1 # lambda0 = 651.58 lambda0 = 200.0 # Build trends: constant cylindrical. x0 = volcano_coords[:, 0].mean() # Volcano center. y0 = volcano_coords[:, 1].mean() z0 = volcano_coords[:, 2].mean() coeff_mean = torch.tensor([m0, 0.01]).reshape(-1, 1).float() coeff_cov = torch.tensor([[200.0, 0], [0, 0.05]]).float() coeff_F = torch.hstack([ torch.ones(volcano_coords.shape[0], 1), cylindrical(volcano_coords, x0, y0).reshape(-1, 1) ]).float() # Model with trend. updatable_gp = UniversalUpdatableGP(kernel, lambda0, torch.tensor([sigma0]), volcano_coords, coeff_F, coeff_cov, coeff_mean, n_chunks=200) # Sample artificial log-normal volcano. gp_sampler = UpdatableGP(kernel, lambda0, torch.tensor([sigma0]), 0, volcano_coords, n_chunks=200) """ # Add trend to generate ground truth. # Commented out since we re-use an already nice looking one. ground_truth_no_trend = torch.exp(gp_sampler.sample_prior()) true_trend = coeff_F @ coeff_mean ground_truth = ground_truth_no_trend + true_trend np.save(os.path.join(results_folder, "ground_truth.npy"), ground_truth.cpu().numpy()) """ ground_truth = torch.from_numpy( np.load(os.path.join(results_folder, "ground_truth.npy"))) # Add noise and generate data. """ noise = MultivariateNormal(loc=torch.zeros(n_data), covariance_matrix=data_std**2 * torch.eye(n_data)).rsample().reshape(-1, 1) synth_data = G @ ground_truth + noise np.save(os.path.join(results_folder, "synth_data.npy"), synth_data.cpu().numpy()) """ synth_data = torch.from_numpy( np.load(os.path.join(results_folder, "synth_data.npy"))) # Now train GP model on it. constant_updatable_gp = UpdatableGP(kernel, lambda0, torch.tensor([sigma0]), m0, volcano_coords, n_chunks=200) # Compute log-likelihood. updatable_gp.concentrated_NLL(10.0, G, synth_data, kappa_2=0.01) """
def main(): # Load G = torch.from_numpy( np.load(os.path.join(data_folder, "F_full_surface.npy"))).float().detach() grid = Grid.load(os.path.join(data_folder, "grid.pickle")) volcano_coords = torch.from_numpy(grid.cells).float().detach() data_coords = torch.from_numpy( np.load(os.path.join(data_folder,"surface_data_coords.npy"))).float() ground_truth = torch.from_numpy(np.load(ground_truth_path)) # Dictionary between the original Niklas data and our discretization. niklas_data_inds = torch.from_numpy( np.load(os.path.join(data_folder, "niklas_data_inds_insurf.npy"))).long() niklas_coords = data_coords[niklas_data_inds].numpy() # -------------------------------- # DEFINITION OF THE EXCURSION SET. # -------------------------------- THRESHOLD_low = 700.0 excursion_inds = (ground_truth >= THRESHOLD_low).nonzero()[:, 0] # Reload the GPs. gpINFILL = UpdatableGP.load(os.path.join(results_folder_INFILL, "gp_state.pkl")) # Load results. visited_inds_INFILL = np.load(os.path.join( results_folder_INFILL, "visited_inds.npy")) # GP is only saved every 10 iterations, to cut the additional data. n_data = len(gpINFILL.covariance.inversion_ops) visited_inds_INFILL = visited_inds_INFILL[:n_data] # Observation operators. G_stacked_INFILL = G[visited_inds_INFILL, :] # Produce posterior realization. for reskrig_sample_nr in range(200, 400): prior_realization = torch.from_numpy(np.load( os.path.join(reskrig_samples_folder, "prior_sample_{}.npy".format(reskrig_sample_nr)))) myReal = UpdatableRealization.bootstrap(prior_realization, G_stacked_INFILL, data_std=0.1, gp_module=gpINFILL) np.save( os.path.join(output_folder, "conditional_real_{}.npy".format(reskrig_sample_nr)), myReal._realization.detach().cpu().numpy()) """ irregular_array_to_point_cloud(volcano_coords.numpy(), myReal._realization.detach().cpu().numpy(), os.path.join(results_folder_wIVR, "Cond_Reals/conditional_real_{}.vtk".format(reskrig_sample_nr)), fill_nan_val=-20000.0) """ """
def main(sample_nr): # Create output directory. output_folder = os.path.join(base_folder, "RANDOMWALK_results/prior_samples_April2021/sample_{}".format(sample_nr)) os.makedirs(output_folder, exist_ok=True) # Load static data. F = torch.from_numpy( np.load(os.path.join(data_folder, "F_full_surface.npy"))).float().detach() grid = Grid.load(os.path.join(data_folder, "grid.pickle")) volcano_coords = torch.from_numpy(grid.cells).float().detach() data_coords = torch.from_numpy( np.load(os.path.join(data_folder,"surface_data_coords.npy"))).float() # Load generated data. post_sample_path = os.path.join(ground_truth_folder, "prior_sample_{}.npy".format(sample_nr)) ground_truth = torch.from_numpy( np.load(post_sample_path)) # -------------------------------- # DEFINITION OF THE EXCURSION SET. # -------------------------------- THRESHOLD_low = 500.0 # Choose a starting points on the coast. start_ind = 4478 # ------------------------------------- # Define GP model. # ------------------------------------- data_std = 0.1 lambda0 = 338.46 sigma0 = 359.49 m0 = -114.40 # Prepare data. data_values = F @ ground_truth + torch.normal(0, data_std, size=(F.shape[0], 1)) data_feed = lambda x: data_values[x] updatable_gp = UpdatableGP(cl, lambda0, sigma0, m0, volcano_coords, n_chunks=200) # ------------------------------------- from volcapy.strategy.random_walk import RandomWalkStrategy strategy = RandomWalkStrategy(updatable_gp, data_coords, F, data_feed, lower=THRESHOLD_low, upper=None, ) start = timer() # Run strategy. visited_inds, observed_data = strategy.run( start_ind, n_steps=4000, data_std=0.1, output_folder=output_folder ) end = timer() print("Run in {} mins.".format((end - start)/60))
def main(sample_nr): # Create output directory. output_folder = os.path.join( base_folder, "INFILL_results_350_nonoise/prior_samples_April2021/sample_{}".format( sample_nr)) os.makedirs(output_folder, exist_ok=True) # Load static data. F = torch.from_numpy( np.load(os.path.join(data_folder, "F_full_surface.npy"))).float().detach() grid = Grid.load(os.path.join(data_folder, "grid.pickle")) volcano_coords = torch.from_numpy(grid.cells).float().detach() data_coords = torch.from_numpy( np.load(os.path.join(data_folder, "surface_data_coords.npy"))).float() # Load generated data. post_sample_path = os.path.join(ground_truth_folder, "prior_sample_{}.npy".format(sample_nr)) ground_truth = torch.from_numpy(np.load(post_sample_path)) # -------------------------------- # DEFINITION OF THE EXCURSION SET. # -------------------------------- THRESHOLD_low = 350.0 true_excursion_inds = (ground_truth >= THRESHOLD_low).nonzero()[:, 0] print("True excursion size: {} cells.".format( true_excursion_inds.shape[0])) # Choose a starting points on the coast. start_ind = 4478 # ------------------------------------- # Define GP model. # ------------------------------------- data_std = 0.1 lambda0 = 338.46 sigma0 = 359.49 m0 = -114.40 # Prepare data. data_values = F @ ground_truth data_feed = lambda x: data_values[x] updatable_gp = UpdatableGP(cl, lambda0, sigma0, m0, volcano_coords, n_chunks=200) # ------------------------------------- strategy = InfillStrategy( updatable_gp, data_coords, F, data_feed, lower=THRESHOLD_low, upper=None, ) start = timer() # Run strategy. visited_inds, observed_data = strategy.run(data_std=0.1, output_folder=output_folder, n_data_splits=200) end = timer() print("Run in {} mins.".format((end - start) / 60))
def main(): os.makedirs(output_path, exist_ok=True) # Load F = torch.from_numpy( np.load(os.path.join(data_folder, "F_full_surface.npy"))).float().detach() grid = Grid.load(os.path.join(data_folder, "grid.pickle")) volcano_coords = torch.from_numpy(grid.cells).float().detach() data_coords = torch.from_numpy( np.load(os.path.join(data_folder, "surface_data_coords.npy"))).float() ground_truth = torch.from_numpy( np.load(os.path.join(data_folder, "post_sample.npy"))) data_values = torch.from_numpy( np.load(os.path.join(data_folder, "post_data_sample.npy"))) # Dictionary between the original Niklas data and our discretization. niklas_data_inds = torch.from_numpy( np.load(os.path.join(data_folder, "niklas_data_inds_insurf.npy"))).long() niklas_coords = data_coords[niklas_data_inds].numpy() # PATHS ON THE VOLCANO. from volcapy.data_preparation.paths import paths as paths_niklas # Convert to indices in the full dataset. paths = [] for path in paths_niklas: paths.append(niklas_data_inds[path].long()) # -------------------------------- # DEFINITION OF THE EXCURSION SET. # -------------------------------- THRESHOLD_low = 700.0 excursion_inds = (ground_truth >= THRESHOLD_low).nonzero()[:, 0] # Coast data. coast_data_inds_infull = niklas_data_inds[coast_data_inds] # Choose a starting points on the coast. start_ind = coast_data_inds_infull[0] # Params data_std = 0.1 # lambda0 = 338.0 lambda0 = 338.46 sigma0 = 359.49 m0 = -114.40 # Define GP model. data_feed = lambda x: data_values[x] updatable_gp = UpdatableGP(cl, lambda0, sigma0, m0, volcano_coords, n_chunks=70) from volcapy.strategy.myopic_weighted_ivr import MyopicStrategy strategy = MyopicStrategy(updatable_gp, data_coords, F, data_feed, lower=THRESHOLD_low, upper=None) visited_inds = np.load("./visited_inds.npy") start = timer() strategy.save_plugin_estimate(visited_inds, data_std=0.1, output_folder="./") end = timer() print("Run in {} mins.".format((end - start) / 60))
def main(): print("Main") # Load static data. F = torch.from_numpy( np.load(os.path.join(data_folder, "F_corrected_final.npy"))).float().detach() grid = Grid.load(os.path.join(data_folder, "grid.pickle")) volcano_coords = torch.from_numpy(grid.cells).float().detach() data_coords = torch.from_numpy( np.load( os.path.join(data_folder, "niklas_data_coords_corrected_final.npy"))).float() data_values = torch.from_numpy( np.load( os.path.join(data_folder, "niklas_data_obs_corrected_final.npy"))).float() # Remove data points that are too close to each other. # prob_inds = np.array([92, 109, 116, 142, 143, 199, 235, 294, 295, 400]) from scipy.spatial import distance_matrix dists = distance_matrix(data_coords.numpy(), data_coords.numpy()) np.fill_diagonal(dists, 100.0) prob_inds, _ = np.where(dists < 40.0) # prob_inds = np.array([92, 109, 116, 142, 143, 199, 235, 294, 295, 400, # 43, 82, 99, 137, 191, 196, 420]) F = np.delete(F, prob_inds, axis=0) data_coords = np.delete(data_coords, prob_inds, axis=0) data_values = np.delete(data_values, prob_inds, axis=0) # HYPERPARAMETERS data_std = 0.1 sigma0_exp = 308.89 m0_exp = 535.39 lambda0_exp = 1925.0 sigma0_matern32 = 527.84 m0_matern32 = 549.15 lambda0_matern32 = 891.66 sigma0_matern52 = 349.47 m0_matern52 = 582.43 lambda0_matern52 = 436.206 df = pd.DataFrame( columns=['kernel', 'Test set size', 'repetition', 'Test RMSE']) # Loop over hold-out length: n_trains = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500] n_trains = [250, 300, 350, 400, 450, 500] n_trains = [400, 450, 500] n_repetitions = 30 print("Go") for n_train in n_trains: print("Train: {}".format(n_train)) for repetition in range(n_repetitions): print("Repetition {}.".format(repetition)) # Create a random shuffle of the data. shuffled_inds = list(range(data_values.shape[0])) np.random.shuffle(shuffled_inds) F_shuffled = F[shuffled_inds, :] data_values_shuffled = data_values[shuffled_inds] # Test/Train split. F_train = F_shuffled[:n_train, :] data_values_train = data_values_shuffled[:n_train] F_test = F_shuffled[n_train:, :] data_values_test = data_values_shuffled[n_train:] # Re-create the GPs at every loop. gp_exp = UpdatableGP(exponential_kernel, lambda0_exp, sigma0_exp, m0_exp, volcano_coords, n_chunks=400) gp_matern32 = UpdatableGP(matern32_kernel, lambda0_matern32, sigma0_matern32, m0_matern32, volcano_coords, n_chunks=400) gp_matern52 = UpdatableGP(matern52_kernel, lambda0_matern52, sigma0_matern52, m0_matern52, volcano_coords, n_chunks=400) gps = [gp_exp, gp_matern32, gp_matern52] for gp in gps: print(gp.covariance.cov_module.KERNEL_FAMILY) torch.cuda.empty_cache() # Condition on training data. gp.update(F_train, data_values_train, data_std) m_post_m = gp.mean_vec # Predict test data. data_values_pred = (F_test @ m_post_m.cpu()).reshape(-1) test_rmse = torch.sqrt( torch.mean((data_values_test - data_values_pred)**2)) # Compute negative log predictive density. neg_predictive_log_density = gp.neg_predictive_log_density( data_values_test, F_test, data_std, svd=True) df = df.append( { 'kernel': gp.covariance.cov_module.KERNEL_FAMILY, 'Test set size': F.shape[0] - n_train, 'repetition': repetition, 'Test RMSE': test_rmse.detach().item(), 'Test neg_predictive_log_density': neg_predictive_log_density.detach().item() }, ignore_index=True) # Save after each train set size. df.to_pickle("test_set_results.pkl")
plt.rcParams.update(plot_params) output_folder = "/home/cedric/PHD/Dev/VolcapySIAM/reporting/universal_kriging/mascot_num_plots/plots/" n_cells_1d = 2000 my_problem = ToyFourier1d.build_problem(n_cells_1d) m0 = 0.0 sigma0 = np.sqrt(2.0) lambda0 = 0.05 cell_coords = torch.from_numpy(my_problem.grid.cells) constant_updatable_gp = UpdatableGP(kernel, lambda0, torch.tensor([sigma0]), m0, cell_coords, n_chunks=200) # Build some ground truth by sampling. """ from volcapy.covariance.sample import direct_sample ground_truth = direct_sample(kernel, sigma0, lambda0, m0, cell_coords).numpy() plot_basic(my_problem.grid, ground_truth) np.save(os.path.join(output_folder, "ground_truth.npy"), ground_truth) """ ground_truth_notrend = torch.from_numpy( np.load(os.path.join(output_folder, "ground_truth.npy"))).float() # Simple trend. trend = 5 * cell_coords**3
def main(sample_nr): post_sample_path = os.path.join( ground_truth_folder, "post_samples/post_sample_{}.npy".format(sample_nr)) post_data_sample_path = os.path.join( ground_truth_folder, "post_data_samples/post_data_sample_{}.npy".format(sample_nr)) output_path = os.path.join(output_folder, "INFILL_results/sample_{}".format(sample_nr)) os.makedirs(output_path, exist_ok=True) save_gp_state_path = os.path.join(output_path, "gp_state.pkl") # Load F = torch.from_numpy( np.load(os.path.join(data_folder, "F_full_surface.npy"))).float().detach() grid = Grid.load(os.path.join(data_folder, "grid.pickle")) volcano_coords = torch.from_numpy(grid.cells).float().detach() data_coords = torch.from_numpy( np.load(os.path.join(data_folder, "surface_data_coords.npy"))).float() ground_truth = torch.from_numpy(np.load(post_sample_path)) data_values = torch.from_numpy(np.load(post_data_sample_path)) # Dictionary between the original Niklas data and our discretization. niklas_data_inds = torch.from_numpy( np.load(os.path.join(data_folder, "niklas_data_inds_insurf.npy"))).long() niklas_coords = data_coords[niklas_data_inds].numpy() # -------------------------------- # DEFINITION OF THE EXCURSION SET. # -------------------------------- THRESHOLD_low = 700.0 excursion_inds = (ground_truth >= THRESHOLD_low).nonzero()[:, 0] # Coast data. coast_data_inds_infull = niklas_data_inds[coast_data_inds] # Choose a starting points on the coast. start_ind = coast_data_inds_infull[0] # Params data_std = 0.1 # lambda0 = 338.0 lambda0 = 338.46 sigma0 = 359.49 m0 = -114.40 # Define GP model. data_feed = lambda x: data_values[x] updatable_gp = UpdatableGP(cl, lambda0, sigma0, m0, volcano_coords, n_chunks=80) from volcapy.strategy.infill import InfillStrategy strategy = InfillStrategy( updatable_gp, data_coords, F, data_feed, lower=THRESHOLD_low, upper=None, ) start = timer() visited_inds, observed_data, ivrs = strategy.run( start_ind, n_steps=2000, data_std=0.1, output_folder=output_path, save_coverage=True, max_step=310.0, save_gp_state_path=save_gp_state_path) end = timer() print("Run in {} mins.".format((end - start) / 60)) np.save(os.path.join(output_path, "visited_inds.npy"), visited_inds) np.save(os.path.join(output_path, "observed_data.npy"), observed_data) np.save(os.path.join(output_path, "ivrs.npy"), ivrs)
def run(self, start_ind, n_steps, data_std, output_folder=None, max_step=None, min_step=None, restart_from_save=None): """ Run the startegy. Note that this works with any criterion to choose the next point, the only requirement is that selt.get_next_ind is defined before running the strategy. Parameters ---------- start_ind n_steps: int Number of steps to run the strategy for. data_std: float Standard deviation of observation noise (homoscedactic). output_folder: string Path to folder where to save results. max_step: float If provided, then instead of only walking to neighbors at each step, can go to any cell within distance max_step. min_step: float If provided, then only consider neigbors farther away than min_step (must be used in conjunction with max_step). restart_from_save: string If a path to a folder is provided, then will restart the run from the saved data and finish it. """ if restart_from_save is None: self.current_ind = start_ind self.visited_inds = [] self.observed_data = [] self.n_steps = n_steps self.data_std = data_std self.max_step = max_step self.min_step = min_step else: self.visited_inds = list(np.load(os.path.join(output_folder, "visited_inds.npy"))) i = len(self.visited_inds) - 1 print("Restarting from step {}.".format(i)) self.observed_data = list(np.load(os.path.join(output_folder, "observed_data.npy"), allow_pickle=True)) self.gp = UpdatableGP.load(os.path.join(output_folder, "gp_state.pkl")) print(self.gp.mean.m) metadata = np.load(os.path.join(output_folder, "metadata.npy"), allow_pickle='TRUE').item() self.current_ind = metadata['next_ind_to_visit'].item() print(self.current_ind) self.max_step = metadata['max_step'] try: self.min_step = metadata['min_step'] except: pass self.data_std = metadata['data_std'] # Remaining steps to perform. self.n_steps = metadata['remaining_steps'] # Change the get neighbors routine if can jump more that one step. if self.max_step is not None: self.get_neighbors = lambda x: self.get_neighbors_bigstep(x, r=self.max_step, rmin=self.min_step) else: self.get_neighbors = lambda x: self.get_nearest_neighbors(x) for i in range(n_steps): # Observe at currennt location and update model. self.visited_inds.append(self.current_ind) y = self.data_feed(self.current_ind).detach().float() self.observed_data.append(y.detach().float().numpy()) # Make sure that the observation operator has # at least two dimensions. G = self.G[self.current_ind,:] if len(G.shape) <= 1: G = G.reshape(1, -1) self.gp.update(G, y, data_std) # Update the conditional realizations. # Since by default it is an empty list, everything works # as intended. for real in self.realizations: real.update(G, y, data_std) # Extract current coverage function (after observing at current # location). self.current_coverage = self.gp.coverage(self.lower, self.upper) # Now evaluate where to go next. next_ind = self.get_next_ind() self.current_ind = next_ind print("Go to cell {} for step {}.".format(self.current_ind, len(self.visited_inds))) # Save coverage each iteration. if output_folder is not None: self.save_state(output_folder, coverage_only=True) # Save full state every 3 iterations. if i % 3 == 0 and output_folder is not None: self.save_state(output_folder) return self.visited_inds, self.observed_data