def analyze(f, title="Plot"): X, Y, groups = f() Y_data = np.hstack([Y, groups]) likelihood = gpflow.likelihoods.SwitchedLikelihood([ gpflow.likelihoods.Gaussian(variance=1.0), gpflow.likelihoods.Gaussian(variance=1.0) ]) # model construction (notice that num_latent_gps is 1) natgrad = NaturalGradient(gamma=1.0) adam = tf.optimizers.Adam() kernel = gpflow.kernels.Matern52(lengthscales=0.5) model = gpflow.models.VGP((X, Y_data), kernel=kernel, likelihood=likelihood, num_latent_gps=1) # here's a plot of the raw data. fig, ax = plt.subplots(1, 1, figsize=(12, 6)) _ = ax.plot(X, Y_data, "kx") plt.xlabel("Minutes") plt.ylabel("Value") plt.title(title) plt.savefig(title + '.png') for _ in range(ci_niter(1000)): natgrad.minimize(model.training_loss, [(model.q_mu, model.q_sqrt)]) # let's do some plotting! xx = np.linspace(0, 30, 200)[:, None] mu, var = model.predict_f(xx) plt.figure(figsize=(12, 6)) plt.plot(xx, mu, "C0") plt.plot(xx, mu + 2 * np.sqrt(var), "C0", lw=0.5) plt.plot(xx, mu - 2 * np.sqrt(var), "C0", lw=0.5) plt.plot(X, Y, "C1x", mew=2) plt.xlabel("Minutes") plt.ylabel("Value") plt.title(title) plt.savefig(title + ' GP model.png') print_summary(model) # print(type(summary)) # summary.to_markdown(title+'.md') # plt.set_xlim(0, 30) # _ = ax.plot(xx, 2.5 * np.sin(6 * xx) + np.cos(3 * xx), "C2--") # plt.errorbar( # X.squeeze(), # Y.squeeze(), # # yerr=2 * (np.sqrt(NoiseVar)).squeeze(), # marker="x", # lw=0, # elinewidth=1.0, # color="C1", # ) # _ = plt.xlim(-5, 5) return
def save(self, fname, io=None): """ Save model as a checkpoint """ ckpt_path = self.manager.save() logging.info(f'Saved to {ckpt_path}') print_summary(self.model) if io is not None: io._upload_dir_to_bucket(self.save_path, self.save_path, ['ckpt', 'checkpoint'])
def fit(self, X, Y): self.modelo = gpflow.models.GPR(data=(X, Y), kernel=self.kernel, mean_function=self.mean_function) # opt = gpflow.optimizers.Scipy() # opt_logs = opt.minimize(self.modelo.training_loss, self.modelo.trainable_variables, options=dict(maxiter=100), method='BFGS') #print(opt_logs) print_summary(self.kernel, fmt="notebook") print_summary(self.modelo, fmt="notebook")
def fit(self, X, y): # We turn off training for inducing point locations # gpflow.set_trainable(self.model.inducing_variable, False) data = (tf.reshape(tf.cast(X, tf.float64), [-1, self.dim]), tf.reshape(tf.cast(y, tf.float64), [-1, 1])) print_summary(self.model) train_dataset = tf.data.Dataset.from_tensor_slices( data).repeat().shuffle(X.shape[0]) self.logf = self.run_adam(train_dataset) return self
def __init__(self, data, Z=None, kernel=SquaredExponential(), likelihood=Gaussian(), mean_function=None, maxiter=1000): # Use full Gaussian processes regression model for now. Could # implement SVGP in the future is dataset gets too big. if Z is None: m = gpflow.models.GPR(data, kernel=kernel, mean_function=mean_function) # Implements the L-BFGS-B algorithm for optimising hyperparameters opt = gpflow.optimizers.Scipy() def objective_closure(): return -m.log_marginal_likelihood() opt_logs = opt.minimize(objective_closure, m.trainable_variables, options=dict(maxiter=maxiter)) else: # Sparse variational Gaussian process for big data (see Hensman) m = gpflow.models.SVGP(kernel, likelihood, Z, num_data=data[0].shape[0]) @tf.function def optimization_step(optimizer, m, batch): with tf.GradientTape() as t: t.watch(m.tranable_variables) objective = -model.elbo(batch) grads = tape.gradient(objective, m.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) return objective adam = tf.optimizers.Adam() for i in range(maxiter): elbo = -optimization_step(adam, m, data) if step % 100 == 0: print('Iteration: {} ELBO: {.3f}'.format(i, elbo)) print_summary(m) # Cannot simply set self.gp_model = m as need to sample from prior, # not the posterior. self.kernel = m.kernel self.likelihood = m.likelihood
def run_gpflow_scipy(x_train, y_train, kernel, mean_function="linear", fmt="notebook"): """Create and train a GPFlow model Parameters ---------- x_train : np.ndarray, shape=(n_samples, n_parameters) The x training data y_train : np.ndarray, shape=(n_samples, 1) The y training data kernel : string Kernel to use for the GP model mean_function: string or None, default = "linear" Type of mean function for the GP model Options are "linear", or None fmt : string, optional, default="notebook" The formatting type for the GPFlow print_summary """ if mean_function is not None: if mean_function == "linear": mean_function = gpflow.mean_functions.Linear( A=np.zeros(x_train.shape[1]).reshape(-1, 1)) elif mean_function.lower() == "none": mean_function = None else: raise ValueError( "Only supported mean functions are 'linear' and 'none'") # Create the model model = gpflow.models.GPR(data=(x_train, y_train.reshape(-1, 1)), kernel=kernel, mean_function=mean_function) # Print initial values print_summary(model, fmt=fmt) # Optimize model with scipy optimizer = gpflow.optimizers.Scipy() optimizer.minimize(model.training_loss, model.trainable_variables) # Print the optimized values print_summary(model, fmt="notebook") # Return the model return model
def train_soapgp(X_train, y_train, rem_mat, log=False): """ Initialises the kernel and GP model, then calls on the scipy L-BFGS optimizer to minimise the training loss. Call with log=True to print the final values of the kernel parameters for save/load purposes. """ k = SOAP_kern(rem_mat)+gpflow.kernels.White(0.1) m = gpflow.models.GPR( data=(X_train, y_train), kernel=k) opt = gpflow.optimizers.Scipy() opt.minimize(m.training_loss, m.trainable_variables, options=dict(maxiter=10000)) if log: print_summary(m) return m
def load(self, io=None): """ Load model from the checkpoint """ if io is not None: io._download_dir_from_bucket(self.save_path, self.save_path, True) step_var = tf.Variable(1, dtype=tf.int32, trainable=False) epoch_var = tf.Variable(1, dtype=tf.int32, trainable=False) ckpt = tf.train.Checkpoint(model=self.model, step=step_var, epoch=epoch_var) ckpt.restore(tf.train.latest_checkpoint(self.save_path)) logging.info( f"Restored model from {tf.train.latest_checkpoint(self.save_path)} [step:{int(step_var)}, epoch:{int(epoch_var)}]" ) print_summary(self.model)
def gp_model(x_train, y_train, x_test, num_classes): """This function instantiates the gp model and gets the predictions from the model. :param x_train: The training dataset. :param y_train: The training dataset labels. :param x_test: The test dataset. :param num_classes: The number of classes in the dataset. :return: predictions, the predictions from the gp model. :return time_taken: The time taken to train the model.""" data = (x_train, y_train) kernel = gpflow.kernels.SquaredExponential() + gpflow.kernels.Matern12( ) + gpflow.kernels.Exponential() invlink = gpflow.likelihoods.RobustMax(num_classes) likelihood = gpflow.likelihoods.MultiClass(num_classes, invlink=invlink) z = x_train[::5].copy() model = gpflow.models.SVGP(kernel=kernel, likelihood=likelihood, inducing_variable=z, num_latent_gps=num_classes, whiten=True, q_diag=True) set_trainable(model.inducing_variable, False) print('\nInitial parameters:') print_summary(model, fmt="notebook") start = time.time() opt = gpflow.optimizers.Scipy() opt.minimize(model.training_loss_closure(data), model.trainable_variables, options=dict(maxiter=ci_niter(1000))) print('\nParameters after optimization:') print_summary(model, fmt="notebook") end = time.time() time_taken = round(end - start, 2) print('Optimization took {:.2f} seconds'.format(time_taken)) predictions = model.predict_y(x_test)[0] return predictions, time_taken
def optimizeModel(self): k = gpflow.kernels.Matern52(self.kernel_variance, self.lengthscales) X = np.concatenate(self.X, 0) Y = np.concatenate(self.Y, 0) X = X.reshape((-1, self.input_dim)) Y = Y.reshape((-1, 1)) meanf = gpflow.mean_functions.Constant(self.mean_value) self.gp = gpflow.models.GPR(data=(X, Y), kernel=k, mean_function=meanf) self.gp.likelihood.variance.assign(self.noise_variance) #keep prior mean functions fixed #set_trainable(self.gp.mean_function.c, False) if(self.fixed_noise_variance): set_trainable(self.gp.likelihood.variance, False) opt = gpflow.optimizers.Scipy() opt_logs = opt.minimize(self.gp.training_loss, self.gp.trainable_variables, options=dict(maxiter=100)) print_summary(self.gp)
def analyze(f, title="Plot", rawplot=True, modelplot=True,summary=True): # Obtain randomly generated data X, Y, groups = f() Y_data = np.hstack([Y, groups]) # Model construction (notice that num_latent_gps is 1) likelihood = gpflow.likelihoods.SwitchedLikelihood( [gpflow.likelihoods.Gaussian(variance=1.0), gpflow.likelihoods.Gaussian(variance=1.0)] ) natgrad = NaturalGradient(gamma=1.0) adam = tf.optimizers.Adam() kernel = gpflow.kernels.Matern52(lengthscales=0.5) model = gpflow.models.VGP((X, Y_data), kernel=kernel, likelihood=likelihood, num_latent_gps=1) for _ in range(ci_niter(1000)): natgrad.minimize(model.training_loss, [(model.q_mu, model.q_sqrt)]) # Plot of the raw data. if rawplot: fig, ax = plt.subplots(1, 1, figsize=(12, 6)) _ = ax.plot(X, Y_data, "kx") plt.xlabel("Minutes") plt.ylabel("Value") plt.title(title) plt.savefig(title+'.png') # Plot of GP model if modelplot: xx = np.linspace(0, 30, 200)[:, None] mu, var = model.predict_f(xx) plt.figure(figsize=(12, 6)) plt.plot(xx, mu, "C0") plt.plot(xx, mu + 2 * np.sqrt(var), "C0", lw=0.5) plt.plot(xx, mu - 2 * np.sqrt(var), "C0", lw=0.5) plt.plot(X, Y, "C1x", mew=2) plt.xlabel("Minutes") plt.ylabel("Value") plt.title(title) plt.savefig(title+' GP model.png') if summary: print_summary(model) return model
def train_soapgp_onthefly(X_train, y_train, args, log=False): """ Initialises the kernel and GP model, then calls on the scipy L-BFGS optimizer to minimise the training loss. Call with log=True to print the final values of the kernel parameters for save/load purposes. """ k = SOAP_onthefly(args) + gpflow.kernels.White(0.1) m = gpflow.models.GPR(data=(X_train, y_train), kernel=k) opt = gpflow.optimizers.Scipy() # needs compile=False so tensorflow remains in eager mode, allowing calls to X.numpy() opt.minimize(m.training_loss, m.trainable_variables, options=dict(maxiter=10000), compile=False) if log: print_summary(m) return m
def learn_hp(s, n): bids = np.random.uniform(0., MAX_BID, 1101) gp = gp_model.GPModel(OPT=True) costs = s.cost(bids) revs = s.revenue(bids) gp.update(bids, costs, revs) print(f'summary model_cost {n}') print_summary(gp.model_cost) print(f'summary model_rev {n}') print_summary(gp.model_rev) print('\'cost_variance\': ', gp.model_cost.kernel.variance) print('\'rev_variance\': ', gp.model_rev.kernel.variance) print('\'cost_lengthscales\': ', gp.model_cost.kernel.lengthscales) print('\'rev_lengthscales\': ', gp.model_rev.kernel.lengthscales) print('\'cost_likelihood\': ', gp.model_cost.likelihood.variance) print('\'rev_likelihood\': ', gp.model_rev.likelihood.variance)
def regression_with_gp(): print("Hola muchachos!") # define training points with noise (random but fixed for reproduction) x_train = np.array([-15, -4, 4, 9, 13, 18], dtype=np.float) np.random.seed(1) noise = np.random.normal(scale=0.1, size=x_train.size) y_train = f(x_train) + noise x_train = x_train.reshape(-1, 1) y_train = y_train.reshape(-1, 1) x_function_plot = np.arange(-25, 25, 0.01).reshape(-1, 1) k = gpflow.kernels.SquaredExponential() print_summary(k) gp = gpflow.models.GPR(data=(x_train, y_train), kernel=k, mean_function=None) print_summary(gp) opt = gpflow.optimizers.Scipy() opt_logs = opt.minimize(gp.training_loss, gp.trainable_variables, options=dict(maxiter=100)) mean, var = gp.predict_f(x_function_plot) ## plot plt.figure(figsize=(12, 6)) plt.plot(x_train, y_train, "kx", mew=2) plt.plot(x_function_plot, mean, "C0", lw=2) plt.fill_between( x_function_plot[:, 0], mean[:, 0] - 1.96 * np.sqrt(var[:, 0]), mean[:, 0] + 1.96 * np.sqrt(var[:, 0]), color="C0", alpha=0.2, ) plt.savefig("regression_with_GP.jpg")
def optimizeModel(self): output_dim = self.output_dim rank = self.rank self.k = gpflow.kernels.Matern52(self.kernel_variance, self.lengthscales, active_dims= np.arange(self.input_dim).tolist()) self.coreg = gpflow.kernels.Coregion(output_dim=output_dim, rank=rank, active_dims=[self.input_dim]) k = self.k * self.coreg X = np.concatenate(self.X, 0) Y = np.concatenate(self.Y, 0) X = X.reshape((X.shape[0], -1)) Y = Y.reshape((Y.shape[0], -1)) meanf = gpflow.mean_functions.Constant(self.mean_value) lik = gpflow.likelihoods.SwitchedLikelihood([gpflow.likelihoods.Gaussian() for i in range(self.output_dim)]) # now build the GP model as normal self.gp = gpflow.models.VGP((X,Y), kernel=k, likelihood=lik, mean_function = meanf) #set_trainable(self.gp.mean_function.c, False) for i in range(self.output_dim): self.gp.likelihood.likelihoods[i].variance.assign(self.noise_variance[i]) if(self.fixed_noise_variance): for i in range(self.output_dim): set_trainable(self.gp.likelihood.likelihoods[i].variance, False) gpflow.optimizers.Scipy().minimize(self.gp.training_loss, self.gp.trainable_variables, options=dict(maxiter=10000), method="L-BFGS-B") print_summary(self.gp)
plt.plot(x0, y0) M = 50 # Number of inducing locations Z = x[::M, :].copy( ) # Initialise inducing locations to the first M inputs in the dataset m = gpflow.models.SVGP( kern, gpflow.likelihoods.Gaussian(), Z, num_data=N, ) m.kernel.lengthscale.assign(0.1) m.kernel.variance.assign(10.0) m.likelihood.variance.assign(0.05) print_summary(m) opt = gpflow.optimizers.Scipy() minibatch_size = 100 train_dataset = tf.data.Dataset.from_tensor_slices((x, y)) \ .repeat() \ .shuffle(N) train_it = iter(train_dataset.batch(minibatch_size)) ground_truth = m.log_likelihood(x, y).numpy() log_likelihood = tf.function(autograph=False)(m.log_likelihood) evals = [ log_likelihood(*minibatch).numpy() for minibatch in itertools.islice(train_it, 100)
def main(): # Load MNIST dataset mnist = tf.keras.datasets.mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() # 60k train, 10k test x_train, x_test = x_train / 255.0, x_test / 255.0 # set numbers NUM_TRAIN_DATA = x_train.shape[0] NUM_TEST_DATA = x_test.shape[0] MAXITER = 100 IMAGE_SHAPE = [28, 28] PATCH_SHAPE = [9, 9] M = 100 num_class = 10 # # for small batch test # x_train = x_train[0:NUM_TRAIN_DATA] # (n,28,28) # y_train = y_train[0:NUM_TRAIN_DATA] # (n,) # x_test = x_test[0:NUM_TEST_DATA] # y_test = y_test[0:NUM_TEST_DATA] # process data for gpflow x_train = x_train.reshape(NUM_TRAIN_DATA, -1).astype(np.float64) # (n, 28*28) y_train = y_train.reshape(NUM_TRAIN_DATA, -1).astype(np.float64) # (n,1) x_test = x_test.reshape(NUM_TEST_DATA, -1).astype(np.float64) y_test = y_test.reshape(NUM_TEST_DATA, -1).astype(np.float64) data = (x_train, y_train) # set constraints f64 = lambda x: np.array(x, dtype=np.float64) positive_with_min = lambda: tfp.bijectors.AffineScalar(shift=f64(1e-4))( tfp.bijectors.Softplus()) constrained = lambda: tfp.bijectors.AffineScalar( shift=f64(1e-4), scale=f64(100.0))(tfp.bijectors.Sigmoid()) max_abs_1 = lambda: tfp.bijectors.AffineScalar( shift=f64(-2.0), scale=f64(4.0))(tfp.bijectors.Sigmoid()) # base kernel base_k = gpflow.kernels.SquaredExponential() # conv kernel by summing all the patches conv_k = gpflow.kernels.Convolutional(base_k, IMAGE_SHAPE, PATCH_SHAPE) # apply constraints to trainable varialbes conv_k.base_kernel.lengthscales = gpflow.Parameter( 1.0, transform=positive_with_min()) conv_k.base_kernel.variance = gpflow.Parameter(1.0, transform=constrained()) conv_k.weights = gpflow.Parameter(conv_k.weights.numpy(), transform=max_abs_1()) # initialize indcuing points for conv model Z = my_inducing_points(x_train, M, base_k, IMAGE_SHAPE, PATCH_SHAPE) conv_f = gpflow.inducing_variables.InducingPatches(Z) # models rbf_m = rbf_model(base_k, data, MAXITER, M, num_class) conv_m = conv_model(conv_k, conv_f, data, num_class) # run l-bfgs-b optimization rbf_lbfgs = run_lbfgs(rbf_m, MAXITER, data) conv_lbfgs = run_lbfgs(conv_m, MAXITER, data) # #run adam # loss_rbf_adam = run_adam(rbf_m, MAXITER, data) # loss_conv_adam = run_adam(conv_m, MAXITER, data) # print(loss_conv_adam) # model summary print_summary(rbf_m) print_summary(conv_m) # run prediction rbf_train_acc, rbf_test_acc = pred(rbf_m, x_train, y_train, x_test, y_test) conv_train_acc, conv_test_acc = pred(conv_m, x_train, y_train, x_test, y_test) print("RBF training accuracy is: %.2f " % rbf_train_acc) print("RBF testing accuracy is: %.2f " % rbf_test_acc) print("Conv model training accuracy is: %.2f" % conv_train_acc) print("Conv model testing accuracy is: %.2f" % conv_test_acc)
inducing_variable=inducing_variable, ) # Instead of passing an inducing_variable directly, we can also set the num_inducing_variables argument to an integer, which will randomly pick from the data. gplvm.likelihood.variance.assign(0.01) opt = gpflow.optimizers.Scipy() maxiter = ci_niter(1000) _ = opt.minimize( gplvm.training_loss, method="BFGS", variables=gplvm.trainable_variables, options=dict(maxiter=maxiter), ) print_summary(gplvm) X_pca = ops.pca_reduce(Y, latent_dim).numpy() gplvm_X_mean = gplvm.X_data_mean.numpy() # plt.subplot(131) # plt.scatter(Y[:, 0], gplvm_X_mean) # plt.subplot(132) # plt.scatter(Y[:, 1], gplvm_X_mean) # plt.subplot(133) # plt.scatter(Y_bg[:, 0], Y_bg[:, 1], c=gplvm_X_mean) # plt.show() plt.subplot(121) plt.scatter(gplvm_X_mean[:, 0], gplvm_X_mean[:, 1]) plt.subplot(122)
# %% # generate toy data np.random.seed(1) X = np.random.rand(20, 1) Y = np.sin(12 * X) + 0.66 * np.cos(25 * X) + np.random.randn(20, 1) * 0.01 m = gpflow.models.GPR( (X, Y), kernel=gpflow.kernels.Matern32() + gpflow.kernels.Linear()) # %% [markdown] # ## Viewing, getting, and setting parameters # You can display the state of the model in a terminal by using `print_summary(m)`. You can change the display format using the `fmt` keyword argument, e.g. `'html'`. In a notebook, you can also use `fmt='notebook'` or set the default printing format as `notebook`: # %% print_summary(m, fmt="notebook") # %% gpflow.config.set_default_summary_fmt("notebook") # %% [markdown] # This model has four parameters. The kernel is made of the sum of two parts. The first (counting from zero) is a Matern32 kernel that has a variance parameter and a lengthscale parameter; the second is a linear kernel that has only a variance parameter. There is also a parameter that controls the variance of the noise, as part of the likelihood. # # All the model variables have been initialized at `1.0`. You can access individual parameters in the same way that you display the state of the model in a terminal; for example, to see all the parameters that are part of the likelihood, run: # %% print_summary(m.likelihood) # %% [markdown] # This gets more useful with more complex models!
def main(path, representation): """ :param path: str specifying path to dataset. :param representation: str specifying the molecular representation. One of ['fingerprints, 'fragments', 'fragprints'] """ task = 'e_iso_pi' # task always e_iso_pi with human performance comparison data_loader = TaskDataLoader(task, path) smiles_list, y = data_loader.load_property_data() X = featurise_mols(smiles_list, representation) # 5 test molecules test_smiles = [ 'BrC1=CC=C(/N=N/C2=CC=CC=C2)C=C1', 'O=[N+]([O-])C1=CC=C(/N=N/C2=CC=CC=C2)C=C1', 'CC(C=C1)=CC=C1/N=N/C2=CC=C(N(C)C)C=C2', 'BrC1=CC([N+]([O-])=O)=CC([N+]([O-])=O)=C1/N=N/C2=CC([H])=C(C=C2[H])N(CC)CC', 'ClC%11=CC([N+]([O-])=O)=CC(C#N)=C%11/N=N/C%12=CC([H])=C(C=C%12OC)N(CC)CC' ] # and their indices in the loaded data test_smiles_indices = [116, 131, 168, 221, 229] X_train = np.delete(X, np.array(test_smiles_indices), axis=0) y_train = np.delete(y, np.array(test_smiles_indices)) X_test = X[[116, 131, 168, 221, 229]] # experimental wavelength values in EtOH. Main csv file has 400nm instead of 407nm because measurement was # under a different solvent y_test = y[[116, 131, 168, 221, 229]] y_test[2] = 407. y_train = y_train.reshape(-1, 1) y_test = y_test.reshape(-1, 1) # # We standardise the outputs but leave the inputs unchanged # # _, y_train, _, y_test, y_scaler = transform_data(X_train, y_train, X_test, y_test) X_train = X_train.astype(np.float64) X_test = X_test.astype(np.float64) data_loader_z_iso_pi = TaskDataLoader('z_iso_pi', path) data_loader_e_iso_n = TaskDataLoader('e_iso_n', path) data_loader_z_iso_n = TaskDataLoader('z_iso_n', path) smiles_list_z_iso_pi, y_z_iso_pi = data_loader_z_iso_pi.load_property_data( ) smiles_list_e_iso_n, y_e_iso_n = data_loader_e_iso_n.load_property_data() smiles_list_z_iso_n, y_z_iso_n = data_loader_z_iso_n.load_property_data() y_z_iso_pi = y_z_iso_pi.reshape(-1, 1) y_e_iso_n = y_e_iso_n.reshape(-1, 1) y_z_iso_n = y_z_iso_n.reshape(-1, 1) X_z_iso_pi = featurise_mols(smiles_list_z_iso_pi, representation) X_e_iso_n = featurise_mols(smiles_list_e_iso_n, representation) X_z_iso_n = featurise_mols(smiles_list_z_iso_n, representation) output_dim = 4 # Number of outputs rank = 1 # Rank of W feature_dim = len(X_train[0, :]) tanimoto_active_dims = [i for i in range(feature_dim) ] # active dims for Tanimoto base kernel. # We define the Gaussian Process Regression Model using the Tanimoto kernel m = None def objective_closure(): return -m.log_marginal_likelihood() # Augment the input with zeroes, ones, twos, threes to indicate the required output dimension X_augmented = np.vstack((np.append(X_train, np.zeros((len(X_train), 1)), axis=1), np.append(X_z_iso_pi, np.ones((len(X_z_iso_pi), 1)), axis=1), np.append(X_e_iso_n, np.ones((len(X_e_iso_n), 1)) * 2, axis=1), np.append(X_z_iso_n, np.ones((len(X_z_iso_n), 1)) * 3, axis=1))) X_test = np.append(X_test, np.zeros((len(X_test), 1)), axis=1) X_train = np.append(X_train, np.zeros((len(X_train), 1)), axis=1) # Augment the Y data with zeroes, ones, twos and threes that specify a likelihood from the list of likelihoods Y_augmented = np.vstack( (np.hstack((y_train, np.zeros_like(y_train))), np.hstack((y_z_iso_pi, np.ones_like(y_z_iso_pi))), np.hstack((y_e_iso_n, np.ones_like(y_e_iso_n) * 2)), np.hstack((y_z_iso_n, np.ones_like(y_z_iso_n) * 3)))) y_test = np.hstack((y_test, np.zeros_like(y_test))) # Base kernel k = Tanimoto(active_dims=tanimoto_active_dims) # set_trainable(k.variance, False) # Coregion kernel coreg = gpflow.kernels.Coregion(output_dim=output_dim, rank=rank, active_dims=[feature_dim]) # Create product kernel kern = k * coreg # This likelihood switches between Gaussian noise with different variances for each f_i: lik = gpflow.likelihoods.SwitchedLikelihood([ gpflow.likelihoods.Gaussian(), gpflow.likelihoods.Gaussian(), gpflow.likelihoods.Gaussian(), gpflow.likelihoods.Gaussian() ]) # now build the GP model as normal m = gpflow.models.VGP((X_augmented, Y_augmented), mean_function=Constant(np.mean(y_train[:, 0])), kernel=kern, likelihood=lik) # fit the covariance function parameters maxiter = ci_niter(1000) gpflow.optimizers.Scipy().minimize( m.training_loss, m.trainable_variables, options=dict(maxiter=maxiter), method="L-BFGS-B", ) print_summary(m) # mean and variance GP prediction y_pred, y_var = m.predict_f(X_test) # Output Standardised RMSE and RMSE on Train Set y_pred_train, _ = m.predict_f(X_train) train_rmse_stan = np.sqrt(mean_squared_error(y_train, y_pred_train)) train_rmse = np.sqrt(mean_squared_error(y_train, y_pred_train)) print("\nStandardised Train RMSE: {:.3f}".format(train_rmse_stan)) print("Train RMSE: {:.3f}".format(train_rmse)) r2 = r2_score(y_test[:, 0], y_pred) rmse = np.sqrt(mean_squared_error(y_test[:, 0], y_pred)) mae = mean_absolute_error(y_test[:, 0], y_pred) per_molecule = np.diag(abs(y_pred - y_test[:, 0])) print("\n Averaged test statistics are") print("\nR^2: {:.3f}".format(r2)) print("RMSE: {:.3f}".format(rmse)) print("MAE: {:.3f}".format(mae)) print("\nAbsolute error per molecule is {} ".format(per_molecule))
3, invlink=invlink) # Multiclass likelihood Z = X[::5].copy() # inducing inputs m = gpflow.models.SVGP( kernel=kernel, likelihood=likelihood, inducing_variable=Z, num_latent_gps=C, whiten=True, q_diag=True, ) # Only train the variational parameters set_trainable(m.kernel.kernels[1].variance, False) set_trainable(m.inducing_variable, False) print_summary(m, fmt="notebook") # %% [markdown] # #### Running inference # %% opt = gpflow.optimizers.Scipy() opt_logs = opt.minimize(m.training_loss_closure(data), m.trainable_variables, options=dict(maxiter=ci_niter(1000))) print_summary(m, fmt="notebook") # %% plot_posterior_predictions(m, X, Y)
f = GandL_1D() X = np.random.uniform(f.xmin, f.xmax, (N, f.xdim)) delta = np.random.normal(0, sigma, (N, 1)) Y = f(X) + delta data = (tf.convert_to_tensor(X, "float64"), tf.convert_to_tensor(Y, "float64")) opt = gpflow.optimizers.Scipy() ############################## # Exact GP part ############################## kernel = gpflow.kernels.SquaredExponential() my_gp = gpflow.models.GPR(data, kernel=kernel) print('GPR init:'), print_summary(my_gp) opt.minimize(my_gp.training_loss, my_gp.trainable_variables, tol=1e-11, options=dict(maxiter=1000), method='l-bfgs-b') print('GPR trained:'), print_summary(my_gp) xx = np.linspace(f.xmin, f.xmax, 1000).reshape(-1, 1) mean, var = my_gp.predict_f(xx) fig = plt.figure(1) plt.xkcd() plt.plot(xx, mean, color='#0072BD', lw=2, label="predictive mean") plt.fill_between(xx[:,0], mean[:,0] - 1.96 * np.sqrt(var[:,0]), mean[:,0] + 1.96 * np.sqrt(var[:,0]), color='#0072BD', alpha=0.3) plt.plot(X, Y, "o", color='#484848', ms=3.5, label='samples')
def main(path, task, representation, use_pca, n_trials, test_set_size, use_rmse_conf): """ :param path: str specifying path to dataset. :param task: str specifying the task. One of ['e_iso_pi', 'z_iso_pi', 'e_iso_n', 'z_iso_n'] :param representation: str specifying the molecular representation. One of ['fingerprints, 'fragments', 'fragprints'] :param use_pca: bool. If True apply PCA to perform Principal Components Regression. :param n_trials: int specifying number of random train/test splits to use :param test_set_size: float in range [0, 1] specifying fraction of dataset to use as test set :param use_rmse_conf: bool specifying whether to compute the rmse confidence-error curves or the mae confidence- error curves. True is the option for rmse. """ data_loader = TaskDataLoader(task, path) smiles_list, y = data_loader.load_property_data() X = featurise_mols(smiles_list, representation) # If True we perform Principal Components Regression if use_pca: n_components = 100 else: n_components = None # We define the Gaussian Process Regression Model using the Tanimoto kernel m = None def objective_closure(): return -m.log_marginal_likelihood() r2_list = [] rmse_list = [] mae_list = [] # We pre-allocate arrays for plotting confidence-error curves _, _, _, y_test = train_test_split(X, y, test_size=test_set_size) # To get test set size n_test = len(y_test) rmse_confidence_list = np.zeros((n_trials, n_test)) mae_confidence_list = np.zeros((n_trials, n_test)) print('\nBeginning training loop...') for i in range(0, n_trials): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_set_size, random_state=i) y_train = y_train.reshape(-1, 1) y_test = y_test.reshape(-1, 1) # We standardise the outputs but leave the inputs unchanged _, y_train, _, y_test, y_scaler = transform_data(X_train, y_train, X_test, y_test, n_components=n_components, use_pca=use_pca) X_train = X_train.astype(np.float64) X_test = X_test.astype(np.float64) k = Tanimoto() m = gpflow.models.GPR(data=(X_train, y_train), mean_function=Constant(np.mean(y_train)), kernel=k, noise_variance=1) # e_iso_pi best params: # {'learner': RandomForestRegressor(max_features=0.9348473830061558, n_estimators=381, # n_jobs=1, random_state=2, verbose=False)} # e_iso_n best params: # {'learner': RandomForestRegressor(bootstrap=False, max_features=0.09944870853556087, # min_samples_leaf=3, n_estimators=1295, n_jobs=1, # random_state=0, verbose=False)} # z_iso_pi best params: # {'learner': RandomForestRegressor(max_depth=4, max_features=0.33072121415416944, # n_estimators=2755, n_jobs=1, random_state=2, # verbose=False)} # z_iso_n best params: # {'learner': RandomForestRegressor(max_features=None, n_estimators=892, n_jobs=1, # random_state=3, verbose=False)} regr_rf = RandomForestRegressor(max_features=None, n_estimators=892, n_jobs=1, random_state=3, verbose=False) regr_rf.fit(X_train, y_train) # Optimise the kernel variance and noise level by the marginal likelihood opt = gpflow.optimizers.Scipy() opt.minimize(objective_closure, m.trainable_variables, options=dict(maxiter=100)) print_summary(m) # mean and variance GP prediction and RF prediction y_pred, y_var = m.predict_f(X_test) y_pred_rf = regr_rf.predict(X_test) y_pred_av = (y_pred + y_pred_rf.reshape(-1, 1)) / 2.0 y_pred = y_scaler.inverse_transform(y_pred_av) y_test = y_scaler.inverse_transform(y_test) # Output Standardised RMSE and RMSE on Train Set y_pred_train, _ = m.predict_f(X_train) y_pred_train_rf = regr_rf.predict(X_train) y_pred_train = (y_pred_train + y_pred_train_rf.reshape(-1, 1)) / 2.0 train_rmse_stan = np.sqrt(mean_squared_error(y_train, y_pred_train)) train_rmse = np.sqrt(mean_squared_error(y_scaler.inverse_transform(y_train), y_scaler.inverse_transform(y_pred_train))) print("\nStandardised Train RMSE: {:.3f}".format(train_rmse_stan)) print("Train RMSE: {:.3f}".format(train_rmse)) score = r2_score(y_test, y_pred) rmse = np.sqrt(mean_squared_error(y_test, y_pred)) mae = mean_absolute_error(y_test, y_pred) print("\nR^2: {:.3f}".format(score)) print("RMSE: {:.3f}".format(rmse)) print("MAE: {:.3f}".format(mae)) r2_list.append(score) rmse_list.append(rmse) mae_list.append(mae) r2_list = np.array(r2_list) rmse_list = np.array(rmse_list) mae_list = np.array(mae_list) print("\nmean R^2: {:.4f} +- {:.4f}".format(np.mean(r2_list), np.std(r2_list)/np.sqrt(len(r2_list)))) print("mean RMSE: {:.4f} +- {:.4f}".format(np.mean(rmse_list), np.std(rmse_list)/np.sqrt(len(rmse_list)))) print("mean MAE: {:.4f} +- {:.4f}\n".format(np.mean(mae_list), np.std(mae_list)/np.sqrt(len(mae_list))))
def readv(): # set the colormap and centre the colorbar class MidpointNormalize(Normalize): """Normalise the colorbar. e.g. norm=MidpointNormalize(mymin, mymax, 0.)""" def __init__(self, vmin=None, vmax=None, midpoint=None, clip=False): self.midpoint = midpoint Normalize.__init__(self, vmin, vmax, clip) def __call__(self, value, clip=None): x, y = [self.vmin, self.midpoint, self.vmax], [0, 0.5, 1] return np.ma.masked_array(np.interp(value, x, y), np.isnan(value)) #################### Initialize parameters ####################### #################### ---------------------- ####################### ice_model = 'd6g_h6g_' #'glac1d_' lith_thickness = 'l7' # 'l90C' place = 'europe' locs = { 'england': [-12, 2, 50, 60], 'southchina': [110, 117, 19, 23], 'easternhem': [50, 178, -45, 80], 'westernhem': [-175, 30, -80, 75], 'world': [-179.8, 179.8, -89.8, 89.8], 'namerica': [-150, -20, 10, 75], 'eastcoast': [-88, -65, 15, 40], 'europe': [-20, 15, 35, 70] } extent = locs[place] tmax, tmin, tstep = 7050, 1450, 100 ages_lgm = np.arange(100, 26000, tstep)[::-1] #import khan dataset path = 'data/GSL_LGM_120519_.csv' df = pd.read_csv(path, encoding="ISO-8859-15", engine='python') df = df.replace('\s+', '_', regex=True).replace('-', '_', regex=True).\ applymap(lambda s:s.lower() if type(s) == str else s) df.columns = df.columns.str.lower() df.rename_axis('index', inplace=True) df = df.rename({'latitude': 'lat', 'longitude': 'lon'}, axis='columns') dfind, dfterr, dfmar = df[(df.type == 0) & (df.age > 0)], df[df.type == 1], df[df.type == -1] np.sort(list(set(dfind.regionname1))) #select location df_place = dfind[(dfind.age > tmin) & (dfind.age < tmax) & (dfind.lon > extent[0]) & (dfind.lon < extent[1]) & (dfind.lat > extent[2]) & (dfind.lat < extent[3]) & (dfind.rsl_er_max < 1)][[ 'lat', 'lon', 'rsl', 'rsl_er_max', 'age' ]] # & (df_place.rsl_er_max < 1) df_place.shape #################### Plot locations ####################### #################### ---------------------- ####################### #get counts by location rounded to nearest 0.1 degree df_rnd = df_place.copy() df_rnd.lat = np.round(df_rnd.lat, 1) df_rnd.lon = np.round(df_rnd.lon, 1) dfcounts_place = df_rnd.groupby( ['lat', 'lon']).count().reset_index()[['lat', 'lon', 'rsl', 'age']] #plot fig = plt.figure(figsize=(10, 7)) ax = plt.subplot(1, 1, 1, projection=ccrs.PlateCarree()) ax.set_extent(extent) ax.coastlines(resolution='110m', linewidth=1, zorder=2) ax.add_feature(cfeature.OCEAN, zorder=0) ax.add_feature(cfeature.LAND, color='palegreen', zorder=1) ax.add_feature(cfeature.BORDERS, linewidth=0.5, zorder=3) ax.gridlines(linewidth=1, color='white', alpha=0.5, zorder=4) scat = ax.scatter(dfcounts_place.lon, dfcounts_place.lat, s=dfcounts_place.rsl * 70, c='lightsalmon', vmin=-20, vmax=20, cmap='coolwarm', edgecolor='k', linewidths=1, transform=ccrs.PlateCarree(), zorder=5) size = Line2D(range(4), range(4), color="black", marker='o', linewidth=0, linestyle='none', markersize=16, markerfacecolor="lightsalmon") labels = ['RSL datapoint location'] leg = plt.legend([size], labels, loc='lower left', bbox_to_anchor=(0.00, 0.00), prop={'size': 20}, fancybox=True) leg.get_frame().set_edgecolor('k') ax.set_title('') #################### Make 3D fingerprint ####################### #################### ---------------------- ####################### filename = 'data/WAISreadvance_VM5_6ka_1step.mat' waismask = io.loadmat(filename, squeeze_me=True) ds_mask = xr.Dataset({'rsl': (['lat', 'lon', 'age'], waismask['RSL'])}, coords={ 'lon': waismask['lon_out'], 'lat': waismask['lat_out'], 'age': np.round(waismask['ice_time_new']) }) fingerprint = ds_mask.sel(age=ds_mask.age[0]) def make_fingerprint(start, end, maxscale): #palindromic scaling vector def palindrome(maxscale, ages): """ Make palindrome scale 0-maxval with number of steps. """ half = np.linspace(0, maxscale, 1 + (len(ages) - 1) // 2) scalefactor = np.concatenate([half, half[::-1]]) return scalefactor ages_readv = ages_lgm[(ages_lgm < start) & (ages_lgm >= end)] scale = palindrome(maxscale, ages_readv) #scale factor same size as ice model ages pre = np.zeros(np.where(ages_lgm == start)[0]) post = np.zeros(len(ages_lgm) - len(pre) - len(scale)) readv_scale = np.concatenate([pre, scale, post]) #scale factor into dataarray da_scale = xr.DataArray(readv_scale, coords=[('age', ages_lgm)]) # broadcast fingerprint & scale to same dimensions; fingerprint_out, fing_scaled = xr.broadcast(fingerprint.rsl, da_scale) # mask fingerprint with scale to get LGM-pres timeseries ds_fingerprint = (fingerprint_out * fing_scaled).transpose().to_dataset(name='rsl') # scale dataset with fingerprint to LGM-present length & 0-max-0 over x years xrlist = [] for i, key in enumerate(da_scale): mask = ds_fingerprint.sel(age=ds_fingerprint.age[i].values) * key mask = mask.assign_coords(scale=key, age=ages_lgm[i]).expand_dims(dim=['age']) xrlist.append(mask) ds_readv = xr.concat(xrlist, dim='age') ds_readv.coords['lon'] = pd.DataFrame((ds_readv.lon[ds_readv.lon >= 180] - 360)- 0.12) \ .append(pd.DataFrame(ds_readv.lon[ds_readv.lon < 180]) + 0.58) \ .reset_index(drop=True).squeeze() ds_readv = ds_readv.swap_dims({'dim_0': 'lon'}).drop('dim_0') # Add readv to modeled RSL at locations with data ##### Need to fix this, as currently slice does not acknowledge new coords ######### ds_readv = ds_readv.sel(age=slice(tmax, tmin), lon=slice(df_place.lon.min() + 180 - 2, df_place.lon.max() + 180 + 2), lat=slice(df_place.lat.max() + 2, df_place.lat.min() - 2)) return ds_readv #Make deterministic readvance fingerprint start, end = 6100, 3000 maxscale = 2.25 ds_readv = make_fingerprint(start, end, maxscale) #Make readvance prior start, end = 8000, 2000 maxscale = 2.25 ds_readvprior = make_fingerprint(start, end, maxscale) ds_readvprior_std = ds_readvprior * 0.3 #################### Build GIA models ####################### #################### ---------------------- ####################### #Use either glac1d or ICE6G if ice_model == 'glac1d_': def build_dataset(model): """download model runs from local directory.""" path = f'data/glac1d_/output_{model}' files = f'{path}*.nc' basefiles = glob.glob(files) modelrun = [ key.split('glac1d/output_', 1)[1][:-3].replace('.', '_') for key in basefiles ] dss = xr.open_mfdataset(files, chunks=None, concat_dim='modelrun', combine='nested') lats, lons, times = dss.LAT.values[0], dss.LON.values[ 0], dss.TIME.values[0] ds = dss.drop(['LAT', 'LON', 'TIME']) ds = ds.assign_coords(lat=lats, lon=lons, time=times, modelrun=modelrun).rename({ 'time': 'age', 'RSL': 'rsl' }) return ds def one_mod(names): """Organize model runs into xarray dataset.""" ds1 = build_dataset(names[0]) names = names[1:] ds = ds1.chunk({'lat': 10, 'lon': 10}) for i in range(len(names)): temp = build_dataset(names[i]) temp1 = temp.interp_like(ds1) temp1['modelrun'] = temp['modelrun'] ds = xr.concat([ds, temp1], dim='modelrun') ds['age'] = ds['age'] * 1000 ds = ds.roll(lon=256, roll_coords=True) ds.coords['lon'] = pd.DataFrame((ds.lon[ds.lon >= 180] - 360)- 0.12 ) \ .append(pd.DataFrame(ds.lon[ds.lon < 180]) + 0.58) \ .reset_index(drop=True).squeeze() ds.coords['lat'] = ds.lat[::-1] ds = ds.swap_dims({'dim_0': 'lon'}).drop('dim_0') return ds #make composite of a bunch of GIA runs, i.e. GIA prior ds = one_mod([ice_model + lith_thickness]) ds_sliced = ds.rsl.sel(age=slice(tmax, tmin), lon=slice(df_place.lon.min() - 2, df_place.lon.max() + 2), lat=slice(df_place.lat.min() - 2, df_place.lat.max() + 2)) ds_area = ds_sliced.mean(dim='modelrun').load().to_dataset().interp( age=ds_readv.age, lon=ds_readv.lon, lat=ds_readv.lat) ds_areastd = ds_sliced.std(dim='modelrun').load().to_dataset().interp( age=ds_readv.age, lon=ds_readv.lon, lat=ds_readv.lat) # make "true" RSL by adding single GIA run and fingerprint lithmantle = 'l71C_ump2_lm50' ds_diff = one_mod( [ice_model + 'l71C']).sel(modelrun=ice_model + lithmantle).rsl.sel( age=slice(tmax, tmin), lon=slice(df_place.lon.min() - 2, df_place.lon.max() + 2), lat=slice(df_place.lat.min() - 2, df_place.lat.max() + 2)).load().to_dataset().interp( age=ds_readv.age, lon=ds_readv.lon, lat=ds_readv.lat) else: def build_dataset(model): """download model runs from local directory.""" path = f'data/d6g_h6g_/output_{model}' files = f'{path}*.nc' basefiles = glob.glob(files) modelrun = [ key.split('d6g_h6g_/output_', 1)[1][:-3].replace('.', '_') for key in basefiles ] dss = xr.open_mfdataset(files, chunks=None, concat_dim='modelrun', combine='nested') lats, lons, times = dss.LAT.values[0], dss.LON.values[ 0], dss.TIME.values[0] ds = dss.drop(['LAT', 'LON', 'TIME']) ds = ds.assign_coords(lat=lats, lon=lons, time=times, modelrun=modelrun).rename({ 'time': 'age', 'RSL': 'rsl' }) return ds def one_mod(names): """Organize model runs into xarray dataset.""" ds1 = build_dataset(names[0]) names = names[1:] ds = ds1.chunk({'lat': 10, 'lon': 10}) for i in range(len(names)): temp = build_dataset(names[i]) temp1 = temp.interp_like(ds1) temp1['modelrun'] = temp['modelrun'] ds = xr.concat([ds, temp1], dim='modelrun') ds['age'] = ds['age'] * 1000 ds = ds.roll(lon=256, roll_coords=True) ds.coords['lon'] = pd.DataFrame((ds.lon[ds.lon >= 180] - 360)- 0.12 ) \ .append(pd.DataFrame(ds.lon[ds.lon < 180]) + 0.58) \ .reset_index(drop=True).squeeze() ds = ds.swap_dims({'dim_0': 'lon'}).drop('dim_0') return ds #make composite of a bunch of GIA runs, i.e. GIA prior ds = one_mod([ice_model + lith_thickness]) ds_sliced = ds.rsl.sel(age=slice(tmax, tmin), lon=slice(df_place.lon.min() - 2, df_place.lon.max() + 2), lat=slice(df_place.lat.max() + 2, df_place.lat.min() - 2)) ds_area = ds_sliced.mean(dim='modelrun').load().to_dataset().interp( age=ds_readv.age, lon=ds_readv.lon, lat=ds_readv.lat) ds_areastd = ds_sliced.std(dim='modelrun').load().to_dataset().interp( age=ds_readv.age, lon=ds_readv.lon, lat=ds_readv.lat) # make "true" RSL by adding single GIA run and fingerprint lithmantle = 'l71C_ump2_lm50' ds_diff = one_mod( [ice_model + 'l71C']).sel(modelrun=ice_model + lithmantle).rsl.sel( age=slice(tmax, tmin), lon=slice(df_place.lon.min() - 2, df_place.lon.max() + 2), lat=slice(df_place.lat.max() + 2, df_place.lat.min() - 2)).load().to_dataset().interp( age=ds_readv.age, lon=ds_readv.lon, lat=ds_readv.lat) #make residual by subtracting GIA prior and fingerprint prior from "true" GIA ds_true = ds_diff + ds_readv ds_prior = ds_area + ds_readvprior ds_priorstd = ds_areastd + ds_readvprior_std ds_truelessprior = ds_true - ds_prior #sample each model at points where we have RSL data def ds_select(ds): return ds.rsl.sel(age=[row.age], lon=[row.lon], lat=[row.lat], method='nearest').squeeze().values #select points at which RSL data exists for i, row in df_place.iterrows(): df_place.loc[i, 'rsl_true'] = ds_select(ds_true) df_place.loc[i, 'rsl_resid'] = ds_select(ds_truelessprior) df_place.loc[i, 'rsl_realresid'] = df_place.rsl[i] - ds_select(ds_area) df_place.loc[i, 'rsl_totalprior'] = ds_select(ds_prior) df_place.loc[i, 'rsl_totalprior_std'] = ds_select(ds_priorstd) df_place.loc[i, 'rsl_giaprior'] = ds_select(ds_area) df_place.loc[i, 'rsl_giaprior_std'] = ds_select(ds_areastd) df_place.loc[i, 'rsl_readvprior'] = ds_select(ds_readvprior) df_place.loc[i, 'rsl_readvprior_std'] = ds_select(ds_readvprior_std) print('number of datapoints = ', df_place.shape) ################## RUN GP REGRESSION ####################### ################## -------------------- ###################### start = time.time() Data = Tuple[tf.Tensor, tf.Tensor] likelihood = df_place.rsl_er_max.ravel()**2 + df_place.rsl_giaprior_std.ravel( )**2 # here we define likelihood class GPR_diag(gpf.models.GPModel): r""" Gaussian Process Regression. This is a vanilla implementation of GP regression with a pointwise Gaussian likelihood. Multiple columns of Y are treated independently. The log likelihood of this models is sometimes referred to as the 'marginal log likelihood', and is given by .. math:: \log p(\mathbf y \,|\, \mathbf f) = \mathcal N\left(\mathbf y\,|\, 0, \mathbf K + \sigma_n \mathbf I\right) """ def __init__(self, data: Data, kernel: Kernel, mean_function: Optional[MeanFunction] = None, likelihood=likelihood): likelihood = gpf.likelihoods.Gaussian(variance=likelihood) _, y_data = data super().__init__(kernel, likelihood, mean_function, num_latent=y_data.shape[-1]) self.data = data def log_likelihood(self): """ Computes the log likelihood. """ x, y = self.data K = self.kernel(x) num_data = x.shape[0] k_diag = tf.linalg.diag_part(K) s_diag = tf.convert_to_tensor(self.likelihood.variance) jitter = tf.cast(tf.fill([num_data], default_jitter()), 'float64') # stabilize K matrix w/jitter ks = tf.linalg.set_diag(K, k_diag + s_diag + jitter) L = tf.linalg.cholesky(ks) m = self.mean_function(x) # [R,] log-likelihoods for each independent dimension of Y log_prob = multivariate_normal(y, m, L) return tf.reduce_sum(log_prob) def predict_f(self, predict_at: tf.Tensor, full_cov: bool = False, full_output_cov: bool = False): r""" This method computes predictions at X \in R^{N \x D} input points .. math:: p(F* | Y) where F* are points on the GP at new data points, Y are noisy observations at training data points. """ x_data, y_data = self.data err = y_data - self.mean_function(x_data) kmm = self.kernel(x_data) knn = self.kernel(predict_at, full=full_cov) kmn = self.kernel(x_data, predict_at) num_data = x_data.shape[0] s = tf.linalg.diag(tf.convert_to_tensor( self.likelihood.variance)) #changed from normal GPR conditional = gpf.conditionals.base_conditional f_mean_zero, f_var = conditional( kmn, kmm + s, knn, err, full_cov=full_cov, white=False) # [N, P], [N, P] or [P, N, N] f_mean = f_mean_zero + self.mean_function(predict_at) return f_mean, f_var def normalize(df): return np.array((df - df.mean()) / df.std()).reshape(len(df), 1) def denormalize(y_pred, df): return np.array((y_pred * df.std()) + df.mean()) def bounded_parameter(low, high, param): """Make parameter tfp Parameter with optimization bounds.""" affine = tfb.AffineScalar(shift=tf.cast(low, tf.float64), scale=tf.cast(high - low, tf.float64)) sigmoid = tfb.Sigmoid() logistic = tfb.Chain([affine, sigmoid]) parameter = gpf.Parameter(param, transform=logistic, dtype=tf.float64) return parameter class HaversineKernel_Matern52(gpf.kernels.Matern52): """ Isotropic Matern52 Kernel with Haversine distance instead of euclidean distance. Assumes n dimensional data, with columns [latitude, longitude] in degrees. """ def __init__( self, lengthscale=1.0, variance=1.0, active_dims=None, ): super().__init__( active_dims=active_dims, variance=variance, lengthscale=lengthscale, ) def haversine_dist(self, X, X2): pi = np.pi / 180 f = tf.expand_dims(X * pi, -2) # ... x N x 1 x D f2 = tf.expand_dims(X2 * pi, -3) # ... x 1 x M x D d = tf.sin((f - f2) / 2)**2 lat1, lat2 = tf.expand_dims(X[:, 0] * pi, -1), \ tf.expand_dims(X2[:, 0] * pi, -2) cos_prod = tf.cos(lat2) * tf.cos(lat1) a = d[:, :, 0] + cos_prod * d[:, :, 1] c = tf.asin(tf.sqrt(a)) * 6371 * 2 return c def scaled_squared_euclid_dist(self, X, X2): """ Returns (dist(X, X2ᵀ)/lengthscales)². """ if X2 is None: X2 = X dist = da.square(self.haversine_dist(X, X2) / self.lengthscale) # dist = tf.convert_to_tensor(dist) return dist class HaversineKernel_Matern32(gpf.kernels.Matern32): """ Isotropic Matern52 Kernel with Haversine distance instead of euclidean distance. Assumes n dimensional data, with columns [latitude, longitude] in degrees. """ def __init__( self, lengthscale=1.0, variance=1.0, active_dims=None, ): super().__init__( active_dims=active_dims, variance=variance, lengthscale=lengthscale, ) def haversine_dist(self, X, X2): pi = np.pi / 180 f = tf.expand_dims(X * pi, -2) # ... x N x 1 x D f2 = tf.expand_dims(X2 * pi, -3) # ... x 1 x M x D d = tf.sin((f - f2) / 2)**2 lat1, lat2 = tf.expand_dims(X[:, 0] * pi, -1), \ tf.expand_dims(X2[:, 0] * pi, -2) cos_prod = tf.cos(lat2) * tf.cos(lat1) a = d[:, :, 0] + cos_prod * d[:, :, 1] c = tf.asin(tf.sqrt(a)) * 6371 * 2 return c def scaled_squared_euclid_dist(self, X, X2): """ Returns (dist(X, X2ᵀ)/lengthscales)². """ if X2 is None: X2 = X dist = tf.square(self.haversine_dist(X, X2) / self.lengthscale) # dist = tf.convert_to_tensor(dist) # return to tensorflow return dist ########### Section to Run GPR###################### ##################################3################# # Input space, rsl normalized to zero mean, unit variance X = np.stack((df_place['lon'], df_place['lat'], df_place['age']), 1) RSL = normalize(df_place.rsl_realresid) #define kernels with bounds k1 = HaversineKernel_Matern32(active_dims=[0, 1]) k1.lengthscale = bounded_parameter(5000, 30000, 10000) #hemispheric space k1.variance = bounded_parameter(0.1, 100, 2) k2 = HaversineKernel_Matern32(active_dims=[0, 1]) k2.lengthscale = bounded_parameter(10, 5000, 100) #GIA space k2.variance = bounded_parameter(0.1, 100, 2) k3 = gpf.kernels.Matern32(active_dims=[2]) #GIA time k3.lengthscale = bounded_parameter(8000, 20000, 10000) k3.variance = bounded_parameter(0.1, 100, 1) k4 = gpf.kernels.Matern32(active_dims=[2]) #shorter time k4.lengthscale = bounded_parameter(1, 8000, 1000) k4.variance = bounded_parameter(0.1, 100, 1) k5 = gpf.kernels.White(active_dims=[2]) k5.variance = bounded_parameter(0.1, 100, 1) kernel = (k1 * k3) + (k2 * k4) + k5 #build & train model m = GPR_diag((X, RSL), kernel=kernel, likelihood=likelihood) print('model built, time=', time.time() - start) @tf.function(autograph=False) def objective(): return -m.log_marginal_likelihood() o = gpf.optimizers.Scipy() o.minimize(objective, variables=m.trainable_variables) print('model minimized, time=', time.time() - start) # output space nout = 50 lat = np.linspace(min(ds_area.lat), max(ds_area.lat), nout) lon = np.linspace(min(ds_area.lon), max(ds_area.lon), nout) ages = ages_lgm[(ages_lgm < tmax) & (ages_lgm > tmin)] xyt = np.array(list(product(lon, lat, ages))) #query model & renormalize data y_pred, var = m.predict_f(xyt) y_pred_out = denormalize(y_pred, df_place.rsl_realresid) #reshape output vectors Xlon = np.array(xyt[:, 0]).reshape((nout, nout, len(ages))) Xlat = np.array(xyt[:, 1]).reshape((nout, nout, len(ages))) Zp = np.array(y_pred_out).reshape(nout, nout, len(ages)) varp = np.array(var).reshape(nout, nout, len(ages)) #print kernel details print_summary(m, fmt='notebook') print('time elapsed = ', time.time() - start) print('negative log marginal likelihood =', m.neg_log_marginal_likelihood().numpy()) ################## INTERPOLATE MODELS ####################### ################## -------------------- ###################### # turn GPR output into xarray dataarray da_zp = xr.DataArray(Zp, coords=[lon, lat, ages], dims=['lon', 'lat', 'age']).transpose('age', 'lat', 'lon') da_varp = xr.DataArray(varp, coords=[lon, lat, ages], dims=['lon', 'lat', 'age']).transpose('age', 'lat', 'lon') def interp_likegpr(ds): return ds.rsl.load().transpose().interp_like(da_zp) #interpolate all models onto GPR grid da_trueinterp = interp_likegpr(ds_true) ds_trueinterp = ds_true.interp(age=ages) da_priorinterp = interp_likegpr(ds_prior) ds_priorinterp = ds_prior.interp(age=ages) da_priorinterpstd = interp_likegpr(ds_priorstd) da_giapriorinterp = interp_likegpr(ds_area) ds_giapriorinterp = ds_area.interp(age=ages) da_giapriorinterpstd = interp_likegpr(ds_areastd) da_readvpriorinterp = interp_likegpr(ds_readvprior) da_readvpriorinterpstd = interp_likegpr(ds_readvprior_std) # add total prior RSL back into GPR da_priorplusgpr = da_zp + da_giapriorinterp ################## SAVE NETCDFS ####################### ################## -------------------- ###################### path = 'output/' da_zp.to_netcdf(path + ice_model + lith_thickness + '_' + place + '_da_zp') da_giapriorinterp.to_netcdf(path + ice_model + lith_thickness + '_' + place + '_giaprior') da_priorplusgpr.to_netcdf(path + ice_model + lith_thickness + '_' + place + '_posterior') da_varp.to_netcdf(path + ice_model + lith_thickness + '_' + place + '_gp_variance') ################## PLOT MODELS ####################### ################## -------------------- ###################### dirName = f'figs/{place}/' if not os.path.exists(dirName): os.mkdir(dirName) print("Directory ", dirName, " Created ") else: print("Directory ", dirName, " already exists") for i, age in enumerate(ages): if (age / 500).is_integer(): step = (ages[0] - ages[1]) df_it = df_place[(df_place.age < age) & (df_place.age > age - step)] resid_it = da_zp.sel(age=slice(age, age - step)) rsl, var = df_it.rsl, df_it.rsl_er_max.values**2 lat_it, lon_it = df_it.lat, df_it.lon vmin = ds_giapriorinterp.rsl.min().values # + 10 vmax = ds_giapriorinterp.rsl.max().values # - 40 vmin_std = 0 vmax_std = 1 tmin_it = np.round(age - step, 2) tmax_it = np.round(age, 2) cbarscale = 0.3 fontsize = 20 cmap = 'coolwarm' cbar_kwargs = {'shrink': cbarscale, 'label': 'RSL (m)'} proj = ccrs.PlateCarree() projection = ccrs.PlateCarree() fig, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, figsize=(24, 16), subplot_kw=dict(projection=projection)) # total prior mean + "true" data ax1.coastlines(color='k') pc1 = ds_giapriorinterp.rsl[i].transpose().plot(ax=ax1, transform=proj, cmap=cmap, norm=MidpointNormalize( vmin, vmax, 0), add_colorbar=False, extend='both') cbar = fig.colorbar(pc1, ax=ax1, shrink=.3, label='RSL (m)', extend='both') scat = ax1.scatter(lon_it, lat_it, s=80, c=rsl, edgecolor='k', vmin=vmin, vmax=vmax, norm=MidpointNormalize(vmin, vmax, 0), cmap=cmap) ax1.set_title(f'{np.round(ds_trueinterp.rsl[i].age.values, -1)} yrs', fontsize=fontsize) # ax1.set_extent(extent_) # Learned difference between prior and "true" data ax2.coastlines(color='k') pc = da_zp[i, :, :].plot(ax=ax2, transform=proj, cmap=cmap, extend='both', norm=MidpointNormalize( resid_it.min(), resid_it.max(), 0), add_colorbar=False) cbar = fig.colorbar(pc, ax=ax2, shrink=.3, label='RSL (m)', extend='both') scat = ax2.scatter(lon_it, lat_it, s=80, facecolors='k', cmap=cmap, edgecolor='k', transform=proj, norm=MidpointNormalize(resid_it.min(), resid_it.max(), 0)) ax2.set_title(f'{np.round(tmax_it,2)} yrs', fontsize=fontsize) # ax2.set_extent(extent_) # GP regression ax3.coastlines(color='k') pc = da_priorplusgpr[i].plot(ax=ax3, transform=proj, norm=MidpointNormalize(vmin, vmax, 0), cmap=cmap, extend='both', add_colorbar=False) scat = ax3.scatter(lon_it, lat_it, s=80, c=rsl, edgecolor='k', cmap=cmap, norm=MidpointNormalize(vmin, vmax, 0)) cbar = fig.colorbar(pc, ax=ax3, shrink=.3, label='RSL (m)', extend='both') ax3.set_title(f'{np.round(tmax_it,2)} yrs', fontsize=fontsize) # ax3.set_extent(extent_) #GP regression standard deviation ax4.coastlines(color='k') pc = (2 * np.sqrt(da_varp[i])).plot( ax=ax4, transform=proj, vmin=vmin_std, vmax=vmax_std * 2, cmap='Reds', extend='both', add_colorbar=False, ) scat = ax4.scatter(lon_it, lat_it, s=80, c=2 * np.sqrt(var), vmin=vmin_std, vmax=vmax_std * 2, cmap='Reds', edgecolor='k', transform=proj) cbar = fig.colorbar(pc, ax=ax4, shrink=.3, extend='both', label='RSL (m) (2 $\sigma$)') ax4.set_title(f'{np.round(tmax_it,2)} yrs', fontsize=fontsize) # ax4.set_extent(extent_) ########## ----- Save figures -------- ####################### fig.savefig(dirName + f'{ages[i]}_{place}_realdata_fig_3D', transparent=True) ################## CHOOSE LOCS W/NUF SAMPS ####################### ################## -------------------- ###################### def locs_with_enoughsamples(df_place, place, number): """make new dataframe, labeled, of sites with [> number] measurements""" df_lots = df_place.groupby(['lat', 'lon']).filter(lambda x: len(x) > number) df_locs = [] for i, group in enumerate(df_lots.groupby(['lat', 'lon'])): singleloc = group[1].copy() singleloc['location'] = place singleloc['locnum'] = place + '_site' + str( i) # + singleloc.reset_index().index.astype('str') df_locs.append(singleloc) df_locs = pd.concat(df_locs) return df_locs number = 6 df_nufsamps = locs_with_enoughsamples(df_place, place, number) len(df_nufsamps.locnum.unique()) ################## PLOT LOCS W/NUF SAMPS ####################### ################## -------------------- ###################### def slice_dataarray(da): return da.sel(lat=site[1].lat.unique(), lon=site[1].lon.unique(), method='nearest') fig, ax = plt.subplots(1, len(df_nufsamps.locnum.unique()), figsize=(18, 4)) ax = ax.ravel() colors = ['darkgreen', 'darkblue', 'darkred'] fontsize = 18 for i, site in enumerate(df_nufsamps.groupby('locnum')): #slice data for each site prior_it = slice_dataarray(da_priorinterp) priorvar_it = slice_dataarray(da_priorinterpstd) top_prior = prior_it + priorvar_it * 2 bottom_prior = prior_it - priorvar_it * 2 var_it = slice_dataarray(np.sqrt(da_varp)) post_it = slice_dataarray(da_priorplusgpr) top = post_it + var_it * 2 bottom = post_it - var_it * 2 site_err = 2 * (site[1].rsl_er_max + site[1].rsl_giaprior_std) ax[i].scatter(site[1].age, site[1].rsl, c=colors[0], label='"true" RSL') ax[i].errorbar( site[1].age, site[1].rsl, site_err, c=colors[0], fmt='none', capsize=1, lw=1, ) prior_it.plot(ax=ax[i], c=colors[2], label='Prior $\pm 2 \sigma$') ax[i].fill_between(prior_it.age, bottom_prior.squeeze(), top_prior.squeeze(), color=colors[2], alpha=0.3) post_it.plot(ax=ax[i], c=colors[1], label='Posterior $\pm 2 \sigma$') ax[i].fill_between(post_it.age, bottom.squeeze(), top.squeeze(), color=colors[1], alpha=0.3) # ax[i].set_title(f'{site[0]} RSL', fontsize=fontsize) ax[i].set_title('') ax[i].legend(loc='lower left') path = 'figs/{place}' fig.savefig(dirName + f'{ages[0]}to{ages[-1]}_{place}_realdata_fig_1D', transparent=True) #plot locations of data fig, ax = plt.subplots(1, len(df_nufsamps.locnum.unique()), figsize=(18, 4), subplot_kw=dict(projection=projection)) ax = ax.ravel() da_zeros = xr.zeros_like(da_zp) for i, site in enumerate(df_nufsamps.groupby('locnum')): ax[i].coastlines(color='k') ax[i].plot(site[1].lon.unique(), site[1].lat.unique(), c=colors[0], ms=7, marker='o', transform=proj) ax[i].plot(site[1].lon.unique(), site[1].lat.unique(), c=colors[0], ms=25, marker='o', transform=proj, mfc="None", mec='red', mew=4) da_zeros[0].plot(ax=ax[i], cmap='Greys', add_colorbar=False) ax[i].set_title(site[0], fontsize=fontsize) # plt.tight_layout() fig.savefig(dirName + f'{ages[0]}to{ages[-1]}_{place}_realdata_fig_1Dlocs', transparent=True) ################# DECOMPOSE GPR INTO KERNELS #################### ################## -------------------- ###################### def predict_decomp_f(m, custom_kernel, predict_at: tf.Tensor, full_cov: bool = False, full_output_cov: bool = False, var=None): """Decompose GP into individual kernels.""" x_data, y_data = m.data err = y_data - m.mean_function(x_data) kmm = m.kernel(x_data) knn = custom_kernel(predict_at, full=full_cov) kmn = custom_kernel(x_data, predict_at) num_data = x_data.shape[0] s = tf.linalg.diag(tf.convert_to_tensor(var)) # added diagonal variance conditional = gpf.conditionals.base_conditional f_mean_zero, f_var = conditional( kmn, kmm + s, knn, err, full_cov=full_cov, white=False) # [N, P], [N, P] or [P, N, N] f_mean = np.array(f_mean_zero + m.mean_function(predict_at)) f_var = np.array(f_var) return f_mean, f_var def reshape_decomp(k, var=None): A, var = predict_decomp_f(m, k, xyt, var=var) A = A.reshape(nout, nout, len(ages)) var = var.reshape(nout, nout, len(ages)) return A, var def make_dataarray(da): coords = [lon, lat, ages] dims = ['lon', 'lat', 'age'] return xr.DataArray(da, coords=coords, dims=dims).transpose('age', 'lat', 'lon') A1, var1 = reshape_decomp(k1, var=df_place.rsl_er_max.ravel()**2 + df_place.rsl_giaprior_std.ravel()**2) #gia spatial A2, var2 = reshape_decomp(k2, var=df_place.rsl_er_max.ravel()**2 + df_place.rsl_giaprior_std.ravel()**2) #gia temporal A3, var3 = reshape_decomp( k3, var=df_place.rsl_er_max.ravel()**2 + df_place.rsl_giaprior_std.ravel()**2) #readvance spatial A4, var4 = reshape_decomp( k4, var=df_place.rsl_er_max.ravel()**2 + df_place.rsl_giaprior_std.ravel()**2) #readvance temporal A5, var5 = reshape_decomp( k5, var=df_place.rsl_er_max.ravel()**2 + df_place.rsl_giaprior_std.ravel()**2) #readvance spatial da_A1 = make_dataarray(A1) da_var1 = make_dataarray(var1) da_A2 = make_dataarray(A2) da_var2 = make_dataarray(var2) da_A3 = make_dataarray(A3) da_var3 = make_dataarray(var3) da_A4 = make_dataarray(A4) da_var4 = make_dataarray(var4) da_A5 = make_dataarray(A5) da_var5 = make_dataarray(var5) ################# PLOT DECOMPOSED KERNELS #################### ################## -------------------- #################### fig, ax = plt.subplots(1, 6, figsize=(24, 4)) ax = ax.ravel() da_A1[0, :, :].plot(ax=ax[0], cmap='RdBu_r') da_A2[0, :, :].plot(ax=ax[1], cmap='RdBu_r') da_A3[0, :, :].plot(ax=ax[2], cmap='RdBu_r') da_A4[:, 0, 0].plot(ax=ax[3]) da_A5[:, 0, 0].plot(ax=ax[4]) # da_A6[:,0,0].plot(ax=ax[5]) # plt.tight_layout() fig.savefig(dirName + f'{ages[0]}to{ages[-1]}_{place}_decompkernels', transparent=True)
# %% single_inducing_point = X[:1, :].copy() vfe = gpflow.models.SGPR((X, Y), gpflow.kernels.SquaredExponential(), inducing_variable=single_inducing_point) objective = tf.function( autograph=False)(lambda: -vfe.log_marginal_likelihood()) gpflow.optimizers.Scipy().minimize(objective, vfe.trainable_variables, options=dict(maxiter=ci_niter(1000)), jit=False) # Note that we need to set jit=False here due to a discrepancy in tf.function jitting # see https://github.com/GPflow/GPflow/issues/1260 print("Lower bound: %f" % vfe.log_likelihood().numpy()) print("Upper bound: %f" % vfe.upper_bound().numpy()) # %% [markdown] # In this case we show that for the hyperparameter setting, the bound is very tight. However, this does _not_ imply that we have enough inducing points, but simply that we have correctly identified the marginal likelihood for this particular hyperparameter setting. In this specific case, where we used a single inducing point, the model collapses to not using the GP at all (lengthscale is really long to model only the mean). The rest of the variance is explained by noise. This GP can be perfectly approximated with a single inducing point. # %% plot_model(vfe) # %% print_summary(vfe, fmt='notebook') # %% [markdown] # This can be diagnosed by showing that there are other hyperparameter settings with higher upper bounds. This indicates that there might be better hyperparameter settings, but we cannot identify them due to the lack of inducing points. An example of this can be seen in the previous section. # %%
# Sampling the ground-truth xmin = -2*np.pi xmax = 2*np.pi X = np.random.uniform(xmin, xmax, (N, 1)) delta = np.random.normal(0, sigma, (N, 1)) Y = np.sin(X) + delta # Dataset needs to be converted to tensor for GPflow to handle it data = (tf.convert_to_tensor(X, "float64"), tf.convert_to_tensor(Y, "float64")) # Defining the GP kernel = gpflow.kernels.SquaredExponential() my_gp = gpflow.models.GPR(data, kernel=kernel) # Let's take a look at its hyperparameters (before training) print_summary(my_gp) # Picking an optimizer and training the GP through MLE opt = gpflow.optimizers.Scipy() opt.minimize(my_gp.training_loss, my_gp.trainable_variables, tol=1e-11, options=dict(maxiter=1000), method='l-bfgs-b') # Let's take a look at its hyperparameters (after training) print_summary(my_gp) # Gridding the space and predicting! xx = np.linspace(xmin * 1.4, xmax * 1.4, 1000).reshape(-1, 1) mean, var = my_gp.predict_f(xx) # Plotting the results (two standard deviations = 95% confidence) fig = plt.figure() plt.plot(xx, mean, color='#0072BD', lw=2)
def print_summary(self): """Prints a model summary.""" print_summary(self.model)
def main(path, path_to_dft_dataset, task, representation, theory_level): """ :param path: str specifying path to photoswitches.csv file. :param path_to_dft_dataset: str specifying path to dft_comparison.csv file. :param task: str specifying the task. e_iso_pi only supported task for the TD-DFT comparison. :param representation: str specifying the molecular representation. One of ['fingerprints, 'fragments', 'fragprints'] :param theory_level: str giving the level of theory to compare against - CAM-B3LYP or PBE0 ['CAM-B3LYP', 'PBE0'] """ data_loader = TaskDataLoader(task, path) smiles_list, _, pbe0_vals, cam_vals, experimental_vals = data_loader.load_dft_comparison_data(path_to_dft_dataset) X = featurise_mols(smiles_list, representation) # Keep only non-duplicate entries because we're not considering effects of solvent non_duplicate_indices = np.array([i for i, smiles in enumerate(smiles_list) if smiles not in smiles_list[:i]]) X = X[non_duplicate_indices, :] experimental_vals = experimental_vals[non_duplicate_indices] non_dup_pbe0 = np.array([i for i, smiles in enumerate(smiles_list) if smiles not in smiles_list[:i]]) non_dup_cam = np.array([i for i, smiles in enumerate(smiles_list) if smiles not in smiles_list[:i]]) pbe0_vals = pbe0_vals[non_dup_pbe0] cam_vals = cam_vals[non_dup_cam] # molecules with dft values to be split into train/test if theory_level == 'CAM-B3LYP': X_with_dft = np.delete(X, np.argwhere(np.isnan(cam_vals)), axis=0) y_with_dft = np.delete(experimental_vals, np.argwhere(np.isnan(cam_vals))) # DFT values for the CAM-B3LYP level of theory dft_vals = np.delete(cam_vals, np.argwhere(np.isnan(cam_vals))) # molecules with no dft vals must go into the training set. X_no_dft = np.delete(X, np.argwhere(~np.isnan(cam_vals)), axis=0) y_no_dft = np.delete(experimental_vals, np.argwhere(~np.isnan(cam_vals))) else: X_with_dft = np.delete(X, np.argwhere(np.isnan(pbe0_vals)), axis=0) y_with_dft = np.delete(experimental_vals, np.argwhere(np.isnan(pbe0_vals))) # DFT values for the PBE0 level of theory dft_vals = np.delete(pbe0_vals, np.argwhere(np.isnan(pbe0_vals))) # molecules with no dft vals must go into the training set. X_no_dft = np.delete(X, np.argwhere(~np.isnan(pbe0_vals)), axis=0) y_no_dft = np.delete(experimental_vals, np.argwhere(~np.isnan(pbe0_vals))) mae_list = [] dft_mae_list = [] # We define the Gaussian Process optimisation objective m = None def objective_closure(): return -m.log_marginal_likelihood() print('\nBeginning training loop...') for i in range(len(y_with_dft)): X_train = np.delete(X_with_dft, i, axis=0) y_train = np.delete(y_with_dft, i) X_test = X_with_dft[i].reshape(1, -1) y_test = y_with_dft[i] dft_test = dft_vals[i] X_train = np.concatenate((X_train, X_no_dft)) y_train = np.concatenate((y_train, y_no_dft)) y_train = y_train.reshape(-1, 1) y_test = y_test.reshape(-1, 1) # We standardise the outputs but leave the inputs unchanged _, y_train, _, y_test, y_scaler = transform_data(X_train, y_train, X_test, y_test) X_train = X_train.astype(np.float64) X_test = X_test.astype(np.float64) k = Tanimoto() m = gpflow.models.GPR(data=(X_train, y_train), mean_function=Constant(np.mean(y_train)), kernel=k, noise_variance=1) # Optimise the kernel variance and noise level by the marginal likelihood opt = gpflow.optimizers.Scipy() opt.minimize(objective_closure, m.trainable_variables, options=dict(maxiter=100)) print_summary(m) # Output Standardised RMSE and RMSE on Train Set y_pred_train, _ = m.predict_f(X_train) train_rmse_stan = np.sqrt(mean_squared_error(y_train, y_pred_train)) train_rmse = np.sqrt(mean_squared_error(y_scaler.inverse_transform(y_train), y_scaler.inverse_transform(y_pred_train))) print("\nStandardised Train RMSE: {:.3f}".format(train_rmse_stan)) print("Train RMSE: {:.3f}".format(train_rmse)) # mean and variance GP prediction y_pred, y_var = m.predict_f(X_test) y_pred = y_scaler.inverse_transform(y_pred) y_test = y_scaler.inverse_transform(y_test) # Output MAE for this trial mae = abs(y_test - y_pred) print("MAE: {}".format(mae)) # Store values in order to compute the mean and standard error of the statistics across trials mae_list.append(mae) # DFT prediction scores on the same trial dft_mae = abs(y_test - dft_test) dft_mae_list.append(dft_mae) mae_list = np.array(mae_list) dft_mae_list = np.array(dft_mae_list) print("\nmean GP-Tanimoto MAE: {:.4f} +- {:.4f}\n".format(np.mean(mae_list), np.std(mae_list)/np.sqrt(len(mae_list)))) print("mean {} MAE: {:.4f} +- {:.4f}\n".format(theory_level, np.mean(dft_mae_list), np.std(dft_mae_list)/np.sqrt(len(dft_mae_list))))
# %% [markdown] # ### A tight estimate bound does not imply a converged model # %% single_inducing_point = X[:1, :].copy() vfe = gpflow.models.SGPR((X, Y), gpflow.kernels.SquaredExponential(), inducing_variable=single_inducing_point) objective = tf.function(vfe.training_loss) gpflow.optimizers.Scipy().minimize(objective, vfe.trainable_variables, options=dict(maxiter=ci_niter(1000)), compile=False) # Note that we need to set compile=False here due to a discrepancy in compiling with tf.function # see https://github.com/GPflow/GPflow/issues/1260 print("Lower bound: %f" % vfe.elbo().numpy()) print("Upper bound: %f" % vfe.upper_bound().numpy()) # %% [markdown] # In this case we show that for the hyperparameter setting, the bound is very tight. However, this does _not_ imply that we have enough inducing points, but simply that we have correctly identified the marginal likelihood for this particular hyperparameter setting. In this specific case, where we used a single inducing point, the model collapses to not using the GP at all (lengthscale is really long to model only the mean). The rest of the variance is explained by noise. This GP can be perfectly approximated with a single inducing point. # %% plot_model(vfe) # %% print_summary(vfe, fmt="notebook") # %% [markdown] # This can be diagnosed by showing that there are other hyperparameter settings with higher upper bounds. This indicates that there might be better hyperparameter settings, but we cannot identify them due to the lack of inducing points. An example of this can be seen in the previous section.
set_trainable(likelihood, True) set_trainable(kernel.variance, True) # %% [markdown] # We can use ```param.assign(value)``` to assign a value to a parameter: # %% kernel.lengthscales.assign(0.5) # %% [markdown] # All these changes are reflected when we use ```print_summary(model)``` to print a detailed summary of the model. By default the output is displayed in a minimalistic and simple table. # %% from gpflow.utilities import print_summary print_summary(model) # same as print_summary(model, fmt="fancy_table") # %% [markdown] # We can change default printing so that it will look nicer in our notebook: # %% gpflow.config.set_default_summary_fmt("notebook") print_summary(model) # same as print_summary(model, fmt="notebook") # %% [markdown] # Jupyter notebooks also format GPflow classes (that are subclasses of `gpflow.base.Module`) in the same nice way when at the end of a cell (this is independent of the `default_summary_fmt`): # %% model