def __init__(self, regression_model: Functions, **kwargs): self.gpr = regression_model self.options = self._unpack_options(**kwargs) self.dimensions = self.gpr.dimensions prior_mean = self.options['prior_mean'].reshape(-1) prior_cov = self.options['prior_variance'] self.prior = Gaussian(mean=prior_mean, covariance=prior_cov)
def bmc(self): """ Bayesian Monte Carlo - No Active Sampling :return: """ prior_mean = self.options['prior_mean'].reshape(-1) prior_cov = self.options['prior_variance'] prior = Gaussian(mean=prior_mean, covariance=prior_cov) budget = self.options['naive_bq_budget'] samples = np.zeros((budget, self.gpr.dimensions)) yv = np.zeros((budget, )) # yv_scaled = np.zeros((budget, )) intv = np.zeros((budget, )) log_intv = np.zeros((budget, )) kern = GPy.kern.RBF( input_dim=self.dimensions, variance=self.options['naive_bq_kern_variance'], lengthscale=self.options['naive_bq_kern_lengthscale']) initial_x = np.array([[0, 0]]) initial_y = np.array(self.gpr.sample(initial_x)).reshape(1, -1) gpy_gp = GPy.models.GPRegression(initial_x, initial_y, kernel=kern) gp = GP(gpy_gp) model = OriginalIntegrandModel(gp=gp, prior=prior) for i in range(budget): samples[i, :] = np.random.multivariate_normal(mean=np.array([0, 0]), cov=2 * np.eye(2)) # yv[i] = np.array(self.gpr.log_sample(samples[i, :])).reshape(1, -1) yv[i] = np.array(self.gpr.sample(samples[i, :])).reshape(1, -1) # scaling = np.max(yv[:i+1]) # yv_scaled[:i+1] = np.exp(yv[:i+1] - scaling) # y = yv_scaled[:i+1].reshape(-1, 1) model.update(samples[i], yv[i].reshape(-1, 1)) if i % 10 == 0 and i > 0: gpy_gp.optimize() _ = gpy_gp.plot() plt.show() # log_intv[i] = np.log((model.integral_mean())[0]) + scaling # intv[i] = np.exp(log_intv[i]) intv[i] = model.integral_mean()[0] log_intv[i] = np.log(intv[i]) print(i, log_intv[i]) if i % 100 == 0: self.plot_iterations(i, log_intv, true_val=self.gpr.grd_log_evidence) return log_intv
def __init__(self, regression_model: Union[RBFGPRegression, PeriodicGPRegression], **kwargs): self.gpr = regression_model self.options = self._unpack_options(**kwargs) self.dimensions = self.gpr.param_dim # Parameter prior - note that the prior is in *log-space* self.prior = Gaussian(mean=self.options['prior_mean'].reshape(-1), covariance=self.options['prior_variance']) self.gp = self.gpr.model
def __init__(self, classification_model: SVMClassification, **kwargs): self.model = classification_model self.options = self._unpack_options(**kwargs) self.dimensions = self.model.param_dim self.prior = Gaussian(mean=self.options['prior_mean'].reshape(-1), covariance=self.options['prior_variance'])
# Set up test function and WSABI-L model. def true_function(x): x = np.atleast_2d(x) return np.atleast_2d((((np.sin(x) + 0.5 * np.cos(3 * x))**2) / ((x / 2)**2 + 0.3)).prod(axis=1)) initial_x = np.array([[0, 0]]) initial_y = np.sqrt(2 * true_function(initial_x)) k = GPy.kern.RBF(2, variance=2, lengthscale=2) lik = GPy.likelihoods.Gaussian(variance=1e-10) prior = Gaussian(mean=np.array([0, 0]), covariance=2 * np.eye(2)) gpy_gp = GPy.core.GP(initial_x, initial_y, kernel=k, likelihood=lik) warped_gp = WsabiLGP(gpy_gp) model = WarpedIntegrandModel(warped_gp, prior) def true_integrand(x): return true_function(x) * prior(x) # Set up plotting. LOWER_LIMIT = -4 UPPER_LIMIT = 4 PLOTTING_RESOLUTION = 200
def reset_prior(self): self.prior = Gaussian(mean=self.options['prior_mean'].reshape(-1), covariance=self.options['prior_variance']) logging.info("Prior reset at mean, " + str(self.options['prior_mean']) + ' and variance ' + str(self.options['prior_variance']))
def wsabi(X_pred, y_grd, log_lik_handle, param_dim=5, prior_mean=np.zeros((5, 1)), prior_var=100 * np.eye(5)): # Allocating number of maximum evaluations start = time.time() prior = Gaussian(mean=prior_mean.reshape(-1), covariance=prior_var) # Initial grid sampling log_phis = np.mgrid[-1:1.1:1, -1:1.1:1, -1:1.1:1, -1:1.1:1, 0:25:5].reshape(5, -1).T n = log_phis.shape[0] phis = log_phis.copy() phis[:, :-1] = np.exp(phis[:, :-1]) # Allocate memory of the samples and results log_r = np.zeros((n, 1)) # The log-likelihood function q = np.zeros((n, 1)) # Prediction # var = np.zeros((n, )) # Posterior variance for i in range(n): log_r[i, :], q[i, :], _ = log_lik_handle(phi=phis[i, :], x_pred=X_pred) print(phis[i, :], log_r[i, :], q[i, :]) r = np.exp(log_r) # Setting up kernel - Note we only marginalise over the lengthscale terms, other hyperparameters are set to the # MAP values. kern = GPy.kern.RBF(param_dim, variance=1., lengthscale=1.) # kern.plot(ax=plt.gca()) r_gp = GPy.models.GPRegression(phis[:1, :], r[:1, :], kern) r_model = WarpedIntegrandModel(WsabiLGP(r_gp), prior) r_model.update(phis[1:, :], r[1:, :]) r_gp.optimize() r_int = r_model.integral_mean()[0] # Model evidence log_r_int = np.log(r_int) # Model log-evidence print( "Estimate of model evidence: ", r_int, ) print("Model log-evidence ", log_r_int) # Enforce positivity in q q_min = np.min(q) if q_min < 0: q -= q_min else: q_min = 0 # Do the same exponentiation and rescaling trick for q log_rq_x = log_r + np.log(q) max_log_rq = np.max(log_rq_x) rq = np.exp(log_rq_x - max_log_rq) rq_gp = GPy.models.GPRegression(phis, np.sqrt(2 * rq.reshape(-1, 1)), kern) rq_model = WarpedIntegrandModel(WsabiLGP(rq_gp), prior) rq_model.update(phis, rq) rq_gp.optimize() # Now estimate the posterior # rq_int = rq_model.integral_mean()[0] + q_min * r_int rq_int = np.exp(np.log(rq_model.integral_mean()[0]) + max_log_rq) + q_min * r_int print("rq_int", rq_int) # Similar for variance #log_rvar_x = log_r + np.log(var) #max_log_rvar = np.max(log_rvar_x) #rvar = np.exp(log_rvar_x - max_log_rvar) #rvar_gp = GPy.models.GPRegression(phis[:1, :], np.sqrt(2 * rvar[0].reshape(1, 1)), kern) #rvar_model = WarpedIntegrandModel(WsabiLGP(rvar_gp), prior) #rvar_model.update(phis[1:, :], rvar[1:].reshape(-1, 1)) #rvar_gp.optimize() #rvar_int = np.exp(np.log(rvar_model.integral_mean()[0]) + max_log_rvar) pred = rq_int / r_int #pred_var = rvar_int / r_int print('pred', pred) print('actual', y_grd) end = time.time() print("Total Time: ", end - start) return pred, None
def toy_example(num_samples=None, noise=(0,0)): if num_samples==None: num_samples = FLAGS.num_samples with tf.GradientTape() as tape: train_xs, valid_xs, test_xs = utils.load_toy_data() batch_xs = train_xs[0:FLAGS.batch_size] # [batch_size, input_dim] # set up your prior model, proposal and likelihood networks p_z = ToyPrior(mu_inital_value = 2., size=FLAGS.latent_dim, name="toy_prior") # returns a callable Normal distribution p_x_given_z = ToyConditionalNormalLikelihood() # with tf.name_scope('proposal') as scope: q_z = ToyConditionalNormal( size=FLAGS.latent_dim, hidden_layer_sizes=1, initializers=None, use_bias=True, name="proposal") # initialise the network parameters to optimal (plus some specified N dist'ed noise) q_z.initialise_and_fix_network(batch_xs, noise) # returns the Normal dist proposal, and the parameters (fixed to optimal A and b) proposal, inference_network_params = q_z(batch_xs, stop_gradient=False) z = proposal.sample(sample_shape=[num_samples]) # [num_samples, batch_size, latent_dim] print("z samples ", z.shape) # returns a Normal dist conditioned on z likelihood = p_x_given_z(z) # returns the Prior normal (p_z), and the prior parameter mu prior, mu = p_z() log_p_z = tf.reduce_sum(prior.log_prob(z), axis=-1) # [num_samples, batch_size] log_q_z = tf.reduce_sum(proposal.log_prob(z), axis=-1) # [num_samples, batch_size] log_p_x_given_z = tf.reduce_sum(likelihood.log_prob(batch_xs), axis=-1) # [num_samples, batch_size] log_weights = log_p_z + log_p_x_given_z - log_q_z # [num_samples, batch_size] # This step is crucial for replicating the IWAE bound. log of the sum, NOT sum of the log (the VAE bound - where M increases) log_sum_weight = tf.reduce_logsumexp(log_weights, axis=0) # this sums over K samples, and returns us to IWAE estimator land log_avg_weight = log_sum_weight - tf.log(tf.to_float(num_samples)) inference_loss = -tf.reduce_mean(log_avg_weight) # print("shapes", log_p_z.shape, log_p_x_given_z.shape, log_q_z.shape, log_weights.shape, log_sum_weight.shape, inference_loss.shape) parameters = (inference_network_params[0], inference_network_params[1], mu) # print("near optimal parameters: ", parameters) grads = tape.gradient(inference_loss, parameters) # Build the evidence lower bound (ELBO) or the negative loss # kl = tf.reduce_mean(tfd.kl_divergence(proposal, prior), axis=-1) # analytic KL # log_sum_ll = tf.reduce_logsumexp(log_p_x_given_z, axis=0) # this converts back to IWAE estimator (log of the sum) # expected_log_likelihood = log_sum_ll - tf.log(tf.to_float(num_samples)) # KL_elbo = tf.reduce_mean(expected_log_likelihood - kl) if FLAGS.using_BQ: def get_log_joint(z): return np.reshape(p_x_given_z(z).log_prob(batch_xs).numpy() + prior.log_prob(z).numpy(), (-1, 1)) kernel = GPy.kern.RBF(1, variance=2, lengthscale=2) kernel.variance.constrain_bounded(1e-5, 1e5) bq_likelihood = GPy.likelihoods.Gaussian(variance=1e-1) bq_prior = Gaussian(mean=proposal._loc.numpy().squeeze(), covariance=proposal._scale.numpy().item()) initial_x = bq_prior.sample(5) initial_y = [] for point in initial_x: initial_y.append(get_log_joint(np.atleast_2d(point))) initial_y = np.concatenate(initial_y) mean_function = NegativeQuadratic(1) gpy_gp = GPy.core.GP(initial_x, initial_y, kernel=kernel, likelihood=bq_likelihood, mean_function=mean_function) warped_gp = VanillaGP(gpy_gp) bq_model = IntegrandModel(warped_gp, bq_prior) for i in range(10): if i % 5 == 0: gpy_gp.optimize_restarts(num_restarts=5) failed = True while failed: try: batch = select_batch(bq_model, 1, KRIGING_BELIEVER) failed = False except FloatingPointError: gpy_gp.optimize_restarts(num_restarts=5) X = np.array(batch) Y = get_log_joint(X) bq_model.update(batch, Y) gpy_gp.optimize_restarts(num_restarts=5) bq_elbo = bq_model.integral_mean() import scipy.integrate def integrand(z): return get_log_joint(z) * np.exp(bq_prior.logpdf(np.atleast_2d(z))) brute_force_elbo = scipy.integrate.quad(integrand, -10, 10) print("BQ ", bq_elbo) print("ACTUAL ELBO ", brute_force_elbo) return grads
def naive_bq(self) -> tuple: """ Marginalise the marginal log-likelihood using naive Bayesian Quadrature :return: """ budget = self.options['naive_bq_budget'] naive_bq_samples = np.zeros( (budget, self.gpr.dimensions + 2)) # Array to store all the x locations of samples naive_bq_log_y = np.zeros(( budget, )) # Array to store all the log-likelihoods evaluated at x naive_bq_y = np.zeros( (budget, )) # Array to store the likelihoods evaluated at x log_naive_bq_int = np.zeros( (budget, ) ) # Array to store the current estimate of the marginalised integral # Initial points initial_x = np.zeros((self.dimensions + 2, 1)).reshape( 1, -1) + 1e-6 # Set the initial sample to the prior mean initial_y = np.array(self.gpr.log_sample(initial_x)).reshape(1, -1) # Prior in log space prior_mean = self.options['prior_mean'].reshape(1, -1) prior_cov = self.options['prior_variance'] # Setting up kernel - noting the log-transformation kern = GPy.kern.RBF( self.dimensions + 2, variance=np.log(self.options['naive_bq_kern_variance']), lengthscale=np.log(self.options['naive_bq_kern_lengthscale'])) # Initial guess for the GP for BQ lik = GPy.likelihoods.Gaussian(variance=1e-10) prior = Gaussian(mean=prior_mean.reshape(-1), covariance=prior_cov) gpy_gp = GPy.core.GP(initial_x, initial_y, kernel=kern, likelihood=lik) gp = GP(gpy_gp) model = OriginalIntegrandModel(gp=gp, prior=prior) for i in range(1, self.options['naive_bq_budget']): # Do active sampling this_x = np.array(select_batch(model, 1, LOCAL_PENALISATION)).reshape(1, -1) naive_bq_samples[i, :] = this_x naive_bq_log_y[i] = np.array(self.gpr.log_sample(this_x)).reshape( 1, -1) # Compute the scaling log_scaling = np.max(naive_bq_log_y[:i]) # Scaling batch by max and exponentiate naive_bq_y[:i] = np.exp(naive_bq_log_y[:i] - log_scaling) this_y = naive_bq_y[i] model.update(this_x, this_y) gpy_gp.optimize() naive_bq_int, _, _ = model.integral_mean(log_transform=True) log_naive_bq_int[i] = naive_bq_int + log_scaling print("samples", np.exp(this_x)) print("eval", log_naive_bq_int[i]) if i % 1 == 0: self.plot_iterations(i, naive_bq_samples, naive_bq_log_y) print("Step", str(i)) print("Current estimate of Log-evidence: ", log_naive_bq_int[i]) # print("Current values of hyperparameters: ", display(gpy_gp)) plt.plot(log_naive_bq_int[:i]) plt.show() self.naive_bq_samples = naive_bq_samples return naive_bq_log_y[-1], log_naive_bq_int[-1]
def wsabi_bq(self, rebase=False): """ Marginalise the marginal log-likelihood using WSABI Bayesian Quadrature :return: """ budget = self.options['naive_bq_budget'] samples = np.zeros((budget, self.gpr.dimensions )) # Array to store all the x locations of samples yv = np.zeros(( budget, )) # Array to store all the log-likelihoods evaluated at x yv_scaled = np.zeros((budget, )) intv = np.zeros( (budget, ) ) # Array to store the current estimate of the marginalised integral log_intv = np.zeros((budget, )) # Initial points initial_x = np.zeros((self.dimensions, 1)).reshape( 1, -1) + 1e-6 # Set the initial sample to the prior mean if rebase: initial_y = np.array([1]).reshape(1, -1) else: initial_y = np.array(self.gpr.sample(initial_x)).reshape(1, -1) # Prior in log space prior_mean = self.options['prior_mean'].reshape(-1) prior_cov = self.options['prior_variance'] prior = Gaussian(mean=prior_mean, covariance=prior_cov) # Setting up kernel - noting the log-transformation kern = GPy.kern.RBF( self.dimensions, variance=self.options['naive_bq_kern_variance'], lengthscale=self.options['naive_bq_kern_lengthscale']) # Initial guess for the GP for BQ gpy_gp = GPy.models.GPRegression( initial_x, initial_y, kernel=kern, ) warped_gp = WsabiLGP(gpy_gp) model = WarpedIntegrandModel(warped_gp, prior=prior) if rebase: for i in range(budget): samples[i, :] = np.array( select_batch(model, 1, LOCAL_PENALISATION)).reshape(1, -1) yv[i] = np.array(self.gpr.log_sample(samples[i, :])).reshape( 1, -1) scaling = np.max(yv[:i + 1]) yv_scaled[:i + 1] = np.exp(yv[:i + 1] - scaling) x = samples[:i + 1] y = yv_scaled[:i + 1].reshape(-1, 1) if i % 20 == 0 and i > 0: # Create a new model since all x and y data have been replaced due to rebasing gpy_gp = GPy.models.GPRegression( x, y, kernel=kern, ) warped_gp = WsabiLGP(gpy_gp) model = WarpedIntegrandModel(warped_gp, prior) gpy_gp.optimize() log_intv[i] = np.log((model.integral_mean())[0]) + scaling intv[i] = np.exp(log_intv[i]) if i % 100 == 0: self.plot_iterations(i, log_intv, true_val=self.gpr.grd_log_evidence) else: for i in range(budget): samples[i, :] = np.array( select_batch(model, 1, LOCAL_PENALISATION)).reshape(1, -1) yv[i] = np.array(self.gpr.sample(samples[i, :])).reshape(1, -1) model.update(samples[i, :], yv[i]) gpy_gp.optimize() intv[i] = (model.integral_mean())[0] log_intv[i] = np.log(intv[i]) if i % 100 == 0: self.plot_iterations(i, log_intv, true_val=self.gpr.grd_log_evidence) # gpy_gp.plot() plt.show() return yv[-1], intv[-1]
def wsabi(gpy_gp: GPy.core.GP, x, kern=None, noise=None): """Initial grid sampling, followed by WSABI quadrature""" from ratio.posterior_mc_inference import PosteriorMCSampler budget = 50 x = np.array(x).reshape(1, 1) #sampler = PosteriorMCSampler(gpy_gp) #log_params = np.log(sampler.hmc(num_iters=budget, mode='gpy')) #print(log_params) log_params = np.empty((budget, 3)) #for i in range(budget): # log_params[i, :] = scipy.stats.multivariate_normal.rvs(mean=np.array([0, 0, 0]), cov=4*np.eye(3)) log_params = np.mgrid[0:4.1:1, 0:4.1:1, -4:-0.1:1.5].reshape(3, -1).T budget = log_params.shape[0] if kern is None: kern = GPy.kern.RBF(input_dim=1, variance=1., lengthscale=1.) if noise is None: noise = 1e-3 prior = Gaussian(mean=np.array([0, 0, 0]), covariance=4 * np.eye(3)) log_phis = np.empty((budget, 3)) log_liks = np.empty((budget, )) pred_means = np.empty((budget, )) pred_vars = np.empty((budget, )) for i in range(log_params.shape[0]): log_phi = log_params[i, :] _set_model(gpy_gp, np.exp(log_phi)) log_lik = gpy_gp.log_likelihood() #print('params', log_phi, log_lik) log_phis[i] = log_phi log_liks[i] = log_lik pred_means[i], pred_vars[i] = gpy_gp.predict_noiseless(x) if np.max(log_liks) > 15.: # For highly peaked likelihoods, we do not use quadrature and simply use MAP estimate idx = log_liks.argmax() return pred_means[idx], pred_vars[idx], kern, noise r_gp = GPy.models.GPRegression( log_params[:1, :], np.sqrt(2 * np.exp(log_liks[0])).reshape(1, -1), kern) r_gp.Gaussian_noise.variance = noise r_model = WarpedIntegrandModel(WsabiLGP(r_gp), prior) r_model.update(log_phis[1:, :], np.exp(log_liks[1:]).reshape(1, -1)) #print(r_gp.X, r_gp.Y) #from IPython.display import display #display(r_gp) r_gp.optimize() r_int = r_model.integral_mean()[0] q_min = np.min(pred_means) pred_means -= q_min rq = np.exp(log_liks) * pred_means rq_gp = GPy.models.GPRegression(log_phis[:1, :], np.sqrt(2 * rq[0]).reshape(1, -1), kern) rq_model = WarpedIntegrandModel((WsabiLGP(rq_gp)), prior) rq_model.update(log_phis[1:, :], rq[1:].reshape(1, -1)) rq_gp.optimize() rq_int = rq_model.integral_mean()[0] + q_min * r_int rvar = np.exp(log_liks) * pred_vars rvar_gp = GPy.models.GPRegression(log_phis[:1, :], np.sqrt(2 * rvar[0]).reshape(1, -1), kern) rvar_model = WarpedIntegrandModel((WsabiLGP(rvar_gp)), prior) rvar_model.update(log_phis[1:, :], rvar[1:].reshape(1, -1)) rvar_gp.optimize() rvar_int = rvar_model.integral_mean()[0] return rq_int / r_int, rvar_int / r_int, r_gp.kern, r_gp.Gaussian_noise.variance
def plot_gauss_mix(r: GaussMixture, q: GaussMixture): r.plot(label='$r(\phi) = p(z_d|\phi)$') q.plot(label='$q(\phi) = p(y_*|z_d, \phi)$') plt.xlabel("$\phi$") plt.legend() if __name__ == "__main__": r = GaussMixture(means=[-1, 2], covariances=[0.7, 2], weights=[0.1, 0.2]) q = GaussMixture([0.5, 1.5, -1.5, -0.3, 0.2], [100, 1, 0.4, 0.2, 1], weights=[3, 0.5, 0.5, 0.2, -0.1]) prior = Gaussian(mean=np.array([[0]]), covariance=np.array([[1]])) prediction = predictive_integral(r, q, prior_mean=0, prior_var=1) evidence = evidence_integral(r, prior_mean=0, prior_var=1) print(prediction, evidence, prediction / evidence) num, den, ratio = approx_integrals(prior, q, r) #plot_gauss_mix(r, q) #plt.show() #naive_bq = NaiveBQ(r, q, prior, num_batches=203, batch_size=1, plot_iterations=False, # display_step=50, # true_prediction_integral=num, true_evidence_integral=den) #naive_bq.quadrature() #naive_bq.plot_result() #plt.show()