def try_bec(): # get train and test data in_provider = InputProvider() harmonic_sims, tr, te, va = in_provider.get_bec_data() data = bec.get_within_range(tr, g_low=30, g_high=50, n=100) # data_test = te.sample(100) # get gp model and fit gp = GP() gp.fit(torch.FloatTensor(data[['g', 'x']].to_numpy()), torch.FloatTensor(data.psi.to_numpy()), True) # predict around one some fixed Dim df = bec.get_closest_sim(harmonic_sims, g=30.) test_gx = np.stack([30. * np.ones(df.x.shape[0]), df.x]).transpose() y_pred, sigma = gp.predict(torch.FloatTensor(test_gx)) print(y_pred, sigma) # plot subplots with multiple fixed dimensions # specify input dimensions, # the first entry is fixed for each iteration # the second entry is plotted against the fixed dimension input_dimensions = ['g', 'x'] out_dimensions = 'psi' sub_plot_multiple_gp(gp, harmonic_sims, [5, 30, 60, 90], input_dimensions, out_dimensions)
def __init__(self, data_generator, init_sample_size, max_steps, sigma_obs=None, is_mcmc=False, mcmc_opts=None): # Initializing Bayesian optimization objects: # I need to have an object that generates data and specifies domain of optimization # max_steps refer to the maximum number of sampled points self.max_steps = max_steps self.data_generator = data_generator # Initializing seen observations and adding a couple of variables for later bookkeeping self.domain = self.data_generator.domain pick_x = np.random.choice(range(len(self.domain)), size=init_sample_size, replace=False) self.x = self.domain[pick_x] self.y = self.data_generator.sample(self.x) self.best_y = np.max(self.y) self.mu_posterior = None self.std_posterior = None # Initializing underlying GP self.gp = GP(self.x, self.y) self.sigma_obs = sigma_obs # Initializing MCMC properties (mcmc_properties is supposed to be an instance of MCMCProperties class) self.is_mcmc = is_mcmc self.mcmc_opts = mcmc_opts
def sample_function(kernel, nsamples, n_fsamples, initialize=True): train_x = torch.linspace(0, 1, nsamples) train_y = torch.linspace(0, 1, nsamples) gp = GP(train_x, train_y, kernel, initialize) fsamples = gp.sample_f(n_fsamples) return gp, fsamples
def objective(x): noise = x[0] hps = {} for idx in xrange(len(hp_specs)): hps[hp_specs[idx].name] = x[idx + 1] kernel = kernel_creator(hps=hps, **kwargs) gp = GP(domain, kernel, noise) gp.add_observations(x_data, y_data) return_val = -1 * gp.get_log_marginal_likelihood() return return_val
def __init__(self, num_gp=3, gps=[GP() for i in range(3)], gating_gps=[GP() for i in range(3)], epsilon=0.05, max_iter=100): self.num_gp = num_gp self.gps = gps self.gating_gps = gating_gps self.X_train = None self.Y_train = None self.P = None self.epsilon = epsilon self.max_iter = max_iter
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp', Y_metadata=None, normalizer=False): #pick a sensible inference method if inference_method is None: if isinstance(likelihood, likelihoods.Gaussian): inference_method = var_dtc.VarDTC(limit=1 if not self.missing_data else Y.shape[1]) else: #inference_method = ?? raise NotImplementedError, "what to do what to do?" print "defaulting to ", inference_method, "for latent function inference" self.Z = Param('inducing inputs', Z) self.num_inducing = Z.shape[0] GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer) logger.info("Adding Z as parameter") self.link_parameter(self.Z, index=0) self.posterior = None
def optimise(reuse=True): '''Run an optimisation loop to find better model hyperparams. Inputs: reuse | bool, True if you want to carry on from where you left off Outputs: best_hyperarams | list ''' if reuse: x = self.x_data y = self.score_data else: x = [] y = [] for loop in range(self.loops): for iter in range(self.iter_per_loop): if x != []: gp = GP(x, y, 'matern') new_x = self._choose_next( gp) #choose hyperparams to try next new_x_dict = self._array_to_hyperparams(new_x) else: #randomly choose pass #Train and predict try: self.model.train(self.X_train, self.y_train, **new_x_dict) output_pred = self.model.predict(self.X_val) #make sure it's an Nx1 array if len(output_pred.shape) == 1: output_pred = np.reshape(output_pred, (output_pred.shape[0], 1)) except Exception as e: raise e("Model did not have method `train` " \ "or `predict`") new_score = self._score(output_pred) if new_score < self.best_score: self.best_score = new_score self.best_hyperparams = new_x_dict print("Best score is {}".format(new_score)) print("\n") x.append(new_x) y.append(new_score) return self.best_hyperparams
def init(self, params_tl, params_l): """Initialize the GPs. Parameters ---------- params_tl : np.ndarray initial parameters for GP over :math:`\log\ell` params_l : np.ndarray initial parameters for GP over :math:`\exp(\log\ell)` """ kernel = self.options['kernel'] # create the gaussian process over log(l) self.gp_log_l = GP(kernel(*params_tl[:-1]), self.x_s, self.tl_s, s=params_tl[-1]) # TODO: improve matrix conditioning for log(l) self.gp_log_l.jitter = np.zeros(self.ns, dtype=DTYPE) # pick candidate points self._choose_candidates() # create the gaussian process over exp(log(l)) self.gp_l = GP(kernel(*params_l[:-1]), self.x_sc, self.l_sc, s=params_l[-1]) # TODO: improve matrix conditioning for exp(log(l)) self.gp_l.jitter = np.zeros(self.nsc, dtype=DTYPE) # make the vector of locations for approximations self._approx_x = self._make_approx_x() self._approx_px = self._make_approx_px() self.initialized = True
def __init__(self): rospy.init_node('sampling_modeling_node') num_gp = rospy.get_param("~num_gp", 3) self.optimize_kernel = rospy.get_param("~online_kernel_optimization", True) modeling_gps = [] gating_gps = [] for i in range(num_gp): modeling_gp_param = rospy.get_param( "~modeling_gp_" + str(i) + "_kernel", [0.5, 0.5, 0.1]) gating_gp_param = rospy.get_param( "~gating_gp_" + str(i) + "_kernel", [0.5, 0.5, 0.1]) assert len(modeling_gp_param) == 3 assert len(gating_gp_param) == 3 modeling_gps.append( GP(modeling_gp_param[0], modeling_gp_param[1], modeling_gp_param[2])) gating_gps.append( GP(gating_gp_param[0], gating_gp_param[1], gating_gp_param[2])) EM_epsilon = rospy.get_param("~EM_epsilon", 0.03) EM_max_iteration = rospy.get_param("~EM_max_iteration", 100) self.model = MixtureGaussianProcess(num_gp=num_gp, gps=modeling_gps, gating_gps=gating_gps, epsilon=EM_epsilon, max_iter=EM_max_iteration) self.X_test = None self.add_test_position_server = rospy.Service( KModelingNameSpace + 'add_test_position', AddTestPositionToModel, self.AddTestPosition) self.add_sample_server = rospy.Service( KModelingNameSpace + 'add_samples_to_model', AddSampleToModel, self.AddSampleToModel) self.update_model_server = rospy.Service( KModelingNameSpace + 'update_model', Trigger, self.UpdateModel) self.model_predict_server = rospy.Service( KModelingNameSpace + 'model_predict', ModelPredict, self.ModelPredict) self.sample_count = 0 rospy.spin()
def init(self, params_tl, params_l): """Initialize the GPs. Parameters ---------- params_tl : np.ndarray initial parameters for GP over :math:`\log\ell` params_l : np.ndarray initial parameters for GP over :math:`\exp(\log\ell)` """ kernel = self.options['kernel'] # create the gaussian process over log(l) self.gp_log_l = GP( kernel(*params_tl[:-1]), self.x_s, self.tl_s, s=params_tl[-1]) # TODO: improve matrix conditioning for log(l) self.gp_log_l.jitter = np.zeros(self.ns, dtype=DTYPE) # pick candidate points self._choose_candidates() # create the gaussian process over exp(log(l)) self.gp_l = GP( kernel(*params_l[:-1]), self.x_sc, self.l_sc, s=params_l[-1]) # TODO: improve matrix conditioning for exp(log(l)) self.gp_l.jitter = np.zeros(self.nsc, dtype=DTYPE) # make the vector of locations for approximations self._approx_x = self._make_approx_x() self._approx_px = self._make_approx_px() self.initialized = True
def test_posterior_std(): np.random.seed(1) N, n = 10, 50 f = lambda x: np.sin(0.9 * x).flatten() X = np.random.uniform(-5, 5, size=(N, 1)) Xtest = np.linspace(-5, 5, n).reshape(-1, 1) y = f(X) gg = GP(X, y, SquaredExp) means, stds = gg.draw_posterior(Xtest) truth = np.array([ 0.04202604, 0.06074646, 0.06442741, 0.06198905, 0.05028453, 0.01173271, 0.04384755, 0.03729233, 0.0337959, 0.04233938, 0.05163432, 0.06106133, 0.06421379, 0.05957212, 0.05028201, 0.04232989, 0.04012184, 0.0419569, 0.04305928, 0.04062566, 0.03890225, 0.05161809, 0.07836714, 0.10348942, 0.11188181, 0.09352313, 0.05289924, 0.07014139, 0.15883308, 0.25247954, 0.32610151, 0.3625603, 0.35170763, 0.29152607, 0.18832523, 0.06334274, 0.11994324, 0.27386883, 0.42075257, 0.54795666, 0.64986834, 0.72557935, 0.77767936, 0.81080593, 0.83020954, 0.84065061, 0.84580095, 0.84812688, 0.84908808, 0.8494516 ]) assert (np.allclose(stds, truth, atol=1e-5))
def try_1D(): ip = InputProvider() x_data, y_data, x_test = ip.get_1d_regression_data() gp = GP() gp.fit(x_data, y_data, True) gp.plot(x_test)
def plot_results(time_points, values): axis_x = np.arange(0,5.1,0.1) fig = plt.figure(0) plt.axis([0,5,-2,2], facecolor = 'g') plt.grid(color='w', linestyle='-', linewidth=0.5) ax = fig.add_subplot(111) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.patch.set_facecolor('#E8E8F1') # show mean mu = np.zeros(axis_x.size) var = np.zeros(axis_x.size) ker = Kernel() ker.SE(1,1) gp = GP() for i in range(axis_x.size): mu[i],var[i],_ = gp.GPR(time_points = time_points,values = values, predict_point = axis_x[i], kernel = ker) # show covariance print mu plt.fill_between(axis_x,mu + var,mu-var,color = '#D1D9F0') # show mean plt.plot(axis_x, mu, linewidth = 2, color = "#5B8CEB") # show the points plt.scatter(time, value,color = '#598BEB') plt.show()
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp', Y_metadata=None, normalizer=False): #pick a sensible inference method if inference_method is None: if isinstance(likelihood, likelihoods.Gaussian): inference_method = var_dtc.VarDTC( limit=1 if not self.missing_data else Y.shape[1]) else: #inference_method = ?? raise NotImplementedError, "what to do what to do?" print "defaulting to ", inference_method, "for latent function inference" self.Z = Param('inducing inputs', Z) self.num_inducing = Z.shape[0] GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer) logger.info("Adding Z as parameter") self.link_parameter(self.Z, index=0) self.posterior = None
def test_gp_posterior_mean(): np.random.seed(1) N, n = 10, 50 f = lambda x: np.sin(0.9 * x).flatten() X = np.random.uniform(-5, 5, size=(N, 1)) Xtest = np.linspace(-5, 5, n).reshape(-1, 1) y = f(X) gg = GP(X, y, SquaredExp) means, stds = gg.draw_posterior(Xtest) # Truth truth = [ 0.97406338, 0.93351725, 0.8504679, 0.73040922, 0.589338, 0.42420947, 0.24978696, 0.07058869, -0.11020691, -0.28851528, -0.45856577, -0.61317727, -0.74742986, -0.8562826, -0.93591749, -0.98394361, -0.99921925, -0.98145889, -0.93090595, -0.84831721, -0.73533425, -0.59509, -0.4327257, -0.25546679, -0.07205324, 0.10841341, 0.27787463, 0.43049268, 0.56317516, 0.67525843, 0.76748952, 0.84072345, 0.89486151, 0.92845102, 0.93909474, 0.92449526, 0.88371745, 0.81818343, 0.73203338, 0.6317308, 0.52505586, 0.4198154, 0.3226457, 0.23820076, 0.1688575, 0.11490075, 0.07503359, 0.04701673, 0.02826614, 0.01630293 ] assert (np.allclose(means, truth))
def create_gp(domain, kernel_name, noise=None, hps=None, **kwargs): """Create GP with the specified kernel. Args: domain: List of lists [[dim1_low, dim1_high], ...] kernel_name: Name of kernel to use. noise: The amount of noise in the system. If None tune or make default. pre_tune_pts: The amount of points to use to learn the GP. kwargs: Other arguments to be passed to kernel. Returns: GP object. """ kernel = None for k_info in all_kernels: if k_info.name.lower() == kernel_name.lower(): kernel = k_info.obj(hps=hps, **kwargs) if kernel is None: raise ValueError('Kernel %s not found.' % kernel_name) # Make default noise small but positive. Helps with SPD conditions. default_noise = 0.01 gp = GP(domain, kernel, default_noise) return gp
def create_tuned_gp(domain, kernel_name, x_data, y_data, maxfs=50, **kwargs): """Tune the gp. Args: gp: The GP object. pts: List of lists representing the points. num_pts: Number of random points to be used if pts not specified. maxfs: Maximum number of GPs to build in tuning. Returns: Tuned GP (note does not have data added to it). """ kernel_creator = None for k_info in all_kernels: if k_info.name.lower() == kernel_name.lower(): kernel_creator = k_info.obj if kernel_creator is None: raise ValueError('Kernel %s not found.' % kernel_name) hp_specs = kernel_creator.get_hp_specs() def objective(x): noise = x[0] hps = {} for idx in xrange(len(hp_specs)): hps[hp_specs[idx].name] = x[idx + 1] kernel = kernel_creator(hps=hps, **kwargs) gp = GP(domain, kernel, noise) gp.add_observations(x_data, y_data) return_val = -1 * gp.get_log_marginal_likelihood() return return_val bounds = [[0.0001, 1]] + [[hp_info.lower, hp_info.upper] for hp_info in hp_specs] if maxfs is not None: best_specs = direct_min(objective, bounds, maxf=maxfs).x else: best_specs = direct_min(objective, bounds).x noise = best_specs[0] hps = {} for idx in xrange(len(hp_specs)): hps[hp_specs[idx].name] = best_specs[idx + 1] print hps kernel = kernel_creator(hps=hps, **kwargs) return GP(domain, kernel, noise)
class BQ(object): r""" Estimate an integral of the following form using Bayesian Quadrature with a Gaussian Process prior: .. math:: Z = \int \ell(x)\mathcal{N}(x\ |\ \mu, \sigma^2)\ \mathrm{d}x See :meth:`~bayesian_quadrature.bq.BQ.load_options` for details on allowable options. Parameters ---------- x : numpy.ndarray size :math:`s` array of sample locations l : numpy.ndarray size :math:`s` array of sample observations options : dict Options dictionary Notes ----- This algorithm is an updated version of the one described in [OD12]_. The overall idea is: 1. Estimate :math:`\log\ell` using a GP. 2. Estimate :math:`\bar{\ell}=\exp(\log\ell)` using second GP. 3. Integrate exactly under :math:`\bar{\ell}`. """ ################################################################## # Initialization # ################################################################## def __init__(self, x, l, **options): """Initialize the Bayesian Quadrature object.""" #: Vector of observed locations self.x_s = np.array(x, dtype=DTYPE) #: Vector of observed values self.l_s = np.array(l, dtype=DTYPE) if (self.l_s <= 0).any(): raise ValueError("l_s contains zero or negative values") if self.x_s.ndim > 1: raise ValueError("invalid number of dimensions for x") if self.l_s.ndim > 1: raise ValueError("invalid number of dimensions for l") if self.x_s.shape != self.l_s.shape: raise ValueError("shape mismatch for x and l") #: Vector of log-transformed observed values self.tl_s = np.log(self.l_s) #: Number of observations self.ns = self.x_s.shape[0] self.load_options(**options) self.initialized = False self.gp_log_l = None #: Gaussian process over log(l) self.gp_l = None #: Gaussian process over exp(log(l)) self.x_c = None #: Vector of candidate locations self.l_c = None #: Vector of candidate values self.nc = None #: Number of candidate points self.x_sc = None #: Vector of observed plus candidate locations self.l_sc = None #: Vector of observed plus candidate values self.nsc = None #: Number of observations plus candidates self._approx_x = None self._approx_px = None def load_options(self, kernel, n_candidate, candidate_thresh, x_mean, x_var, optim_method): r""" Load options. Parameters ---------- kernel : Kernel The type of kernel to use. Note that if the kernel is not Gaussian, slow approximate (rather than analytic) solutions will be used. n_candidate : int The (maximum) number of candidate points. candidate_thresh : float Minimum allowed space between candidates. x_mean : float Prior mean, :math:`\mu`. x_var : float Prior variance, :math:`\sigma^2`. optim_method : string Method to use for parameter optimization (e.g., 'L-BFGS-B' or 'Powell') """ # store the options dictionary for future use self.options = { 'kernel': kernel, 'n_candidate': int(n_candidate), 'candidate_thresh': float(candidate_thresh), 'x_mean': np.array([x_mean], dtype=DTYPE, order='F'), 'x_cov': np.array([[x_var]], dtype=DTYPE, order='F'), 'use_approx': not (kernel is GaussianKernel), 'wrapped': kernel is PeriodicKernel, 'optim_method': optim_method } if self.options['use_approx']: logger.debug("Using approximate solutions for non-Gaussian kernel") def init(self, params_tl, params_l): """Initialize the GPs. Parameters ---------- params_tl : np.ndarray initial parameters for GP over :math:`\log\ell` params_l : np.ndarray initial parameters for GP over :math:`\exp(\log\ell)` """ kernel = self.options['kernel'] # create the gaussian process over log(l) self.gp_log_l = GP( kernel(*params_tl[:-1]), self.x_s, self.tl_s, s=params_tl[-1]) # TODO: improve matrix conditioning for log(l) self.gp_log_l.jitter = np.zeros(self.ns, dtype=DTYPE) # pick candidate points self._choose_candidates() # create the gaussian process over exp(log(l)) self.gp_l = GP( kernel(*params_l[:-1]), self.x_sc, self.l_sc, s=params_l[-1]) # TODO: improve matrix conditioning for exp(log(l)) self.gp_l.jitter = np.zeros(self.nsc, dtype=DTYPE) # make the vector of locations for approximations self._approx_x = self._make_approx_x() self._approx_px = self._make_approx_px() self.initialized = True ################################################################## # Mean and variance of l # ################################################################## def l_mean(self, x): r""" Mean of the final approximation to :math:`\ell`. Parameters ---------- x : numpy.ndarray :math:`m` array of new sample locations. Returns ------- mean : numpy.ndarray :math:`m` array of predictive means Notes ----- This is just the mean of the GP over :math:`\exp(\log\ell)`, i.e.: .. math:: \mathbb{E}[\bar{\ell}(\mathbf{x})] = \mathbb{E}_{\mathrm{GP}(\exp(\log\ell))}(\mathbf{x}) """ return self.gp_l.mean(x) def l_var(self, x): r""" Marginal variance of the final approximation to :math:`\ell`. Parameters ---------- x : numpy.ndarray :math:`m` array of new sample locations. Returns ------- mean : numpy.ndarray :math:`m` array of predictive variances Notes ----- This is just the diagonal of the covariance of the GP over :math:`\log\ell` multiplied by the squared mean of the GP over :math:`\exp(\log\ell)`, i.e.: .. math:: \mathbb{V}[\bar{\ell}(\mathbf{x})] = \mathbb{V}_{\mathrm{GP}(\log\ell)}(\mathbf{x})\mathbb{E}_{\mathrm{GP}(\exp(\log\ell))}(\mathbf{x})^2 """ v_log_l = np.diag(self.gp_log_l.cov(x)).copy() m_l = self.gp_l.mean(x) l_var = v_log_l * m_l ** 2 l_var[l_var < 0] = 0 return l_var ################################################################## # Mean of Z # ################################################################## def Z_mean(self): r""" Computes the mean of :math:`Z`, which is defined as: .. math :: \mathbb{E}[Z]=\int \bar{\ell}(x)p(x)\ \mathrm{d}x Returns ------- mean : float """ if self.options['use_approx']: return self._approx_Z_mean() else: return self._exact_Z_mean() def _approx_Z_mean(self, xo=None): if xo is None: xo = self._approx_x p_xo = self._approx_px else: p_xo = self._make_approx_px(xo) approx = bq_c.approx_Z_mean( np.array(xo[None], order='F'), p_xo, self.l_mean(xo)) return approx def _exact_Z_mean(self): r""" Equivalent to: .. math:: \begin{align*} \mathbb{E}[Z]&\approx \int\bar{\ell}(x)\mathcal{N}(x\ |\ \mu, \sigma^2)\ \mathrm{d}x \\ &= \left(\int K_{\exp(\log\ell)}(x, \mathbf{x}_c)\mathcal{N}(x\ |\ \mu, \sigma^2)\ \mathrm{d}x\right)K_{\exp(\log\ell)}(\mathbf{x}_c, \mathbf{x}_c)^{-1}\ell(\mathbf{x}_c) \end{align*} """ x_sc = np.array(self.x_sc[None], order='F') alpha_l = self.gp_l.inv_Kxx_y h_s, w_s = self.gp_l.K.params w_s = np.array([w_s], order='F') m_Z = bq_c.Z_mean( x_sc, alpha_l, h_s, w_s, self.options['x_mean'], self.options['x_cov']) return m_Z ################################################################## # Variance of Z # ################################################################## def Z_var(self): r""" Computes the variance of :math:`Z`, which is defined as: .. math:: \mathbb{V}(Z)\approx \int\int \mathrm{Cov}_{\log\ell}(x, x^\prime)\bar{\ell}(x)\bar{\ell}(x^\prime)p(x)p(x^\prime)\ \mathrm{d}x\ \mathrm{d}x^\prime Returns ------- var : float """ if self.options['use_approx']: return self._approx_Z_var() else: return self._exact_Z_var() def _approx_Z_var(self, xo=None): if xo is None: xo = self._approx_x p_xo = self._approx_px else: p_xo = self._make_approx_px(xo) approx = bq_c.approx_Z_var( np.array(xo[None], order='F'), p_xo, np.array(self.l_mean(xo), order='F'), np.array(self.gp_log_l.cov(xo), order='F')) return approx def _exact_Z_var(self): # values for the GPs over l(x) and log(l(x)) x_s = np.array(self.x_s[None], order='F') x_sc = np.array(self.x_sc[None], order='F') alpha_l = self.gp_l.inv_Kxx_y L_tl = np.array(self.gp_log_l.Lxx, order='F') h_l, w_l = self.gp_l.K.params w_l = np.array([w_l]) h_tl, w_tl = self.gp_log_l.K.params w_tl = np.array([w_tl]) V_Z = bq_c.Z_var( x_s, x_sc, alpha_l, L_tl, h_l, w_l, h_tl, w_tl, self.options['x_mean'], self.options['x_cov']) return V_Z ################################################################## # Expected variance of Z # ################################################################## def expected_Z_var(self, x_a): r""" Computes the expected variance of :math:`Z` given a new observation :math:`x_a`. This is defined as: .. math :: \mathbb{E}[V(Z)\ |\ \ell_s, \ell_a] = \mathbb{E}[Z\ |\ \ell_s]^2 + V(Z\ |\ \ell_s) - \int \mathbb{E}[Z\ |\ \ell_s, \ell_a]^2 \mathcal{N}(\ell_a\ |\ \hat{m}_a, \hat{C}_a)\ \mathrm{d}\ell_a Parameters ---------- x_a : numpy.ndarray vector of points for which to (independently) compute the expected variance Returns ------- out : expected variance for each point in `x_a` """ mean_second_moment = self.Z_mean() ** 2 + self.Z_var() expected_squared_mean = self.expected_squared_mean(x_a) expected_var = mean_second_moment - expected_squared_mean return expected_var def expected_squared_mean(self, x_a): r""" Computes the expected square mean of :math:`Z` given a new observation :math:`x_a`. This is defined as: .. math :: \mathbb{E}[\mathbb{E}[Z]^2 |\ \ell_s] = \int \mathbb{E}[Z\ |\ \ell_s, \ell_a]^2 \mathcal{N}(\ell_a\ |\ \hat{m}_a, \hat{C}_a)\ \mathrm{d}\ell_a Parameters ---------- x_a : numpy.ndarray vector of points for which to (independently) compute the expected squared mean Returns ------- out : expected squared mean for each point in `x_a` """ esm = np.empty(x_a.shape[0]) for i in xrange(x_a.shape[0]): esm[i] = self._esm_and_em(x_a[[i]])[0] return esm def expected_mean(self, x_a): r""" Computes the expected mean of :math:`Z` given a new observation :math:`x_a`. Parameters ---------- x_a : numpy.ndarray vector of points for which to (independently) compute the expected mean Returns ------- out : expected mean for each point in `x_a` """ em = np.empty(x_a.shape[0]) for i in xrange(x_a.shape[0]): em[i] = self._esm_and_em(x_a[[i]])[1] return em def expected_squared_mean_and_mean(self, x_a): r""" Computes the expected squared mean and expected mean of :math:`Z` given a new observation :math:`x_a`. Parameters ---------- x_a : numpy.ndarray vector of points for which to (independently) compute the expected mean Returns ------- out : expected squared mean and expected mean for each point in `x_a` """ em = np.empty((x_a.shape[0], 2)) for i in xrange(x_a.shape[0]): em[i] = self._esm_and_em(x_a[[i]]) return em def _esm_and_em(self, x_a): """Computes the expected square mean for a single point `x_a`.""" # check for invalid inputs if x_a is None or np.isnan(x_a) or np.isinf(x_a): raise ValueError("invalid value for x_a: %s", x_a) # don't do the heavy computation if the point is close to one # we already have if np.isclose(x_a, self.x_s, atol=1e-4).any(): em = self.Z_mean() esm = em ** 2 return esm, em # include new x_a x_sca = np.concatenate([self.x_sc, x_a]) # compute K_l(x_sca, x_sca) K_l = self.gp_l.Kxoxo(x_sca) jitter = np.zeros(self.nsc + 1) # add noise to the candidate points closest to x_a, since they # are likely to change close = np.abs(self.x_c - x_a) < self.options['candidate_thresh'] if close.any(): idx = np.array(np.nonzero(close)[0]) + self.ns bq_c.improve_covariance_conditioning(K_l, jitter, idx) # also add noise to the new point bq_c.improve_covariance_conditioning(K_l, jitter, np.array([self.nsc])) L = np.empty(K_l.shape, order='F') try: la.cho_factor(np.array(K_l, order='F'), L) except np.linalg.LinAlgError: # if the matrix is singular, it's because either x_a is # close to a point we already have, or the kernel produces # similar values for all points (e.g., there is a very # large variance). In both cases, out expectation should # be that the mean won't change much, so just return the # mean we currently have. em = self.Z_mean() esm = em ** 2 return esm, em # compute expected transformed mean tm_a = np.array(self.gp_log_l.mean(x_a)) # compute expected transformed covariance tC_a = np.array(self.gp_log_l.cov(x_a), order='F') if self.options['use_approx']: xo = self._approx_x p_xo = self._approx_px Kxxo = np.array(self.gp_l.K(x_sca, xo), order='F') esm, em = bq_c.approx_expected_squared_mean_and_mean( self.l_sc, L, tm_a, tC_a, np.array(xo[None], order='F'), p_xo, Kxxo) else: esm, em = bq_c.expected_squared_mean_and_mean( self.l_sc, L, tm_a, tC_a, np.array(x_sca[None], order='F'), self.gp_l.K.h, np.array([self.gp_l.K.w]), self.options['x_mean'], np.array(self.options['x_cov'], order='F')) if np.isnan(esm) or esm < 0: raise RuntimeError( "invalid expected squared mean for x_a=%s: %s" % ( x_a, esm)) if np.isnan(em): raise RuntimeError( "invalid expected mean for x_a=%s: %s" % (x_a, em)) if np.isinf(esm): logger.warn("expected squared mean for x_a=%s is infinity!", x_a) if np.isinf(em): logger.warn("expected mean for x_a=%s is infinity!", x_a) return esm, em ################################################################## # Hyperparameter optimization/marginalization # ################################################################## def _make_llh_params(self, params): nparam = len(params) def f(x): if x is None or np.isnan(x).any(): return -np.inf try: self._set_gp_log_l_params(dict(zip(params, x[:nparam]))) self._set_gp_l_params(dict(zip(params, x[nparam:]))) except (ValueError, np.linalg.LinAlgError): return -np.inf try: llh = self.gp_log_l.log_lh + self.gp_l.log_lh except (ValueError, np.linalg.LinAlgError): return -np.inf return llh return f def fit_hypers(self, params): p0_tl = [self.gp_log_l.get_param(p) for p in params] p0_l = [self.gp_l.get_param(p) for p in params] p0 = np.array(p0_tl + p0_l) f = self._make_llh_params(params) p0 = util.find_good_parameters(f, p0, self.options['optim_method']) if p0 is None: raise RuntimeError("couldn't find good parameters") def sample_hypers(self, params, n=1, nburn=10): r""" Use slice sampling to samples new hyperparameters for the two GPs. Note that this will probably cause :math:`\bar{ell}(x_{sc})` to change. Parameters ---------- params : list of strings Dictionary of parameter names to be sampled nburn : int Number of burn-in samples """ # TODO: should the window be a parameter that is set by the # user? is there a way to choose a sane window size # automatically? nparam = len(params) window = 2 * nparam p0_tl = [self.gp_log_l.get_param(p) for p in params] p0_l = [self.gp_l.get_param(p) for p in params] p0 = np.array(p0_tl + p0_l) f = self._make_llh_params(params) if f(p0) < MIN: pn = util.find_good_parameters(f, p0, self.options['optim_method']) if pn is None: raise RuntimeError("couldn't find good starting parameters") p0 = pn hypers = util.slice_sample(f, nburn+n, window, p0, nburn=nburn, freq=1) return hypers[:, :nparam], hypers[:, nparam:] ################################################################## # Active sampling # ################################################################## def marginalize(self, funs, n, params): r""" Compute the approximate marginal functions `funs` by approximately marginalizing over the GP hyperparameters. Parameters ---------- funs : list List of functions for which to compute the marginal. n : int Number of samples to take when doing the approximate marginalization params : list or tuple List of parameters to marginalize over """ # cache state state = deepcopy(self.__getstate__()) # allocate space for the function values values = [] for fun in funs: value = fun() try: m = value.shape except AttributeError: values.append(np.empty(n)) else: values.append(np.empty((n,) + m)) # do all the sampling at once, because it is faster hypers_tl, hypers_l = self.sample_hypers(params, n=n, nburn=1) # compute values for the functions based on the parameters we # just sampled for i in xrange(n): params_tl = dict(zip(params, hypers_tl[i])) params_l = dict(zip(params, hypers_l[i])) self._set_gp_log_l_params(params_tl) self._set_gp_l_params(params_l) for j, fun in enumerate(funs): try: values[j][i] = fun() except: logger.error( "error with parameters %s and %s", params_tl, params_l) raise # restore state self.__setstate__(state) return values def choose_next(self, x_a, n, params, plot=False): f = lambda: -self.expected_squared_mean(x_a) values = self.marginalize([f], n, params) loss = values[0].mean(axis=0) best = np.min(loss) close = np.nonzero(np.isclose(loss, best))[0] choice = np.random.choice(close) best = x_a[choice] if plot: fig, (ax1, ax2) = plt.subplots(1, 2, sharex=True) self.plot_l(ax1, xmin=x_a.min(), xmax=x_a.max()) util.vlines(ax1, best, color='g', linestyle='--', lw=2) ax2.plot(x_a, loss, 'k-', lw=2) util.vlines(ax2, best, color='g', linestyle='--', lw=2) ax2.set_title("Negative expected sq. mean") fig.set_figwidth(10) fig.set_figheight(3.5) return best def add_observation(self, x_a, l_a): diffs = np.abs(x_a - self.x_s) if diffs.min() < self.options['candidate_thresh']: c = diffs.argmin() logger.debug( "x_a=%s is close to x_s=%s, averaging them", x_a, self.x_s[c]) self.x_s[c] = (self.x_s[c] + x_a) / 2. self.l_s[c] = (self.l_s[c] + l_a) / 2. self.tl_s[c] = np.log(float(self.l_s[c])) else: self.x_s = np.append(self.x_s, float(x_a)) self.l_s = np.append(self.l_s, float(l_a)) self.tl_s = np.append(self.tl_s, np.log(float(l_a))) self.ns += 1 # reinitialize the bq object self.init(self.gp_log_l.params, self.gp_l.params) ################################################################## # Plotting methods # ################################################################## def plot_gp_log_l(self, ax, f_l=None, xmin=None, xmax=None): x = self._make_approx_x(xmin=xmin, xmax=xmax, n=1000) if f_l is not None: l = np.log(f_l(x)) ax.plot(x, l, 'k-', lw=2) self.gp_log_l.plot(ax, xlim=[x.min(), x.max()], color='r') ax.plot( self.x_c, np.log(self.l_c), 'bs', markersize=4, label="$m_{\log\ell}(x_c)$") ax.set_title(r"GP over $\log\ell$") util.set_scientific(ax, -5, 4) def plot_gp_l(self, ax, f_l=None, xmin=None, xmax=None): x = self._make_approx_x(xmin=xmin, xmax=xmax, n=1000) if f_l is not None: l = f_l(x) ax.plot(x, l, 'k-', lw=2) self.gp_l.plot(ax, xlim=[x.min(), x.max()], color='r') ax.plot( self.x_c, self.l_c, 'bs', markersize=4, label="$\exp(m_{\log\ell}(x_c))$") ax.set_title(r"GP over $\exp(\log\ell)$") util.set_scientific(ax, -5, 4) def plot_l(self, ax, f_l=None, xmin=None, xmax=None, legend=True): x = self._make_approx_x(xmin=xmin, xmax=xmax, n=1000) if f_l is not None: l = f_l(x) ax.plot(x, l, 'k-', lw=2, label="$\ell(x)$") l_mean = self.l_mean(x) l_var = np.sqrt(self.l_var(x)) lower = l_mean - l_var upper = l_mean + l_var ax.fill_between(x, lower, upper, color='r', alpha=0.2) ax.plot( x, l_mean, 'r-', lw=2, label="final approx") ax.plot( self.x_s, self.l_s, 'ro', markersize=5, label="$\ell(x_s)$") ax.plot( self.x_c, self.l_c, 'bs', markersize=4, label="$\exp(m_{\log\ell}(x_c))$") ax.set_title("Final Approximation") ax.set_xlim(x.min(), x.max()) util.set_scientific(ax, -5, 4) if legend: ax.legend(loc=0, fontsize=10) def plot_expected_squared_mean(self, ax, xmin=None, xmax=None): x_a = self._make_approx_x(xmin=xmin, xmax=xmax, n=1000) exp_sq_m = self.expected_squared_mean(x_a) # plot the expected variance ax.plot(x_a, exp_sq_m, label=r"$E[\mathrm{m}(Z)^2]$", color='k', lw=2) ax.set_xlim(x_a.min(), x_a.max()) # plot a line for the current variance util.hlines( ax, self.Z_mean() ** 2, color="#00FF00", lw=2, label=r"$\mathrm{m}(Z)^2$") # plot lines where there are observatiosn util.vlines(ax, self.x_sc, color='k', linestyle='--', alpha=0.5) util.set_scientific(ax, -5, 4) ax.legend(loc=0, fontsize=10) ax.set_title(r"Expected squared mean of $Z$") def plot_expected_variance(self, ax, xmin=None, xmax=None): x_a = self._make_approx_x(xmin=xmin, xmax=xmax, n=1000) exp_Z_var = self.expected_Z_var(x_a) # plot the expected variance ax.plot(x_a, exp_Z_var, label=r"$E[\mathrm{Var}(Z)]$", color='k', lw=2) ax.set_xlim(x_a.min(), x_a.max()) # plot a line for the current variance util.hlines( ax, self.Z_var(), color="#00FF00", lw=2, label=r"$\mathrm{Var}(Z)$") # plot lines where there are observations util.vlines(ax, self.x_sc, color='k', linestyle='--', alpha=0.5) util.set_scientific(ax, -5, 4) ax.legend(loc=0, fontsize=10) ax.set_title(r"Expected variance of $Z$") def plot(self, f_l=None, xmin=None, xmax=None): fig, axes = plt.subplots(1, 3) self.plot_gp_log_l(axes[0], f_l=f_l, xmin=xmin, xmax=xmax) self.plot_gp_l(axes[1], f_l=f_l, xmin=xmin, xmax=xmax) self.plot_l(axes[2], f_l=f_l, xmin=xmin, xmax=xmax) ymins, ymaxs = zip(*[ax.get_ylim() for ax in axes[1:]]) ymin = min(ymins) ymax = max(ymaxs) for ax in axes[1:]: ax.set_ylim(ymin, ymax) fig.set_figwidth(14) fig.set_figheight(3.5) return fig, axes ################################################################## # Saving and restoring # ################################################################## def __getstate__(self): state = {} state['x_s'] = self.x_s state['l_s'] = self.l_s state['tl_s'] = self.tl_s state['options'] = self.options state['initialized'] = self.initialized if self.initialized: state['gp_log_l'] = self.gp_log_l state['gp_log_l_jitter'] = self.gp_log_l.jitter state['gp_l'] = self.gp_l state['gp_l_jitter'] = self.gp_l.jitter state['_approx_x'] = self._approx_x state['_approx_px'] = self._approx_px return state def __setstate__(self, state): self.x_s = state['x_s'] self.l_s = state['l_s'] self.tl_s = state['tl_s'] self.ns = self.x_s.shape[0] self.options = state['options'] self.initialized = state['initialized'] if self.initialized: self.gp_log_l = state['gp_log_l'] self.gp_log_l.jitter = state['gp_log_l_jitter'] self.gp_l = state['gp_l'] self.gp_l.jitter = state['gp_l_jitter'] self.x_sc = self.gp_l._x self.l_sc = self.gp_l._y self.nsc = self.x_sc.shape[0] self.x_c = self.x_sc[self.ns:] self.l_c = self.l_sc[self.ns:] self.nc = self.nsc - self.ns self._approx_x = state['_approx_x'] self._approx_px = state['_approx_px'] else: self.gp_log_l = None self.gp_l = None self.x_c = None self.l_c = None self.nc = None self.x_sc = None self.l_sc = None self.nsc = None self._approx_x = None self._approx_px = None ################################################################## # Copying # ################################################################## def __copy__(self): state = self.__getstate__() cls = type(self) bq = cls.__new__(cls) bq.__setstate__(state) return bq def __deepcopy__(self, memo): state = deepcopy(self.__getstate__(), memo) cls = type(self) bq = cls.__new__(cls) bq.__setstate__(state) return bq def copy(self, deep=True): if deep: out = deepcopy(self) else: out = copy(self) return out ################################################################## # Helper methods # ################################################################## def _set_gp_log_l_params(self, params): # set the parameter values for p, v in params.iteritems(): self.gp_log_l.set_param(p, v) # TODO: improve matrix conditioning for log(l) self.gp_log_l.jitter.fill(0) # update values of candidate points m = self.gp_log_l.mean(self.x_c) V = np.diag(self.gp_log_l.cov(self.x_c)).copy() V[V < 0] = 0 tl_c = m + 2 * np.sqrt(V) if (tl_c > MAX).any(): raise np.linalg.LinAlgError("GP mean is too large") self.l_c = np.exp(m) self.l_sc = np.array(np.concatenate([self.l_s, self.l_c])) # update the locations and values for exp(log(l)) self.gp_l.x = self.x_sc self.gp_l.y = self.l_sc # TODO: improve matrix conditioning for exp(log(l)) self.gp_l.jitter.fill(0) def _set_gp_l_params(self, params): # set the parameter values for p, v in params.iteritems(): self.gp_l.set_param(p, v) # TODO: improve matrix conditioning for exp(log(l)) self.gp_l.jitter.fill(0) def _choose_candidates(self): logger.debug("Choosing candidate points") if self.options['wrapped']: xmin = -np.pi * self.gp_log_l.K.p xmax = np.pi * self.gp_log_l.K.p else: xmin = self.x_s.min() - self.gp_log_l.K.w xmax = self.x_s.max() + self.gp_log_l.K.w # compute the candidate points xc = np.random.uniform(xmin, xmax, self.options['n_candidate']) # make sure they don't overlap with points we already have bq_c.filter_candidates(xc, self.x_s, self.options['candidate_thresh']) # save the locations and compute the values self.x_c = np.sort(xc[~np.isnan(xc)]) self.l_c = np.exp(self.gp_log_l.mean(self.x_c)) self.nc = self.x_c.shape[0] # concatenate with the observations we already have self.x_sc = np.array(np.concatenate([self.x_s, self.x_c])) self.l_sc = np.array(np.concatenate([self.l_s, self.l_c])) self.nsc = self.ns + self.nc def _make_approx_x(self, xmin=None, xmax=None, n=1000): if xmin is None: if self.options['wrapped']: xmin = -np.pi * self.gp_log_l.K.p else: xmin = self.x_sc.min() - self.gp_log_l.K.w if xmax is None: if self.options['wrapped']: xmax = np.pi * self.gp_log_l.K.p else: xmax = self.x_sc.max() + self.gp_log_l.K.w return np.linspace(xmin, xmax, n) def _make_approx_px(self, x=None): if x is None: x = self._approx_x p = np.empty(x.size, order='F') if self.options['wrapped']: bq_c.p_x_vonmises( p, x, float(self.options['x_mean']), 1. / float(self.options['x_cov'])) else: bq_c.p_x_gaussian( p, np.array(x[None], order='F'), self.options['x_mean'], self.options['x_cov']) return p
params = {} params['ln_noise'] = 0.5 params['ln_signal'] = 1.0 params['ln_length'] = 0.3 #training data np.random.seed(42) x = np.random.random((50, 1)) * 20 y = np.cos(x) + 0.5 * x + np.random.normal(loc=0.0, scale=0.2, size=(50, 1)) #test data X points x_test = np.linspace(-5, 22, 100) x_test = np.reshape(x_test, (100, 1)) a_gp = GP(x, y, "matern", params) b = GP(x, y, "rbf", params) ''' product_of_experts = distributedGP(x,y,8) general_poe = distributedGP(x,y,8, method='gpoe') bcm = distributedGP(x,y,8, method='bcm') rbcm = distributedGP(x,y,8,method='rbcm') ''' mean, cov = a_gp.predict(x_test) m2, c2 = b.predict(x_test) ''' mean2, cov2 = product_of_experts.predict(x_test) mean3, cov3 = general_poe.predict(x_test) mean4, cov4 = bcm.predict(x_test) mean5, cov5 = rbcm.predict(x_test)
noise=0.01) X, Y = training_data lengthscale = 2.1 signal_variance = 1. noise_variance = 0.01 # Setup testing locations. # You can change the testing locations here. X_star = np.linspace(training_start, training_end + 4 * np.pi, 200) X_star_few = np.linspace(training_start, training_end + 0 * np.pi, 50) # Compute posterior mean and variance. kernel = SquaredExponentialKernel(lengthscale=lengthscale, signal_variance=signal_variance) gp = GP(kernel=kernel, noise_variance=noise_variance) post_m, post_variance, weights = gp.posterior(X, Y, X_star) post_m_few, post_variance_few, weights_few = gp.posterior(X, Y, X_star_few) # Plot posterior mean and variance. post_variance = np.diagonal(post_variance) plt.plot(X_star, post_m, color='red') plt.scatter(X_star_few, post_m_few, marker='.', s=[60] * len(X_star), color='red', alpha=0.7) plt.fill_between(X_star, post_m - 1.96 * np.sqrt(post_variance),
def main(): torch.manual_seed(opt.seed) if opt.debug: pdb.set_trace() # load data img, obj, view = read_face_data(opt.data) # image, object, and view train_data = FaceDataset(img["train"], obj["train"], view["train"]) val_data = FaceDataset(img["val"], obj["val"], view["val"]) train_queue = DataLoader(train_data, batch_size=opt.bs, shuffle=True) val_queue = DataLoader(val_data, batch_size=opt.bs, shuffle=False) # longint view and object repr Dt = Variable(obj["train"][:, 0].long(), requires_grad=False).to(device) Wt = Variable(view["train"][:, 0].long(), requires_grad=False).to(device) Dv = Variable(obj["val"][:, 0].long(), requires_grad=False).to(device) Wv = Variable(view["val"][:, 0].long(), requires_grad=False).to(device) # define VAE and optimizer vae = FaceVAE(**vae_cfg).to(device) RV = torch.load(opt.vae_weights) vae.load_state_dict(RV) vae.to(device) vae.eval() for params in vae.parameters(): params.requires_grad = False # define gp P = sp.unique(obj["train"]).shape[0] Q = sp.unique(view["train"]).shape[0] vm = Vmodel(P, Q, opt.xdim, Q).to(device) gp = GP(n_rand_effs=1).to(device) gp_params = nn.ParameterList() gp_params.extend(vm.parameters()) gp_params.extend(gp.parameters()) # define optimizers gp_optimizer = optim.Adam(gp_params, lr=opt.gp_lr) bce = nn.BCELoss(reduction='sum').to(device) if opt.debug: pdb.set_trace() history = {} for epoch in range(opt.epochs): # 1. encode Y in mini-batches Zm, Zs = encode_Y(vae, train_queue) # 2. sample Z Eps = Variable(torch.randn(*Zs.shape), requires_grad=False).to(device) Z = Zm + Eps * Zs # 3. evaluation step (not needed for training) Vt = vm(Dt, Wt).detach() Vv = vm(Dv, Wv).detach() rv_eval, imgs, covs = eval_step(vae, gp, vm, val_queue, Zm, Vt, Vv) # 4. compute first-order Taylor expansion coefficient Zb, Vbs, vbs, gp_nll = gp.taylor_coeff(Z, [Vt]) rv_eval["gp_nll"] = float(gp_nll.data.mean().cpu()) / vae.K # 5. accumulate gradients over mini-batches and update params rv_back = backprop_and_update( vae, bce, gp, vm, train_queue, Dt, Wt, Eps, Zb, Vbs, vbs, gp_optimizer, ) rv_back["loss"] = (rv_back["recon_term"] + rv_eval["gp_nll"] + rv_back["pen_term"]) smartAppendDict(history, rv_eval) smartAppendDict(history, rv_back) smartAppend(history, "vs", gp.get_vs().data.cpu().numpy()) logging.info( "epoch %d - resons_term: %f - gp_nll: %f - pen_term: %f - mse: %f - abs: %f - fake_loss: %f - tra_mse_val: %f - train_mse_out: %f" % (epoch, rv_back["recon_term"], rv_back["gp_nll"], rv_back["pen_term"], rv_back["mse"], rv_back["abs"], rv_back["fake_loss"], rv_eval["mse_val"], rv_eval["mse_out"])) # callback? if epoch % opt.epoch_cb == 0: logging.info("epoch %d - executing callback" % epoch) ffile = os.path.join(opt.outdir, "plot.%.5d.png" % epoch) callback_gppvae(epoch, history, covs, imgs, ffile)
class BayesOpt: def __init__(self, data_generator, init_sample_size, max_steps, sigma_obs=None, is_mcmc=False, mcmc_opts=None): # Initializing Bayesian optimization objects: # I need to have an object that generates data and specifies domain of optimization # max_steps refer to the maximum number of sampled points self.max_steps = max_steps self.data_generator = data_generator # Initializing seen observations and adding a couple of variables for later bookkeeping self.domain = self.data_generator.domain pick_x = np.random.choice(range(len(self.domain)), size=init_sample_size, replace=False) self.x = self.domain[pick_x] self.y = self.data_generator.sample(self.x) self.best_y = np.max(self.y) self.mu_posterior = None self.std_posterior = None # Initializing underlying GP self.gp = GP(self.x, self.y) self.sigma_obs = sigma_obs # Initializing MCMC properties (mcmc_properties is supposed to be an instance of MCMCProperties class) self.is_mcmc = is_mcmc self.mcmc_opts = mcmc_opts def add_obs(self, x, y): # Adding new observations # It is assumed x and y are passed as scalars self.x = np.append(self.x, x) self.y = np.append(self.y, y) def determine_l(self): # This function returns kernel hyperparameters for current state of the system # It is either hyperparameters that optimize log-likelihood or # In case we have mcmc sampling it is the sample of posterior distribution of hyperparameters # The output of the function is in either case the array of elements (one element for max-likelihood estimator) if not self.is_mcmc: # Getting maximum likelihood estimator (curently for [0, 1] interval) l = max(np.exp(np.linspace(np.log(0.01), np.log(1), 100)), key=lambda z: self.gp.log_likelihood(self.sigma_obs, z)) return [l] if self.is_mcmc: l_sampler = MCMCSampler( lambda z: self.gp.log_likelihood(self.sigma_obs, z), self.mcmc_opts) return l_sampler.posterior_sample() def step(self): # The main function of BayesOpt class which performs one does a single optimization step # I estimate the kernel hyperparameters that best fit the data (either with mcmc or likelihood optimization) # Then I select the best point to sample (currently with EI acquisition function) # Then I sample the point and update my state # Sampling kernel hyperparameters sampled_l = self.determine_l() # Averaging GP posterior and EI over possible kernel hyperparameters # Note that as std is not quite an expectation, its averaging is a hack and not necessariy would give true std mu = np.zeros((len(self.domain), )) std_1d = np.zeros((len(self.domain), )) ei = np.zeros((len(self.domain), )) for l in sampled_l: sampled_mu, sampled_std_1d = self.gp.gp_posterior( self.domain, self.sigma_obs, l, return_chol=False) z = (sampled_mu - self.best_y) / sampled_std_1d sampled_ei = sampled_std_1d * scipy.stats.norm.pdf( z) + z * sampled_std_1d * scipy.stats.norm.cdf(z) mu += sampled_mu std_1d += sampled_std_1d ei += sampled_ei # Sampling a new point new_x = self.domain[np.argmax(ei)] new_y = self.data_generator.sample(new_x) self.add_obs(new_x, new_y) self.gp.add_obs(new_x, new_y) self.best_y = max(new_y, self.best_y) self.mu_posterior = mu / len(sampled_l) self.std_posterior = std_1d / len(sampled_l) def run(self): # The function that runs whole optimizaion # For now it only does single steps # In the future some print and plot statements could be added for _ in range(self.max_steps): self.step()
class BQ(object): r""" Estimate an integral of the following form using Bayesian Quadrature with a Gaussian Process prior: .. math:: Z = \int \ell(x)\mathcal{N}(x\ |\ \mu, \sigma^2)\ \mathrm{d}x See :meth:`~bayesian_quadrature.bq.BQ.load_options` for details on allowable options. Parameters ---------- x : numpy.ndarray size :math:`s` array of sample locations l : numpy.ndarray size :math:`s` array of sample observations options : dict Options dictionary Notes ----- This algorithm is an updated version of the one described in [OD12]_. The overall idea is: 1. Estimate :math:`\log\ell` using a GP. 2. Estimate :math:`\bar{\ell}=\exp(\log\ell)` using second GP. 3. Integrate exactly under :math:`\bar{\ell}`. """ ################################################################## # Initialization # ################################################################## def __init__(self, x, l, **options): """Initialize the Bayesian Quadrature object.""" #: Vector of observed locations self.x_s = np.array(x, dtype=DTYPE) #: Vector of observed values self.l_s = np.array(l, dtype=DTYPE) if (self.l_s <= 0).any(): raise ValueError("l_s contains zero or negative values") if self.x_s.ndim > 1: raise ValueError("invalid number of dimensions for x") if self.l_s.ndim > 1: raise ValueError("invalid number of dimensions for l") if self.x_s.shape != self.l_s.shape: raise ValueError("shape mismatch for x and l") #: Vector of log-transformed observed values self.tl_s = np.log(self.l_s) #: Number of observations self.ns = self.x_s.shape[0] self.load_options(**options) self.initialized = False self.gp_log_l = None #: Gaussian process over log(l) self.gp_l = None #: Gaussian process over exp(log(l)) self.x_c = None #: Vector of candidate locations self.l_c = None #: Vector of candidate values self.nc = None #: Number of candidate points self.x_sc = None #: Vector of observed plus candidate locations self.l_sc = None #: Vector of observed plus candidate values self.nsc = None #: Number of observations plus candidates self._approx_x = None self._approx_px = None def load_options(self, kernel, n_candidate, candidate_thresh, x_mean, x_var, optim_method): r""" Load options. Parameters ---------- kernel : Kernel The type of kernel to use. Note that if the kernel is not Gaussian, slow approximate (rather than analytic) solutions will be used. n_candidate : int The (maximum) number of candidate points. candidate_thresh : float Minimum allowed space between candidates. x_mean : float Prior mean, :math:`\mu`. x_var : float Prior variance, :math:`\sigma^2`. optim_method : string Method to use for parameter optimization (e.g., 'L-BFGS-B' or 'Powell') """ # store the options dictionary for future use self.options = { 'kernel': kernel, 'n_candidate': int(n_candidate), 'candidate_thresh': float(candidate_thresh), 'x_mean': np.array([x_mean], dtype=DTYPE, order='F'), 'x_cov': np.array([[x_var]], dtype=DTYPE, order='F'), 'use_approx': not (kernel is GaussianKernel), 'wrapped': kernel is PeriodicKernel, 'optim_method': optim_method } if self.options['use_approx']: logger.debug("Using approximate solutions for non-Gaussian kernel") def init(self, params_tl, params_l): """Initialize the GPs. Parameters ---------- params_tl : np.ndarray initial parameters for GP over :math:`\log\ell` params_l : np.ndarray initial parameters for GP over :math:`\exp(\log\ell)` """ kernel = self.options['kernel'] # create the gaussian process over log(l) self.gp_log_l = GP(kernel(*params_tl[:-1]), self.x_s, self.tl_s, s=params_tl[-1]) # TODO: improve matrix conditioning for log(l) self.gp_log_l.jitter = np.zeros(self.ns, dtype=DTYPE) # pick candidate points self._choose_candidates() # create the gaussian process over exp(log(l)) self.gp_l = GP(kernel(*params_l[:-1]), self.x_sc, self.l_sc, s=params_l[-1]) # TODO: improve matrix conditioning for exp(log(l)) self.gp_l.jitter = np.zeros(self.nsc, dtype=DTYPE) # make the vector of locations for approximations self._approx_x = self._make_approx_x() self._approx_px = self._make_approx_px() self.initialized = True ################################################################## # Mean and variance of l # ################################################################## def l_mean(self, x): r""" Mean of the final approximation to :math:`\ell`. Parameters ---------- x : numpy.ndarray :math:`m` array of new sample locations. Returns ------- mean : numpy.ndarray :math:`m` array of predictive means Notes ----- This is just the mean of the GP over :math:`\exp(\log\ell)`, i.e.: .. math:: \mathbb{E}[\bar{\ell}(\mathbf{x})] = \mathbb{E}_{\mathrm{GP}(\exp(\log\ell))}(\mathbf{x}) """ return self.gp_l.mean(x) def l_var(self, x): r""" Marginal variance of the final approximation to :math:`\ell`. Parameters ---------- x : numpy.ndarray :math:`m` array of new sample locations. Returns ------- mean : numpy.ndarray :math:`m` array of predictive variances Notes ----- This is just the diagonal of the covariance of the GP over :math:`\log\ell` multiplied by the squared mean of the GP over :math:`\exp(\log\ell)`, i.e.: .. math:: \mathbb{V}[\bar{\ell}(\mathbf{x})] = \mathbb{V}_{\mathrm{GP}(\log\ell)}(\mathbf{x})\mathbb{E}_{\mathrm{GP}(\exp(\log\ell))}(\mathbf{x})^2 """ v_log_l = np.diag(self.gp_log_l.cov(x)).copy() m_l = self.gp_l.mean(x) l_var = v_log_l * m_l**2 l_var[l_var < 0] = 0 return l_var ################################################################## # Mean of Z # ################################################################## def Z_mean(self): r""" Computes the mean of :math:`Z`, which is defined as: .. math :: \mathbb{E}[Z]=\int \bar{\ell}(x)p(x)\ \mathrm{d}x Returns ------- mean : float """ if self.options['use_approx']: return self._approx_Z_mean() else: return self._exact_Z_mean() def _approx_Z_mean(self, xo=None): if xo is None: xo = self._approx_x p_xo = self._approx_px else: p_xo = self._make_approx_px(xo) approx = bq_c.approx_Z_mean(np.array(xo[None], order='F'), p_xo, self.l_mean(xo)) return approx def _exact_Z_mean(self): r""" Equivalent to: .. math:: \begin{align*} \mathbb{E}[Z]&\approx \int\bar{\ell}(x)\mathcal{N}(x\ |\ \mu, \sigma^2)\ \mathrm{d}x \\ &= \left(\int K_{\exp(\log\ell)}(x, \mathbf{x}_c)\mathcal{N}(x\ |\ \mu, \sigma^2)\ \mathrm{d}x\right)K_{\exp(\log\ell)}(\mathbf{x}_c, \mathbf{x}_c)^{-1}\ell(\mathbf{x}_c) \end{align*} """ x_sc = np.array(self.x_sc[None], order='F') alpha_l = self.gp_l.inv_Kxx_y h_s, w_s = self.gp_l.K.params w_s = np.array([w_s], order='F') m_Z = bq_c.Z_mean(x_sc, alpha_l, h_s, w_s, self.options['x_mean'], self.options['x_cov']) return m_Z ################################################################## # Variance of Z # ################################################################## def Z_var(self): r""" Computes the variance of :math:`Z`, which is defined as: .. math:: \mathbb{V}(Z)\approx \int\int \mathrm{Cov}_{\log\ell}(x, x^\prime)\bar{\ell}(x)\bar{\ell}(x^\prime)p(x)p(x^\prime)\ \mathrm{d}x\ \mathrm{d}x^\prime Returns ------- var : float """ if self.options['use_approx']: return self._approx_Z_var() else: return self._exact_Z_var() def _approx_Z_var(self, xo=None): if xo is None: xo = self._approx_x p_xo = self._approx_px else: p_xo = self._make_approx_px(xo) approx = bq_c.approx_Z_var(np.array(xo[None], order='F'), p_xo, np.array(self.l_mean(xo), order='F'), np.array(self.gp_log_l.cov(xo), order='F')) return approx def _exact_Z_var(self): # values for the GPs over l(x) and log(l(x)) x_s = np.array(self.x_s[None], order='F') x_sc = np.array(self.x_sc[None], order='F') alpha_l = self.gp_l.inv_Kxx_y L_tl = np.array(self.gp_log_l.Lxx, order='F') h_l, w_l = self.gp_l.K.params w_l = np.array([w_l]) h_tl, w_tl = self.gp_log_l.K.params w_tl = np.array([w_tl]) V_Z = bq_c.Z_var(x_s, x_sc, alpha_l, L_tl, h_l, w_l, h_tl, w_tl, self.options['x_mean'], self.options['x_cov']) return V_Z ################################################################## # Expected variance of Z # ################################################################## def expected_Z_var(self, x_a): r""" Computes the expected variance of :math:`Z` given a new observation :math:`x_a`. This is defined as: .. math :: \mathbb{E}[V(Z)\ |\ \ell_s, \ell_a] = \mathbb{E}[Z\ |\ \ell_s]^2 + V(Z\ |\ \ell_s) - \int \mathbb{E}[Z\ |\ \ell_s, \ell_a]^2 \mathcal{N}(\ell_a\ |\ \hat{m}_a, \hat{C}_a)\ \mathrm{d}\ell_a Parameters ---------- x_a : numpy.ndarray vector of points for which to (independently) compute the expected variance Returns ------- out : expected variance for each point in `x_a` """ mean_second_moment = self.Z_mean()**2 + self.Z_var() expected_squared_mean = self.expected_squared_mean(x_a) expected_var = mean_second_moment - expected_squared_mean return expected_var def expected_squared_mean(self, x_a): r""" Computes the expected square mean of :math:`Z` given a new observation :math:`x_a`. This is defined as: .. math :: \mathbb{E}[\mathbb{E}[Z]^2 |\ \ell_s] = \int \mathbb{E}[Z\ |\ \ell_s, \ell_a]^2 \mathcal{N}(\ell_a\ |\ \hat{m}_a, \hat{C}_a)\ \mathrm{d}\ell_a Parameters ---------- x_a : numpy.ndarray vector of points for which to (independently) compute the expected squared mean Returns ------- out : expected squared mean for each point in `x_a` """ esm = np.empty(x_a.shape[0]) for i in xrange(x_a.shape[0]): esm[i] = self._esm_and_em(x_a[[i]])[0] return esm def expected_mean(self, x_a): r""" Computes the expected mean of :math:`Z` given a new observation :math:`x_a`. Parameters ---------- x_a : numpy.ndarray vector of points for which to (independently) compute the expected mean Returns ------- out : expected mean for each point in `x_a` """ em = np.empty(x_a.shape[0]) for i in xrange(x_a.shape[0]): em[i] = self._esm_and_em(x_a[[i]])[1] return em def expected_squared_mean_and_mean(self, x_a): r""" Computes the expected squared mean and expected mean of :math:`Z` given a new observation :math:`x_a`. Parameters ---------- x_a : numpy.ndarray vector of points for which to (independently) compute the expected mean Returns ------- out : expected squared mean and expected mean for each point in `x_a` """ em = np.empty((x_a.shape[0], 2)) for i in xrange(x_a.shape[0]): em[i] = self._esm_and_em(x_a[[i]]) return em def _esm_and_em(self, x_a): """Computes the expected square mean for a single point `x_a`.""" # check for invalid inputs if x_a is None or np.isnan(x_a) or np.isinf(x_a): raise ValueError("invalid value for x_a: %s", x_a) # don't do the heavy computation if the point is close to one # we already have if np.isclose(x_a, self.x_s, atol=1e-4).any(): em = self.Z_mean() esm = em**2 return esm, em # include new x_a x_sca = np.concatenate([self.x_sc, x_a]) # compute K_l(x_sca, x_sca) K_l = self.gp_l.Kxoxo(x_sca) jitter = np.zeros(self.nsc + 1) # add noise to the candidate points closest to x_a, since they # are likely to change close = np.abs(self.x_c - x_a) < self.options['candidate_thresh'] if close.any(): idx = np.array(np.nonzero(close)[0]) + self.ns bq_c.improve_covariance_conditioning(K_l, jitter, idx) # also add noise to the new point bq_c.improve_covariance_conditioning(K_l, jitter, np.array([self.nsc])) L = np.empty(K_l.shape, order='F') try: la.cho_factor(np.array(K_l, order='F'), L) except np.linalg.LinAlgError: # if the matrix is singular, it's because either x_a is # close to a point we already have, or the kernel produces # similar values for all points (e.g., there is a very # large variance). In both cases, out expectation should # be that the mean won't change much, so just return the # mean we currently have. em = self.Z_mean() esm = em**2 return esm, em # compute expected transformed mean tm_a = np.array(self.gp_log_l.mean(x_a)) # compute expected transformed covariance tC_a = np.array(self.gp_log_l.cov(x_a), order='F') if self.options['use_approx']: xo = self._approx_x p_xo = self._approx_px Kxxo = np.array(self.gp_l.K(x_sca, xo), order='F') esm, em = bq_c.approx_expected_squared_mean_and_mean( self.l_sc, L, tm_a, tC_a, np.array(xo[None], order='F'), p_xo, Kxxo) else: esm, em = bq_c.expected_squared_mean_and_mean( self.l_sc, L, tm_a, tC_a, np.array(x_sca[None], order='F'), self.gp_l.K.h, np.array([self.gp_l.K.w]), self.options['x_mean'], np.array(self.options['x_cov'], order='F')) if np.isnan(esm) or esm < 0: raise RuntimeError("invalid expected squared mean for x_a=%s: %s" % (x_a, esm)) if np.isnan(em): raise RuntimeError("invalid expected mean for x_a=%s: %s" % (x_a, em)) if np.isinf(esm): logger.warn("expected squared mean for x_a=%s is infinity!", x_a) if np.isinf(em): logger.warn("expected mean for x_a=%s is infinity!", x_a) return esm, em ################################################################## # Hyperparameter optimization/marginalization # ################################################################## def _make_llh_params(self, params): nparam = len(params) def f(x): if x is None or np.isnan(x).any(): return -np.inf try: self._set_gp_log_l_params(dict(zip(params, x[:nparam]))) self._set_gp_l_params(dict(zip(params, x[nparam:]))) except (ValueError, np.linalg.LinAlgError): return -np.inf try: llh = self.gp_log_l.log_lh + self.gp_l.log_lh except (ValueError, np.linalg.LinAlgError): return -np.inf return llh return f def fit_hypers(self, params): p0_tl = [self.gp_log_l.get_param(p) for p in params] p0_l = [self.gp_l.get_param(p) for p in params] p0 = np.array(p0_tl + p0_l) f = self._make_llh_params(params) p0 = util.find_good_parameters(f, p0, self.options['optim_method']) if p0 is None: raise RuntimeError("couldn't find good parameters") def sample_hypers(self, params, n=1, nburn=10): r""" Use slice sampling to samples new hyperparameters for the two GPs. Note that this will probably cause :math:`\bar{ell}(x_{sc})` to change. Parameters ---------- params : list of strings Dictionary of parameter names to be sampled nburn : int Number of burn-in samples """ # TODO: should the window be a parameter that is set by the # user? is there a way to choose a sane window size # automatically? nparam = len(params) window = 2 * nparam p0_tl = [self.gp_log_l.get_param(p) for p in params] p0_l = [self.gp_l.get_param(p) for p in params] p0 = np.array(p0_tl + p0_l) f = self._make_llh_params(params) if f(p0) < MIN: pn = util.find_good_parameters(f, p0, self.options['optim_method']) if pn is None: raise RuntimeError("couldn't find good starting parameters") p0 = pn hypers = util.slice_sample(f, nburn + n, window, p0, nburn=nburn, freq=1) return hypers[:, :nparam], hypers[:, nparam:] ################################################################## # Active sampling # ################################################################## def marginalize(self, funs, n, params): r""" Compute the approximate marginal functions `funs` by approximately marginalizing over the GP hyperparameters. Parameters ---------- funs : list List of functions for which to compute the marginal. n : int Number of samples to take when doing the approximate marginalization params : list or tuple List of parameters to marginalize over """ # cache state state = deepcopy(self.__getstate__()) # allocate space for the function values values = [] for fun in funs: value = fun() try: m = value.shape except AttributeError: values.append(np.empty(n)) else: values.append(np.empty((n, ) + m)) # do all the sampling at once, because it is faster hypers_tl, hypers_l = self.sample_hypers(params, n=n, nburn=1) # compute values for the functions based on the parameters we # just sampled for i in xrange(n): params_tl = dict(zip(params, hypers_tl[i])) params_l = dict(zip(params, hypers_l[i])) self._set_gp_log_l_params(params_tl) self._set_gp_l_params(params_l) for j, fun in enumerate(funs): try: values[j][i] = fun() except: logger.error("error with parameters %s and %s", params_tl, params_l) raise # restore state self.__setstate__(state) return values def choose_next(self, x_a, n, params, plot=False): f = lambda: -self.expected_squared_mean(x_a) values = self.marginalize([f], n, params) loss = values[0].mean(axis=0) best = np.min(loss) close = np.nonzero(np.isclose(loss, best))[0] choice = np.random.choice(close) best = x_a[choice] if plot: fig, (ax1, ax2) = plt.subplots(1, 2, sharex=True) self.plot_l(ax1, xmin=x_a.min(), xmax=x_a.max()) util.vlines(ax1, best, color='g', linestyle='--', lw=2) ax2.plot(x_a, loss, 'k-', lw=2) util.vlines(ax2, best, color='g', linestyle='--', lw=2) ax2.set_title("Negative expected sq. mean") fig.set_figwidth(10) fig.set_figheight(3.5) return best def add_observation(self, x_a, l_a): diffs = np.abs(x_a - self.x_s) if diffs.min() < self.options['candidate_thresh']: c = diffs.argmin() logger.debug("x_a=%s is close to x_s=%s, averaging them", x_a, self.x_s[c]) self.x_s[c] = (self.x_s[c] + x_a) / 2. self.l_s[c] = (self.l_s[c] + l_a) / 2. self.tl_s[c] = np.log(float(self.l_s[c])) else: self.x_s = np.append(self.x_s, float(x_a)) self.l_s = np.append(self.l_s, float(l_a)) self.tl_s = np.append(self.tl_s, np.log(float(l_a))) self.ns += 1 # reinitialize the bq object self.init(self.gp_log_l.params, self.gp_l.params) ################################################################## # Plotting methods # ################################################################## def plot_gp_log_l(self, ax, f_l=None, xmin=None, xmax=None): x = self._make_approx_x(xmin=xmin, xmax=xmax, n=1000) if f_l is not None: l = np.log(f_l(x)) ax.plot(x, l, 'k-', lw=2) self.gp_log_l.plot(ax, xlim=[x.min(), x.max()], color='r') ax.plot(self.x_c, np.log(self.l_c), 'bs', markersize=4, label="$m_{\log\ell}(x_c)$") ax.set_title(r"GP over $\log\ell$") util.set_scientific(ax, -5, 4) def plot_gp_l(self, ax, f_l=None, xmin=None, xmax=None): x = self._make_approx_x(xmin=xmin, xmax=xmax, n=1000) if f_l is not None: l = f_l(x) ax.plot(x, l, 'k-', lw=2) self.gp_l.plot(ax, xlim=[x.min(), x.max()], color='r') ax.plot(self.x_c, self.l_c, 'bs', markersize=4, label="$\exp(m_{\log\ell}(x_c))$") ax.set_title(r"GP over $\exp(\log\ell)$") util.set_scientific(ax, -5, 4) def plot_l(self, ax, f_l=None, xmin=None, xmax=None, legend=True): x = self._make_approx_x(xmin=xmin, xmax=xmax, n=1000) if f_l is not None: l = f_l(x) ax.plot(x, l, 'k-', lw=2, label="$\ell(x)$") l_mean = self.l_mean(x) l_var = np.sqrt(self.l_var(x)) lower = l_mean - l_var upper = l_mean + l_var ax.fill_between(x, lower, upper, color='r', alpha=0.2) ax.plot(x, l_mean, 'r-', lw=2, label="final approx") ax.plot(self.x_s, self.l_s, 'ro', markersize=5, label="$\ell(x_s)$") ax.plot(self.x_c, self.l_c, 'bs', markersize=4, label="$\exp(m_{\log\ell}(x_c))$") ax.set_title("Final Approximation") ax.set_xlim(x.min(), x.max()) util.set_scientific(ax, -5, 4) if legend: ax.legend(loc=0, fontsize=10) def plot_expected_squared_mean(self, ax, xmin=None, xmax=None): x_a = self._make_approx_x(xmin=xmin, xmax=xmax, n=1000) exp_sq_m = self.expected_squared_mean(x_a) # plot the expected variance ax.plot(x_a, exp_sq_m, label=r"$E[\mathrm{m}(Z)^2]$", color='k', lw=2) ax.set_xlim(x_a.min(), x_a.max()) # plot a line for the current variance util.hlines(ax, self.Z_mean()**2, color="#00FF00", lw=2, label=r"$\mathrm{m}(Z)^2$") # plot lines where there are observatiosn util.vlines(ax, self.x_sc, color='k', linestyle='--', alpha=0.5) util.set_scientific(ax, -5, 4) ax.legend(loc=0, fontsize=10) ax.set_title(r"Expected squared mean of $Z$") def plot_expected_variance(self, ax, xmin=None, xmax=None): x_a = self._make_approx_x(xmin=xmin, xmax=xmax, n=1000) exp_Z_var = self.expected_Z_var(x_a) # plot the expected variance ax.plot(x_a, exp_Z_var, label=r"$E[\mathrm{Var}(Z)]$", color='k', lw=2) ax.set_xlim(x_a.min(), x_a.max()) # plot a line for the current variance util.hlines(ax, self.Z_var(), color="#00FF00", lw=2, label=r"$\mathrm{Var}(Z)$") # plot lines where there are observations util.vlines(ax, self.x_sc, color='k', linestyle='--', alpha=0.5) util.set_scientific(ax, -5, 4) ax.legend(loc=0, fontsize=10) ax.set_title(r"Expected variance of $Z$") def plot(self, f_l=None, xmin=None, xmax=None): fig, axes = plt.subplots(1, 3) self.plot_gp_log_l(axes[0], f_l=f_l, xmin=xmin, xmax=xmax) self.plot_gp_l(axes[1], f_l=f_l, xmin=xmin, xmax=xmax) self.plot_l(axes[2], f_l=f_l, xmin=xmin, xmax=xmax) ymins, ymaxs = zip(*[ax.get_ylim() for ax in axes[1:]]) ymin = min(ymins) ymax = max(ymaxs) for ax in axes[1:]: ax.set_ylim(ymin, ymax) fig.set_figwidth(14) fig.set_figheight(3.5) return fig, axes ################################################################## # Saving and restoring # ################################################################## def __getstate__(self): state = {} state['x_s'] = self.x_s state['l_s'] = self.l_s state['tl_s'] = self.tl_s state['options'] = self.options state['initialized'] = self.initialized if self.initialized: state['gp_log_l'] = self.gp_log_l state['gp_log_l_jitter'] = self.gp_log_l.jitter state['gp_l'] = self.gp_l state['gp_l_jitter'] = self.gp_l.jitter state['_approx_x'] = self._approx_x state['_approx_px'] = self._approx_px return state def __setstate__(self, state): self.x_s = state['x_s'] self.l_s = state['l_s'] self.tl_s = state['tl_s'] self.ns = self.x_s.shape[0] self.options = state['options'] self.initialized = state['initialized'] if self.initialized: self.gp_log_l = state['gp_log_l'] self.gp_log_l.jitter = state['gp_log_l_jitter'] self.gp_l = state['gp_l'] self.gp_l.jitter = state['gp_l_jitter'] self.x_sc = self.gp_l._x self.l_sc = self.gp_l._y self.nsc = self.x_sc.shape[0] self.x_c = self.x_sc[self.ns:] self.l_c = self.l_sc[self.ns:] self.nc = self.nsc - self.ns self._approx_x = state['_approx_x'] self._approx_px = state['_approx_px'] else: self.gp_log_l = None self.gp_l = None self.x_c = None self.l_c = None self.nc = None self.x_sc = None self.l_sc = None self.nsc = None self._approx_x = None self._approx_px = None ################################################################## # Copying # ################################################################## def __copy__(self): state = self.__getstate__() cls = type(self) bq = cls.__new__(cls) bq.__setstate__(state) return bq def __deepcopy__(self, memo): state = deepcopy(self.__getstate__(), memo) cls = type(self) bq = cls.__new__(cls) bq.__setstate__(state) return bq def copy(self, deep=True): if deep: out = deepcopy(self) else: out = copy(self) return out ################################################################## # Helper methods # ################################################################## def _set_gp_log_l_params(self, params): # set the parameter values for p, v in params.iteritems(): self.gp_log_l.set_param(p, v) # TODO: improve matrix conditioning for log(l) self.gp_log_l.jitter.fill(0) # update values of candidate points m = self.gp_log_l.mean(self.x_c) V = np.diag(self.gp_log_l.cov(self.x_c)).copy() V[V < 0] = 0 tl_c = m + 2 * np.sqrt(V) if (tl_c > MAX).any(): raise np.linalg.LinAlgError("GP mean is too large") self.l_c = np.exp(m) self.l_sc = np.array(np.concatenate([self.l_s, self.l_c])) # update the locations and values for exp(log(l)) self.gp_l.x = self.x_sc self.gp_l.y = self.l_sc # TODO: improve matrix conditioning for exp(log(l)) self.gp_l.jitter.fill(0) def _set_gp_l_params(self, params): # set the parameter values for p, v in params.iteritems(): self.gp_l.set_param(p, v) # TODO: improve matrix conditioning for exp(log(l)) self.gp_l.jitter.fill(0) def _choose_candidates(self): logger.debug("Choosing candidate points") if self.options['wrapped']: xmin = -np.pi * self.gp_log_l.K.p xmax = np.pi * self.gp_log_l.K.p else: xmin = self.x_s.min() - self.gp_log_l.K.w xmax = self.x_s.max() + self.gp_log_l.K.w # compute the candidate points xc = np.random.uniform(xmin, xmax, self.options['n_candidate']) # make sure they don't overlap with points we already have bq_c.filter_candidates(xc, self.x_s, self.options['candidate_thresh']) # save the locations and compute the values self.x_c = np.sort(xc[~np.isnan(xc)]) self.l_c = np.exp(self.gp_log_l.mean(self.x_c)) self.nc = self.x_c.shape[0] # concatenate with the observations we already have self.x_sc = np.array(np.concatenate([self.x_s, self.x_c])) self.l_sc = np.array(np.concatenate([self.l_s, self.l_c])) self.nsc = self.ns + self.nc def _make_approx_x(self, xmin=None, xmax=None, n=1000): if xmin is None: if self.options['wrapped']: xmin = -np.pi * self.gp_log_l.K.p else: xmin = self.x_sc.min() - self.gp_log_l.K.w if xmax is None: if self.options['wrapped']: xmax = np.pi * self.gp_log_l.K.p else: xmax = self.x_sc.max() + self.gp_log_l.K.w return np.linspace(xmin, xmax, n) def _make_approx_px(self, x=None): if x is None: x = self._approx_x p = np.empty(x.size, order='F') if self.options['wrapped']: bq_c.p_x_vonmises(p, x, float(self.options['x_mean']), 1. / float(self.options['x_cov'])) else: bq_c.p_x_gaussian(p, np.array(x[None], order='F'), self.options['x_mean'], self.options['x_cov']) return p
def main(): gp = GP(terminals, functions, fitness, config) gp.init_population() result = gp.run() print_results(result, training_cases) return gp
# Generate training data. X, Y = generate_points(start=np.pi * 0, end=np.pi * 2) data_fits = [] model_complexities = [] lengthscales = [] # Grid search for optimal lengthscale values, while keeping # signal_variance and noise_variance unchanged. for lengthscale in np.linspace(0, 2.9, 30): lengthscale += 0.1 kernel = ExponentialSquaredKernel(lengthscale=lengthscale, signal_variance=1.) gp = GP(kernel, noise_variance=0.1) data_fit = gp.data_fit_term(X, Y) model_complexity = gp.model_complexity_term(X) objective = gp.objective(X, Y) lengthscales.append(lengthscale) data_fits.append(data_fit) model_complexities.append(model_complexity) # Find the lengthscale that gives the maximum objective. objectives = np.array(data_fits) + np.array(model_complexities) optimal_lengthscale_id = np.argmax(objectives) max_objective = objectives[optimal_lengthscale_id] # Plotting.
def vizualize_sample_execution(world_config_file, schedule_config_file, planner_config_file, model_config_file, base_model_filepath, schedule_filepath, strategies, num_deliveries_runs, availability_percents, stat_run, visualize, out_gif_path, out_img_path): ## params params = load_params(world_config_file, schedule_config_file, planner_config_file, model_config_file) ## import world # g = Graph() # g.read_graph_from_file(os.path.dirname(os.path.abspath(__file__)) + params['graph_filename']) # g = read_graph_from_file(os.path.dirname(os.path.abspath(__file__)) + params['graph_filename']) g, rooms = generate_graph(params['graph_generator_type'], os.path.dirname(os.path.abspath(__file__)), params['graph_filename'], params['max_rooms'], params['rooms'], params['max_traversal_cost'], params['distance_scaling']) params['rooms'] = rooms for num_deliveries in num_deliveries_runs: for availability_percent in availability_percents: # temporal consistency parameter if params['availabilities'] == 'windows': # available_time = params['budget']*availability_percent # num_windows = max(int(round(float(available_time)/params['availability_length'])), 1) # ave_window_offset = float(params['budget'] - available_time)/num_windows # mu = max(ave_window_offset, 1) available_time = params['budget'] * availability_percent num_windows = max( 1, int( round( float(available_time) / params['availability_length']))) # new_availability_length = int(float(available_time)/num_windows) ave_window_offset = min( float(params['budget'] - available_time) / num_windows, float(params['budget'] - available_time) / 2) mu = int(ave_window_offset / 2) # mu = int(params['availability_length']/2) elif params['availabilities'] == 'simple': mu = int(params['availability_length'] / 2) else: mu = 30 params['mu'] = mu # base models, true schedules stat_run = 0 model_file_exists = os.path.exists(base_model_filepath + str(num_deliveries) + "_" + str(availability_percent) + "_" + str(stat_run) + ".yaml") schedule_file_exists = os.path.exists(schedule_filepath + str(num_deliveries) + "_" + str(availability_percent) + "_" + str(stat_run) + ".yaml") if model_file_exists and schedule_file_exists: # load pre-generated schedules/models base_availability_models, base_model_variances, node_requests = load_base_models_from_file( base_model_filepath, num_deliveries, availability_percent, stat_run) true_availability_models, true_schedules = load_schedules_from_file( schedule_filepath, num_deliveries, availability_percent, stat_run) availabilities = base_availability_models else: if params['availabilities'] == 'windows': # sample rooms for delivieries if params['node_closeness'] == 'random': node_requests = random.sample(params['rooms'], num_deliveries) if params['node_closeness'] == 'sequential': node_requests = params['rooms'][0:num_deliveries] ## base availability models avails, base_model_variances = generate_windows_overlapping( node_requests, params['start_time'], availability_percent, params['budget'], params['time_interval'], params['availability_length'], params['availability_chance']) if params['use_gp']: from gp import GP gps = {} availabilities = {} for request in node_requests: x_in = list( range(params['start_time'], params['budget'], params['time_interval'])) gps[request] = GP(None, x_in, avails[request], params['budget'], params['spacing'], params['noise_scaling'], True, 'values') availabilities[request] = gps[request].get_preds( x_in) base_availability_models = gps else: base_availability_models = avails availabilities = avails ## true availability models # sampled_availability_models = sample_model_parameters(node_requests, base_availability_models, base_model_variances, params['sampling_method']) true_availability_models = avails ## true schedules true_schedules = generate_schedule( node_requests, true_availability_models, params['mu'], params['num_intervals'], params['schedule_generation_method'], params['temporal_consistency']) # save_base_models_to_file(base_model_filepath, base_availability_models, base_model_variances, node_requests, num_deliveries, availability_percent, stat_run) # save_schedules_to_file(schedule_filepath, true_availability_models, true_schedules, node_requests, num_deliveries, availability_percent, stat_run) elif params['availabilities'] == 'simple': # sample rooms for delivieries if params['node_closeness'] == 'random': node_requests = random.sample(params['rooms'], num_deliveries) if params['node_closeness'] == 'sequential': node_requests = params['rooms'][0:num_deliveries] ## base availability models base_availability_models, base_model_variances = generate_simple_models( node_requests, params['start_time'], availability_percent, params['budget'], params['time_interval'], params['availability_length'], params['availability_chance']) availabilities = base_availability_models # ## true availability models # sampled_avails = sample_model_parameters(node_requests[stat_run], avails, variances, params['sampling_method']) # true_availability_models.append(sampled_avails) ## true schedules true_schedules = generate_simple_schedules( node_requests, base_availability_models, params['mu'], params['num_intervals'], params['schedule_generation_method']) # true_schedules.append(sample_schedule_from_model(node_requests[stat_run], sampled_avails, mu, params['num_intervals'], params['temporal_consistency'])) # save_base_models_to_file(base_model_filepath, base_availability_models[stat_run], base_model_variances[stat_run], node_requests[stat_run], num_deliveries, availability_percent, stat_run) # save_schedules_to_file(schedule_filepath, true_availability_models[stat_run], true_schedules[stat_run], node_requests[stat_run], num_deliveries, availability_percent, stat_run) else: raise ValueError(params['availabilities']) ## "learned" availability models availability_models = base_availability_models model_variances = base_model_variances # plan and execute paths for specified strategies visit_traces = {} for strategy in strategies: if strategy == 'mcts': total_profit, competitive_ratio, maintenance_competitive_ratio, path_history = create_policy_and_execute( strategy, g, availability_models, model_variances, true_schedules, node_requests, params['mu'], params, visualize, out_gif_path) else: total_profit, competitive_ratio, maintenance_competitive_ratio, path_history = plan_and_execute( strategy, g, availability_models, model_variances, true_schedules, node_requests, params['mu'], params, visualize, out_gif_path) visit_traces[strategy] = path_history visualize_path_willow(strategies, visit_traces, availabilities, true_schedules, node_requests, params['maintenance_node'], params['start_time'], params['budget'], params['time_interval'], out_img_path)
def run_bo(xs, oracle, kern, aq_type='ei', noise_var=1e-2, n_init=1, n_itr=50, seed=0): """run bayesian optimization(maximum problem) Parameters ---------- xs : 2d-ndarray Candidate input points oracle : function-obj Oracle objective function object(see oracle.py) kern : kernel-obj Kernel function class object(see kern.py) aq_type : str, optional Aquisition function type(ei or pi or ucb), by default is ei noise_var : float, optional obsearvation noise for GP, by default 1e-2 n_init : int, optional Initial input points of GP, by default 1 n_itr : int, optional Bayesian optimization iteration num, by default 50 seed : int, optional Random number generator's seed, by default 0 Returns ------- dict Bayesian optimization results and logs """ rand_gen = np.random.RandomState(seed) nx = xs.shape[0] xdim = xs.shape[1] aq_vals = np.zeros([n_itr, nx]) selected_xs = np.zeros([n_itr + n_init, xdim]) selected_ys = np.zeros([n_itr + n_init]) cumlative_times = np.zeros([n_itr + 1]) regret = np.zeros([n_itr + 1]) gp_mus = np.zeros([n_itr, nx]) gp_vars = np.zeros([n_itr, nx]) true_max = np.max(oracle(xs)) true_xmax = xs[np.argmax(oracle(xs))] ## select initial xs ## init_indices = rand_gen.choice(nx, n_init, replace=False) selected_xs[:n_init] = xs[init_indices] selected_ys[:n_init] = oracle(selected_xs[:n_init]) cur_max = np.max(selected_ys) cur_xmax = xs[np.argmax(selected_ys)] regret[0] = true_max - cur_max ### start bayesian optimization for i in trange(n_itr): st = time.time() gp_model = GP(selected_xs[:n_init + i], selected_ys[:n_init + i], kern, noise_var=noise_var, seed=seed) pmean, pvar = gp_model.predict_f(xs) gp_mus[i] = pmean gp_vars[i] = pvar if aq_type == 'ei': aq_val = EI(pmean, pvar, cur_max) elif aq_type == 'pi': aq_val = PI(pmean, pvar, cur_max) else: aq_val = UCB(pmean, pvar) next_x = xs[np.argmax(aq_val)] aq_vals[i] = aq_val selected_xs[i + n_init] = next_x y = oracle(next_x[:, np.newaxis])[0] selected_ys[i + n_init] = y if cur_max < y: cur_max = y cur_xmax = next_x cumlative_times[i + 1] = cumlative_times[i] + time.time() - st regret[i + 1] = true_max - cur_max hist = { 'selected_xs': selected_xs, 'selected_ys': selected_ys, 'aq_vals': aq_vals, 'cumulative_times': cumlative_times, 'true_max': true_max, 'ture_xmax': true_xmax, 'regret': regret, 'cur_max': cur_max, 'cur_xmax': cur_xmax, 'gp_mus': gp_mus, 'gp_vars': gp_vars, 'options': { 'n_itr': n_itr, 'n_init': n_init, 'noise_var': noise_var, 'aq_type': aq_type, 'seed': seed } } return hist
def stat_runs(world_config_file, schedule_config_file, planner_config_file, model_config_file, base_model_filepath, schedule_filepath, output_file, strategies, num_deliveries_runs, availability_percents, budgets, num_stat_runs, visualize, out_gif_path): if output_file == None: record_output = False else: record_output = True ## params params = load_params(world_config_file, schedule_config_file, planner_config_file, model_config_file) ## load world # g = Graph() # g.read_graph_from_file(os.path.dirname(os.path.abspath(__file__)) + params['graph_filename']) # g = read_graph_from_file(os.path.dirname(os.path.abspath(__file__)) + params['graph_filename']) g, rooms = generate_graph(params['graph_generator_type'], os.path.dirname(os.path.abspath(__file__)), params['graph_filename'], params['max_rooms'], params['rooms'], params['max_traversal_cost'], params['distance_scaling']) params['rooms'] = rooms # for num_deliveries in num_deliveries_runs: num_deliveries = num_deliveries_runs[0] for availability_percent in availability_percents: for budget in budgets: params['budget'] = budget params['num_intervals'] = int(params['budget'] / params['time_interval']) params['longest_period'] = budget # temporal consistency parameter if params['availabilities'] == 'windows': # available_time = params['budget']*availability_percent # num_windows = max(int(round(float(available_time)/params['availability_length'])), 1) # ave_window_offset = float(params['budget'] - available_time)/num_windows # mu = max(ave_window_offset, 1) available_time = params['budget'] * availability_percent num_windows = max( 1, int( round( float(available_time) / params['availability_length']))) # new_availability_length = int(float(available_time)/num_windows) ave_window_offset = min( float(params['budget'] - available_time) / num_windows, float(params['budget'] - available_time) / 2) mu = int(ave_window_offset / 2) # mu = int(params['availability_length']/2) elif params['availabilities'] == 'simple': mu = int(params['availability_length'] / 2) else: mu = 30 params['mu'] = mu # base models, true schedules node_requests = [] base_availability_models = [] base_model_variances = [] true_availability_models = [] true_schedules = [] num_test_runs = 0 for stat_run in range(num_stat_runs): model_file_exists = os.path.exists(base_model_filepath + str(num_deliveries) + "_" + str(availability_percent) + "_" + str(stat_run) + ".p") schedule_file_exists = os.path.exists( schedule_filepath + str(num_deliveries) + "_" + str(availability_percent) + "_" + str(stat_run) + ".yaml") if model_file_exists and schedule_file_exists: # # load pre-generated schedules/models gmms, base_variances, requests = load_base_models_from_file( base_model_filepath, num_deliveries, availability_percent, stat_run) true_avails, schedules = load_schedules_from_file( schedule_filepath, num_deliveries, availability_percent, stat_run) node_requests.append(requests) # gmms = {} # for request in node_requests[stat_run]: # x_in = list(range(int(params['start_time']), int(params['budget']), int(params['time_interval']))) # y_in = Y_in[request][:len(x_in)] # gmms[request] = build_gmm(x_in, y_in, params['start_time'], params['start_time'] + params['budget'], params['time_interval'], params, True) base_availability_models.append(gmms) base_model_variances.append(base_variances) true_availability_models.append(true_avails) true_schedules.append(schedules) else: if params['availabilities'] == 'brayford': # model if params['use_gp']: from gp import GP gps = {} # if params['use_gmm']: gmms = {} # mus = {} mu = 0.0 mu_n = 0 node_requests.append(params['rooms']) for request in node_requests[stat_run]: x_in, y_in, mu_combined, mu_combined_n = load_brayford_training_data( request, os.path.dirname(os.path.abspath(__file__)) + params['data_path'], out_gif_path) if params['use_gp']: gps[request] = GP(None, x_in, y_in, params['budget'], 1, params['noise_scaling'], True, 'values') else: gmms[request] = build_gmm( x_in, y_in, params['start_time'], params['start_time'] + params['budget'], params['time_interval'], params) # gmms[request].visualize(out_gif_path + "train_" + request + "_gmm_histogram_10.jpg", request) # mus[request] = mu_combined/mu_combined_n mu += mu_combined mu_n += mu_combined_n # gps[request].visualize(out_gif_path + "train_" + request + "_model_histogram_10.jpg", request) if params['use_gp']: base_availability_models.append(gps) else: base_availability_models.append(gmms) base_model_variances.append({}) mu = mu / mu_n params['mu'] = mu # true schedule # if params['availabilities'] == 'brayford': schedules = {} for request in node_requests[stat_run]: X, Y = load_brayford_testing_data( request, os.path.dirname(os.path.abspath(__file__)) + params['data_path'], stat_run, out_gif_path) schedules[request] = Y[stat_run] # for i in range(Y.shape[0]): # if not(i in schedules): # schedules[i] = {} # schedules[i][request] = Y[i] # num_test_runs = Y.shape[0] # schedules[request] = Y # if params['use_gp']: # from gp import GP # test_gp = GP(None, x_in, y_in, params['budget'], 1, params['noise_scaling'], True, 'values') # if params['use_gmm']: # test_gp = build_gmm(x_in, y_in, params['start_time'], params['start_time'] + params['budget'], params['time_interval'], params) # if stat_run == 0: # test_gp.visualize(out_gif_path + "february_" + request + "_model_10.jpg", request) # else: # test_gp.visualize(out_gif_path + "november_" + request + "_model_histogram_10.jpg", request) # schedules[request] = test_gp.threshold_sample_schedule(params['start_time'], params['budget'], params['time_interval']) # # visualize: # fig = plt.figure() # X = np.array(list(range(params['start_time'], params['budget'], params['time_interval']))) # Y = np.array(schedules[request]) # plt.scatter(X, Y) # if stat_run == 0: # plt.title("Brayford Schedule Node " + request + ": February") # plt.savefig(out_gif_path + "february_" + request + ".jpg") # else: # plt.title("Brayford Schedule Node " + request + ": November") # plt.savefig(out_gif_path + "november_" + request + ".jpg") true_schedules.append(schedules) elif params['availabilities'] == 'windows': # sample rooms for delivieries if params['node_closeness'] == 'random': node_requests.append( random.sample(params['rooms'], num_deliveries)) if params['node_closeness'] == 'sequential': node_requests.append( params['rooms'][0:num_deliveries]) ## base availability models avails, variances = generate_windows_overlapping( node_requests[stat_run], params['start_time'], availability_percent, params['budget'], params['time_interval'], params['availability_length'], params['availability_chance']) # X_in = {} # Y_in = {} if params['use_gp']: from gp import GP gps = {} for request in node_requests[stat_run]: x_in = list( range(int(params['start_time']), int(params['budget']), int(params['time_interval']))) y_in = copy.deepcopy(avails[request]) for i in range(len(y_in)): y = max( y_in[i] + random.random() * params['noise_scaling'] - params['noise_scaling'] / 2.0, 0.01) y = min(y, .99) y_in[i] = y gps[request] = GP(None, x_in, y_in, params['budget'], params['spacing'], 0.0, True, 'values') # Y_in[request] = y_in base_availability_models.append(gps) else: gmms = {} for request in node_requests[stat_run]: x_in = list( range(int(params['start_time']), int(params['budget']), int(params['time_interval']))) y_in = copy.deepcopy(avails[request]) for i in range(len(y_in)): y = max( y_in[i] + random.random() * params['noise_scaling'] - params['noise_scaling'] / 2.0, 0.01) y = min(y, .99) y_in[i] = y gmms[request] = build_gmm( x_in, y_in, params['start_time'], params['start_time'] + params['budget'], params['time_interval'], params, True) # Y_in[request] = y_in # gmms[request].visualize(out_gif_path + "train_" + request + "_gmm_histogram_10.jpg", request) # mus[request] = mu_combined/mu_combined_n # mu += mu_combined # mu_n += mu_combined_n base_availability_models.append(gmms) # else: # base_availability_models.append(avails) # base_availability_models.append(avails) base_model_variances.append(variances) # true availability models sampled_avails = sample_model_parameters( node_requests[stat_run], avails, variances, params['sampling_method']) sampled_avails = avails true_availability_models.append(avails) ## true schedules true_schedules.append( generate_schedule( node_requests[stat_run], avails, params['mu'], params['num_intervals'], params['schedule_generation_method'], params['temporal_consistency'])) # true_schedules.append(sample_schedule_from_model(node_requests[stat_run], sampled_avails, mu, params['num_intervals'], params['temporal_consistency'])) save_base_models_to_file( base_model_filepath, base_availability_models[stat_run], base_model_variances[stat_run], node_requests[stat_run], num_deliveries, availability_percent, stat_run) save_schedules_to_file( schedule_filepath, true_availability_models[stat_run], true_schedules[stat_run], node_requests[stat_run], num_deliveries, availability_percent, stat_run) # elif params['availabilities'] == 'simple': # # sample rooms for delivieries # if params['node_closeness'] == 'random': # node_requests.append(random.sample(params['rooms'], num_deliveries)) # if params['node_closeness'] == 'sequential': # node_requests.append(params['rooms'][0:num_deliveries]) # ## base availability models # avails, variances = generate_simple_models(node_requests[stat_run], params['start_time'], availability_percent, params['budget'], params['time_interval'], params['availability_length'], params['availability_chance']) # base_availability_models.append(avails) # base_model_variances.append(variances) # # ## true availability models # # sampled_avails = sample_model_parameters(node_requests[stat_run], avails, variances, params['sampling_method']) # # true_availability_models.append(sampled_avails) # ## true schedules # true_schedules.append(generate_simple_schedules(node_requests[stat_run], sampled_avails, params['mu'], params['num_intervals'], params['schedule_generation_method'])) # # true_schedules.append(sample_schedule_from_model(node_requests[stat_run], sampled_avails, mu, params['num_intervals'], params['temporal_consistency'])) # # save_base_models_to_file(base_model_filepath, base_availability_models[stat_run], base_model_variances[stat_run], node_requests[stat_run], num_deliveries, availability_percent, stat_run) # # save_schedules_to_file(schedule_filepath, true_availability_models[stat_run], true_schedules[stat_run], node_requests[stat_run], num_deliveries, availability_percent, stat_run) else: raise ValueError(params['availabilities']) ## "learned" availability models availability_models = base_availability_models model_variances = base_model_variances # plan and execute paths for specified strategies for strategy in strategies: strategy_name = strategy params['uncertainty_penalty'] = 0.0 params['observation_reward'] = 0.0 params['deliver_threshold'] = 0.0 if strategy == 'observe_mult_visits_up_5_or_0_dt_0': params['uncertainty_penalty'] = 0.5 params['observation_reward'] = 0.0 params['deliver_threshold'] = 0.0 strategy_name = strategy strategy = 'observe_mult_visits' if strategy == 'observe_mult_visits_up_0_or_7_dt_0': params['uncertainty_penalty'] = 0.0 params['observation_reward'] = 0.7 params['deliver_threshold'] = 0.0 strategy_name = strategy strategy = 'observe_mult_visits' if strategy == 'observe_mult_visits_up_5_or_7_dt_0': params['uncertainty_penalty'] = 0.5 params['observation_reward'] = 0.7 params['deliver_threshold'] = 0.0 strategy_name = strategy strategy = 'observe_mult_visits' # for stat_run in range(num_stat_runs): for stat_run in [2]: # stat_run = 0 # for test_run in range(num_test_runs): if strategy == 'mcts': total_profit, competitive_ratio, maintenance_competitive_ratio, path_history, ave_plan_time = create_policy_and_execute( strategy, g, availability_models[stat_run], model_variances[stat_run], true_schedules[stat_run], node_requests[stat_run], params['mu'], params, visualize, out_gif_path) else: total_profit, competitive_ratio, maintenance_competitive_ratio, path_history, ave_plan_time = plan_and_execute( strategy, g, availability_models[stat_run], model_variances[stat_run], true_schedules[stat_run], node_requests[stat_run], params['mu'], params, visualize, out_gif_path) if record_output: with open(output_file, 'a', newline='') as csvfile: writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) writer.writerow([ strategy_name, params['budget'], num_deliveries, availability_percent, params['availability_chance'], params['maintenance_reward'], params['max_noise_amplitude'], params['variance_bias'], competitive_ratio, maintenance_competitive_ratio, ave_plan_time ])
def test4(): system = GP(terminals, functions, fitness) system.init_population() return system.run(3)
modeldir = "/export/a10/kduh/p/mt/gridsearch/" + args.dataset + "/models/" x, y, _ = extract_data(modeldir=modeldir, threshold=args.threshold, architecture=args.architecture, rnn_cell_type=args.rnn_cell_type) result = np.zeros((len(y) - 3, len(y))) for i in range(len(y) - 3): print("step {0}/{1}".format(i + 1, len(y) - 3)) label_ids = np.array([i, i + 1, i + 2]) while len(label_ids) != len(y): if args.model == "gbssl": opt_model = GBSSL(x, y[label_ids], label_ids) elif args.model == "gp": opt_model = GP(x, y[label_ids], label_ids) elif args.model == "krr": opt_model = KRR(x, y[label_ids], label_ids) y_preds, y_vars = opt_model.fit_predict() del opt_model unlabel_ids = np.array( [u for u in range(len(y)) if u not in label_ids]) def get_risk(candidate_id): opt_model = GBSSL( x, np.append(y[label_ids], y_preds[candidate_id]), np.append(label_ids, candidate_id)) new_y_preds, new_y_vars = opt_model.fit_predict() del opt_model return np.linalg.norm( np.array(new_y_preds)[label_ids] - y[label_ids])
X = np.array([p[0] for p in data]) Y = np.array([p[1] for p in data]) # Normalize the Y dimension. mean_Y = np.mean(Y) std_Y = np.std(Y) Y = (Y - mean_Y) / std_Y # Fit a Gaussian Process to the data points. lengthscale = 40 signal_variance = 3. noise_variance = 0.1 X_star = np.linspace(0, 960, 50) kernel = SquaredExponentialKernel(lengthscale=lengthscale, signal_variance=signal_variance) gp = GP(kernel, noise_variance=noise_variance) post_m, post_var, weights = gp.posterior(X, Y, X_star) # Plot results. color = 'yellow' ax.plot(X_star, post_m * std_Y + mean_Y, color=color) ax.scatter(X, Y * std_Y + mean_Y, s=30, color=color) post_var = np.diagonal(post_var) plt.fill_between(X_star, (post_m - 1.96 * np.sqrt(post_var)) * std_Y + mean_Y, (post_m + 1.96 * np.sqrt(post_var)) * std_Y + mean_Y, color=color, alpha=0.2) plt.xlim(0, 960)
from gp import GP, ExponentialSquaredKernel import numpy as np from scipy.stats import multivariate_normal import matplotlib.pyplot as plt from utils import multiple_formatter # Set values to model parameters. lengthscale = 1 signal_variance = 1. noise_variance = 0.1 # Create the GP. kernel = ExponentialSquaredKernel( lengthscale=lengthscale, signal_variance=signal_variance) gp = GP(kernel=kernel, noise_variance=noise_variance) n = 60 x = np.linspace(0, 2 * np.pi, n) mean = np.zeros(n) cov = gp.k(x, x) # Draw samples from the GP prior. probabilities = [] samples = [] jitter = np.eye(n) * 1e-6 for _ in range(50): y = multivariate_normal.rvs(mean=mean, cov=cov) # Add a jitter to the covariance matrix for numerical stability. prob = multivariate_normal.pdf(y, mean=mean, cov=cov + jitter) samples.append(y) probabilities.append(prob)
def test_gp_mut(generations=20): data_path = Path('./containerfs/tmp/cetdl1772small.dat') training_data = parse_data(data_path) gpobj = GP(POP_SIZE, training_data, mutation_method='branch_replacement') gpobj.run(generations)