def bayesian_opt(): # 2. ranges of the synth parameters syn1 = syn2 = syn3 = syn4 = syn5 = np.arange(158) syn6 = np.arange(6000) syn7 = np.arange(1000) syn8 = np.arange(700) # 2. synth paramters ranges into an 8D parameter space # parameter_space = ParameterSpace( # [ContinuousParameter('x1', 0., 157.)]) # parameter_space = ParameterSpace( # [DiscreteParameter('x8', syn8)]) parameter_space = ParameterSpace( [ContinuousParameter('x1', 0., 157.), ContinuousParameter('x2', 0., 157.), ContinuousParameter('x3', 0., 157.), ContinuousParameter('x4', 0., 157.), ContinuousParameter('x5', 0., 157.), ContinuousParameter('x6', 0., 5999.), ContinuousParameter('x7', 0., 999.), ContinuousParameter('x8', 0., 699.)]) # parameter_space = ParameterSpace( # [DiscreteParameter('x1', syn1), DiscreteParameter('x2', syn2), DiscreteParameter('x3', syn3), # DiscreteParameter('x4', syn4), DiscreteParameter('x5', syn5), DiscreteParameter('x6', syn6), # DiscreteParameter('x7', syn1), DiscreteParameter('x8', syn8)]) # 3. collect random points design = RandomDesign(parameter_space) X = design.get_samples(num_data_points) # X is a numpy array print("X=", X) # [is the below needed?] # UserFunction.evaluate(training_function, X) # I put UserFunctionWrapper in line 94 # 4. define training_function as Y Y = training_function(X) # [is this needed?] # loop_state = create_loop_state(X, Y) # 5. train and wrap the model in Emukit model_gpy = GPRegression(X, Y, normalizer=True) model_emukit = GPyModelWrapper(model_gpy) expected_improvement = ExpectedImprovement(model=model_emukit) bayesopt_loop = BayesianOptimizationLoop(model=model_emukit, space=parameter_space, acquisition=expected_improvement, batch_size=5) max_iterations = 15 bayesopt_loop.run_loop(training_function, max_iterations) model_gpy.plot() plt.show() results = bayesopt_loop.get_results() # bayesopt_loop.loop_state.X print("X: ", bayesopt_loop.loop_state.X) print("Y: ", bayesopt_loop.loop_state.Y) print("cost: ", bayesopt_loop.loop_state.cost)
def fit_GPy_kern(X, Y, kernel, restarts, score = BIC, **kwargs): if len(np.shape(X)) == 1: X = np.array(X)[:, None] if len(np.shape(Y)) == 1: Y = np.array(Y)[:, None] m = GPRegression(X, Y, kernel) m.optimize_restarts(num_restarts = restarts, **kwargs) m.plot() print(m.kern) print(f'Log-Likelihood: {m.log_likelihood()}') print(f'{score.__name__}: {score(m.log_likelihood(), len(X), m._size_transformed())}') plt.show() return m
class GPModel(): def __init__(self, X, Y, kernel_expression=SumKE(['WN'])._initialise()): self.X = X self.Y = Y self.kernel_expression = kernel_expression self.restarts = None self.model = None self.cached_utility_function = None self.cached_utility_function_type = None # Kwargs passed to optimize_restarts, which passes them to optimize # Check comments in optimize's class AND optimization.get_optimizer for real list of optimizers # TODO: Eventually set robust to True; see description in optimize_restarts method def fit(self, restarts=None, optimiser='lbfgsb', verbose=False, robust=False, **kwargs): if restarts is None: if self.restarts is None: raise ValueError('No restarts value specified') else: self.restarts = restarts self.model = GPRegression(self.X, self.Y, self.kernel_expression.to_kernel()) with warnings.catch_warnings(): # Ignore known numerical warnings warnings.simplefilter('ignore') self.model.optimize_restarts(num_restarts=self.restarts, verbose=verbose, robust=robust, optimizer=optimiser, **kwargs) return self def interpret(self): return fit_ker_to_kex_with_params( self.model.kern, deepcopy(self.kernel_expression)).get_interpretation() def predict(self, X, quantiles=(2.5, 97.5), full_cov=False, Y_metadata=None, kern=None, likelihood=None, include_likelihood=True): mean, cov = self.model.predict(X, full_cov, Y_metadata, kern, likelihood, include_likelihood) qs = self.model.predict_quantiles(X, quantiles, Y_metadata, kern, likelihood) return { 'mean': mean, 'covariance': cov, 'low_quantile': qs[0], 'high_quantile': qs[1] } def change_plotting_library(self, library='plotly_offline'): '''Wrapper of GPy.plotting's homonymous function; supported values are: 'matplotlib', 'plotly', 'plotly_online', 'plotly_offline' and 'none'. If 'plotly' then a 3-tuple is returned, with as 1st value the Figure object requiring a .show() to display.''' change_plotting_library(library) def plot(self): return self.model.plot() # Model fit objective criteria & related values: def _ll(self): return self.model.log_likelihood() def _n(self): return len(self.model.X) # number of data points def _k(self): return self.model._size_transformed( ) # number of estimated parameters, i.e. model degrees of freedom def _ordered_score_ps(self): return self.model, self._ll(), self._n(), self._k() def compute_utility(self, score_f): self.cached_utility_function = score_f(*self._ordered_score_ps()) self.cached_utility_function_type = score_f.__name__ return self.cached_utility_function
X_train = np.array([-4, -3, -2, -1, 3]).reshape(-1, 1) Y_train = np.sin(X_train) rbf = RBF(input_dim=1, variance=1.0, lengthscale=1.0) brownian = Brownian(input_dim=1, variance=1.0) periodic = PeriodicExponential(input_dim=1, variance=2.0, n_freq=100) cosine = Cosine(input_dim=1, variance=2) exponential = Exponential(input_dim=1, variance=2.0) integral = Integral(input_dim=1, variances=2.0) matern = Matern32(input_dim=1, variance=2.0) gpr = GPRegression(X_train, Y_train, matern) # Fix the noise variance to known value gpr.Gaussian_noise.variance = noise**2 gpr.Gaussian_noise.variance.fix() # Run optimization ret = gpr.optimize() print(ret) # Display optimized parameter values print(gpr) # Obtain optimized kernel parameters #l = gpr.rbf.lengthscale.values[0] #sigma_f = np.sqrt(gpr.rbf.variance.values[0]) # Plot the results with the built-in plot function gpr.plot() plt.show()
class KernelKernelGPModel: def __init__(self, kernel_kernel: Optional[Covariance] = None, noise_var: Optional[float] = None, exact_f_eval: bool = False, optimizer: Optional[str] = 'lbfgsb', max_iters: int = 1000, optimize_restarts: int = 5, verbose: bool = True, kernel_kernel_hyperpriors: Optional[HyperpriorMap] = None): """ :param kernel_kernel: :param noise_var: :param exact_f_eval: :param optimizer: :param max_iters: :param optimize_restarts: :param verbose: :param kernel_kernel_hyperpriors: """ self.noise_var = noise_var self.exact_f_eval = exact_f_eval self.optimize_restarts = optimize_restarts self.optimizer = optimizer self.max_iters = max_iters self.verbose = verbose self.covariance = kernel_kernel self.kernel_hyperpriors = kernel_kernel_hyperpriors self.model = None def train(self): """Train (optimize) the model.""" if self.max_iters > 0: # Update the model maximizing the marginal likelihood. if self.optimize_restarts == 1: self.model.optimize(optimizer=self.optimizer, max_iters=self.max_iters, messages=False, ipython_notebook=False) else: self.model.optimize_restarts(num_restarts=self.optimize_restarts, optimizer=self.optimizer, max_iters=self.max_iters, ipython_notebook=False, verbose=self.verbose, robust=True, messages=False) def _create_model(self, x: np.ndarray, y: np.ndarray): """Create model given input data X and output data Y. :param x: 2d array of indices of distance builder :param y: model fitness scores :return: """ # Make sure input data consists only of positive integers. assert np.issubdtype(x.dtype, np.integer) and x.min() >= 0 # Define kernel self.input_dim = x.shape[1] # TODO: figure out default kernel kernel initialization if self.covariance is None: assert self.covariance is not None # kern = GPy.kern.RBF(self.input_dim, variance=1.) else: kern = self.covariance.raw_kernel self.covariance = None # Define model noise_var = y.var() * 0.01 if self.noise_var is None else self.noise_var normalize = x.size > 1 # only normalize if more than 1 observation. self.model = GPRegression(x, y, kern, noise_var=noise_var, normalizer=normalize) # Set hyperpriors if self.kernel_hyperpriors is not None: if 'GP' in self.kernel_hyperpriors: # Set likelihood hyperpriors. likelihood_hyperprior = self.kernel_hyperpriors['GP'] set_priors(self.model.likelihood, likelihood_hyperprior, in_place=True) if 'SE' in self.kernel_hyperpriors: # Set kernel hyperpriors. se_hyperprior = self.kernel_hyperpriors['SE'] set_priors(self.model.kern, se_hyperprior, in_place=True) # Restrict variance if exact evaluations of the objective. if self.exact_f_eval: self.model.Gaussian_noise.constrain_fixed(1e-6, warning=False) else: # --- We make sure we do not get ridiculously small residual noise variance if self.model.priors.size > 0: # FIXME: shouldn't need this case, but GPy doesn't have log Jacobian implemented for Logistic self.model.Gaussian_noise.constrain_positive(warning=False) else: self.model.Gaussian_noise.constrain_bounded(1e-9, 1e6, warning=False) def update(self, x_all, y_all, x_new, y_new): """Update model with new observations.""" if self.model is None: self._create_model(x_all, y_all) else: self.model.set_XY(x_all, y_all) self.train() def _predict(self, x: np.ndarray, full_cov: bool, include_likelihood: bool): if x.ndim == 1: x = x[None, :] m, v = self.model.predict(x, full_cov=full_cov, include_likelihood=include_likelihood) v = np.clip(v, 1e-10, np.inf) return m, v def predict(self, x: np.ndarray, with_noise: bool = True): m, v = self._predict(x, False, with_noise) # We can take the square root because v is just a diagonal matrix of variances return m, np.sqrt(v) def get_f_max(self): """ Returns the location where the posterior mean is takes its maximal value. """ return self.model.predict(self.model.X)[0].max() def plot(self, **plot_kwargs): import matplotlib.pyplot as plt self.model.plot(plot_limits=(0, self.model.kern.n_models - 1), resolution=self.model.kern.n_models, **plot_kwargs) plt.show()
class Gaussfit: """Handles GPR of input data. """ def __init__(self): """Initialize a gaussfit object.""" self.kernel = None self.model = None self.scale = None self.translate = None self.save_fig = False self.save_path = None self.kernel_name = None # Used for saving file names @property def save_fig(self): return self.save_fig @save_fig.setter def save_fig(self, save_fig): self.save_fig = save_fig @property def save_path(self): return self.save_path @save_path.setter def save_path(self, save_path): self.save_path = save_path def set_gp_kernel(self, kernel=DEFAULTS['kernel'], in_dim=DEFAULTS['input_dim'], variance=DEFAULTS['variance'], lengthscale=DEFAULTS['lengthscale'], multi_dim=False): self.kernel_name = kernel # This is used for saving file names """Sets the kernel of this Gaussfit""" if kernel == 'RBF': self.kernel = RBF(input_dim=in_dim, variance=variance, lengthscale=lengthscale, ARD=multi_dim) elif kernel == 'Exponential': self.kernel = Exponential(input_dim=in_dim, variance=variance, lengthscale=lengthscale, ARD=multi_dim) elif kernel == 'Matern32': self.kernel = Matern32(input_dim=in_dim, variance=variance, lengthscale=lengthscale, ARD=multi_dim) elif kernel == 'Matern52': self.kernel = Matern52(input_dim=in_dim, variance=variance, lengthscale=lengthscale, ARD=multi_dim) else: print 'Kernel not recognized or not implemented' def populate_gp_model(self, observable, lecs, energy=None, rescale=False, fixvariance=0): """Creates a model based on given data and kernel. Args: observable - numpy array with observable. (1 row for each observable from each lec sample) lecs - numpy array with lec parameters fit should be done with regard to (lec 1 coloum 1 and so on, sample 1 on row 1 and so on) energy - energy values """ # Add row with energies to parameters for fit (c for col if that is that is the right way) if energy is not None: lecs = np.r_(lecs, energy) if rescale: (lecs, observable) = self.rescale(lecs, observable) lecs.transpose() observable.transpose() self.model = GPRegression(lecs, observable, self.kernel) self.model.Gaussian_noise.variance.unconstrain() self.model.Gaussian_noise.variance = fixvariance self.model.Gaussian_noise.variance.fix() def optimize(self, num_restarts=1): """Optimize the model.""" #Something worng, model doesn't always converge self.model.optimize_restarts(num_restarts=num_restarts, messages=True) print self.model def rescale(self, inlecs, inobs): """Rescales the input parameters that Gpy handles, so that they are in the interval [-1,1] #Remove 16xnr """ if self.translate is None: self.translate = np.append(np.mean(inlecs, axis=0), np.mean(inobs)) inlecs = inlecs - self.translate[None, :16] inobs = inobs - self.translate[16] if self.scale is None: self.scale = np.append(np.amax(abs(inlecs), axis=0), max(abs(inobs))) self.scale[self.scale <= 1e-10] = 1 outlecs = inlecs / self.scale[None, :16] outobs = inobs / self.scale[16] return (outlecs, outobs) def calculate_valid(self, Xvalid): """Calculates model prediction in validation points""" if self.scale is not None: Xvalid = (Xvalid - self.translate[None, :16]) / self.scale[None, :16] (Ymodel, Variance) = self.model.predict(Xvalid) Ymodel = Ymodel * self.scale[16] + self.translate[16] Variance = Variance * self.scale[16] * self.scale[16] return (Ymodel, Variance) else: return self.model.predict(Xvalid) def plot(self): """Plot the GP-model. Plot limits only for 1D-case. """ print(self.model) self.model.plot() plt.show() def tags_to_title(self, train_tags, val_tags): """Create plot title from tags.""" title = '_'.join(train_tags) title += '_' + '_'.join(val_tags) title += '_' + str(self.kernel_name) return title def save_fig_to_file(self, filename): """Saves the last specified global figure to file with filename File path specified by self.file_path. Also concatenates kernel name used """ plt.savefig(self.save_path + filename) def generate_and_save_tikz(self, Ymodel, Yvalid, Variance, train_tags, val_tags): fig = plt.figure() style.use('seaborn-bright') sigma = np.sqrt(Variance) Expected, = plt.plot([max(Yvalid), min(Yvalid)], [max(Yvalid), min(Yvalid)], '-', linewidth=2, zorder=10, ms=19, label="Expected") Data, = plt.plot(Yvalid, Ymodel, '.', ms=0.5, zorder=3, label="Data points") plt.errorbar(Yvalid, Ymodel, yerr=2 * sigma, fmt='none', alpha=0.5, zorder=1, label="Error bars") plt.xlabel('Simulated value [\si{\milli\barn}]') plt.ylabel('Emulated value [\si{\milli\barn}]') plt.grid(True) modelError = str(self.get_model_error(Ymodel, Yvalid)) # Create a legend for the line. first_legend = plt.legend(handles=[Expected, Data], loc=4) #["Expected", "Data points"], #third_legend = plt.legend(handles=[Error], loc=4) #The folowing saves the file to folder as well as adding 3 rows. The "clip mode=individual" was a bit tricky to add so this is the ugly way to solve it. tikz_save(self.save_path + self.tags_to_title(train_tags, val_tags) + '_predicted_actual.tex', figureheight='\\textwidth*0.8,\nclip mode=individual', figurewidth='\\textwidth*0.8') #Last fix of tikz with script. edit = EditText() #adding tikz file info edit.fix_file( self.save_path + self.tags_to_title(train_tags, val_tags) + '_predicted_actual.tex', '% This file was created by matplotlib2tikz v0.6.3.', '% ' + self.save_path + '\n% ' + self.tags_to_title(train_tags, val_tags) + '\n% Model Error: ' + modelError) #adding legend edit.fix_file( self.save_path + self.tags_to_title(train_tags, val_tags) + '_predicted_actual.tex', '\\end{axis}', '\\legend{Data,Expected}\n\\end{axis}') #adding forget plot edit.fix_file( self.save_path + self.tags_to_title(train_tags, val_tags) + '_predicted_actual.tex', '\\addplot [lightgray!80.0!black, opacity=0.5, mark=-, mark size=3, mark options={solid}, only marks]', '\\addplot [lightgray!80.0!black, opacity=0.5, mark=-, mark size=3, mark options={solid}, only marks, forget plot]' ) #Making transformable to PNG edit.fix_file( self.save_path + self.tags_to_title(train_tags, val_tags) + '_predicted_actual.tex', '% Model Error: ' + modelError, '\documentclass{standalone}\n\usepackage{tikz}\n\usepackage{pgfplots}\n\usepackage{siunitx}\n\n\\begin{document}' ) edit.fix_file( self.save_path + self.tags_to_title(train_tags, val_tags) + '_predicted_actual.tex', '\end{tikzpicture}', '\end{tikzpicture}\n\end{document}') def get_model_error(self, Ymodel, Yvalid, alt=False): """A measure of how great the model's error is compared to validation points Currently uses the rms of the relative error """ #Sum of a numpy array returns another array, we use the first (and only) element #if alt: # return np.sqrt(np.mean(np.square((Ymodel-Yvalid)/np.std(Yvalid)))) return np.sqrt(np.mean(np.square((Ymodel - Yvalid) / Yvalid))) def plot_predicted_actual(self, Ymodel, Yvalid, Variance, train_tags, val_tags): """Plots the predicted values vs the actual values, adds a straight line and 2sigma error bars.""" sigma = np.sqrt(Variance) plt.figure(1) plt.plot(Yvalid, Ymodel, '.') plt.errorbar(Yvalid, Ymodel, yerr=2 * sigma, fmt='none') plt.plot([max(Yvalid), min(Yvalid)], [max(Yvalid), min(Yvalid)], '-') plt.xlabel('Simulated value [mb]') plt.ylabel('Emulated value [mb]') # Do we want to save to file? if self.save_fig: self.save_fig_to_file( self.tags_to_title(train_tags, val_tags) + "_predicted_actual.png") plt.show() def get_sigma_intervals(self, Ymodel, Yvalid, Variance): """Returns the fraction of errors within 1, 2, and 3 sigma.""" sigma = np.sqrt(Variance) n = np.array([0, 0, 0]) errors = abs(Yvalid - Ymodel) for i, e in enumerate(errors): if e <= sigma[i]: n[0] = n[0] + 1 if e <= 2 * sigma[i]: n[1] = n[1] + 1 if e <= 3 * sigma[i]: n[2] = n[2] + 1 return n / float(np.shape(errors)[0]) def plot_modelerror(self, Xvalid, Xlearn, Ymodel, Yvalid, train_tags, val_tags): """ Creates a plot showing the vallidated error """ alldists = cdist(Xvalid, Xlearn, 'euclidean') mindists = np.min(alldists, axis=1) plt.figure(1) plt.plot(mindists, Ymodel - Yvalid, '.') plt.xlabel('Distance to closest training point') plt.ylabel('Vallidated error [mb]') plt.axis([ 0, 1.1 * max(mindists), 1.1 * min(Ymodel - Yvalid), 1.1 * max(Ymodel - Yvalid) ]) #Do we want to save val error to file? if self.save_fig: self.save_fig_to_file( self.tags_to_title(train_tags, val_tags) + "_val_error.png") plt.figure(2) plt.plot(mindists, (Ymodel - Yvalid) / Yvalid, '.') plt.xlabel('Distance to closest training point') plt.ylabel('Vallidated relative error') plt.axis([ 0, 1.1 * max(mindists), 1.1 * min((Ymodel - Yvalid) / Yvalid), 1.1 * max((Ymodel - Yvalid) / Yvalid) ]) #Show model_error in plot if self.save_fig: self.save_fig_to_file( self.tags_to_title(train_tags, val_tags) + "_val_rel_error.png") plt.show() def plot_model(self, Xvalid, Ymodel, Yvalid): """Plot the model of training data with the model of walidation data.""" plt.figure(3) plt.plot(Xvalid, Ymodel, 'bo') plt.plot(Xvalid, Yvalid, 'rx') plt.show() """Plots the kernel function of lec index""" def plot_kernel(self, lec_idx): plot_covariance(self.kernel, visible_dims=lec_idx) plt.show() """Plots a slice of of each lec through the center point Set energy to None if energy is not a parameter in your model""" def plot_lecs(self, center, intervals, energy=None): if energy is not None: center = np.append(center, energy).reshape(1, 17) intervals = np.append(intervals, 0).reshape(1, 17) else: intervals = np.append(intervals, 0).reshape(1, 16) for i in range(16): plt.subplot(4, 4, i + 1) x = np.linspace(center[0][i] - intervals[0][i], center[0][i] + intervals[0][i], num=200) lecs = np.tile(center[0], (200, 1)) lecs[:, i] = x obs, _ = self.calculate_valid(lecs) plt.plot(x, obs) plt.show() def save_model_parameters(self, savepath, traintags, kernel, LEC_LENGTH, lengthscale, multidim, rescale): "Saves GPy model hyperparameters as a .pickle file" "" params = self.model.param_array if (savepath.endswith(".pickle")) and (not os.path.isfile(savepath)): with open(savepath, 'w') as f: pickle.dump([ params, kernel, traintags, LEC_LENGTH, lengthscale, multidim, rescale ], f) elif (not savepath.endswith(".pickle")): print "*****ERROR***** Model properties must be saved as .pickle file *****ERROR*****" elif os.path.isfile(savepath): print "*****ERROR***** File already exists. Cannot save to existing file. *****ERROR*****" def load_model_parameters(self, Ylearn, Xlearn, loadpath): """Loads a GPy model with hyperparameters from a .pickle file""" Xlearn.transpose() Ylearn.transpose() with open(loadpath, 'r') as f: filecontents = pickle.load(f) if len(filecontents) == 6: params, kernel, traintags, LEC_LENGTH, lengthscale, multi_dim = filecontents rescale = False elif len(filecontents) == 7: params, kernel, traintags, LEC_LENGTH, lengthscale, multi_dim, rescale = filecontents print(params) print(LEC_LENGTH) self.set_gp_kernel(kernel=kernel, in_dim=LEC_LENGTH, lengthscale=lengthscale, multi_dim=multi_dim) if rescale: (Xlearn, Ylearn) = self.rescale(Xlearn, Ylearn) m_load = GPRegression(Xlearn, Ylearn, self.kernel, initialize=False) m_load.update_model(False) m_load.initialize_parameter() m_load[:] = params m_load.update_model(True) self.model = m_load def plot_energy_curve(self, mod_obs, val_obs, mod_var, val_energy): plt.plot(val_energy, val_obs, 'x') plt.plot(val_energy, mod_obs, 'o') plt.show()