def __init__(self, sample_kernel, error_kernel=None, *, fit_white_noise=False): if fit_white_noise: sample_kernel = sample_kernel + kernels.WhiteKernel() if error_kernel is None: if fit_white_noise: error_kernel = sample_kernel else: error_kernel = sample_kernel + kernels.WhiteKernel() self.fit_white_noise = fit_white_noise self.sample_kernel = sample_kernel self.submodel_samples = GaussianProcessRegressor(self.sample_kernel) self.submodel_errors = GaussianProcessRegressor(error_kernel)
def bo_(x_obs, y_obs): kernel = kernels.Matern() + kernels.WhiteKernel() gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=16) gp.fit(x_obs, y_obs) xs = list(repeat(np.atleast_2d(np.linspace(0, 10, 128)).T, 2)) x = cartesian_product(*xs) a = a_EI(gp, x_obs=x_obs, y_obs=y_obs) argmin_a_x = x[np.argmax(a(x))] # heavy evaluation print("f({})".format(argmin_a_x)) f_argmin_a_x = f2d(np.atleast_2d(argmin_a_x)) plot_2d(gp, x_obs, y_obs, argmin_a_x, a, xs) plt.show() bo_( x_obs=np.vstack((x_obs, argmin_a_x)), y_obs=np.hstack((y_obs, f_argmin_a_x)), )
def build_model(self, specie, members): """ Build the model using GP """ # Get the data decisions_df = self.build_member_decision_score_df(specie, members) # Extract the choices as the response variables choices = ComponentState.get(specie).list_choices() choice_names = [ c.get_name() for c in choices ] x = decisions_df.loc[:, choice_names] # Extract the scores as the dependant variables y = decisions_df.loc[:, "score"] # Preprocess using one hot encoding categories = [ c.get_component_names() for c in choices ] encoder = OneHotEncoder(sparse = False, categories = categories) # Define the isotropic kernel kernel = 1.0 * kernels.RBF([5]) + kernels.WhiteKernel() # Define the regressor regressor = GaussianProcessRegressor(kernel=kernel, normalize_y = True) # Build the pipeline and fit it pipeline = Pipeline(steps = [ ('encoder', encoder), ('regressor', regressor) ]) pipeline.fit(x, y) return pipeline
def __init__(self, sample_kernel, error_kernel=None): if error_kernel is None: error_kernel = sample_kernel + kernels.WhiteKernel() self.sample_kernel = sample_kernel self.submodel_samples = GaussianProcessRegressor(self.sample_kernel) self.submodel_errors = GaussianProcessRegressor(error_kernel)
def cov_function_sklearn(params, nu=5 / 2): """Generates a default covariance function. Args: params: A dictionary with GP hyperparameters. nu: Degree of the matern kernel. Returns: cov_fun: an ARD Matern covariance function with diagonal noise for numerical stability. """ amplitude = params['amplitude'] noise = params['noise'] lengthscale = params['lengthscale'].flatten() amplitude_bounds = PARAMS_BOUNDS['amplitude'] lengthscale_bounds = PARAMS_BOUNDS['lengthscale'] noise_bounds = PARAMS_BOUNDS['noise'] cov_fun = kernels.ConstantKernel( amplitude, constant_value_bounds=amplitude_bounds) * kernels.Matern( lengthscale, nu=nu, length_scale_bounds=lengthscale_bounds) + kernels.WhiteKernel( noise, noise_level_bounds=noise_bounds) return cov_fun
def gp_noise_estimation( chunk: Type[DataChunk], rbf_params={}, noise_params={}, verbose=False ) -> np.ndarray: """ Uses a simple Gaussian Process model to perform noise estimation on spectral data. A given chunk of the full spectrum is fit with a GP model comprising RBF and white noise kernels, where the former explains covariance in intensities between channels and the latter models variation in the signal as i.i.d white noise. The GP model is conditioned to provide a maximum likelihood estimate of the data, and depends heavily on the initial parameters. The arguments `rbf_params` and `noise_params` allow the user to override defaults for the kernels, and may require some tweaking to get the desired behavior. The objective of this function is to estimate the noise at every point of the spectrum, and returns a NumPy 1D array of noise values with the same shape as the frequency bins. Parameters ---------- chunk : Type[DataChunk] [description] rbf_params : dict, optional [description], by default {} noise_params : dict, optional [description], by default {} Returns ------- np.ndarray NumPy 1D array containing the noise at every channel """ freq, intensity = chunk.frequency, chunk.intensity # RBF parameters affect how correlated each channel is # noise parameters affect the variance in signal explained as normally # distributed noise rbf_kern = {"length_scale": 5e-1, "length_scale_bounds": (1e-1, 10.0)} noise_kern = {"noise_level": 1e-1, "noise_level_bounds": (1e-3, 1.0)} rbf_kern.update(**rbf_params) noise_kern.update(**noise_params) # instantiate the model kernel = kernels.RBF(**rbf_kern) + kernels.WhiteKernel(**noise_kern) gp_model = GaussianProcessRegressor(kernel, normalize_y=True) gp_result = gp_model.fit(freq[:, None], intensity[:, None]) # reproduce the spectrum with uncertainties pred_y, pred_std = gp_result.predict(freq[:, None], return_std=True) # log some information about the GP result if verbose: logger.info(f"GP results for catalog index {chunk.catalog_index}.") logger.info( f"MSE from GP fit: {mean_squared_error(pred_y.flatten(), intensity):.4f}" ) logger.info( f"Marginal log likelihood: {gp_result.log_marginal_likelihood_value_:.4f}" ) logger.info(f"Kernel parameters: {gp_result.kernel_}") return pred_std
def fit_model(opt_spec: OptimizationProblem, train_x: np.ndarray, train_y: np.ndarray) -> Pipeline: """Fit and test a model using the latest data Args: opt_spec: Configuration file for the optimization train_x: Input columns train_y: Output column out_dir: Location to store the data """ # Create an initial RBF kernel, using the training set mean as a scaling parameter kernel = train_y.mean()**2 * kernels.RBF(length_scale=1) # TODO (wardlt): Make it clear where featurization would appear, as we are soon to introduce additives # This will yield chemical degrees of freedom better captured using features of the additives rather # than a new variable per additive # Notes for now: Mol. Weight, Side Chain Length, and ... are the likely candidates # Add a noise parameter based on user settings noise = opt_spec.planner_options.get('noise_level', 0) if noise < 0: # Use standard deviation of the distribution of train_y will be the estimation of initial noise # TODO (wardlt): Document where 3, 4, and 11 come from noise_estimated = np.std(train_y) / 3 noise_lb = noise_estimated / 4 noise_ub = noise_estimated * 11 kernel_noise = kernels.WhiteKernel(noise_level=noise_estimated**2, noise_level_bounds=(noise_lb**2, noise_ub**2)) kernel = kernel + kernel_noise elif noise > 0: kernel = kernel + kernels.WhiteKernel( noise**2, noise_level_bounds=(noise**2, ) * 2) # Train a GPR model model = Pipeline([('variance', VarianceThreshold()), ('scale', StandardScaler()), ('gpr', GaussianProcessRegressor(kernel))]) # Train and save the model model.fit(train_x, train_y) print(f'Finished fitting the model on {len(train_x)} data points') print(f'Optimized model: {model["gpr"].kernel_}') return model
def GPR_fit(x_train, y_train, x_test): kernel = sk_kern.RBF(1.0, (1e-3, 1e3)) + sk_kern.ConstantKernel( 1.0, (1e-3, 1e3)) + sk_kern.WhiteKernel() clf = GaussianProcessRegressor(kernel=kernel, alpha=1e-10, optimizer="fmin_l_bfgs_b", n_restarts_optimizer=20, normalize_y=True) clf.fit(x_train, y_train) pred_mean, pred_std = clf.predict(x_test, return_std=True) return pred_mean, pred_std
def main(): # Specify type_ and region of data type_ = 'organic' region = 'WestTexNewMexico' # Specify the kernel functions; please see the paper for the rationale behind the choices kernel = Kernels.ExpSineSquared(length_scale=20., periodicity=365.) \ + 0.8 * Kernels.RationalQuadratic(alpha=20., length_scale=80.) \ + Kernels.WhiteKernel(.2) # Fit gp model and plot run_gp(kernel, n_restarts_optimizer=10, type_=type_, region=region)
def fit(self, X, Y, alpha=0.0, verbose=False): X = np.array(X) kernel = 1.0 * GPKernels.RBF( length_scale=100.0, length_scale_bounds=(1e-2, 1e3)) + GPKernels.WhiteKernel( noise_level=1, noise_level_bounds=(1e-10, 1e+1)) gp = GaussianProcessRegressor(kernel=kernel, alpha=alpha) self.model = gp.fit(X, Y) self.fitted = True if verbose: print("Parameters of " + self.name + " :") print("-" * 30) print("-" * 30)
def select_kernel(self, kernel): """Get the sklearn.gaussian_process.kernels kernel by matching the given kernel identifier. Parameters: kernel (str): Kernel string such as 'RBF' or depending on the surrogate also product and sum kernels such as 'RBF+Matern52'. Returns: sklearn.gaussian_process.kernels: Scikit-learn kernel object. Currently, for sum and product kernels, the initial hyperparameters are the same for all kernels. """ from re import split from sklearn.gaussian_process import kernels as sklearn_kernels full_str = split('([+*])', kernel) try: kernel = [] for key in full_str: kernel += [ key if key in ('+', '*') else getattr( sklearn_kernels, key)( length_scale=self.hyperparameters['length_scale']) ] except AttributeError: raise RuntimeError("Kernel {} is not implemented.".format(kernel)) if len(kernel) == 1: kernel = kernel[0] else: kernel = [ str(key) if not isinstance(key, str) else key for key in kernel ] kernel = eval(''.join(kernel)) # Add scale and noise to kernel kernel *= sklearn_kernels.ConstantKernel( constant_value=1 / self.hyperparameters['sigma_f'].item()**2) if not self.fixed_sigma_n: kernel += sklearn_kernels.WhiteKernel( noise_level=self.hyperparameters['sigma_n'].item()**2) return kernel
def bo_(x_obs, y_obs, n_iter): if n_iter > 0: kernel = kernels.Matern() + kernels.WhiteKernel() gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=16) gp.fit(x_obs, 1-y_obs) a = a_EI(gp, x_obs=x_obs, y_obs=1-y_obs) argmax_f_x_ = x[np.argmax(a(x))] # heavy evaluation f_argmax_f_x_ = cross_validation(argmax_f_x_) y_ob = np.atleast_2d(mean_mean_validation_scores(f_argmax_f_x_)).T return f_argmax_f_x_ + bo_( x_obs=np.vstack((x_obs, argmax_f_x_)), y_obs=np.vstack((y_obs, y_ob)), n_iter=n_iter-1, ) else: return []
def integrated_sigma(alpha, n_samples, n_restarts_optimizer=16, f=f): print("integrated_sigma(n_samples={n_samples}, alpha={alpha})".format( n_samples=n_samples, alpha=alpha, )) X = np.atleast_2d(np.linspace(1, 9, n_samples)).T y = f(X).ravel() x = np.atleast_2d(np.linspace(0, 10, 16 * 1024)).T kernel = kernels.Matern() + (kernels.WhiteKernel( noise_level=alpha) if alpha is not None else 0.0) gp = GaussianProcessRegressor( kernel=kernel, n_restarts_optimizer=n_restarts_optimizer, ) gp.fit(X, y) y_pred, sigma = gp.predict(x, return_std=True) return simps( x=x.ravel(), y=sigma, )
def gp(xdata, ydata): kernel = [ kernels.RBF(), kernels.Matern(), kernels.ConstantKernel(), kernels.WhiteKernel(), kernels.RationalQuadratic() ] max_iter_predict = [10, 50, 100, 500, 1000] warm_start = [False, True] multi_class = ['one_vs_rest', 'one_vs_one'] with open('gaussianprocess.csv', mode='w', newline='') as file: writer = csv.writer(file, quoting=csv.QUOTE_NONNUMERIC) writer.writerow([ 'kernel', 'max_iter_predict', 'warm_start', 'multi_class', 'accuracy' ]) for k in kernel: for m in max_iter_predict: for w in warm_start: for mc in multi_class: accuracy = 0 model = GaussianProcessClassifier(kernel=k, max_iter_predict=m, warm_start=w, multi_class=mc, random_state=1) kf = StratifiedKFold(n_splits=5, shuffle=True) for i, j in kf.split(xdata, ydata): X_ktrain, X_ktest = X[i], X[j] y_ktrain, y_ktest = y[i], y[j] model.fit(X_ktrain, y_ktrain) ypred = model.predict(X_ktest) accuracy += np.mean(ypred == y_ktest) accuracy /= 5 writer.writerow([k, m, w, mc, accuracy])
def fit_model(self, train_x, train_y): """ Fit a Gaussian process regressor with noisy Matern kernel to the given data """ train_x, train_y = self.preprocess(train_x, train_y, 1500) k = ker.Matern(length_scale=0.01, nu=2.5) + \ ker.WhiteKernel(noise_level=1e-05) gpr = gp.GaussianProcessRegressor(kernel=k, alpha=0.01, n_restarts_optimizer=20, random_state=42, normalize_y=True) noisyMat_gpr = pipeline.Pipeline([("scaler", self.scaler), ("gpr", gpr)]) print("Fitting noisy Matern GPR") start = time() noisyMat_gpr.fit(train_x, train_y) print("Took {} seconds".format(time() - start)) self.model = noisyMat_gpr
# a = 10.0 a = 2.0 f0 = 3.0 T = 1000 # N = 10 N = 10 def process(x): return a*np.sin(2.0*np.pi*f0*x) locations = np.linspace(0,1.0,T) trueValues = process(locations) noise = noiseStd*np.random.randn(len(trueValues)) observableValues = trueValues + noise kernel = ((a**2) * gpk.RBF(length_scale= 0.25 / f0)) + gpk.WhiteKernel(noiseStd**2) def do_update(): # update values # indexes = np.random.randint(0,T-1, N) indexes = np.random.randint(int(T/4.0), int(3.0*T/4.0), N) obsLocations = np.array([locations[i] for i in indexes]) obsValues = np.array([observableValues[i] for i in indexes]) gprProcessor = GaussianProcessRegressor(kernel, alpha=0.0, optimizer=None, copy_X_train=False) gprProcessor.fit(obsLocations.reshape(-1,1), obsValues.reshape(-1,1)) prediction = gprProcessor.predict(locations.reshape(-1,1), return_std=True)
calc_d[obs_name] = { 'x_list': x_list, 'y_list': y_list, 'mean': calculation_mean_list, 'uncert': calculation_uncert_list } ######################################### # Make interpolator for each observable # ######################################### kernel = ( 1. * kernels.RBF(length_scale=.2, length_scale_bounds=(.05, .5)) # + kernels.ConstantKernel() + kernels.WhiteKernel(noise_level=1., noise_level_bounds=(1e-5, 1e5))) gp = GPR(kernel=kernel, n_restarts_optimizer=5, copy_X_train=False) meshmesh = np.zeros((nlenp * nlenx, 2)) z_list_new = np.zeros((nlenp * nlenx, 1)) for ii in range(nlenp * nlenx): meshmesh[ii][0] = x_mesh[math.floor(ii / nlenx)][0] meshmesh[ii][1] = y_mesh[0][int(ii % nlenx)] z_list_new[ii] = z_list[math.floor(ii / nlenx)][int(ii % nlenx)] gp.fit(np.atleast_2d(meshmesh), z_list_new) print("C^2 = ", gp.kernel_.get_params()['k1']) print(gp.kernel_.get_params()['k2']) def predictM(x, gpx): mean2 = gpx.predict(return_cov=False, X=np.atleast_2d(x).T)
def __init__(self, system_str, npc, nrestarts=2): print("Emulators for system " + system_str) print("with viscous correction type {:d}".format(idf)) print("NPC : " + str(npc)) print("Nrestart : " + str(nrestarts)) #list of observables is defined in calculations_file_format_event_average #here we get their names and sum all the centrality bins to find the total number of observables nobs self.nobs = 0 self.observables = [] self._slices = {} for obs, cent_list in obs_cent_list[system_str].items(): #for obs, cent_list in calibration_obs_cent_list[system_str].items(): self.observables.append(obs) n = np.array(cent_list).shape[0] self._slices[obs] = slice(self.nobs, self.nobs + n) self.nobs += n print("self.nobs = " + str(self.nobs)) #read in the model data from file print("Loading model calculations from " \ + SystemsInfo[system_str]['main_obs_file']) # things to drop delete = [] # build a matrix of dimension (num design pts) x (number of observables) Y = [] for ipt, data in enumerate(trimmed_model_data[system_str]): row = np.array([]) for obs in self.observables: #n_bins_bayes = len(calibration_obs_cent_list[system_str][obs]) # only using these bins for calibration #values = np.array(trimmed_model_data[system_str][pt, idf][obs]['mean'][:n_bins_bayes] ) values = np.array(data[idf][obs]['mean']) if np.isnan(values).sum() > 0: print( "WARNING! FOUND NAN IN MODEL DATA WHILE BUILDING EMULATOR!" ) print("Design pt = " + str(pt) + "; Obs = " + obs) row = np.append(row, values) Y.append(row) Y = np.array(Y) print("Y_Obs shape[Ndesign, Nobs] = " + str(Y.shape)) #Principal Components self.npc = npc self.scaler = StandardScaler(copy=False) #whiten to ensure uncorrelated outputs with unit variances self.pca = PCA(copy=False, whiten=True, svd_solver='full') # Standardize observables and transform through PCA. Use the first # `npc` components but save the full PC transformation for later. Z = self.pca.fit_transform( self.scaler.fit_transform(Y) )[:, : npc] # save all the rows (design points), but keep first npc columns design, design_max, design_min, labels = prepare_emu_design(system_str) #delete undesirable data if len(delete_design_pts_set) > 0: print("Warning! Deleting " + str(len(delete_design_pts_set)) + " points from data") design = np.delete(design, list(delete_design_pts_set), 0) ptp = design_max - design_min print("Design shape[Ndesign, Nparams] = " + str(design.shape)) # Define kernel (covariance function): # Gaussian correlation (RBF) plus a noise term. # noise term is necessary since model calculations contain statistical noise k0 = 1. * kernels.RBF( length_scale=ptp, length_scale_bounds=np.outer(ptp, (4e-1, 1e2)), #nu = 3.5 ) k1 = kernels.ConstantKernel() k2 = kernels.WhiteKernel(noise_level=.1, noise_level_bounds=(1e-2, 1e2)) #kernel = (k0 + k1 + k2) #this includes a consant kernel kernel = (k0 + k2) # this does not # Fit a GP (optimize the kernel hyperparameters) to each PC. self.gps = [] for i, z in enumerate(Z.T): print("Fitting PC #", i) self.gps.append( GPR(kernel=kernel, alpha=0.1, n_restarts_optimizer=nrestarts, copy_X_train=False).fit(design, z)) for n, (z, gp) in enumerate(zip(Z.T, self.gps)): print("GP " + str(n) + " score : " + str(gp.score(design, z))) print("Constructing full linear transformation matrix") # Construct the full linear transformation matrix, which is just the PC # matrix with the first axis multiplied by the explained standard # deviation of each PC and the second axis multiplied by the # standardization scale factor of each observable. self._trans_matrix = (self.pca.components_ * np.sqrt( self.pca.explained_variance_[:, np.newaxis]) * self.scaler.scale_) # Pre-calculate some arrays for inverse transforming the predictive # variance (from PC space to physical space). # Assuming the PCs are uncorrelated, the transformation is # # cov_ij = sum_k A_ki var_k A_kj # # where A is the trans matrix and var_k is the variance of the kth PC. # https://en.wikipedia.org/wiki/Propagation_of_uncertainty print("Computing partial transformation for first npc components") # Compute the partial transformation for the first `npc` components # that are actually emulated. A = self._trans_matrix[:npc] self._var_trans = np.einsum('ki,kj->kij', A, A, optimize=False).reshape(npc, self.nobs**2) # Compute the covariance matrix for the remaining neglected PCs # (truncation error). These components always have variance == 1. B = self._trans_matrix[npc:] self._cov_trunc = np.dot(B.T, B) # Add small term to diagonal for numerical stability. self._cov_trunc.flat[::self.nobs + 1] += 1e-4 * self.scaler.var_
param1_paramspace_length = param1_max - param1_min param2_paramspace_length = param2_max - param2_min # Kernels k0 = 1. * kernels.RBF(length_scale=(param1_paramspace_length / 2., param2_paramspace_length / 2.), length_scale_bounds=( (param1_paramspace_length / param1_nb_design_pts, 3. * param1_paramspace_length), (param2_paramspace_length / param2_nb_design_pts, 3. * param2_paramspace_length))) relative_uncertainty = info_d['theoretical_relative_uncertainty'] k2 = 1. * kernels.WhiteKernel( noise_level=relative_uncertainty, # noise_level_bounds='fixed' noise_level_bounds=(relative_uncertainty / 4., 4 * relative_uncertainty)) kernel = (k0 + k2) nrestarts = 10 emulator_design_pts_value = np.transpose([ np.ravel(calc_d[obs_name]['param1_mesh']), np.ravel(calc_d[obs_name]['param2_mesh']) ]) # Should have format [[param1, param2],[],[],...] emulator_obs_mean_value = np.ravel(calc_d[obs_name]['mean_plus_noise']) # Should have format [ob1, obs1, ...]
def to_sklearn(self): """Convert it to a sklearn kernel, if there is one""" return sklearn_kern.WhiteKernel(noise_level=self.variance)
def integrate_EI(x, sample_theta_list, evaluated_loss, mode, greater_is_better=False, n_params=1): """ expected_improvement Expected improvement acquisition function. Arguments: ---------- x: array-like, shape = [n_samples, n_hyperparams] The point for which the expected improvement needs to be computed. sample_theta_list: hyperparameter samples of the GP model, which will be used to calculate integrated acquisition function evaluated_loss: Numpy array. Numpy array that contains the values off the loss function for the previously evaluated hyperparameters. greater_is_better: Boolean. Boolean flag that indicates whether the loss function is to be maximised or minimised. n_params: int. Dimension of the hyperparameter space. """ # sample_theta_list contains all samples of hyperparameters ei_list = list() input_dimension = n_params init_length_scale = np.ones((input_dimension, )) kernel = kernels.Sum( kernels.WhiteKernel(), kernels.Product( kernels.ConstantKernel(), kernels.Matern(length_scale=init_length_scale, nu=5. / 2.))) for theta_set in sample_theta_list: model = Gaussian_Process(kernel, mode) ''' model = gp.GaussianProcessRegressor(kernel=kernel, alpha=1e-5, optimizer = None, normalize_y=True) model.set_params(**{"kernel__k1__noise_level": np.abs(theta_set[0]), "kernel__k2__k1__constant_value": np.abs(theta_set[1]), "kernel__k2__k2__length_scale": theta_set[2:]}) ''' model.set_params(theta_set) x_to_predict = x.reshape(-1, n_params) mu, sigma = model.predict(x_to_predict) #mu, sigma = model.predict(x_to_predict, return_std=True) if greater_is_better: loss_optimum = np.max(evaluated_loss) else: loss_optimum = np.min(evaluated_loss) scaling_factor = (-1)**(not greater_is_better) # In case sigma equals zero with np.errstate(divide='ignore'): Z = scaling_factor * (mu - loss_optimum) / sigma expected_improvement = scaling_factor * ( mu - loss_optimum) * norm.cdf(Z) + sigma * norm.pdf(Z) expected_improvement[sigma == 0.0] == 0.0 ei_list.append(expected_improvement[0]) res_ei = np.mean(ei_list) result = np.array([res_ei]) return -1 * result
def scikit_prior(filename0, varname='v', dt=0, tlim=6, radar='', xlim=[0, 0], ylim=[0, 0], dx=0, ind=0, xrange=3): startTime = datetime.now() dir0, a = filename0.split("res") b, fname0 = a.split("/") fname0 = dir0 + fname0 fm = sio.loadmat(fname0 + '.mat') print 'Longitude limits:', xlim print 'Latitude limits :', ylim # get radar data grid, if that is the case: if radar != '': inFile = Dataset(radar, 'r') lon0, lat0 = inFile.variables['imageOriginPosition'][:] x0, y0 = NAD83(lon0, lat0) x0 = (x0 - x_ori) / 1000. # in km y0 = (y0 - y_ori) / 1000. # in km xg = x0 + inFile.variables['xCoords'][:] / 1000. yg = y0 + inFile.variables['yCoords'][:] / 1000. tr = inFile.variables['time'][:] ur = inFile.variables['ux'][:] vr = inFile.variables['uy'][:] t0 = datetime(2016, 01, 01) # radar data counts from here, in hours t0D = datetime(2016, 2, 7, 2, 15) # first time from Filtered_2016_2_7.pkl' tg = np.array([(t0 + timedelta(tr[0]) - t0D).total_seconds() / 3600]) it = 0 filename = filename0 + '_radar' Yg, Tg, Xg = np.meshgrid(yg, tg, xg) Tg = np.reshape(Tg, [Tg.size, 1]) Yg = np.reshape(Yg, [Yg.size, 1]) Xg = np.reshape(Xg, [Xg.size, 1]) X = np.concatenate([Tg, Yg, Xg], axis=1) else: # DEFINE GRID if (xlim[1] > xlim[0]) & (ylim[1] > ylim[0]): # should focus here X, tcenter, yg, xg = getGrid([dt, dt + 1], ylim, xlim, 1, dx) filename = filename0 + '_cyc' else: # this is for preexisting grids f = Dataset(filename0 + '.nc', 'r') # HPU = f.variables['hyperparam_u'][:] # HPV = f.variables['hyperparam_v'][:] xg = f.variables['x'][:] yg = f.variables['y'][:] tg = f.variables['time'][:] it = dt #tg.size/2 + dt tcenter = np.array([tg[it]]) Yg, Tg, Xg = np.meshgrid(yg, tg, xg) Tg = np.reshape(Tg, [Tg.size, 1]) Yg = np.reshape(Yg, [Yg.size, 1]) Xg = np.reshape(Xg, [Xg.size, 1]) X = np.concatenate([Tg, Yg, Xg], axis=1) filename = filename0 inc = yg.size * xg.size i2 = inc * it X = X[i2:i2 + inc, :] filename = filename + '_' + str(np.round(tcenter[0], decimals=2)) + 'h_scikit_' outFile = filename + str(ind) + '.nc' # LOAD Observations to = fm['Xo'][:, 0] tt = fm['Xt'][:, 0] xo = fm['Xo'][:, 2] xt = fm['Xt'][:, 2] ito = np.where((to >= tcenter - tlim) & (to <= tcenter + tlim) & (xo >= xlim[0] - xrange) & (xo <= xlim[1] + xrange)) itt = np.where((tt >= tcenter - tlim) & (tt <= tcenter + tlim) & (xt >= xlim[0] - xrange) & (xt <= xlim[1] + xrange)) Xo = fm['Xo'][ito, :].squeeze() Xt = fm['Xt'][itt, :].squeeze() XT = np.concatenate([Xo, Xt], axis=0) print 'Number of observation points: ', np.size(XT, 0) obs = fm['obs'][ito, :].squeeze() obst = fm['test_points'][itt, :].squeeze() # LOAD Hyper-Parameters cheatPickle = GPy.load('cheatPickle.pkl') model = GPy.load(fname0 + '_' + varname + '.pkl') HP = model.param_array covarname = varname + 'var' modelName = filename + varname + '.pkl' if varname == 'u': u = np.concatenate([obs[:, 1], obst[:, 1]])[:, None] else: u = np.concatenate([obs[:, 0], obst[:, 0]])[:, None] N = HP.size - 1 noise = HP[-1] print 'noise = ' + str(HP[-1]) # Build Model print modelName # if not os.path.isfile(modelName): k = HP[0] * kernels.RBF(length_scale=[HP[1], HP[2], HP[3]]) print 'var1 = ' + str(HP[0]) if N > 5: i = 4 k = k + HP[i] * kernels.RBF( length_scale=[HP[i + 1], HP[i + 2], HP[i + 3]]) print 'var2 = ' + str(HP[i]) k = k + kernels.WhiteKernel(noise_level=noise) print k model_u = GaussianProcessRegressor(kernel=k, optimizer=None) print np.size(XT, 0), np.size(XT, 1) print np.size(u, 0), np.size(u, 1) model_u.fit(XT, u) # file might be to large to save # with open(modelName,'wb') as output: # pickle.dump(model_u,open(modelName,'wb')) # else: # with open(modelName,'rb') as input: # model_u = pickle.load(input) # REGRESSION U, Ustd = model_u.predict(X, return_std=True) U = np.reshape(U, [tcenter.size, yg.size, xg.size]) Ustd = np.reshape(Ustd, [tcenter.size, yg.size, xg.size]) # SAVE NETCDF if not os.path.isfile(outFile): createNC(outFile, tcenter, yg, xg, HP) print np.ndim(U), np.size(U, 0), np.size(U, 1) print np.ndim(Ustd), np.size(Ustd, 0), np.size(Ustd, 1) fi = Dataset(outFile, 'a') fi = writeNC(fi, varname, U) fi = writeNC(fi, covarname, Ustd**2) fi = writeNC(fi, 'hyperparam_' + varname, HP) fi.close() print 'End of script, time : ' + str(datetime.now() - startTime)
def bayesian_optimisation(slice_sample_num, coor_sigma, burn_in, input_dimension, n_iters, sample_loss, bounds, x0=None, n_pre_samples=5, acqui_eva_num=10, random_search=False, epsilon=1e-7, greater_is_better=False, mode='OPT', acqui_mode='MCMC', acqui_sample_num=3): """ bayesian_optimisation Uses Gaussian Processes to optimise the loss function `sample_loss`. Arguments: ---------- slice_sample_num: integer. how many samples we draw for each time of slice sampling coor_sigma: numpy array step-size for slice sampling of each coordinate, the dimension is equal to the number of hyperparameters contained in the kernel burn_in: integer. how many iterations we want to wait before draw samples from slice sampling input_dimension: integer. dimension of input data n_iters: integer. Number of iterations to run the search algorithm. sample_loss: function. Function to be optimised. bounds: array-like, shape = [n_params, 2]. Lower and upper bounds on the parameters of the function `sample_loss`. x0: array-like, shape = [n_pre_samples, n_params]. Array of initial points to sample the loss function for. If None, randomly samples from the loss function. n_pre_samples: integer. If x0 is None, samples `n_pre_samples` initial points from the loss function. acqui_eva_num: when evaluating acquisition function, how many points we want to look into gp_params: dictionary. Dictionary of parameters to pass on to the underlying Gaussian Process. random_search: integer. Flag that indicates whether to perform random search or L-BFGS-B optimisation over the acquisition function. alpha: double. Variance of the error term of the GP. epsilon: double. Precision tolerance for floats. greater_is_better: boolean True: maximize the sample_loss function, False: minimize the sample_loss function mode: OPT means using optimizer to optimize the hyperparameters of GP MAP means using sample posterior mean to optimize the hyperparameters of GP acqui_mode: mode controlling the acquisition 'OPT': using one prediction based on previously optimized model 'MCMC': using several samples to sample the expected acquisition function acqui_sample_num: the number of hyperparameter samples we want to use for integrated acquisition function """ # call slice sampler slice_sampler = Slice_sampler(slice_sample_num, coor_sigma, burn_in) acqui_slice_sampler = Slice_sampler( 1, coor_sigma, burn_in) # only sample one sample a time x_list = [] y_list = [] y_dur_list = [] n_params = bounds.shape[0] if x0 is None: # random draw several points as GP prior for params in np.random.uniform(bounds[:, 0], bounds[:, 1], (n_pre_samples, bounds.shape[0])): x_list.append(params) start = time.clock() y_list.append(sample_loss(params)) elapsed = (time.clock() - start) y_dur_list.append(elapsed) else: for params in x0: x_list.append(params) start = time.clock() y_list.append(sample_loss(params)) elapsed = (time.clock() - start) y_dur_list.append(elapsed) xp = np.array(x_list) yp = np.array(y_list) yp_logdur = np.log(np.array(y_dur_list)) #print (xp,yp) # Create the GP #kernel = gp.kernels.Matern() init_length_scale = np.ones((input_dimension, )) kernel = kernels.Sum( kernels.WhiteKernel(), kernels.Product( kernels.ConstantKernel(), kernels.Matern(length_scale=init_length_scale, nu=5. / 2.))) if mode == 'OPT': model = Gaussian_Process(kernel, mode) elif mode == 'MAP': model = Gaussian_Process(kernel, mode) else: raise Exception('Wrong GP model initialization mode!!!') dur = Gaussian_Process(kernel, 'OPT') iter_num = 0 for n in range(n_iters): iter_num += 1 if iter_num % int(n_iters / 2) == 0: print('%d iterations have been run' % iter_num) else: pass # for each iteration, one sample will be drawn and used to train GP model.fit(xp, yp) dur.fit(xp, yp_logdur) # Sample next hyperparameter #print ('One sample start') if random_search: x_random = np.random.uniform(bounds[:, 0], bounds[:, 1], size=(random_search, n_params)) ei = -1 * expected_improvement(x_random, model, yp, greater_is_better=greater_is_better, n_params=n_params) next_sample = x_random[np.argmax(ei), :] else: if acqui_mode == 'OPT': next_sample = sample_next_hyperparameter( expected_improvement, model, yp, greater_is_better=greater_is_better, bounds=bounds, n_restarts=acqui_eva_num) elif acqui_mode == 'MCMC': sample_theta_list = list() for sample_acqui_time in range(acqui_sample_num): initial_log_theta = np.ones((input_dimension + 2, )) initial_theta = np.exp(1.0 + initial_log_theta) one_log_theta = acqui_slice_sampler.sample( init=initial_theta, gp=model) one_theta = np.exp(1.0 + one_log_theta) sample_theta_list.append(one_theta) next_sample = integrate_sample( integrate_EI, sample_theta_list, yp, mode, greater_is_better=greater_is_better, bounds=bounds, n_restarts=acqui_eva_num) elif acqui_mode == 'PERSEC': sample_theta_list = list() for sample_acqui_time in range(acqui_sample_num): initial_log_theta = np.ones((input_dimension + 2, )) initial_theta = np.exp(1.0 + initial_log_theta) one_log_theta = acqui_slice_sampler.sample( init=initial_theta, gp=model) one_theta = np.exp(1.0 + one_log_theta) sample_theta_list.append(one_theta) next_sample = integrate_sample_perSec( integrate_EI_perSec, sample_theta_list, dur, yp, mode, greater_is_better=greater_is_better, bounds=bounds, n_restarts=acqui_eva_num) else: raise Exception('Wrong acquisition mode!!!') #print ('One sample finished') # Duplicates will break the GP. In case of a duplicate, we will randomly sample a next query point. if np.any(np.abs(next_sample - xp) <= epsilon): next_sample = np.random.uniform(bounds[:, 0], bounds[:, 1], bounds.shape[0]) # Sample loss for new set of parameters start = time.clock() func_value = sample_loss(next_sample) elapsed = (time.clock() - start) # Update lists x_list.append(next_sample) y_list.append(func_value) y_dur_list.append(elapsed) # Update xp and yp xp = np.array(x_list) yp = np.array(y_list) yp_logdur = np.log(np.array(y_dur_list)) return xp, yp, yp_logdur
def __init__(self, system, npc=10, nrestarts=0): logging.info('training emulator for system %s (%d PC, %d restarts)', system, npc, nrestarts) Y = [] self._slices = {} # Build an array of all observables to emulate. nobs = 0 for obs, subobslist in self.observables: self._slices[obs] = {} for subobs in subobslist: Y.append(model.data[system][obs][subobs]['Y']) n = Y[-1].shape[1] self._slices[obs][subobs] = slice(nobs, nobs + n) nobs += n Y = np.concatenate(Y, axis=1) self.npc = npc self.nobs = nobs self.scaler = StandardScaler(copy=False) self.pca = PCA(copy=False, whiten=True, svd_solver='full') # Standardize observables and transform through PCA. Use the first # `npc` components but save the full PC transformation for later. Z = self.pca.fit_transform(self.scaler.fit_transform(Y))[:, :npc] # Define kernel (covariance function): # Gaussian correlation (RBF) plus a noise term. design = Design(system) ptp = design.max - design.min kernel = (1. * kernels.RBF(length_scale=ptp, length_scale_bounds=np.outer(ptp, (.1, 10))) + kernels.WhiteKernel(noise_level=.1**2, noise_level_bounds=(.01**2, 1))) # Fit a GP (optimize the kernel hyperparameters) to each PC. self.gps = [ GPR(kernel=kernel, alpha=0, n_restarts_optimizer=nrestarts, copy_X_train=False).fit(design, z) for z in Z.T ] # Construct the full linear transformation matrix, which is just the PC # matrix with the first axis multiplied by the explained standard # deviation of each PC and the second axis multiplied by the # standardization scale factor of each observable. self._trans_matrix = (self.pca.components_ * np.sqrt( self.pca.explained_variance_[:, np.newaxis]) * self.scaler.scale_) # Pre-calculate some arrays for inverse transforming the predictive # variance (from PC space to physical space). # Assuming the PCs are uncorrelated, the transformation is # # cov_ij = sum_k A_ki var_k A_kj # # where A is the trans matrix and var_k is the variance of the kth PC. # https://en.wikipedia.org/wiki/Propagation_of_uncertainty # Compute the partial transformation for the first `npc` components # that are actually emulated. A = self._trans_matrix[:npc] self._var_trans = np.einsum('ki,kj->kij', A, A, optimize=False).reshape(npc, nobs**2) # Compute the covariance matrix for the remaining neglected PCs # (truncation error). These components always have variance == 1. B = self._trans_matrix[npc:] self._cov_trunc = np.dot(B.T, B) # Add small term to diagonal for numerical stability. self._cov_trunc.flat[::nobs + 1] += 1e-4 * self.scaler.var_
df = df.drop('Unnamed: 0', axis=0) X, y = pre_processing(df.astype(str)) X = X.astype(float) skf = StratifiedKFold(n_splits=metrics.folds, shuffle=True) scorer = make_scorer(accuracy_score) #modelos a serem treinados nmodels = { 'gauss': [ GaussianProcessClassifier(n_jobs=2), { 'kernel': [ 1 * kernels.RBF(), 1 * kernels.DotProduct(), 1 * kernels.Matern(), 1 * kernels.RationalQuadratic(), 1 * kernels.WhiteKernel() ] } ], 'nb': [GaussianNB()], 'rf': [ RandomForestClassifier(), { 'n_estimators': [10, 50, 100, 200, 500], 'criterion': ["gini", "entropy"] } ], 'dt': [ DecisionTreeClassifier(), { "criterion": ["gini", "entropy"], "splitter": ["best", "random"] }
def bayesian_optimisation(coor_sigma, burn_in, input_dimension, n_iters, sample_loss, bounds, x0=None, n_pre_samples=5, acqui_eva_num=10, alpha=1e-5, epsilon=1e-7, greater_is_better=False, mode='OPT', acqui_mode='MCMC', acqui_sample_num=3, process_sample_mode='normal', prior_mode='normal_prior', likelihood_mode='normal_likelihood'): """ bayesian_optimisation Uses Gaussian Processes to optimise the loss function `sample_loss`. Arguments: ---------- slice_sample_num: integer. how many samples we draw for each time of slice sampling coor_sigma: numpy array step-size for slice sampling of each coordinate, the dimension is equal to the number of hyperparameters contained in the kernel burn_in: integer. how many iterations we want to wait before draw samples from slice sampling input_dimension: integer. dimension of input data n_iters: integer. Number of iterations to run the search algorithm. sample_loss: function. Function to be optimised. bounds: array-like, shape = [n_params, 2]. Lower and upper bounds on the parameters of the function `sample_loss`. x0: array-like, shape = [n_pre_samples, n_params]. Array of initial points to sample the loss function for. If None, randomly samples from the loss function. n_pre_samples: integer. If x0 is None, samples `n_pre_samples` initial points from the loss function. acqui_eva_num: when evaluating acquisition function, how many points we want to look into, number of restarts alpha: double. Variance of the error term of the GP. epsilon: double. Precision tolerance for floats. greater_is_better: boolean True: maximize the sample_loss function, False: minimize the sample_loss function mode: OPT means using optimizer to optimize the hyperparameters of GP MAP means using sample posterior mean to optimize the hyperparameters of GP acqui_mode: mode controlling the acquisition 'OPT': using one prediction based on previously optimized model 'MCMC': using several samples to sample the expected acquisition function acqui_sample_num: the number of hyperparameter samples we want to use for integrated acquisition function process_sample_mode: after getting sample, how to process it 'normal': only accept positive sample and reject negative ones 'abs': accept all samples after taking absolute value 'rho': reparamization trick is used, the samples are rho prior_mode: the prior distribution we want to use 'normal_prior': normal distribution 'exp_prior': exponential distribution likelihood_mode: how to calculate likelihood 'normal_likelihood': directly using input hyperparameter to calculate likelihood 'rho_likelihood': using reparamization trick (theta = np.log(1.0 + np.exp(rho))) """ # call slice sampler acqui_slice_sampler = Slice_sampler( 1, coor_sigma, burn_in, prior_mode, likelihood_mode) # only sample one sample a time x_list = [] y_list = [] y_dur_list = [] time_list = [] n_params = bounds.shape[0] print('Start presampling...') if x0 is None: # random draw several points as GP prior for params in np.random.uniform(bounds[:, 0], bounds[:, 1], (n_pre_samples, bounds.shape[0])): x_list.append(params) start = time.clock() y_list.append(sample_loss(params)) elapsed = (time.clock() - start) y_dur_list.append(elapsed) else: for params in x0: x_list.append(params) start = time.clock() y_list.append(sample_loss(params)) elapsed = (time.clock() - start) y_dur_list.append(elapsed) print('Presampling finished.') xp = np.array(x_list) yp = np.array(y_list) yp_logdur = np.log(np.array(y_dur_list)) # Create the GP init_length_scale = np.ones((input_dimension, )) kernel = kernels.Sum( kernels.WhiteKernel(), kernels.Product( kernels.ConstantKernel(), kernels.Matern(length_scale=init_length_scale, nu=5. / 2.))) if mode == 'OPT': model = gp.GaussianProcessRegressor(kernel=kernel, alpha=alpha, n_restarts_optimizer=10, normalize_y=True) elif mode == 'MAP': model = gp.GaussianProcessRegressor(kernel=kernel, alpha=alpha, optimizer=None, n_restarts_optimizer=0, normalize_y=True) else: raise Exception('Wrong GP model initialization mode!!!') dur = gp.GaussianProcessRegressor(kernel=kernel, alpha=alpha, n_restarts_optimizer=10, normalize_y=True) iter_num = 0 for n in range(n_iters): # Start the clock for recording total running time per iteration ite_start = time.clock() iter_num += 1 if iter_num % int(n_iters / 2) == 0: print('%d iterations have been run' % iter_num) else: pass # for each iteration, one sample will be drawn and used to train GP dur.fit(xp, yp_logdur) if mode == 'OPT': # for optimization mode, the hyperparameters are optimized during the process of fitting model.fit(xp, yp) elif mode == 'MAP': # for MAP mode, we use slice sampling to sample the posterior of hyperparameters and use the mean to update GP's hyperparameters model.fit(xp, yp) initial_theta = 10 * np.ones( (input_dimension + 2, ) ) # input_dimension + 2 = number of length_scale + amplitude + noise_sigma else: raise Exception('Wrong GP model initialization mode!!!') # Sample next hyperparameter if acqui_mode == 'OPT': next_sample = sample_next_hyperparameter( expected_improvement, model, yp, greater_is_better=greater_is_better, bounds=bounds, n_restarts=acqui_eva_num) elif acqui_mode == 'MCMC': sample_theta_list = list() while (len(sample_theta_list) < acqui_sample_num): # all samples of theta must be valid one_sample = acqui_slice_sampler.sample(init=initial_theta, gp=model) if process_sample_mode == 'normal': if np.all(one_sample[:, 0] > 0): one_theta = [ np.mean(samples_k) for samples_k in one_sample ] sample_theta_list.append(one_theta) else: continue elif process_sample_mode == 'abs': one_theta = [ np.abs(np.mean(samples_k)) for samples_k in one_sample ] sample_theta_list.append(one_theta) elif process_sample_mode == 'rho': one_theta = [ np.log(1.0 + np.exp((np.mean(samples_k)))) for samples_k in one_sample ] sample_theta_list.append(one_theta) else: raise Exception('Wrong process sample mode!!!') next_sample = integrate_sample(integrate_EI, sample_theta_list, yp, greater_is_better=greater_is_better, bounds=bounds, n_restarts=acqui_eva_num) elif acqui_mode == 'PERSEC': sample_theta_list = list() while (len(sample_theta_list) < acqui_sample_num): # all samples of theta must be valid one_sample = acqui_slice_sampler.sample(init=initial_theta, gp=model) if process_sample_mode == 'normal': if np.all(one_sample[:, 0] > 0): one_theta = [ np.mean(samples_k) for samples_k in one_sample ] sample_theta_list.append(one_theta) else: continue elif process_sample_mode == 'abs': one_theta = [ np.abs(np.mean(samples_k)) for samples_k in one_sample ] sample_theta_list.append(one_theta) elif process_sample_mode == 'rho': one_theta = [ np.log(1.0 + np.exp((np.mean(samples_k)))) for samples_k in one_sample ] sample_theta_list.append(one_theta) else: raise Exception('Wrong process sample mode!!!') next_sample = integrate_sample_perSec( integrate_EI_perSec, sample_theta_list, dur, yp, greater_is_better=greater_is_better, bounds=bounds, n_restarts=acqui_eva_num) elif acqui_mode == 'RANDOM': x_random = np.random.uniform(bounds[:, 0], bounds[:, 1], size=(5, n_params)) ei = -1 * expected_improvement(x_random, model, yp, greater_is_better=greater_is_better, n_params=n_params) next_sample = x_random[np.argmax(ei), :] else: raise Exception('Wrong acquisition mode!!!') # Duplicates will break the GP. In case of a duplicate, we will randomly sample a next query point. if np.any(np.abs(next_sample - xp) <= epsilon): next_sample = np.random.uniform(bounds[:, 0], bounds[:, 1], bounds.shape[0]) # Sample loss for new set of parameters start = time.clock() func_value = sample_loss(next_sample) elapsed = (time.clock() - start) # Update lists x_list.append(next_sample) y_list.append(func_value) y_dur_list.append(elapsed) # Update xp and yp xp = np.array(x_list) yp = np.array(y_list) yp_logdur = np.log(np.array(y_dur_list)) ite_elapsed = (time.clock() - ite_start) time_list.append(ite_elapsed) timep = np.array(time_list) return xp, yp, timep
def bo(X, y): data = list(zip(X, y)) x = np.atleast_2d(np.linspace(0, 10, 1024)).T x_= np.atleast_2d(np.linspace(0, 10, 1024)).T kernel = kernels.Matern() + kernels.WhiteKernel() gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=16, )#normalize_y=True) gp.fit(X, y) # FIXME is it possible for mu(x) < min{x \in observed_x}? # is this due to that GaussainProcess's prior states that mu(x) = 0? # will this effect the performance of GO, since everything not observed will automatically give an additional boost since the prior plays a bigger role (look it up) [we know that the loss we in the end are optimizing is \in [0, 1] y_pred, sigma = gp.predict(x, return_std=True) #http://www.scipy-lectures.org/advanced/mathematical_optimization/ # x_min = fmin(negate(silly_f), 5) # TODO better maximizer # Strong points: it is robust to noise, as it does not rely on computing gradients. Thus it can work on functions that are not locally smooth such as experimental data points, as long as they display a large-scale bell-shape behavior. However it is slower than gradient-based methods on smooth, non-noisy functions. #opt_result = minimize(negate(silly_f), 5, bounds=[(0, 10)]) # TODO better maximizer #print(opt_result) #assert(opt_result.success) #x_min = opt_result.x # x_min = brent(negate(silly_f), brack=(0, 10)) # NOTE 1D only, NOTE not guaranteed to be within range brack=(0, 10) (see documentation) # TODO getting the gradient the gaussian would unlock all gradient based optimization methods!! (including L_BFGS) a = a_EI(gp, x_obs=X, y_obs=y, theta=0.01) a_x = np.apply_along_axis(a, 1, x) (x_min_,) = max(x, key=a) # TODO have a reasonable optimization (this doesn't scale well) #(x_min_,) = brute( # negate(a), # ranges=((0, 10),), # Ns=64, # finish=fmin, #) # FIXME brute can return numbers outside of the range! X = np.linspace(0, 10, 32), Ns=64, ranges=((0, 10) (x_min_ = 10.22...) # I think it occurs when the function is pretty flat (but not constant) # TODO verify that finish function gets the same range as brute and don't wonder off (perhaps this is intended behaviour?) # TODO check https://github.com/scipy/scipy/blob/master/scipy/optimize/optimize.py#L2614 to see if it's possible for x_min to end up outside of the range (and if then when) print(x_min_) #plot_2d(x=x, x_=x_, y_pred=y_pred, sigma = sigma, a_x=a_x) #plot(x=x, y_pred=y_pred, x_obs=X, y_obs=y, x_min_=x_min_, sigma=sigma, a_x=a_x) #plt.show() # evaluate fx_min_ = f(x_min_) bo( X=np.vstack( (X,[x_min_,]) ), y=np.hstack( (y,[fx_min_,]) ), )
# 1) Put in initial length scales for param A and B # 2) Put in reasonable lenght scales bounds for optimization # 3) Fit separate emulatior to each principal component. # Take a look at the optimized hyper-parameters. # What do they mean? """ kernel = ( 1. * kernels.RBF( length_scale=[1, 1], length_scale_bounds=[(.1,10), (.1, 10)] ) + kernels.WhiteKernel(.1) ) """ kernel = (1. * kernels.RBF(length_scale=1, length_scale_bounds=(.1, 10)) + kernels.WhiteKernel(.1)) # Build and train each GP gps = [GPR(kernel=kernel, n_restarts_optimizer=10) for i in range(npc)] for i, gp in enumerate(gps): gp.fit(design, Z[:, i]) print('RBF: ', gp.kernel_.get_params()['k1']) print('White: ', gp.kernel_.get_params()['k2']) ### Step 4-2: Validating the emulators ####################### # It is important to validate the performance of emulators to # make sure they behave as expected. # 1) Pick 6 random combinations of A and B. Compare the # emulators prediction and the model calculations. # Do they agree? fig, (ax1, ax2) = plt.subplots(ncols=2, sharex=True) for a in [
def softmax(x): return np.exp(x) / np.sum(np.exp(x), 1)[:, np.newaxis] def logit(x): logx = np.log(x) return logx - logx[:, -1][:, np.newaxis] depths_train = np.linspace(0, 3000, 3000)[:, np.newaxis] depths_ts = depths_train[::30, :] matk_fabric = k.Matern(length_scale=300.0, nu=0.5) kern_p = k.WhiteKernel(noise_level=5.0) + matk_fabric kern_sh = k.WhiteKernel(noise_level=5.0) + matk_fabric kern_sv = k.WhiteKernel(noise_level=5.0) + matk_fabric matk_vel_error = 100 * k.RBF(length_scale=600) kern_a11 = k.WhiteKernel(noise_level=0.0001) + matk_fabric kern_aii_noise = k.WhiteKernel(noise_level=0.2) + matk_fabric kern_a22 = k.WhiteKernel(noise_level=10.0) + matk_fabric kern_a33 = k.WhiteKernel(noise_level=1.0) + matk_fabric kern_a22 = k.WhiteKernel(noise_level=10.0) + matk_fabric kern_a33 = k.WhiteKernel(noise_level=1.0) + matk_fabric gpr = gp.GaussianProcessRegressor(matk_fabric) gpr_noise = gp.GaussianProcessRegressor(k.WhiteKernel(0.05)) aii = gpr.sample_y(depths_train, 3) aii += np.array([-1, 0, 2])
def _fit_model(self, train_x: np.ndarray, train_y: np.ndarray, out_dir: Path) -> Pipeline: """Fit and test a model using the latest data Args: train_x: Input columns train_y: Output column out_dir: Location to store the data """ # Min-max scaling scale_factor = (train_y.max() - train_y.min()) train_y = (train_y - train_y.min()) / scale_factor # Create an initial RBF kernel, using the training set mean as a scaling parameter kernel = train_y.mean()**2 * kernels.RBF(length_scale=1) # TODO (wardlt): Make it clear where featurization would appear, as we are soon to introduce additives # This will yield chemical degrees of freedom better captured using features of the additives rather # than a new variable per additive # Notes for now: Mol. Weight, Side Chain Length, and ... are the likely candidates # Add a noise parameter based on user settings noise = self.opt_spec.planner_options.get('noise_level', 0) self.logger.debug(f'Using a noise level of {noise}') if noise < 0: # Use standard deviation of the distribution of train_y will be the estimation of initial noise # TODO (wardlt): Document where 3, 4, and 11 come from noise_estimated = np.std(train_y) / 3 noise_lb = noise_estimated / 4 noise_ub = noise_estimated * 11 kernel_noise = kernels.WhiteKernel( noise_level=noise_estimated**2, noise_level_bounds=(noise_lb**2, noise_ub**2)) kernel = kernel + kernel_noise elif noise > 0: kernel = kernel + kernels.WhiteKernel( noise**2, noise_level_bounds=(noise**2, ) * 2) # Train a GPR model self.logger.debug('Starting kernel') model = Pipeline([('variance', VarianceThreshold()), ('scale', StandardScaler()), ('gpr', GaussianProcessRegressor(kernel))]) # Perform k-Fold cross-validation to estimate model performance if len(train_x) > 5: cv_results = cross_validate(model, train_x, train_y, cv=RepeatedKFold(), return_train_score=True, scoring='neg_mean_squared_error') with out_dir.joinpath('cross-val-results.pkl').open('wb') as fp: pkl.dump(cv_results, fp) # Get the RMSE in the unscaled units rmse = np.sqrt(-1 * np.mean(cv_results["test_score"])) rmse *= scale_factor # Print out to screen self.logger.info(f'Performed cross-validation. RMSE: {rmse:.2e}') else: self.logger.info('Insufficient data for cross-validation') # Train and save the model model.fit(train_x, train_y) self.logger.info( f'Finished fitting the model on {len(train_x)} data points') self.logger.info(f'Optimized model: {model["gpr"].kernel_}') with out_dir.joinpath('model.pkl').open('wb') as fp: pkl.dump(model, fp) return model