def get_gpr(kernel_type, X, y): mean, _, std = get_distribution_measures(y) if kernel_type == 'rbf': kernel = kernels.ConstantKernel(mean) * kernels.RBF(std) elif kernel_type == 'dot': kernel = kernels.ConstantKernel(mean) * kernels.DotProduct(std) gpr = GaussianProcessRegressor(kernel=kernel, alpha=0.05, optimizer=None) gpr.fit(X, y) return gpr
def cov_function_sklearn(params, nu=5 / 2): """Generates a default covariance function. Args: params: A dictionary with GP hyperparameters. nu: Degree of the matern kernel. Returns: cov_fun: an ARD Matern covariance function with diagonal noise for numerical stability. """ amplitude = params['amplitude'] noise = params['noise'] lengthscale = params['lengthscale'].flatten() amplitude_bounds = PARAMS_BOUNDS['amplitude'] lengthscale_bounds = PARAMS_BOUNDS['lengthscale'] noise_bounds = PARAMS_BOUNDS['noise'] cov_fun = kernels.ConstantKernel( amplitude, constant_value_bounds=amplitude_bounds) * kernels.Matern( lengthscale, nu=nu, length_scale_bounds=lengthscale_bounds) + kernels.WhiteKernel( noise, noise_level_bounds=noise_bounds) return cov_fun
def setup_latentforces(self, kernels=None): """Initalises the latent force GPs Parameters ---------- kernels : list, optional Kernels of the latent force Gaussian process objects """ if kernels is None: # Default is for kernels 1 * exp(-0.5 * (s-t)**2 ) kernels = [ sklearn_kernels.ConstantKernel(1.) * sklearn_kernels.RBF(1.) for r in range(self.dim.R) ] if len(kernels) != self.dim.R or \ not all(isinstance(k, sklearn_kernels.Kernel) for k in kernels): _msg = "kernels should be a list of {} kernel objects".format( self.dim.R) raise ValueError(_msg) self.latentforces = [ GaussianProcessRegressor(kern) for kern in kernels ]
def run(self): """Connects to the Redis queue with the results and pulls them""" # Make a random guess to start for i in range(self.batch_size): self.queues.send_inputs( np.random.uniform(-32.768, 32.768, size=(self.dim, )).tolist()) self.logger.info('Submitted initial random guesses to queue') train_X = [] train_y = [] # Use the initial guess to train a GPR gpr = Pipeline([('scale', MinMaxScaler(feature_range=(-1, 1))), ('gpr', GaussianProcessRegressor(normalize_y=True, kernel=kernels.RBF() * kernels.ConstantKernel()))]) with open(self.output_path, 'a') as fp: for _ in range(self.batch_size): result = self.queues.get_result() print(result.json(), file=fp) train_X.append(result.args) train_y.append(result.value) # Make guesses based on expected improvement for _ in range(self.n_guesses // self.batch_size - 1): # Update the GPR with the available training data gpr.fit(np.vstack(train_X), train_y) # Generate a random assortment of potential next points to sample sample_X = np.random.uniform(size=(self.batch_size * 1024, self.dim), low=-32.768, high=32.768) # Compute the expected improvement for each point pred_y, pred_std = gpr.predict(sample_X, return_std=True) best_so_far = np.min(train_y) ei = (best_so_far - pred_y) / pred_std # Run the samples with the highest EI best_inds = np.argsort(ei)[-self.batch_size:] self.logger.info( f'Selected {len(best_inds)} best samples. EI: {ei[best_inds]}') for i in best_inds: best_ei = sample_X[i, :] self.queues.send_inputs(best_ei.tolist()) self.logger.info('Sent all of the inputs') # Wait for the value to complete with open(self.output_path, 'a') as fp: for _ in range(self.batch_size): result = self.queues.get_result() print(result.json(), file=fp) train_X.append(result.args) train_y.append(result.value)
def GPR_fit(x_train,y_train,x_test): kernel = sk_kern.RBF(1.0, (1e-3, 1e3)) + sk_kern.ConstantKernel(1.0, (1e-3, 1e3)) + sk_kern.WhiteKernel() clf = GaussianProcessRegressor( kernel=kernel, alpha=1e-10, optimizer="fmin_l_bfgs_b", n_restarts_optimizer=20, normalize_y=True) clf.fit(x_train,y_train) pred_mean, pred_std = clf.predict(x_test, return_std=True) return pred_mean,pred_std
def gp_fit_sklearn_xy(x_input, x_tar, y_input, y_tar, title='', route=None, gp=None): if gp: gp1 = gp else: k1 = kernels.DotProduct(sigma_0=1., sigma_0_bounds=(1e-3, 1e1)) k3 = kernels.RationalQuadratic(alpha=1.5, length_scale=2.5, length_scale_bounds=(1e-3, 20), alpha_bounds=(1e-3, 10)) k4 = kernels.ConstantKernel(1., (1e-3, 1e2)) k5 = kernels.ConstantKernel(1., (1e-2, 1e2)) kernel = k1 * k4 + k3 * k5 gp1 = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, alpha=0, random_state=0) x_input = x_input.reshape(-1, 1) x_tar = x_tar.reshape(-1, 1) gp1.fit(x_input, y_input) pred, std = gp1.predict(x_tar, return_std=True) if route.any(): plt.plot(route[:, 0], route[:, 1], 'b', label='Prediction', alpha=0.2) plt.plot(y_input[:, 0], y_input[:, 1], 'bo', label='Input', alpha=0.4) plt.plot(y_tar[:, 0], y_tar[:, 1], 'go', label='Target', alpha=0.4) plt.plot(pred[:, 0], pred[:, 1], 'ro', label='Prediction', alpha=0.4) # plt.gca().fill_between(pred[:, 0].reshape(-1) - 2 * std, pred[:, 0].reshape(-1) + 2 * std, # pred[:, 1].reshape(-1) - 2 * std, pred[:, 1].reshape(-1) + 2 * std, color='lightblue', # alpha=0.5, label=r"$2\sigma$") plt.title(title) plt.legend() plt.show() return gp1, pred
def run(self): """Connects to the Redis queue with the results and pulls them""" # Make a random guess to start self.queues.send_inputs(uniform(0, 10), method='target_fun') self.logger.info('Submitted initial random guess') train_X = [] train_y = [] # Initialize the GPR and generator gpr = GaussianProcessRegressor(normalize_y=True, kernel=kernels.RBF() * kernels.ConstantKernel()) generator = Generator() # Make guesses based on expected improvement for _ in range(self.n_guesses - 1): # Wait for the result result = self.queues.get_result() self.logger.info(f'Received result: {(result.args, result.value)}') train_X.append(result.args) train_y.append(result.value) # Update the generator and the entry generator generator.partial_fit(*result.args, result.value) gpr.fit(train_X, train_y) # Generate a random assortment of potential next points to sample self.queues.send_inputs(generator, 64, method='generate') result = self.queues.get_result() sample_X = result.value # Compute the expected improvement for each point self.queues.send_inputs(gpr, sample_X, method='score') result = self.queues.get_result() pred_y, pred_std = result.value # Select the best point best_y = np.min(train_y) self.queues.send_inputs(best_y, pred_y, pred_std, method='select') result = self.queues.get_result() chosen_ix = result.value # Run the sample with the highest EI self.queues.send_inputs(*sample_X[chosen_ix], method='target_fun') # Write the best answer to disk with open('answer.out', 'w') as fp: print(np.min(train_y), file=fp)
def createGaussianProcessClassifier(params=None): info("Creating Gaussian Process Classifier", ind=4) error("This takes forever. Don't use it") return {"estimator": None, "params": None} ## Params params = mergeParams(GaussianProcessClassifier(), params) tuneParams = getGaussianProcessClassifierParams() info("Without Parameters", ind=4) kernel = kernels.ConstantKernel() ## Estimator reg = GaussianProcessClassifier(kernel=kernel) return {"estimator": reg, "params": tuneParams}
def run(self): """Connects to the Redis queue with the results and pulls them""" # Make a random guess to start self.queues.send_inputs(uniform(0, 10)) self.logger.info('Submitted initial random guess') train_X = [] train_y = [] # Use the initial guess to train a GPR gpr = GaussianProcessRegressor(normalize_y=True, kernel=kernels.RBF() * kernels.ConstantKernel()) result = self.queues.get_result() train_X.append(result.args) train_y.append(result.value) # Make guesses based on expected improvement for _ in range(self.n_guesses - 1): # Update the GPR with the available training data gpr.fit(train_X, train_y) # Generate a random assortment of potential next points to sample sample_X = np.random.uniform(size=(64, 1), low=0, high=10) # Compute the expected improvement for each point pred_y, pred_std = gpr.predict(sample_X, return_std=True) best_so_far = np.min(train_y) ei = (best_so_far - pred_y) / pred_std # Run the sample with the highest EI best_ei = sample_X[np.argmax(ei), 0] self.queues.send_inputs(best_ei) self.logger.info(f'Sent new guess based on EI: {best_ei}') # Wait for the value to complete result = self.queues.get_result() self.logger.info('Received value') # Add the value to the training set for the GPR train_X.append([best_ei]) train_y.append(result.value) # Write the best answer to disk with open('answer.out', 'w') as fp: print(np.min(train_y), file=fp)
def select_kernel(self, kernel): """Get the sklearn.gaussian_process.kernels kernel by matching the given kernel identifier. Parameters: kernel (str): Kernel string such as 'RBF' or depending on the surrogate also product and sum kernels such as 'RBF+Matern52'. Returns: sklearn.gaussian_process.kernels: Scikit-learn kernel object. Currently, for sum and product kernels, the initial hyperparameters are the same for all kernels. """ from re import split from sklearn.gaussian_process import kernels as sklearn_kernels full_str = split('([+*])', kernel) try: kernel = [] for key in full_str: kernel += [ key if key in ('+', '*') else getattr( sklearn_kernels, key)( length_scale=self.hyperparameters['length_scale']) ] except AttributeError: raise RuntimeError("Kernel {} is not implemented.".format(kernel)) if len(kernel) == 1: kernel = kernel[0] else: kernel = [ str(key) if not isinstance(key, str) else key for key in kernel ] kernel = eval(''.join(kernel)) # Add scale and noise to kernel kernel *= sklearn_kernels.ConstantKernel( constant_value=1 / self.hyperparameters['sigma_f'].item()**2) if not self.fixed_sigma_n: kernel += sklearn_kernels.WhiteKernel( noise_level=self.hyperparameters['sigma_n'].item()**2) return kernel
def gp(xdata, ydata): kernel = [ kernels.RBF(), kernels.Matern(), kernels.ConstantKernel(), kernels.WhiteKernel(), kernels.RationalQuadratic() ] max_iter_predict = [10, 50, 100, 500, 1000] warm_start = [False, True] multi_class = ['one_vs_rest', 'one_vs_one'] with open('gaussianprocess.csv', mode='w', newline='') as file: writer = csv.writer(file, quoting=csv.QUOTE_NONNUMERIC) writer.writerow([ 'kernel', 'max_iter_predict', 'warm_start', 'multi_class', 'accuracy' ]) for k in kernel: for m in max_iter_predict: for w in warm_start: for mc in multi_class: accuracy = 0 model = GaussianProcessClassifier(kernel=k, max_iter_predict=m, warm_start=w, multi_class=mc, random_state=1) kf = StratifiedKFold(n_splits=5, shuffle=True) for i, j in kf.split(xdata, ydata): X_ktrain, X_ktest = X[i], X[j] y_ktrain, y_ktest = y[i], y[j] model.fit(X_ktrain, y_ktrain) ypred = model.predict(X_ktest) accuracy += np.mean(ypred == y_ktest) accuracy /= 5 writer.writerow([k, m, w, mc, accuracy])
def integrate_EI(x, sample_theta_list, evaluated_loss, mode, greater_is_better=False, n_params=1): """ expected_improvement Expected improvement acquisition function. Arguments: ---------- x: array-like, shape = [n_samples, n_hyperparams] The point for which the expected improvement needs to be computed. sample_theta_list: hyperparameter samples of the GP model, which will be used to calculate integrated acquisition function evaluated_loss: Numpy array. Numpy array that contains the values off the loss function for the previously evaluated hyperparameters. greater_is_better: Boolean. Boolean flag that indicates whether the loss function is to be maximised or minimised. n_params: int. Dimension of the hyperparameter space. """ # sample_theta_list contains all samples of hyperparameters ei_list = list() input_dimension = n_params init_length_scale = np.ones((input_dimension, )) kernel = kernels.Sum( kernels.WhiteKernel(), kernels.Product( kernels.ConstantKernel(), kernels.Matern(length_scale=init_length_scale, nu=5. / 2.))) for theta_set in sample_theta_list: model = Gaussian_Process(kernel, mode) ''' model = gp.GaussianProcessRegressor(kernel=kernel, alpha=1e-5, optimizer = None, normalize_y=True) model.set_params(**{"kernel__k1__noise_level": np.abs(theta_set[0]), "kernel__k2__k1__constant_value": np.abs(theta_set[1]), "kernel__k2__k2__length_scale": theta_set[2:]}) ''' model.set_params(theta_set) x_to_predict = x.reshape(-1, n_params) mu, sigma = model.predict(x_to_predict) #mu, sigma = model.predict(x_to_predict, return_std=True) if greater_is_better: loss_optimum = np.max(evaluated_loss) else: loss_optimum = np.min(evaluated_loss) scaling_factor = (-1)**(not greater_is_better) # In case sigma equals zero with np.errstate(divide='ignore'): Z = scaling_factor * (mu - loss_optimum) / sigma expected_improvement = scaling_factor * ( mu - loss_optimum) * norm.cdf(Z) + sigma * norm.pdf(Z) expected_improvement[sigma == 0.0] == 0.0 ei_list.append(expected_improvement[0]) res_ei = np.mean(ei_list) result = np.array([res_ei]) return -1 * result
def bayesian_optimisation(coor_sigma, burn_in, input_dimension, n_iters, sample_loss, bounds, x0=None, n_pre_samples=5, acqui_eva_num=10, alpha=1e-5, epsilon=1e-7, greater_is_better=False, mode='OPT', acqui_mode='MCMC', acqui_sample_num=3, process_sample_mode='normal', prior_mode='normal_prior', likelihood_mode='normal_likelihood'): """ bayesian_optimisation Uses Gaussian Processes to optimise the loss function `sample_loss`. Arguments: ---------- slice_sample_num: integer. how many samples we draw for each time of slice sampling coor_sigma: numpy array step-size for slice sampling of each coordinate, the dimension is equal to the number of hyperparameters contained in the kernel burn_in: integer. how many iterations we want to wait before draw samples from slice sampling input_dimension: integer. dimension of input data n_iters: integer. Number of iterations to run the search algorithm. sample_loss: function. Function to be optimised. bounds: array-like, shape = [n_params, 2]. Lower and upper bounds on the parameters of the function `sample_loss`. x0: array-like, shape = [n_pre_samples, n_params]. Array of initial points to sample the loss function for. If None, randomly samples from the loss function. n_pre_samples: integer. If x0 is None, samples `n_pre_samples` initial points from the loss function. acqui_eva_num: when evaluating acquisition function, how many points we want to look into, number of restarts alpha: double. Variance of the error term of the GP. epsilon: double. Precision tolerance for floats. greater_is_better: boolean True: maximize the sample_loss function, False: minimize the sample_loss function mode: OPT means using optimizer to optimize the hyperparameters of GP MAP means using sample posterior mean to optimize the hyperparameters of GP acqui_mode: mode controlling the acquisition 'OPT': using one prediction based on previously optimized model 'MCMC': using several samples to sample the expected acquisition function acqui_sample_num: the number of hyperparameter samples we want to use for integrated acquisition function process_sample_mode: after getting sample, how to process it 'normal': only accept positive sample and reject negative ones 'abs': accept all samples after taking absolute value 'rho': reparamization trick is used, the samples are rho prior_mode: the prior distribution we want to use 'normal_prior': normal distribution 'exp_prior': exponential distribution likelihood_mode: how to calculate likelihood 'normal_likelihood': directly using input hyperparameter to calculate likelihood 'rho_likelihood': using reparamization trick (theta = np.log(1.0 + np.exp(rho))) """ # call slice sampler acqui_slice_sampler = Slice_sampler( 1, coor_sigma, burn_in, prior_mode, likelihood_mode) # only sample one sample a time x_list = [] y_list = [] y_dur_list = [] time_list = [] n_params = bounds.shape[0] print('Start presampling...') if x0 is None: # random draw several points as GP prior for params in np.random.uniform(bounds[:, 0], bounds[:, 1], (n_pre_samples, bounds.shape[0])): x_list.append(params) start = time.clock() y_list.append(sample_loss(params)) elapsed = (time.clock() - start) y_dur_list.append(elapsed) else: for params in x0: x_list.append(params) start = time.clock() y_list.append(sample_loss(params)) elapsed = (time.clock() - start) y_dur_list.append(elapsed) print('Presampling finished.') xp = np.array(x_list) yp = np.array(y_list) yp_logdur = np.log(np.array(y_dur_list)) # Create the GP init_length_scale = np.ones((input_dimension, )) kernel = kernels.Sum( kernels.WhiteKernel(), kernels.Product( kernels.ConstantKernel(), kernels.Matern(length_scale=init_length_scale, nu=5. / 2.))) if mode == 'OPT': model = gp.GaussianProcessRegressor(kernel=kernel, alpha=alpha, n_restarts_optimizer=10, normalize_y=True) elif mode == 'MAP': model = gp.GaussianProcessRegressor(kernel=kernel, alpha=alpha, optimizer=None, n_restarts_optimizer=0, normalize_y=True) else: raise Exception('Wrong GP model initialization mode!!!') dur = gp.GaussianProcessRegressor(kernel=kernel, alpha=alpha, n_restarts_optimizer=10, normalize_y=True) iter_num = 0 for n in range(n_iters): # Start the clock for recording total running time per iteration ite_start = time.clock() iter_num += 1 if iter_num % int(n_iters / 2) == 0: print('%d iterations have been run' % iter_num) else: pass # for each iteration, one sample will be drawn and used to train GP dur.fit(xp, yp_logdur) if mode == 'OPT': # for optimization mode, the hyperparameters are optimized during the process of fitting model.fit(xp, yp) elif mode == 'MAP': # for MAP mode, we use slice sampling to sample the posterior of hyperparameters and use the mean to update GP's hyperparameters model.fit(xp, yp) initial_theta = 10 * np.ones( (input_dimension + 2, ) ) # input_dimension + 2 = number of length_scale + amplitude + noise_sigma else: raise Exception('Wrong GP model initialization mode!!!') # Sample next hyperparameter if acqui_mode == 'OPT': next_sample = sample_next_hyperparameter( expected_improvement, model, yp, greater_is_better=greater_is_better, bounds=bounds, n_restarts=acqui_eva_num) elif acqui_mode == 'MCMC': sample_theta_list = list() while (len(sample_theta_list) < acqui_sample_num): # all samples of theta must be valid one_sample = acqui_slice_sampler.sample(init=initial_theta, gp=model) if process_sample_mode == 'normal': if np.all(one_sample[:, 0] > 0): one_theta = [ np.mean(samples_k) for samples_k in one_sample ] sample_theta_list.append(one_theta) else: continue elif process_sample_mode == 'abs': one_theta = [ np.abs(np.mean(samples_k)) for samples_k in one_sample ] sample_theta_list.append(one_theta) elif process_sample_mode == 'rho': one_theta = [ np.log(1.0 + np.exp((np.mean(samples_k)))) for samples_k in one_sample ] sample_theta_list.append(one_theta) else: raise Exception('Wrong process sample mode!!!') next_sample = integrate_sample(integrate_EI, sample_theta_list, yp, greater_is_better=greater_is_better, bounds=bounds, n_restarts=acqui_eva_num) elif acqui_mode == 'PERSEC': sample_theta_list = list() while (len(sample_theta_list) < acqui_sample_num): # all samples of theta must be valid one_sample = acqui_slice_sampler.sample(init=initial_theta, gp=model) if process_sample_mode == 'normal': if np.all(one_sample[:, 0] > 0): one_theta = [ np.mean(samples_k) for samples_k in one_sample ] sample_theta_list.append(one_theta) else: continue elif process_sample_mode == 'abs': one_theta = [ np.abs(np.mean(samples_k)) for samples_k in one_sample ] sample_theta_list.append(one_theta) elif process_sample_mode == 'rho': one_theta = [ np.log(1.0 + np.exp((np.mean(samples_k)))) for samples_k in one_sample ] sample_theta_list.append(one_theta) else: raise Exception('Wrong process sample mode!!!') next_sample = integrate_sample_perSec( integrate_EI_perSec, sample_theta_list, dur, yp, greater_is_better=greater_is_better, bounds=bounds, n_restarts=acqui_eva_num) elif acqui_mode == 'RANDOM': x_random = np.random.uniform(bounds[:, 0], bounds[:, 1], size=(5, n_params)) ei = -1 * expected_improvement(x_random, model, yp, greater_is_better=greater_is_better, n_params=n_params) next_sample = x_random[np.argmax(ei), :] else: raise Exception('Wrong acquisition mode!!!') # Duplicates will break the GP. In case of a duplicate, we will randomly sample a next query point. if np.any(np.abs(next_sample - xp) <= epsilon): next_sample = np.random.uniform(bounds[:, 0], bounds[:, 1], bounds.shape[0]) # Sample loss for new set of parameters start = time.clock() func_value = sample_loss(next_sample) elapsed = (time.clock() - start) # Update lists x_list.append(next_sample) y_list.append(func_value) y_dur_list.append(elapsed) # Update xp and yp xp = np.array(x_list) yp = np.array(y_list) yp_logdur = np.log(np.array(y_dur_list)) ite_elapsed = (time.clock() - ite_start) time_list.append(ite_elapsed) timep = np.array(time_list) return xp, yp, timep
def __init__(self, X_train, Y_train, X_train_min=None, X_train_max=None, npc=10, nrestarts=0): """ Initialize the emulator with X_train, Y_train, X_train_range, npc, nrestarts NOTE: if X_train_min == None, X_min=0.9*(X_min) if X_train_max == None, X_max=1.1*(X_max) """ logging.info('training emulator for system with (%d PC, %d restarts)', npc, nrestarts) self.npc = npc (_, self.ndim) = X_train.shape (self.nsamples, self.nobs) = Y_train.shape ### standardize X_train if X_train_min is None: X_train_min = 0.9 * np.min(X_train, axis=0) if X_train_max is None: X_train_max = 1.1 * np.max(X_train, axis=0) self.X_min = np.array(X_train_min) self.X_max = np.array(X_train_max) self.Y_train = np.copy(Y_train) self.X_train = (X_train - X_train_min) / (X_train_max - X_train_min) ### standardScaler transformation ### Args: Y; returns: (Y - Y.mean)/Y.std self.scaler = StandardScaler(copy=False) ### principal components analysis ### Args: Y; Y -- (svd) --> Y = U.S.Vt, ### Returns: if Whiten, Z = Y.V/S * np.sqrt(nsamples -1) = U * np.sqrt(nsamples -1) ### else, Z = Y.V = U.S self.pca = PCA(copy=False, whiten=True, svd_solver='full') Z = self.pca.fit_transform(self.scaler.fit_transform( self.Y_train))[:, :npc] ### Kernel and Gaussian Process Emulators k0 = 1. * kernels.RBF(length_scale=np.ones(self.ndim), length_scale_bounds=np.outer( np.ones(self.ndim), (.1, 10))) k1 = kernels.ConstantKernel() # ConstantKernel doesn't help k2 = kernels.WhiteKernel(noise_level=0.01, noise_level_bounds=(1e-8, 10.)) kernel = (k0 + k2) self.GPs = [ GPR(kernel=kernel, alpha=0, n_restarts_optimizer=nrestarts, copy_X_train=False).fit(self.X_train, z) for z in Z.T ] ## construct the full linear transform matrix self._trans_matrix = self.pca.components_ * np.sqrt( self.pca.explained_variance_[:, np.newaxis]) * self.scaler.scale_ ## in-order to propagate the predictive variance: https://en.wikipedia.org/wiki/Propagation_of_uncertainty A = self._trans_matrix[:npc] self._var_trans = np.einsum('ki,kj->kij', A, A, optimize=False).reshape(npc, self.nobs**2) B = self._trans_matrix[npc:] ## covariance matrix for the remaining neglected PCs self._cov_trunc = np.dot(B.T, B) self._cov_trunc.flat[::self.nobs + 1] += 1e-4 * self.scaler.var_
plt.plot(x_train, y_train, "o", label="training data") #練習出力 np.random.seed(1) x_train = np.random.normal(0, 1., 20) y_train = true_func(x_train) + np.random.normal( loc=0, scale=.1, size=x_train.shape) xx = np.linspace(-3, 3, 200) plt.scatter(x_train, y_train, label="Data") plt.plot(xx, true_func(xx), "--", color="C0", label="True Function") plt.legend() plt.title("traning data") plt.show() kernel = sk_kern.RBF(1.0, (1e-3, 1e3)) + sk_kern.ConstantKernel( 1.0, (1e-3, 1e3)) + sk_kern.WhiteKernel() gp = GaussianProcessRegressor(normalize_y=True, kernel=kernel, optimizer="fmin_l_bfgs_b", alpha=1e-10, n_restarts_optimizer=3) #reshape-1で行ベクトルに1で列数1にする # X は (n_samples, n_features) の shape に変形する必要がある gp.fit(x_train.reshape(-1, 1), y_train) # パラメータ学習後のカーネルは self.kernel_ に保存される gp.kernel_ # < RBF(length_scale=0.374) + 0.0316**2 + WhiteKernel(noise_level=0.00785) # 予測は平均値と、オプションで 分散、共分散 を得ることが出来る x_test = np.linspace(-3., 3., 200).reshape(-1, 1) pred_mean, pred_std = gp.predict(x_test, return_std=True)
# 'KRR' : { # 'class' : KRR, # 'default' : {'alpha': 1e1, 'kernel': 'laplacian'}, # 'grid' : { # 'alpha' : [1e-1, 1e0, 1e1, 1e2], # 'kernel': ['rbf', 'laplacian', 'linear'], # }}, 'GPR': { 'class': GPR, 'default': { 'normalize_y': True, 'alpha': 1e-3 }, #'kernel': GPK.ConstantKernel(1.0, (1e-1, 1e3)) * GPK.RBF(1.0, (1e-1, 1e3))}, 'grid': { 'kernel': [ GPK.ConstantKernel(1.0, (1e-1, 1e3)) * GPK.RBF(1.0, (1e-1, 1e3)), GPK.ConstantKernel(10.0, (1e-1, 1e3)) * GPK.RBF(10.0, (1e-1, 1e3)) ], } }, # 'MDN' : { # 'class' : MDN, # 'default': {'no_load': True}, # 'grid' : { # 'hidden': [[100]*i for i in [2,3,5]], # 'l2' : [1e-5,1e-4,1e-3], # 'lr' : [1e-5,1e-4,1e-3], # }}, }
def main(rms_data): global model RUL_preds = [] total_data = rms_data[0].flatten() # (858, 1) fig = plt.figure(figsize=(19, 9)) # ax1 = fig.add_subplot(2, 2, (1, 3)) # ax2 = fig.add_subplot(2, 2, 4) "Moving Average" MA_data = np.convolve(total_data, np.ones((args.mv_size, )) / args.mv_size, mode='valid')[:610] # (818, 1) RUL_true = [(num, rms) for num, rms in enumerate(MA_data) if 9.0 <= rms <= 9.1][0][0] # (818, 1) if args.type == 'linear': model = LinearRegression() elif args.type == 'exponential': model = Nonlinear_Regression() elif args.type == 'log': model = Nonlinear_Regression() elif args.type == 'lipow': model = Nonlinear_Regression() elif args.type == 'gom': model = Nonlinear_Regression() elif args.type == 'pow': model = Nonlinear_Regression() elif args.type == 'GP': kernel_RBF = kernels.ConstantKernel(1.0) * kernels.RBF( length_scale=1.0) model = GaussianProcessRegressor(kernel=kernel_RBF, alpha=0.4**2) else: assert print('Unsupported regression model', file=sys.stderr) for cnt, tc in sorted(enumerate(args.tc)): ax1 = fig.add_subplot(2, 2, (1, 3)) ax2 = fig.add_subplot(2, 2, 4) observed_data = MA_data[:tc] time_curr_range = current_time(tc, args.n_window) time_pred_range = prediction_time(tc, args.n_window, args.range_pred) if np.prod(observed_data[time_curr_range] >= args.thld_fault): if args.type == 'linear': model.fit(time_curr_range[:, np.newaxis], observed_data[time_curr_range]) y_pred = model.predict(time_pred_range[:, np.newaxis]) try: RUL_pred = np.where(y_pred > args.thld_failure)[0][0] + ( tc - args.n_window) RUL_preds += [RUL_pred] print('Predicted RUL: {}m'.format(RUL_pred), file=sys.stderr) except: RUL_pred = 'Unestimatable' RUL_preds += [RUL_preds[-1]] print('Predicted RUL: {}'.format(RUL_pred), file=sys.stderr) ax1.plot(np.arange(tc - args.n_window, tc + args.range_pred), y_pred, '.') ax1.annotate('Predicted RUL: {}'.format(RUL_pred), xy=(tc, 4.8), fontsize=12, color='red', weight='bold') elif args.type == 'exponential': popt, _ = model.fit_exp(time_curr_range, observed_data[time_curr_range]) y_pred = model.exponential_func( prediction_time(tc, args.n_window, args.range_pred), *popt) try: RUL_pred = np.where(y_pred > args.thld_failure)[0][0] + ( tc - args.n_window) RUL_preds += [RUL_pred] print('Predicted RUL: {}m'.format(RUL_pred), file=sys.stderr) except: RUL_pred = 'Unestimatable' RUL_preds += [RUL_preds[-1]] print('Predicted RUL: {}'.format(RUL_pred), file=sys.stderr) elif args.type == 'log': popt, _ = model.fit_log(time_curr_range, observed_data[time_curr_range]) y_pred = model.log_func( prediction_time(tc, args.n_window, args.range_pred), *popt) try: RUL_pred = np.where(y_pred > args.thld_failure)[0][0] + ( tc - args.n_window) RUL_preds += [RUL_pred] print('Predicted RUL: {}m'.format(RUL_pred), file=sys.stderr) except: RUL_pred = 'Unestimatable' RUL_preds += [RUL_preds[-1]] print('Predicted RUL: {}'.format(RUL_pred), file=sys.stderr) ax1.plot(np.arange(tc - args.n_window, tc + args.range_pred), y_pred) ax1.annotate('Predicted RUL: {}'.format(RUL_pred), xy=(tc, 4.8), fontsize=12, color='red', weight='bold') elif args.type == 'lipow': popt, _ = model.fit_lipow(time_curr_range, observed_data[time_curr_range]) y_pred = model.lipow_func( prediction_time(tc, args.n_window, args.range_pred), *popt) try: RUL_pred = np.where(y_pred > args.thld_failure)[0][0] + ( tc - args.n_window) RUL_preds += [RUL_pred] print('Predicted RUL: {}m'.format(RUL_pred), file=sys.stderr) except: RUL_pred = 'Unestimatable' RUL_preds += [RUL_preds[-1]] print('Predicted RUL: {}'.format(RUL_pred), file=sys.stderr) ax1.plot(np.arange(tc - args.n_window, tc + args.range_pred), y_pred) ax1.annotate('Predicted RUL: {}'.format(RUL_pred), xy=(tc, 4.8), fontsize=12, color='red', weight='bold') elif args.type == 'gom': popt, _ = model.fit_gom(time_curr_range, observed_data[time_curr_range]) y_pred = model.gom_func( prediction_time(tc, args.n_window, args.range_pred), *popt) try: RUL_pred = np.where(y_pred > args.thld_failure)[0][0] + ( tc - args.n_window) RUL_preds += [RUL_pred] print('Predicted RUL: {}m'.format(RUL_pred), file=sys.stderr) except: RUL_pred = 'Unestimatable' RUL_preds += [RUL_preds[-1]] print('Predicted RUL: {}'.format(RUL_pred), file=sys.stderr) ax1.plot(np.arange(tc - args.n_window, tc + args.range_pred), y_pred) ax1.annotate('Predicted RUL: {}'.format(RUL_pred), xy=(tc, 4.8), fontsize=12, color='red', weight='bold') elif args.type == 'pow': popt, _ = model.fit_pow(time_curr_range, observed_data[time_curr_range]) y_pred = model.pow_func( prediction_time(tc, args.n_window, args.range_pred), *popt) try: RUL_pred = np.where(y_pred > args.thld_failure)[0][0] + ( tc - args.n_window) RUL_preds += [RUL_pred] print('Predicted RUL: {}m'.format(RUL_pred), file=sys.stderr) except: RUL_pred = 'Unestimatable' RUL_preds += [RUL_preds[-1]] print('Predicted RUL: {}'.format(RUL_pred), file=sys.stderr) ax1.plot(np.arange(tc - args.n_window, tc + args.range_pred), y_pred) ax1.annotate('Predicted RUL: {}'.format(RUL_pred), xy=(tc, 4.8), fontsize=12, color='red', weight='bold') elif args.type == 'GP': "MLE를 이용하여 데이터 fitting" model.fit(time_curr_range[:, np.newaxis], observed_data[time_curr_range]) "Compute posterior predictive mean and variance" mu_s, cov_s = model.predict(time_pred_range[:, np.newaxis], return_cov=True) samples = np.random.multivariate_normal(mu_s.ravel(), cov_s, 3) plot_gp(mu_s, cov_s, time_pred_range, time_curr_range[:, np.newaxis], observed_data, samples) else: RUL_preds += [590 + args.tc[cnt]] ax1.set_title( 'RUL prediction - linear regression | time range: {}m ~ {}m'. format(tc - args.n_window, tc)) ax1.plot(MA_data, '.', label='total data') ax1.plot(observed_data, '.', label='observed_data') ax1.text(0, args.thld_safe + 0.05, str(args.thld_safe), color='c', fontsize=13, weight='bold') ax1.text(0, args.thld_fault + 0.05, str(args.thld_fault), color='r', fontsize=13, weight='bold') ax1.text(0, args.thld_failure + 0.05, str(args.thld_failure), color='r', fontsize=13, weight='bold') ax1.axvline(x=tc - args.n_window) ax1.axvline(x=tc) ax1.axhline(y=args.thld_safe, color='c', linestyle='-', label='Threshold safe') ax1.axhline(y=args.thld_fault, color='r', linestyle='--', label='Threshold fault') ax1.axhline(y=args.thld_failure, color='r', linestyle='-', label='Threshold failure') ax1.axvspan(tc - args.n_window, tc, facecolor='gray', alpha=0.5) ax1.set_xlabel('Times (m)') ax1.set_ylabel('RMS') ax1.set_xlim([-10, 900]) ax1.set_ylim([min(observed_data) - 0.2, 10]) ax1.annotate('True RUL: {}'.format(RUL_true), xy=(tc, 5), fontsize=12, weight='bold') ax2.set_title('RUL curve') ax2.set_xlabel('Times (m)') ax2.set_ylabel('RUL') ax2.set_xlim([-10, RUL_true + 20]) ax2.set_ylim([-10, RUL_true + 20]) ax2.plot([0, RUL_true], [RUL_true, 0], label='True RUL curve') ax2.plot(args.tc[:len(RUL_preds)], np.array(RUL_preds) - args.tc[:len(RUL_preds)], label='predicted RUL curve') ax1.legend(loc='upper left', bbox_to_anchor=(0.01, 0.6)) ax2.legend(loc='upper left', bbox_to_anchor=(0.01, 0.6)) plt.tight_layout() plt.pause(0.1) plt.clf() os.system('pause')
def run_regression_indexed_data(data, inds, regression_model, NORM_X=True, NORM_Y=True): """ Run regression model(s) on single replica of data (no random resampling). Good for testing training and testing on manually specified splits :param data: df, input dataset, training and test mixed, [x,y] labels last column :param inds: df or series, index vector of training (ind = 1) and test (ind = 2,...,S) for S test SPLITS :param regression_model: list of stings, regression method. Options are hard coded here, but can be extracted in a dict in the future :param NORM_X: bool, wheter to normalize input data :param NORM_Y: bool, wheter to normalize output data :returns: dict containing weights, accuracy scores for training and tests, and the time difference between first and last training points """ tr_ = data.loc[inds.loc[inds['ind'] == 1].index,:].copy() if NORM_X: scalerX = sk.preprocessing.StandardScaler().fit(tr_.iloc[:,:-1]) trn = pd.DataFrame(scalerX.transform(tr_.iloc[:,:-1]), columns=tr_.iloc[:,:-1].columns, index=tr_.index) else: trn = tr_.iloc[:,:-1] if NORM_Y: scalerY = sk.preprocessing.StandardScaler().fit(tr_.iloc[:,-1].values.reshape(-1, 1)) y_trn = scalerY.transform(tr_.iloc[:,-1].values.reshape(-1, 1)) else: y_trn = tr_.iloc[:,-1] trn = trn.assign(labels=y_trn) # print(trn.columns.tolist()) if regression_model.lower() == 'ridgereg': # MSE_error = make_scorer(mean_squared_error, greater_is_better=False) # regModel = RidgeCV(alphas=np.logspace(-6,6,13), fit_intercept=not NORM_Y, # normalize=False, store_cv_values=False, gcv_mode='svd', # cv=3, scoring=MSE_error).fit(trn.iloc[:,:-1], trn.iloc[:,-1]) regModel = sk.linear_model.Ridge(alpha=0.1, fit_intercept=not NORM_Y, normalize=False).fit(trn.iloc[:,:-1], trn.iloc[:,-1]) weights = regModel.coef_ elif regression_model.lower() == 'lasso': # regModel = LassoCV(alphas=np.logspace(-3,-1,3), n_alphas=200, # fit_intercept=not NORM_Y, cv=3).fit(trn.iloc[:,:-1], trn.iloc[:,-1]) regModel = sk.linear_model.Lasso(alpha=0.1, fit_intercept=not NORM_Y, normalize=False).fit(trn.iloc[:,:-1], trn.iloc[:,-1]) weights = regModel.coef_ elif regression_model.lower() == 'pls': n = 3 regModel = PLSRegression(n_components=n, scale=False).fit(trn.iloc[:,:-1], trn.iloc[:,-1]) regModel.coef_ = np.squeeze(np.transpose(regModel.coef_)) weights = regModel.coef_ elif regression_model.lower() == 'rf': import sklearn.ensemble regModel = sklearn.ensemble.RandomForestRegressor(n_estimators=100, criterion='mse', max_depth=10, min_samples_split=2, min_samples_leaf=1).fit(trn.iloc[:,:-1], trn.iloc[:,-1]) elif regression_model.lower() == 'rbfgpr': kernel = 1.0 * kernels.RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)) + \ 1.0 * kernels.WhiteKernel(noise_level=1e-2, noise_level_bounds=(1e-1, 1e+4)) + \ 1.0 * kernels.ConstantKernel(constant_value=1.0, constant_value_bounds=(1e-05, 100000.0)) + \ 1.0 * kernels.DotProduct(sigma_0=1.0, sigma_0_bounds=(1e-05, 100000.0)) regModel = GaussianProcessRegressor(kernel=kernel, optimizer='fmin_l_bfgs_b', alpha=0, n_restarts_optimizer=5).fit(trn.iloc[:,:-1], trn.iloc[:,-1]) elif regression_model.lower() == 'rbfgprard': inds_trn = trn.index x = trn.iloc[:,:-1].values y = trn.iloc[:,-1].values.reshape(-1,1) k = (GPy.kern.RBF(x.shape[1], ARD=True) + GPy.kern.White(x.shape[1], 0.01) + GPy.kern.Linear(x.shape[1], variances=0.01, ARD=False)) regModel = GPy.models.GPRegression(x,y,kernel=k) regModel.optimize('bfgs', max_iters=200) # print(regModel) weights = 50/regModel.sum.rbf.lengthscale else: print('method not implemented yet. Or check the spelling') return [] # from_to = [str(trn.index.tolist()[0]) + ' - ' + str(trn.index.tolist()[-1])] gap_time_delta = str(trn.index.tolist()[-1] - trn.index.tolist()[0]) # weights_summary[gap_num,:] = tr_r2 = []; tr_mse = []# [[],[]]; mse = [[],[]] ts_r2 = []; ts_mse = [] a = 1 for bb in np.setdiff1d(np.unique(inds),1): ts_ = data.loc[inds.loc[inds['ind'] == bb].index,:] if NORM_X: tst = pd.DataFrame(scalerX.transform(ts_.iloc[:,:-1]), columns=ts_.iloc[:,:-1].columns, index=ts_.index) else: tst = ts_.iloc[:,:-1] if NORM_Y: y_tst = scalerY.transform(ts_.iloc[:,-1].values.reshape(-1, 1)) else: y_tst = ts_.iloc[:,-1] tst = tst.assign(labels=y_tst) if regression_model.lower() == 'rbfgprard': inds_tst = tst.index x_ = tst.iloc[:,:-1].values y_ts_h = regModel.predict(x_)[0].reshape(-1,) y_ts_h = pd.Series(y_ts_h,index=inds_tst) y_tr_h = regModel.predict(x)[0].reshape(-1,) y_tr_h = pd.Series(y_tr_h,index=inds_trn) else: y_ts_h = regModel.predict(tst.iloc[:,:-1]) y_tr_h = regModel.predict(trn.iloc[:,:-1]) if NORM_Y: y_tr_h = scalerY.inverse_transform(y_tr_h) y_ts_h = scalerY.inverse_transform(y_ts_h) y_tr_gt = scalerY.inverse_transform(trn.iloc[:,-1]) y_ts_gt = scalerY.inverse_transform(tst.iloc[:,-1]) else: y_tr_gt = trn.iloc[:,-1] y_ts_gt = tst.iloc[:,-1] if a == 1: tr_r2.append(r2_score(y_tr_gt, y_tr_h)) tr_mse.append(np.sqrt(mean_squared_error(y_tr_gt, y_tr_h))) a = 2 ts_r2.append(r2_score(y_ts_gt, y_ts_h)) ts_mse.append(np.sqrt(mean_squared_error(y_ts_gt, y_ts_h))) if 0: print('trn: MSE %f, R2 %f' %(t_mse,t_r2)) print('%f -- trn: MSE %f, R2 %f' %(bb,t_mse,t_r2)) print('%f -- tst: MSE %f, R2 %f' %(bb,mse,r2)) del inds return {'weights': weights, 'gap_time_delta': gap_time_delta, 'tr_r2': tr_r2, 'ts_r2': ts_r2, 'tr_mse': tr_mse, 'ts_mse': ts_mse}
def anisotropic_rbf_kernel( num_dimensions, length_scale_bounds=(1e-5, 1e5), signal_variance_bounds=(1e-5, 1e5), noise_variance_bounds=(1e-5, 1e5), signal_variance=None, length_scale=None, noise_variance=None, ): """An anisotropic Gaussian Process RBF kernel with scale and noise terms. Args: num_dimensions: Number of input dimensions to kernel. length_scale_bounds: Bounds on the kernel length scale. An array-like broadcastable to shape `(num_dimensions, 2)`. signal_variance_bounds: Optional bounds on the signal variance (the kernel scale factor). If None, the kernel has no scale factor. An array-like broadcastable to shape `(2,)` signal_variance_bounds: Optional bounds on the noise variance (the kernel additive term). If None, the kernel has no additive noise. An array-like broadcastable to shape `(2,)`. length_scale: Optional initial length scales. A value broadcastable to shape `(num_dimensions,)`. Defaults to log-space midpoint of `length_scale_bounds`. signal_variance: Optional initial signal variance. A scalar. Defaults to log-space midpoint of `signal_variance_bounds`. noise_variance: Optional initial noise variance. A scalar. Defaults to log-space midpoint of `noise_variance_bounds`. """ length_scale_bounds = np.broadcast_to(length_scale_bounds, (num_dimensions, 2)) if length_scale is None: length_scale = _logspace_mean(length_scale_bounds, axis=-1) else: length_scale = np.broadcast_to(length_scale, (num_dimensions, )) kernel = gp_kernels.RBF(length_scale=length_scale, length_scale_bounds=length_scale_bounds) if signal_variance_bounds is not None: signal_variance_bounds = np.broadcast_to(signal_variance_bounds, (2, )) if signal_variance is None: signal_variance = _logspace_mean(signal_variance_bounds, axis=-1) kernel = (gp_kernels.ConstantKernel( constant_value=signal_variance, constant_value_bounds=signal_variance_bounds, ) * kernel) if noise_variance_bounds is not None: noise_variance_bounds = np.broadcast_to(noise_variance_bounds, (2, )) if noise_variance is None: noise_variance = _logspace_mean(noise_variance_bounds, axis=-1) kernel = kernel + gp_kernels.WhiteKernel( noise_level=noise_variance, noise_level_bounds=noise_variance_bounds) return kernel
def __init__(self, system_str, npc, nrestarts=2): print("Emulators for system " + system_str) print("with viscous correction type {:d}".format(idf)) print("NPC : " + str(npc)) print("Nrestart : " + str(nrestarts)) #list of observables is defined in calculations_file_format_event_average #here we get their names and sum all the centrality bins to find the total number of observables nobs self.nobs = 0 self.observables = [] self._slices = {} for obs, cent_list in obs_cent_list[system_str].items(): #for obs, cent_list in calibration_obs_cent_list[system_str].items(): self.observables.append(obs) n = np.array(cent_list).shape[0] self._slices[obs] = slice(self.nobs, self.nobs + n) self.nobs += n print("self.nobs = " + str(self.nobs)) #read in the model data from file print("Loading model calculations from " \ + SystemsInfo[system_str]['main_obs_file']) # things to drop delete = [] # build a matrix of dimension (num design pts) x (number of observables) Y = [] for ipt, data in enumerate(trimmed_model_data[system_str]): row = np.array([]) for obs in self.observables: #n_bins_bayes = len(calibration_obs_cent_list[system_str][obs]) # only using these bins for calibration #values = np.array(trimmed_model_data[system_str][pt, idf][obs]['mean'][:n_bins_bayes] ) values = np.array(data[idf][obs]['mean']) if np.isnan(values).sum() > 0: print( "WARNING! FOUND NAN IN MODEL DATA WHILE BUILDING EMULATOR!" ) print("Design pt = " + str(pt) + "; Obs = " + obs) row = np.append(row, values) Y.append(row) Y = np.array(Y) print("Y_Obs shape[Ndesign, Nobs] = " + str(Y.shape)) #Principal Components self.npc = npc self.scaler = StandardScaler(copy=False) #whiten to ensure uncorrelated outputs with unit variances self.pca = PCA(copy=False, whiten=True, svd_solver='full') # Standardize observables and transform through PCA. Use the first # `npc` components but save the full PC transformation for later. Z = self.pca.fit_transform( self.scaler.fit_transform(Y) )[:, : npc] # save all the rows (design points), but keep first npc columns design, design_max, design_min, labels = prepare_emu_design(system_str) #delete undesirable data if len(delete_design_pts_set) > 0: print("Warning! Deleting " + str(len(delete_design_pts_set)) + " points from data") design = np.delete(design, list(delete_design_pts_set), 0) ptp = design_max - design_min print("Design shape[Ndesign, Nparams] = " + str(design.shape)) # Define kernel (covariance function): # Gaussian correlation (RBF) plus a noise term. # noise term is necessary since model calculations contain statistical noise k0 = 1. * kernels.RBF( length_scale=ptp, length_scale_bounds=np.outer(ptp, (4e-1, 1e2)), #nu = 3.5 ) k1 = kernels.ConstantKernel() k2 = kernels.WhiteKernel(noise_level=.1, noise_level_bounds=(1e-2, 1e2)) #kernel = (k0 + k1 + k2) #this includes a consant kernel kernel = (k0 + k2) # this does not # Fit a GP (optimize the kernel hyperparameters) to each PC. self.gps = [] for i, z in enumerate(Z.T): print("Fitting PC #", i) self.gps.append( GPR(kernel=kernel, alpha=0.1, n_restarts_optimizer=nrestarts, copy_X_train=False).fit(design, z)) for n, (z, gp) in enumerate(zip(Z.T, self.gps)): print("GP " + str(n) + " score : " + str(gp.score(design, z))) print("Constructing full linear transformation matrix") # Construct the full linear transformation matrix, which is just the PC # matrix with the first axis multiplied by the explained standard # deviation of each PC and the second axis multiplied by the # standardization scale factor of each observable. self._trans_matrix = (self.pca.components_ * np.sqrt( self.pca.explained_variance_[:, np.newaxis]) * self.scaler.scale_) # Pre-calculate some arrays for inverse transforming the predictive # variance (from PC space to physical space). # Assuming the PCs are uncorrelated, the transformation is # # cov_ij = sum_k A_ki var_k A_kj # # where A is the trans matrix and var_k is the variance of the kth PC. # https://en.wikipedia.org/wiki/Propagation_of_uncertainty print("Computing partial transformation for first npc components") # Compute the partial transformation for the first `npc` components # that are actually emulated. A = self._trans_matrix[:npc] self._var_trans = np.einsum('ki,kj->kij', A, A, optimize=False).reshape(npc, self.nobs**2) # Compute the covariance matrix for the remaining neglected PCs # (truncation error). These components always have variance == 1. B = self._trans_matrix[npc:] self._cov_trunc = np.dot(B.T, B) # Add small term to diagonal for numerical stability. self._cov_trunc.flat[::self.nobs + 1] += 1e-4 * self.scaler.var_
mask = NMBA > 0 NMBA = mask * 1 X = pd.concat([NMBA, Age, Berlin, Sex, Weight], axis=1) collist = list(X.columns) imp = Imputer(missing_values='NaN', strategy='most_frequent', axis=0) imp.fit(X) X = imp.transform(X) X = pd.DataFrame(X, columns=collist) X_train, X_test, Y_train, Y_test = \ train_test_split(X,Y,test_size=0.1,random_state=1) # Kernel # myKernel = kernels.Sum(kernels.Matern(), kernels.RBF()) # myKernel = kernels.Sum(myKernel,kernels.RationalQuadratic()) # myKernel = kernels.Sum(myKernel,kernels.DotProduct()) myKernel = kernels.RBF() myKernel = kernels.Sum(myKernel, kernels.DotProduct()) myKernel = kernels.Sum(myKernel, kernels.ConstantKernel()) # myKernel = kernels.Product(myKernel, kernels.DotProduct()) # myKernel = kernels.Sum(myKernel,kernels.ConstantKernel()) model = GaussianProcessClassifier(kernel=myKernel, warm_start=True, n_jobs=2) model.fit(X_train, Y_train) y_pred = model.predict(X_test) predictions = [round(value) for value in y_pred] accuracy = accuracy_score(Y_test, predictions) print(round(accuracy, 2)) # filename = 'gp.pkl' # pickle.dump(model, open(filename, 'wb'))
TrainData_Out = TrainData_Out_0 InputData_Out = InputData_Out_0 #============================================================================== # GPL #============================================================================== OptimizedKernel_L = [] OptimizedRegression_L = [] theta_L = [] print('Fit Kernel') for iOut in range(nOut): # Kernel selection: Anisotropic var = numpy.var(TrainData_Out[:,iOut]) kernel = kernels.ConstantKernel(var/2, constant_value_bounds=(var*1e-3, var*1e1)) \ * kernels.RBF(length_scale=[1.0,]*nIn, length_scale_bounds=(1e-2, 1e3)) \ + kernels.WhiteKernel(noise_level=var/2, noise_level_bounds=(var*1e-3, var*1e0)) ## Fit nFit = 15 gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=nFit, normalize_y = True, alpha=0.0).fit(TrainData_In,TrainData_Out[:,iOut]) OptimizedKernel_L.append(gp.kernel_) OptimizedRegression_L.append(gp) theta_L.append(gp.kernel_.theta) theta_prev = gp.kernel_.theta ## Plot kernel nl = 100
import sklearn.gaussian_process.kernels as kers #Constants myKernels = { 'k_Constant': lambda: kers.ConstantKernel(), 'k_WN': lambda: kers.WhiteKernel(), 'k_RBF': lambda: kers.RBF(), 'k_RQ': lambda: kers.RationalQuadratic(), 'k_mat0': lambda: kers.Matern(nu=0.5), 'k_mat1': lambda: kers.Matern(nu=1.5), 'k_mat2': lambda: kers.Matern(nu=2.5), 'k_sine': lambda: kers.ExpSineSquared(), #now combination kernels 'k1': lambda: kers.Sum(kers.ConstantKernel(), kers.ExpSineSquared()), 'k2': lambda: kers.Product(kers.ConstantKernel(), kers.ExpSineSquared()), 'k3': lambda: kers.Sum( kers.ConstantKernel(), kers.Product(kers.ConstantKernel(), kers.ExpSineSquared())), 'k4': lambda: kers.Sum(kers.ConstantKernel(), kers.Product(kers.RBF(), kers.ExpSineSquared())),
from sklearn.gaussian_process import GaussianProcessRegressor as GPR from sklearn.gaussian_process import kernels # Setting up low_x = np.linspace(0, 1, 11).reshape(-1, 1) high_x = low_x[[0, 4, 6, 10]] diff_x = high_x low_y = mf2.forrester.low(low_x) high_y = mf2.forrester.high(high_x) scale = 1.87 # As reported in the paper diff_y = np.array([(mf2.forrester.high(x) - scale * mf2.forrester.low(x))[0] for x in diff_x]) # Training GP models kernel = kernels.ConstantKernel(constant_value=1.0) \ * kernels.RBF(length_scale=1.0, length_scale_bounds=(1e-1, 10.0)) gp_direct = GPR(kernel=kernel).fit(high_x, high_y) gp_low = GPR(kernel=kernel).fit(low_x, low_y) gp_diff = GPR(kernel=kernel).fit(diff_x, diff_y) # Using a simple function to combine the two models def co_y(x): return scale * gp_low.predict(x) + gp_diff.predict(x) # And finally recreating the plot plot_x = np.linspace(start=0, stop=1, num=501).reshape(-1, 1) plt.figure(figsize=(6, 5), dpi=600)
""" if __name__ == '__main__': import numpy as np from sklearn.datasets import make_friedman2 from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process import kernels X_train = np.loadtxt("../input/GaussianProcess_X_train.csv", delimiter=",").reshape(-1, 1) y_train = np.loadtxt("../input/GaussianProcess_y_train.csv", delimiter=",") X_test = np.loadtxt("../input/GaussianProcess_X_test.csv", delimiter=",").reshape(-1, 1) y_test = np.loadtxt("../input/GaussianProcess_y_test.csv", delimiter=",") kernel = kernels.RBF(1.0, (1e-3, 1e3)) + \ kernels.ConstantKernel(1.0, (1e-3, 1e3)) + kernels.WhiteKernel() clf = GaussianProcessRegressor(kernel=kernel, alpha=1e-10, optimizer="fmin_l_bfgs_b", n_restarts_optimizer=20, normalize_y=True).fit(X_train, y_train) pred_mean, pred_std = clf.predict(X_test, return_std=True) def anomaly_score(pred_mean, pred_std, y_test): a = np.log(2*np.pi*pred_std**2)/2 + \ (y_test.reshape(-1) - pred_mean)**2/(2*pred_std**2) return a a = anomaly_score(pred_mean, pred_std, y_test) import matplotlib.pyplot as plt
def get_default_mle_params(train_x: np.ndarray, train_y: np.ndarray, return_score: bool = False, test_x: np.ndarray = None, test_y: np.ndarray = None, **kwargs) -> Tuple[np.ndarray, Optional[np.ndarray], Optional[np.ndarray]]: """ Finds best MLE hyperparameters for GP and otherwise use defaults This will internally use scikit-learn's GaussianProcessRegressor implementation to perform MLE. Params: ---------------- train_x (np.ndarray): Training inputs with shape (N x 2). train_y (np.ndarray): Training outputs with shape (N x 2). return_score (bool): If true, the R^2 score of the training inputs is also returned. test_x (np.ndarray): Test inputs for R^2 scoring, ignored if return_score is false. test_y (np.ndarray): Test outputs for R^2 scoring, ignored if return_score is false. **kwargs: Additional named parameters to pass to sklearn.GaussianProcessRegressor's constructor Returns: ------------------ (params) if return_score is False. (params, train_r_squared) if return_score is True and either test_x or test_y is None. (params, train_r_squared, test_r_squared) if return_score is True and both test_x and test_y is not None. Additional notes: - It might complain about too low noise parameters after optimizing, but this can safely be ignored as the noise component is not always neccessary. - It depends scikit-learn's implementation of the kernels are identical to the the implementation in DynGP. """ kernel = kernels.ConstantKernel(1.0) * kernels.RBF(length_scale=[4000.0, 4000.0, 1000.0], length_scale_bounds=(10, 1000)) \ + kernels.WhiteKernel(noise_level_bounds=(0.0001, 10)) if "n_restarts_optimizer" not in kwargs: kwargs["n_restarts_optimizer"] = 10 gpr = GaussianProcessRegressor( kernel, normalize_y=True, copy_X_train=False, **kwargs) gpr.fit(train_x, train_y) p = gpr.kernel_.get_params() params = Params( lengthscales=np.array([ p["k1__k2"].length_scale, ]), sigmas=np.array([ p["k1__k1"].constant_value, ]), noise=p["k2"].noise_level, pdaf_R=np.eye(2) * 50**2, pdaf_p_d=0.8, pdaf_clutter_rate=2e-3, pdaf_gate_size=2.0, synthetic_search_radius= 500, synthetic_R=np.eye(2) * 2000**2 ) if return_score: train_r_squared = (gpr.score(train_x[:, :2], train_y),) if test_x is not None and test_y is not None: test_r_squared = (gpr.score(test_x[:, :2], test_y),) return params, train_r_squared, test_r_squared return params, train_r_squared return params
def _update_mlr_model(mlr_model_type, mlr_model): """Update MLR model parameters during run time.""" if mlr_model_type == 'gpr_sklearn': new_kernel = (sklearn_kernels.ConstantKernel(1.0, (1e-5, 1e5)) * sklearn_kernels.RBF(1.0, (1e-5, 1e5))) mlr_model.update_parameters(final__regressor__kernel=new_kernel)
def bayesian_optimisation(slice_sample_num, coor_sigma, burn_in, input_dimension, n_iters, sample_loss, bounds, x0=None, n_pre_samples=5, acqui_eva_num=10, random_search=False, epsilon=1e-7, greater_is_better=False, mode='OPT', acqui_mode='MCMC', acqui_sample_num=3): """ bayesian_optimisation Uses Gaussian Processes to optimise the loss function `sample_loss`. Arguments: ---------- slice_sample_num: integer. how many samples we draw for each time of slice sampling coor_sigma: numpy array step-size for slice sampling of each coordinate, the dimension is equal to the number of hyperparameters contained in the kernel burn_in: integer. how many iterations we want to wait before draw samples from slice sampling input_dimension: integer. dimension of input data n_iters: integer. Number of iterations to run the search algorithm. sample_loss: function. Function to be optimised. bounds: array-like, shape = [n_params, 2]. Lower and upper bounds on the parameters of the function `sample_loss`. x0: array-like, shape = [n_pre_samples, n_params]. Array of initial points to sample the loss function for. If None, randomly samples from the loss function. n_pre_samples: integer. If x0 is None, samples `n_pre_samples` initial points from the loss function. acqui_eva_num: when evaluating acquisition function, how many points we want to look into gp_params: dictionary. Dictionary of parameters to pass on to the underlying Gaussian Process. random_search: integer. Flag that indicates whether to perform random search or L-BFGS-B optimisation over the acquisition function. alpha: double. Variance of the error term of the GP. epsilon: double. Precision tolerance for floats. greater_is_better: boolean True: maximize the sample_loss function, False: minimize the sample_loss function mode: OPT means using optimizer to optimize the hyperparameters of GP MAP means using sample posterior mean to optimize the hyperparameters of GP acqui_mode: mode controlling the acquisition 'OPT': using one prediction based on previously optimized model 'MCMC': using several samples to sample the expected acquisition function acqui_sample_num: the number of hyperparameter samples we want to use for integrated acquisition function """ # call slice sampler slice_sampler = Slice_sampler(slice_sample_num, coor_sigma, burn_in) acqui_slice_sampler = Slice_sampler( 1, coor_sigma, burn_in) # only sample one sample a time x_list = [] y_list = [] y_dur_list = [] n_params = bounds.shape[0] if x0 is None: # random draw several points as GP prior for params in np.random.uniform(bounds[:, 0], bounds[:, 1], (n_pre_samples, bounds.shape[0])): x_list.append(params) start = time.clock() y_list.append(sample_loss(params)) elapsed = (time.clock() - start) y_dur_list.append(elapsed) else: for params in x0: x_list.append(params) start = time.clock() y_list.append(sample_loss(params)) elapsed = (time.clock() - start) y_dur_list.append(elapsed) xp = np.array(x_list) yp = np.array(y_list) yp_logdur = np.log(np.array(y_dur_list)) #print (xp,yp) # Create the GP #kernel = gp.kernels.Matern() init_length_scale = np.ones((input_dimension, )) kernel = kernels.Sum( kernels.WhiteKernel(), kernels.Product( kernels.ConstantKernel(), kernels.Matern(length_scale=init_length_scale, nu=5. / 2.))) if mode == 'OPT': model = Gaussian_Process(kernel, mode) elif mode == 'MAP': model = Gaussian_Process(kernel, mode) else: raise Exception('Wrong GP model initialization mode!!!') dur = Gaussian_Process(kernel, 'OPT') iter_num = 0 for n in range(n_iters): iter_num += 1 if iter_num % int(n_iters / 2) == 0: print('%d iterations have been run' % iter_num) else: pass # for each iteration, one sample will be drawn and used to train GP model.fit(xp, yp) dur.fit(xp, yp_logdur) # Sample next hyperparameter #print ('One sample start') if random_search: x_random = np.random.uniform(bounds[:, 0], bounds[:, 1], size=(random_search, n_params)) ei = -1 * expected_improvement(x_random, model, yp, greater_is_better=greater_is_better, n_params=n_params) next_sample = x_random[np.argmax(ei), :] else: if acqui_mode == 'OPT': next_sample = sample_next_hyperparameter( expected_improvement, model, yp, greater_is_better=greater_is_better, bounds=bounds, n_restarts=acqui_eva_num) elif acqui_mode == 'MCMC': sample_theta_list = list() for sample_acqui_time in range(acqui_sample_num): initial_log_theta = np.ones((input_dimension + 2, )) initial_theta = np.exp(1.0 + initial_log_theta) one_log_theta = acqui_slice_sampler.sample( init=initial_theta, gp=model) one_theta = np.exp(1.0 + one_log_theta) sample_theta_list.append(one_theta) next_sample = integrate_sample( integrate_EI, sample_theta_list, yp, mode, greater_is_better=greater_is_better, bounds=bounds, n_restarts=acqui_eva_num) elif acqui_mode == 'PERSEC': sample_theta_list = list() for sample_acqui_time in range(acqui_sample_num): initial_log_theta = np.ones((input_dimension + 2, )) initial_theta = np.exp(1.0 + initial_log_theta) one_log_theta = acqui_slice_sampler.sample( init=initial_theta, gp=model) one_theta = np.exp(1.0 + one_log_theta) sample_theta_list.append(one_theta) next_sample = integrate_sample_perSec( integrate_EI_perSec, sample_theta_list, dur, yp, mode, greater_is_better=greater_is_better, bounds=bounds, n_restarts=acqui_eva_num) else: raise Exception('Wrong acquisition mode!!!') #print ('One sample finished') # Duplicates will break the GP. In case of a duplicate, we will randomly sample a next query point. if np.any(np.abs(next_sample - xp) <= epsilon): next_sample = np.random.uniform(bounds[:, 0], bounds[:, 1], bounds.shape[0]) # Sample loss for new set of parameters start = time.clock() func_value = sample_loss(next_sample) elapsed = (time.clock() - start) # Update lists x_list.append(next_sample) y_list.append(func_value) y_dur_list.append(elapsed) # Update xp and yp xp = np.array(x_list) yp = np.array(y_list) yp_logdur = np.log(np.array(y_dur_list)) return xp, yp, yp_logdur
def __init__(self, system, npc=10, nrestarts=0, nu=2.5): logging.info('training emulator for system %s (%d PC, %d restarts)', system, npc, nrestarts) Y = [] self._slices = {} self.observables = observables # Build an array of all observables to emulate. nobs = 0 for obs, subobslist in self.observables: self._slices[obs] = {} for subobs in subobslist: Y.append(data_list[system][obs][subobs]['Y']) n = Y[-1].shape[1] self._slices[obs][subobs] = slice(nobs, nobs + n) nobs += n Y = np.concatenate(Y, axis=1) # pickle.dump(Y,open('mod_dat.p','wb')) self.npc = npc self.nobs = nobs self.scaler = StandardScaler(copy=False) self.pca = PCA(copy=False, whiten=True, svd_solver='full') # Standardize observables and transform through PCA. Use the first # `npc` components but save the full PC transformation for later. Z = self.pca.fit_transform(self.scaler.fit_transform(Y))[:, :npc] # Define kernel (covariance function): # Gaussian correlation (RBF) plus a noise term. design = Design(system) # design = joblib.load(filename='cache/lhs/design_s.p') # maxes = np.apply_along_axis(max,0,design) # mins = np.apply_along_axis(min,0,design) # ptp = maxes - mins ptp = design.max - design.min print(ptp) kernel = ( kernels.ConstantKernel(1.0, (1e-3, 1e3)) * kernels.Matern(length_scale=ptp, length_scale_bounds=np.outer(ptp, (1e-3, 1e3)), nu=nu) # * kernels.RBF( # length_scale = ptp, # length_scale_bounds = np.outer(ptp, (1e-3, 1e3)) # ) # 1. * kernels.RationalQuadratic( # length_scale = ptp, # length_scale_bounds = np.outer(ptp, (1e-3, 1e3)) # ) # + kernels.WhiteKernel( # noise_level = .1**2, # noise_level_bounds = (0.0001**2, 1) # ) ) # Fit a GP (optimize the kernel hyperparameters) to each PC. self.gps = [ GPR(kernel=kernel, alpha=0, n_restarts_optimizer=nrestarts, copy_X_train=False).fit(design, z) for z in Z.T ] # print('Emulator design:') # print(design.array) # Construct the full linear transformation matrix, which is just the PC # matrix with the first axis multiplied by the explained standard # deviation of each PC and the second axis multiplied by the # standardization scale factor of each observable. self._trans_matrix = (self.pca.components_ * np.sqrt( self.pca.explained_variance_[:, np.newaxis]) * self.scaler.scale_) # Pre-calculate some arrays for inverse transforming the predictive # variance (from PC space to physical space). # Assuming the PCs are uncorrelated, the transformation is # # cov_ij = sum_k A_ki var_k A_kj # # where A is the trans matrix and var_k is the variance of the kth PC. # https://en.wikipedia.org/wiki/Propagation_of_uncertainty # Compute the partial transformation for the first `npc` components # that are actually emulated. A = self._trans_matrix[:npc] self._var_trans = np.einsum('ki,kj->kij', A, A, optimize=False).reshape(npc, nobs**2) # Compute the covariance matrix for the remaining neglected PCs # (truncation error). These components always have variance == 1. B = self._trans_matrix[npc:] self._cov_trunc = np.dot(B.T, B) # Add small term to diagonal for numerical stability. self._cov_trunc.flat[::nobs + 1] += 1e-4 * self.scaler.var_