class GaussProcess(Minimizer): def __init__(self, correlationsQ=False, searchBoundScaleFactor=None): super(GaussProcess, self).__init__() self.seed_timeout = 1 self.target = None self.devices = [] self.energy = 4 self.seed_iter = 0 self.numBV = 30 self.xi = 0.01 self.bounds = None self.acq_func = ['PI', 'EI', 'UCB'][-1] self.alt_param = -1 self.m = 200 self.iter_bound = False # filepath = os.path.join(os.getcwd(), "parameters", "hyperparameters.npy") # print('MINT-->Grabbing hyps from...: ', filepath) # self.hyper_file = filepath self.max_iter = 50 self.norm_coef = 0.1 self.multiplier = 1 self.simQ = False self.seedScanBool = True self.prior_data = None self.correlationsQ = correlationsQ self.searchBoundScaleFactor = searchBoundScaleFactor def seed_simplex(self): opt_smx = Optimizer() opt_smx.normalization = True opt_smx.norm_coef = self.norm_coef opt_smx.timeout = self.seed_timeout minimizer = Simplex() minimizer.max_iter = self.seed_iter opt_smx.minimizer = minimizer # opt.debug = True seq = [ Action(func=opt_smx.max_target_func, args=[self.target, self.devices]) ] opt_smx.eval(seq) seed_data = np.append( np.vstack(opt_smx.opt_ctrl.dev_sets), np.transpose(-np.array([opt_smx.opt_ctrl.penalty])), axis=1) import pandas as pd self.prior_data = pd.DataFrame(seed_data) self.seed_y_data = opt_smx.opt_ctrl.penalty def preprocess(self): self.target.mi.target = self.target # assemble hyper parameters self.length_scales, self.amp_variance, self.single_noise_variance, self.mean_noise_variance, self.precision_matrix = normscales.normscales( self.target.mi, self.devices, correlationsQ=self.correlationsQ) # build precision_matrix if not returned print('Precision before', self.precision_matrix) if self.precision_matrix is None: self.covarmat = np.diag(self.length_scales)**2 print('Covariance', self.covarmat) self.precision_matrix = np.linalg.inv(self.covarmat) print('Precision', self.precision_matrix) print('Length Scales', self.length_scales) # create OnlineGP model dim = len(self.devices) hyperparams = (self.precision_matrix, np.log(self.amp_variance), np.log(self.mean_noise_variance)) # self.model = OGP(dim, hyperparams, maxBV=self.numBV, weighted=False) self.model = OGP(dim, hyperparams, maxBV=self.numBV, covar=['RBF_ARD', 'MATERN32_ARD', 'MATERN52_ARD'][0], weighted=False) # initialize model on prior data if available if (self.prior_data is not None): p_X = self.prior_data.iloc[:, :-1] p_Y = self.prior_data.iloc[:, -1] num = p_X.shape[0] self.model.fit(p_X, p_Y, min(self.m, num)) # create Bayesian optimizer dev_ids = [dev.eid for dev in self.devices] dev_vals = [dev.get_value() for dev in self.devices] self.scanner = BayesOpt( model=self.model, target_func=self.target, acq_func=self.acq_func, xi=self.xi, alt_param=self.alt_param, m=self.m, bounds=self.bounds, iter_bound=self.iter_bound, prior_data=self.prior_data, start_dev_vals=dev_vals, dev_ids=dev_ids, searchBoundScaleFactor=self.searchBoundScaleFactor) self.scanner.max_iter = self.max_iter self.scanner.opt_ctrl = self.opt_ctrl def minimize(self, error_func, x): self.energy = self.mi.get_energy() if self.seedScanBool: self.seed_simplex() self.preprocess() # x = [dev.get_value() for dev in self.devices] # is this needed? self.scanner.minimize(error_func, x) self.saveModel() return def saveModel(self): """ Add GP model parameters to the save file. """ # add in extra GP model data to save try: self.mi.data except: self.mi.data = {} self.mi.data["acq_fcn"] = self.acq_func # OnlineGP stuff try: self.mi.data["alpha"] = self.model.alpha except: pass try: self.mi.data["C"] = self.model.C except: pass try: self.mi.data["BV"] = self.model.BV except: pass try: self.mi.data["covar_params"] = self.model.covar_params except: pass try: self.mi.data["KB"] = self.model.KB except: pass try: self.mi.data["KBinv"] = self.model.KBinv except: pass try: self.mi.data["weighted"] = self.model.weighted except: pass try: self.mi.data["noise_var"] = self.model.noise_var except: pass try: self.mi.data["corrmat"] = self.corrmat except: pass try: self.mi.data["covarmat"] = self.covarmat except: pass try: self.mi.data["length_scales"] = self.length_scales except: pass try: self.mi.data["amp_variance"] = self.amp_variance except: pass try: self.mi.data["single_noise_variance"] = self.single_noise_variance except: pass try: self.mi.data["mean_noise_variance"] = self.mean_noise_variance except: pass try: self.mi.data["precision_matrix"] = self.precision_matrix except: pass self.mi.data["seedScanBool"] = self.seedScanBool if self.seedScanBool: self.mi.data["nseed"] = self.prior_data.shape[0] else: self.mi.data["nseed"] = 0 if type(self.model.prmeanp) is type(None): self.mi.data["prmean_params_amp"] = "None" self.mi.data["prmean_params_centroid"] = "None" self.mi.data["prmean_params_invcovarmat"] = "None" else: self.mi.data["prmean_params_amp"] = self.model.prmeanp[0] self.mi.data["prmean_params_centroid"] = self.model.prmeanp[1] self.mi.data["prmean_params_invcovarmat"] = self.model.prmeanp[2] if type(self.model.prvarp) is type(None): self.mi.data["prvar_params"] = "None" else: self.mi.data["prvar_params"] = self.model.prvarp try: self.mi.data["prmean_name"] = self.model.prmean_name except: pass try: self.mi.data["prior_pv_info"] = self.model.prior_pv_info except: pass
class DKLGP(object): def __init__(self, dim, hidden_layers=[], dim_z=None, mask=None, alpha=1.0, noise=0.1, activations='lrelu', weight_dir=None): self.dim = dim self.dim_z = dim_z or dim # initialize the OGP object we use to actually make our predictions OGP_params = (np.zeros((self.dim_z, )), np.log(alpha), np.log(noise) ) # lengthscales of one (logged) self.ogp = OGP(self.dim_z, OGP_params) # our embedding function, initially the identity # if unchanged, the DKLGP should match the functionality of OGP self.embed = lambda x: x # build the neural network structure of the DKL self.layers = [] for l in hidden_layers: self.layers.append(Dense(l, activation=activations)) # add the linear output layer and the GP (used for likelihood training) if len(self.layers) > 0: self.layers.append(Dense(dim_z)) else: self.mask = mask self.layers.append(Dense(dim_z, mask=mask)) self.layers.append(CovMat( kernel='rbf', alpha_fixed=False)) # kernel should match the one used in OGP # if weight_dir is specified, we immediately initialize the embedding based on the specified neural network if weight_dir is not None: self.load_embedding(weight_dir) # sets up the DKL and trains the embedding. nullifies the effect of load_embedding if it was called previously # lr is the learning rate: reasonable deafult is 2e-4 # maxiter is the number of iterations of the solver; scales the training time linearly # batch_size is the size of a mini batch; scales the training time ~quadratically # gp = True in NNRegressor() sets gp likelihood as optimization target def train_embedding(self, x, y, lr=2.e-4, batch_size=50, maxiter=4000): opt = Adam(lr) self.DKLmodel = NNRegressor(self.layers, opt=opt, batch_size=batch_size, maxiter=maxiter, gp=True, verbose=False) self.DKLmodel.fit(x, y) self.embed = self.DKLmodel.fast_forward # fast_forward gives mapping up to (but not including) gp (x -> z) # (something like) full_forward maps through the whole dkl + gp # loads the DKL and embedding from the specified directory. forgets any previous embedding # note that network structure and activations, etc. still need to be specified in __init__ def load_embedding(self, dname): self.DKLmodel = NNRegressor(self.layers) self.DKLmodel.first_run(np.zeros((1, self.dim)), None, load_path=dname) self.embed = self.DKLmodel.fast_forward # saves the neural network parameters to specified directory, allowing the saved embedding to be replicated without re-training it def save_embedding(self, dname): if not os.path.isdir(dname): os.makedirs(dname) self.DKLmodel.save_weights(dname) # allows manually setting a linear transform. Make sure you get your tranpose stuff right (x_rows.shape is [npoints,ndim]) def set_linear(self, matrix): self.linear_transform = matrix self.embed = lambda x_rows: np.dot(x_rows, self.linear_transform) # sets a linear transformation based on a given correlation matrix which is assumed to fit the data # NOTE: this isn't necessarily log-likelihood-optimal def linear_from_correlation(self, matrix): # multinormal covariance matrix center = np.linalg.inv(matrix) chol = np.linalg.cholesky(center) self.set_linear(chol) # computes the log-likelihood of the given data set using the current embedding # ASSUMES YOU'RE USING RBF KERNEL def eval_LL(self, X, Y): N = X.shape[0] Z = self.embed(X) diffs = euclidean_distances(Z, squared=True) alpha = np.exp(self.ogp.covar_params[1]) # kind of a hack rbf_K = alpha * np.exp(-diffs / 2.) K_full = rbf_K + (self.ogp.noise_var) * np.eye(N) L = np.linalg.cholesky(K_full) # K = L * L.T Ly = np.linalg.solve(L, Y) # finds inverse(L) * y log_lik = -0.5 * np.sum(Ly**2) # -1/2 * y.T * inverse(L * L.T) * y log_lik -= np.sum(np.log( np.diag(L))) # equivalent to -1/2 * log(det(K)) log_lik -= 0.5 * N * np.log(2 * np.pi) return float(log_lik) # allows passing custom alpha/noise # if compute_deriv is true, assumes that embedding is linear and returns derivative w.r.t. transform def custom_LL(self, X, Y, alpha, noise_variance, compute_deriv=False): N, dim = X.shape Z = self.embed(X) if not compute_deriv: diffs = euclidean_distances(Z, squared=True) rbf_K = alpha * np.exp(-diffs / 2.) K_full = rbf_K + noise_variance * np.eye(N) L = np.linalg.cholesky(K_full) # K = L * L.T Ly = np.linalg.solve(L, Y) # finds inverse(L) * y log_lik = -0.5 * np.sum(Ly**2) # -1/2 * y.T * inverse(L * L.T) * y log_lik -= np.sum(np.log( np.diag(L))) # equivalent to -1/2 * log(det(K)) log_lik -= 0.5 * N * np.log(2 * np.pi) return float(log_lik) lengths = [0. for d in range(dim)] params = lengths + [np.log(alpha)] + [np.log(noise_variance)] neglik, deriv = SPGP_likelihood_4scipy(params, Y, Z) deriv_noise = deriv[-1] deriv_coeff = deriv[-2] deriv_z = deriv[:self.dim_z * N].reshape((N, self.dim_z)) deriv_transform = np.dot(X.T, deriv_z) mask = self.mask or np.ones((dim, dim_z)) return -neglik, deriv_transform * mask, deriv_coeff, deriv_noise # takes an n x dim_z matrix Z and translates it to x, assuming the embedding is linear # currently requires that the model embedding was set via set_linear def inverse_embed(self, Z): assert ('linear_transform' in dir(self)) transform = self.linear_transform # assumption is that z = x * transform column_x = np.linalg.solve(transform.T, Z.T) return column_x.T ########## # remaining functions mimic Online GP functionality, just embedding x -> z first ########## def fit(self, X, y): Z = self.embed(X) self.ogp.fit(Z, y) def update(self, x_new, y_new): z_new = self.embed(x_new) self.ogp.update(z_new, y_new) def predict(self, x): z = np.array(self.embed(x), ndmin=2) return self.ogp.predict(z)