def train(self, X, y, do_optimize=True): """ Computes the Cholesky decomposition of the covariance of X and estimates the GP hyperparameters by optimizing the marginal loglikelihood. The prior mean of the GP is set to the empirical mean of X. Parameters ---------- X: np.ndarray (N, D) Input data points. The dimensionality of X is (N, D), with N as the number of points and D is the number of features. y: np.ndarray (N,) The corresponding target values. do_optimize: boolean If set to true the hyperparameters are optimized otherwise the default hyperparameters of the kernel are used. """ if self.normalize_input: # Normalize input to be in [0, 1] self.X, self.lower, self.upper = normalization.zero_one_normalization( X, self.lower, self.upper) else: self.X = X if self.normalize_output: # Normalize output to have zero mean and unit standard deviation self.y, self.y_mean, self.y_std = normalization.zero_mean_unit_var_normalization( y) if self.y_std == 0: raise ValueError( "Cannot normalize output. All targets have the same value") else: self.y = y # Use the empirical mean of the data as mean for the GP self.mean = np.mean(self.y, axis=0) self.gp = george.GP(self.kernel, mean=self.mean) if do_optimize: self.hypers = self.optimize() self.gp.kernel[:] = self.hypers[:-1] self.noise = np.exp(self.hypers[-1]) # sigma^2 else: self.hypers = self.gp.kernel[:] self.hypers = np.append(self.hypers, np.log(self.noise)) logger.debug("GP Hyperparameters: " + str(self.hypers)) try: self.gp.compute(self.X, yerr=np.sqrt(self.noise)) except np.linalg.LinAlgError: self.noise *= 10 self.gp.compute(self.X, yerr=np.sqrt(self.noise)) self.is_trained = True
def predict(self, X_test, full_cov=False, **kwargs): r""" Returns the predictive mean and variance of the objective function at the given test points. Parameters ---------- X_test: np.ndarray (N, D) Input test points full_cov: bool If set to true than the whole covariance matrix between the test points is returned Returns ---------- np.array(N,) predictive mean np.array(N,) or np.array(N, N) if full_cov == True predictive variance """ if not self.is_trained: raise Exception('Model has to be trained first!') if self.normalize_input: X_test_norm, _, _ = normalization.zero_one_normalization( X_test, self.lower, self.upper) else: X_test_norm = X_test mu, var = self.gp.predict(self.y, X_test_norm) if self.normalize_output: mu = normalization.zero_mean_unit_var_unnormalization( mu, self.y_mean, self.y_std) var *= self.y_std**2 if not full_cov: var = np.diag(var) # Clip negative variances and set them to the smallest # positive float value if var.shape[0] == 1: var = np.clip(var, np.finfo(var.dtype).eps, np.inf) else: var = np.clip(var, np.finfo(var.dtype).eps, np.inf) var[np.where((var < np.finfo(var.dtype).eps) & (var > -np.finfo(var.dtype).eps))] = 0 return mu, var
def sample_functions(self, X_test, n_funcs=1): """ Samples F function values from the current posterior at the N specified test points. Parameters ---------- X_test: np.ndarray (N, D) Input test points n_funcs: int Number of function values that are drawn at each test point. Returns ---------- function_samples: np.array(F, N) The F function values drawn at the N test points. """ if self.normalize_input: X_test_norm, _, _ = normalization.zero_one_normalization( X_test, self.lower, self.upper) else: X_test_norm = X_test if not self.is_trained: raise Exception('Model has to be trained first!') funcs = self.gp.sample_conditional(self.y, X_test_norm, n_funcs) if self.normalize_output: funcs = normalization.zero_mean_unit_var_unnormalization( funcs, self.y_mean, self.y_std) if len(funcs.shape) == 1: return funcs[None, :] else: return funcs
def normalize(X, lower, upper): X_norm, _, _ = normalization.zero_one_normalization( X[:, :-1], lower, upper) X_norm = np.concatenate((X_norm, np.rint(X[:, None, -1])), axis=1) return X_norm
def train(self, X, y, do_optimize=True, **kwargs): X_norm, _, _ = normalization.zero_one_normalization( X[:, :-1], self.lower, self.upper) s_ = self.basis_func(X[:, -1])[:, None] self.X = np.concatenate((X_norm, s_), axis=1) if self.normalize_output: # Normalize output to have zero mean and unit standard deviation self.y, self.y_mean, self.y_std = normalization.zero_mean_unit_var_normalization( y) else: self.y = y # Use the mean of the data as mean for the GP mean = np.mean(self.y, axis=0) self.gp = george.GP(self.kernel, mean=mean) if do_optimize: # We have one walker for each hyperparameter configuration sampler = emcee.EnsembleSampler(self.n_hypers, len(self.kernel.pars) + 1, self.loglikelihood) # Do a burn-in in the first iteration if not self.burned: # Initialize the walkers by sampling from the prior if self.prior is None: self.p0 = np.random.rand(self.n_hypers, len(self.kernel.pars) + 1) else: self.p0 = self.prior.sample_from_prior(self.n_hypers) # Run MCMC sampling self.p0, _, _ = sampler.run_mcmc(self.p0, self.burnin_steps, rstate0=self.rng) self.burned = True # Start sampling pos, _, _ = sampler.run_mcmc(self.p0, self.chain_length, rstate0=self.rng) # Save the current position, it will be the start point in # the next iteration self.p0 = pos # Take the last samples from each walker self.hypers = sampler.chain[:, -1] else: if self.hypers is None: self.hypers = self.gp.kernel[:].tolist() self.hypers.append(self.noise) self.hypers = [self.hypers] self.models = [] for sample in self.hypers: # Instantiate a GP for each hyperparameter configuration kernel = deepcopy(self.kernel) kernel.pars = np.exp(sample[:-1]) noise = np.exp(sample[-1]) model = FabolasGP(kernel, basis_function=self.basis_func, normalize_output=self.normalize_output, noise=noise, lower=self.lower, upper=self.upper, rng=self.rng) model.train(X, y, do_optimize=False) self.models.append(model) self.is_trained = True
def normalize(self, X): X_norm, _, _ = normalization.zero_one_normalization( X[:, :-1], self.lower, self.upper) s_ = self.basis_function(X[:, -1])[:, None] X_norm = np.concatenate((X_norm, s_), axis=1) return X_norm
def train(self, X, y, do_optimize=True, **kwargs): """ Performs MCMC sampling to sample hyperparameter configurations from the likelihood and trains for each sample a GP on X and y Parameters ---------- X: np.ndarray (N, D) Input data points. The dimensionality of X is (N, D), with N as the number of points and D is the number of features. y: np.ndarray (N,) The corresponding target values. do_optimize: boolean If set to true we perform MCMC sampling otherwise we just use the hyperparameter specified in the kernel. """ if self.normalize_input: # Normalize input to be in [0, 1] self.X, self.lower, self.upper = normalization.zero_one_normalization(X, self.lower, self.upper) else: self.X = X if self.normalize_output: # Normalize output to have zero mean and unit standard deviation self.y, self.y_mean, self.y_std = normalization.zero_mean_unit_var_normalization(y) if self.y_std == 0: raise ValueError("Cannot normalize output. All targets have the same value") else: self.y = y # Use the mean of the data as mean for the GP self.mean = np.mean(self.y, axis=0) self.gp = george.GP(self.kernel, mean=self.mean) if do_optimize: # We have one walker for each hyperparameter configuration sampler = emcee.EnsembleSampler(self.n_hypers, len(self.kernel.pars) + 1, self.loglikelihood) sampler.random_state = self.rng.get_state() # Do a burn-in in the first iteration if not self.burned: # Initialize the walkers by sampling from the prior if self.prior is None: self.p0 = self.rng.rand(self.n_hypers, len(self.kernel.pars) + 1) else: self.p0 = self.prior.sample_from_prior(self.n_hypers) # Run MCMC sampling self.p0, _, _ = sampler.run_mcmc(self.p0, self.burnin_steps, rstate0=self.rng) self.burned = True # Start sampling pos, _, _ = sampler.run_mcmc(self.p0, self.chain_length, rstate0=self.rng) # Save the current position, it will be the start point in # the next iteration self.p0 = pos # Take the last samples from each walker self.hypers = sampler.chain[:, -1] else: self.hypers = self.gp.kernel[:].tolist() self.hypers.append(self.noise) self.hypers = [self.hypers] self.models = [] for sample in self.hypers: # Instantiate a GP for each hyperparameter configuration kernel = deepcopy(self.kernel) kernel.pars = np.exp(sample[:-1]) noise = np.exp(sample[-1]) model = GaussianProcess(kernel, normalize_output=self.normalize_output, normalize_input=self.normalize_input, noise=noise, lower=self.lower, upper=self.upper, rng=self.rng) model.train(X, y, do_optimize=False) self.models.append(model) self.is_trained = True