def get_mixed_gp(cat_dims, cont_dims, rs, noise=1e-3, normalize_y=True): from smac.epm.gp_kernels import ConstantKernel, Matern, WhiteKernel, HammingKernel cat_dims = np.array(cat_dims, dtype=np.int) cont_dims = np.array(cont_dims, dtype=np.int) n_dimensions = len(cat_dims) + len(cont_dims) cov_amp = ConstantKernel( 2.0, constant_value_bounds=(1e-10, 2), prior=LognormalPrior(mean=0.0, sigma=1.0, rng=rs), ) exp_kernel = Matern( np.ones([len(cont_dims)]), [(np.exp(-10), np.exp(2)) for _ in range(len(cont_dims))], nu=2.5, operate_on=cont_dims, ) ham_kernel = HammingKernel( np.ones([len(cat_dims)]), [(np.exp(-10), np.exp(2)) for _ in range(len(cat_dims))], operate_on=cat_dims, ) noise_kernel = WhiteKernel( noise_level=noise, noise_level_bounds=(1e-10, 2), prior=HorseshoePrior(scale=0.1, rng=rs), ) kernel = cov_amp * (exp_kernel * ham_kernel) + noise_kernel bounds = [0] * n_dimensions types = np.zeros(n_dimensions) for c in cont_dims: bounds[c] = (0., 1.) for c in cat_dims: types[c] = 3 bounds[c] = (3, np.nan) cs = ConfigurationSpace() for c in cont_dims: cs.add_hyperparameter(UniformFloatHyperparameter('X%d' % c, 0, 1)) for c in cat_dims: cs.add_hyperparameter( CategoricalHyperparameter('X%d' % c, [0, 1, 2, 3])) model = GaussianProcess( configspace=cs, bounds=bounds, types=types, kernel=kernel, seed=rs.randint(low=1, high=10000), normalize_y=normalize_y, ) return model
def _construct_model(configspace, rng): types, bounds = _configspace_to_types_and_bounds(configspace) cont_dims = np.nonzero(types == 0)[0] cat_dims = np.nonzero(types != 0)[0] cov_amp = ConstantKernel( 2.0, constant_value_bounds=(np.exp(-10), np.exp(2)), prior=LognormalPrior(mean=0.0, sigma=1.0, rng=rng), ) if len(cont_dims) > 0: exp_kernel = Matern( np.ones([len(cont_dims)]), [(np.exp(-6.754111155189306), np.exp(0.0858637988771976)) for _ in range(len(cont_dims))], nu=2.5, operate_on=cont_dims, ) if len(cat_dims) > 0: ham_kernel = HammingKernel( np.ones([len(cat_dims)]), [(np.exp(-6.754111155189306), np.exp(0.0858637988771976)) for _ in range(len(cat_dims))], operate_on=cat_dims, ) noise_kernel = WhiteKernel( noise_level=1e-8, noise_level_bounds=(np.exp(-25), np.exp(2)), prior=HorseshoePrior(scale=0.1, rng=rng), ) if len(cont_dims) > 0 and len(cat_dims) > 0: # both kernel = cov_amp * (exp_kernel * ham_kernel) + noise_kernel elif len(cont_dims) > 0 and len(cat_dims) == 0: # only cont kernel = cov_amp * exp_kernel + noise_kernel elif len(cont_dims) == 0 and len(cat_dims) > 0: # only cont kernel = cov_amp * ham_kernel + noise_kernel else: raise ValueError() def _impute_inactive(self, X): X = X.copy() return _impute_conditional_data(X, self.configspace) seed = random.randint(0, 100) GaussianProcess._impute_inactive = _impute_inactive return GaussianProcess( configspace=configspace, types=types, bounds=bounds, seed=seed, kernel=kernel )
def get_gp(n_dimensions, rs, noise=1e-3): cov_amp = 2 initial_ls = np.ones([n_dimensions]) exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dimensions) kernel = cov_amp * exp_kernel prior = DefaultPrior(len(kernel) + 1, rng=rs) n_hypers = 3 * len(kernel) if n_hypers % 2 == 1: n_hypers += 1 bounds = [(0., 1.) for _ in range(n_dimensions)] types = np.zeros(n_dimensions) model = GaussianProcess( bounds=bounds, types=types, kernel=kernel, prior=prior, rng=rs, noise=noise, normalize_output=False, normalize_input=True, ) return model
def get_gp(n_dimensions, rs, noise=1e-3, normalize_y=True) -> GaussianProcess: from smac.epm.gp_kernels import ConstantKernel, Matern, WhiteKernel cov_amp = ConstantKernel( 2.0, constant_value_bounds=(1e-10, 2), prior=LognormalPrior(mean=0.0, sigma=1.0, rng=rs), ) exp_kernel = Matern( np.ones([n_dimensions]), [(np.exp(-10), np.exp(2)) for _ in range(n_dimensions)], nu=2.5, ) noise_kernel = WhiteKernel( noise_level=noise, noise_level_bounds=(1e-10, 2), prior=HorseshoePrior(scale=0.1, rng=rs), ) kernel = cov_amp * exp_kernel + noise_kernel bounds = [(0., 1.) for _ in range(n_dimensions)] types = np.zeros(n_dimensions) configspace = ConfigurationSpace() for i in range(n_dimensions): configspace.add_hyperparameter( UniformFloatHyperparameter('x%d' % i, 0, 1)) model = GaussianProcess( configspace=configspace, bounds=bounds, types=types, kernel=kernel, seed=rs.randint(low=1, high=10000), normalize_y=normalize_y, n_opt_restarts=2, ) return model
def _train(self, X: np.ndarray, y: np.ndarray): """Trains the random forest on X and y. Parameters ---------- X : np.ndarray [n_samples, n_features (config + instance features)] Input data points. Y : np.ndarray [n_samples, ] The corresponding target values. Returns ------- self """ self.X = X self.y = y.flatten() from smac.epm.gp_kernels import ConstantKernel, Matern, WhiteKernel, HammingKernel from smac.epm.gp_base_prior import HorseshoePrior, LognormalPrior self.rf = sklearn.ensemble.RandomForestRegressor( max_features=0.5, bootstrap=True, max_depth=3, min_samples_leaf=10, n_estimators=N_EST, ) # self.rf.fit(X, np.log(y - np.min(y) + 1e-7).ravel()) self.rf.fit(X, y.ravel()) indicators = np.array(self.rf.apply(X)) all_datasets = [] all_targets = [] all_mappings = [] for est in range(N_EST): unique = np.unique(indicators[:, est]) mapping = {j: i for i, j in enumerate(unique)} datasets = [[] for _ in unique] targets = [[] for _ in indicators] for indicator, x, y_ in zip(indicators[:, est], X, y): index = mapping[indicator] datasets[index].append(x) targets[index].append(y_) all_mappings.append(mapping) all_datasets.append(datasets) all_targets.append(targets) # print('Before') # for est in range(N_EST): # for dataset in all_datasets[est]: # print(len(dataset)) for est in range(N_EST): n_nodes = self.rf.estimators_[est].tree_.node_count children_left = self.rf.estimators_[est].tree_.children_left children_right = self.rf.estimators_[est].tree_.children_right feature = self.rf.estimators_[est].tree_.feature threshold = self.rf.estimators_[est].tree_.threshold # The tree structure can be traversed to compute various properties such # as the depth of each node and whether or not it is a leaf. node_depth = np.zeros(shape=n_nodes, dtype=np.int64) is_leaves = np.zeros(shape=n_nodes, dtype=bool) stack = [(0, -1)] # seed is the root node id and its parent depth while len(stack) > 0: node_id, parent_depth = stack.pop() node_depth[node_id] = parent_depth + 1 # If we have a test node if (children_left[node_id] != children_right[node_id]): stack.append((children_left[node_id], parent_depth + 1)) stack.append((children_right[node_id], parent_depth + 1)) else: is_leaves[node_id] = True rules = {} import copy def extend(rule, idx): if is_leaves[idx]: rules[idx] = rule else: rule_left = copy.deepcopy(rule) rule_left.append((threshold[idx], '<=', feature[idx])) extend(rule_left, children_left[idx]) rule_right = copy.deepcopy(rule) rule_right.append((threshold[idx], '>', feature[idx])) extend(rule_right, children_right[idx]) extend([], 0) #print(rules) for key, rule in rules.items(): lower = -np.ones((X.shape[1], )) * np.inf upper = np.ones((X.shape[1], )) * np.inf for element in rule: if element[1] == '<=': if element[0] < upper[element[2]]: upper[element[2]] = element[0] else: if element[0] > lower[element[2]]: lower[element[2]] = element[0] for feature_idx in range(X.shape[1]): closest_lower = -np.inf closes_lower_idx = None closest_upper = np.inf closest_upper_idx = None for x in X: if x[feature_idx] > lower[feature_idx] and x[ feature_idx] < upper[feature_idx]: continue if x[feature_idx] <= lower[feature_idx]: if x[feature_idx] > closest_lower: closest_lower = x[feature_idx] closes_lower_idx = feature_idx if x[feature_idx] >= upper[feature_idx]: if x[feature_idx] < closest_upper: closest_upper = x[feature_idx] closest_upper_idx = feature_idx if closest_upper_idx is not None: all_datasets[est][all_mappings[est][key]].append( X[closest_upper_idx]) all_targets[est][all_mappings[est][key]].append( y[closest_upper_idx]) if closes_lower_idx is not None: all_datasets[est][all_mappings[est][key]].append( X[closes_lower_idx]) all_targets[est][all_mappings[est][key]].append( y[closes_lower_idx]) # print('After') # for est in range(N_EST): # for dataset in all_datasets[est]: # print(len(dataset)) self.all_mappings = all_mappings self.models = [] for est in range(N_EST): models = [] for dataset, targets_ in zip(all_datasets[est], all_targets[est]): cov_amp = ConstantKernel( 2.0, constant_value_bounds=(np.exp(-10), np.exp(2)), prior=LognormalPrior(mean=0.0, sigma=1.0, rng=self.rng), ) cont_dims = np.nonzero(self.types == 0)[0] cat_dims = np.nonzero(self.types != 0)[0] if len(cont_dims) > 0: exp_kernel = Matern( np.ones([len(cont_dims)]), [(np.exp(-10), np.exp(2)) for _ in range(len(cont_dims))], nu=2.5, operate_on=cont_dims, ) if len(cat_dims) > 0: ham_kernel = HammingKernel( np.ones([len(cat_dims)]), [(np.exp(-10), np.exp(2)) for _ in range(len(cat_dims))], operate_on=cat_dims, ) noise_kernel = WhiteKernel( noise_level=1e-8, noise_level_bounds=(np.exp(-25), np.exp(2)), prior=HorseshoePrior(scale=0.1, rng=self.rng), ) if len(cont_dims) > 0 and len(cat_dims) > 0: # both kernel = cov_amp * (exp_kernel * ham_kernel) + noise_kernel elif len(cont_dims) > 0 and len(cat_dims) == 0: # only cont kernel = cov_amp * exp_kernel + noise_kernel elif len(cont_dims) == 0 and len(cat_dims) > 0: # only cont kernel = cov_amp * ham_kernel + noise_kernel else: raise ValueError() gp = GaussianProcess( configspace=self.configspace, types=self.types, bounds=self.bounds, kernel=kernel, normalize_y=True, seed=self.rng.randint(low=0, high=10000), ) gp.train(np.array(dataset), np.array(targets_)) gp._train(X, y, do_optimize=False) models.append(gp) self.models.append(models) return self
def _train(self, X: np.ndarray, y: np.ndarray, do_optimize: bool = True) -> 'GaussianProcessMCMC': """ Performs MCMC sampling to sample hyperparameter configurations from the likelihood and trains for each sample a GP on X and y Parameters ---------- X: np.ndarray (N, D) Input data points. The dimensionality of X is (N, D), with N as the number of points and D is the number of features. y: np.ndarray (N,) The corresponding target values. do_optimize: boolean If set to true we perform MCMC sampling otherwise we just use the hyperparameter specified in the kernel. """ X = self._impute_inactive(X) if self.normalize_y: # A note on normalization for the Gaussian process with MCMC: # Scikit-learn uses a different "normalization" than we use in SMAC3. Scikit-learn normalizes the data to # have zero mean, while we normalize it to have zero mean unit variance. To make sure the scikit-learn GP # behaves the same when we use it directly or indirectly (through the gaussian_process.py file), we # normalize the data here. Then, after the individual GPs are fit, we inject the statistics into them so # they unnormalize the data at prediction time. y = self._normalize_y(y) self.gp = self._get_gp() if do_optimize: self.gp.fit(X, y) self._all_priors = self._get_all_priors( add_bound_priors=True, add_soft_bounds=True if self.mcmc_sampler == 'nuts' else False, ) if self.mcmc_sampler == 'emcee': sampler = emcee.EnsembleSampler(self.n_mcmc_walkers, len(self.kernel.theta), self._ll) sampler.random_state = self.rng.get_state() # Do a burn-in in the first iteration if not self.burned: # Initialize the walkers by sampling from the prior dim_samples = [] prior = None # type: typing.Optional[typing.Union[typing.List[Prior], Prior]] for dim, prior in enumerate(self._all_priors): # Always sample from the first prior if isinstance(prior, list): if len(prior) == 0: prior = None else: prior = prior[0] prior = typing.cast(typing.Optional[Prior], prior) if prior is None: raise NotImplementedError() else: dim_samples.append(prior.sample_from_prior(self.n_mcmc_walkers).flatten()) self.p0 = np.vstack(dim_samples).transpose() # Run MCMC sampling with warnings.catch_warnings(): warnings.filterwarnings('ignore', r'invalid value encountered in double_scalars.*') self.p0, _, _ = sampler.run_mcmc(self.p0, self.burnin_steps) self.burned = True # Start sampling & save the current position, it will be the start point in the next iteration with warnings.catch_warnings(): warnings.filterwarnings('ignore', r'invalid value encountered in double_scalars.*') self.p0, _, _ = sampler.run_mcmc(self.p0, self.chain_length) # Take the last samples from each walker self.hypers = sampler.get_chain()[:, -1] elif self.mcmc_sampler == 'nuts': # Originally published as: # http://www.stat.columbia.edu/~gelman/research/published/nuts.pdf # A good explanation of HMC: # https://theclevermachine.wordpress.com/2012/11/18/mcmc-hamiltonian-monte-carlo-a-k-a-hybrid-monte-carlo/ # A good explanation of HMC and NUTS can be found in: # https://besjournals.onlinelibrary.wiley.com/doi/full/10.1111/2041-210X.12681 # Do not require the installation of NUTS for SMAC # This requires NUTS from https://github.com/mfeurer/NUTS import nuts.nuts # Perform initial fit to the data to obtain theta0 if not self.burned: theta0 = self.gp.kernel.theta self.burned = True else: theta0 = self.p0 samples, _, _ = nuts.nuts.nuts6( f=self._ll_w_grad, Madapt=self.burnin_steps, M=self.chain_length, theta0=theta0, # Increasing this value results in longer running times delta=0.5, adapt_mass=False, # Rather low max depth to keep the number of required gradient steps low max_depth=10, rng=self.rng, ) indices = [int(np.rint(ind)) for ind in np.linspace(start=0, stop=len(samples) - 1, num=10)] self.hypers = samples[indices] self.p0 = self.hypers.mean(axis=0) else: raise ValueError(self.mcmc_sampler) if self.average_samples: self.hypers = [self.hypers.mean(axis=0)] else: self.hypers = self.gp.kernel.theta self.hypers = [self.hypers] self.models = [] for sample in self.hypers: if (sample < -50).any(): sample[sample < -50] = -50 if (sample > 50).any(): sample[sample > 50] = 50 # Instantiate a GP for each hyperparameter configuration kernel = deepcopy(self.kernel) kernel.theta = sample model = GaussianProcess( configspace=self.configspace, types=self.types, bounds=self.bounds, kernel=kernel, normalize_y=False, seed=self.rng.randint(low=0, high=10000), ) try: model._train(X, y, do_optimize=False) self.models.append(model) except np.linalg.LinAlgError: pass if len(self.models) == 0: kernel = deepcopy(self.kernel) kernel.theta = self.p0 model = GaussianProcess( configspace=self.configspace, types=self.types, bounds=self.bounds, kernel=kernel, normalize_y=False, seed=self.rng.randint(low=0, high=10000), ) model._train(X, y, do_optimize=False) self.models.append(model) if self.normalize_y: # Inject the normalization statistics into the individual models. Setting normalize_y to True makes the # individual GPs unnormalize the data at predict time. for model in self.models: model.normalize_y = True model.mean_y_ = self.mean_y_ model.std_y_ = self.std_y_ self.is_trained = True return self
def __init__(self, api_config, config_space, parallel_setting="LS"): super(SMAC4EPMOpimizer, self).__init__(api_config) self.cs = config_space self.num_hps = len(self.cs.get_hyperparameters()) if parallel_setting not in ["CL_min", "CL_max", "CL_mean", "KB", "LS"]: raise ValueError( "parallel_setting can only be one of the following: " "CL_min, CL_max, CL_mean, KB, LS") self.parallel_setting = parallel_setting rng = np.random.RandomState(seed=0) scenario = Scenario({ "run_obj": "quality", # we optimize quality (alt. to runtime) "runcount-limit": 128, "cs": self.cs, # configuration space "deterministic": True, "limit_resources": False, }) self.stats = Stats(scenario) # traj = TrajLogger(output_dir=None, stats=self.stats) self.runhistory = RunHistory() r2e_def_kwargs = { "scenario": scenario, "num_params": self.num_hps, "success_states": [ StatusType.SUCCESS, ], "impute_censored_data": False, "scale_perc": 5, } self.random_chooser = ChooserProb(rng=rng, prob=0.0) types, bounds = get_types(self.cs, instance_features=None) model_kwargs = { "configspace": self.cs, "types": types, "bounds": bounds, "seed": rng.randint(MAXINT), } models = [] cov_amp = ConstantKernel( 2.0, constant_value_bounds=(np.exp(-10), np.exp(2)), prior=LognormalPrior(mean=0.0, sigma=1.0, rng=rng), ) cont_dims = np.array(np.where(np.array(types) == 0)[0], dtype=np.int) cat_dims = np.where(np.array(types) != 0)[0] if len(cont_dims) > 0: exp_kernel = Matern( np.ones([len(cont_dims)]), [(np.exp(-6.754111155189306), np.exp(0.0858637988771976)) for _ in range(len(cont_dims))], nu=2.5, operate_on=cont_dims, ) if len(cat_dims) > 0: ham_kernel = HammingKernel( np.ones([len(cat_dims)]), [(np.exp(-6.754111155189306), np.exp(0.0858637988771976)) for _ in range(len(cat_dims))], operate_on=cat_dims, ) assert len(cont_dims) + len(cat_dims) == len( scenario.cs.get_hyperparameters()) noise_kernel = WhiteKernel( noise_level=1e-8, noise_level_bounds=(np.exp(-25), np.exp(2)), prior=HorseshoePrior(scale=0.1, rng=rng), ) if len(cont_dims) > 0 and len(cat_dims) > 0: # both kernel = cov_amp * (exp_kernel * ham_kernel) + noise_kernel elif len(cont_dims) > 0 and len(cat_dims) == 0: # only cont kernel = cov_amp * exp_kernel + noise_kernel elif len(cont_dims) == 0 and len(cat_dims) > 0: # only cont kernel = cov_amp * ham_kernel + noise_kernel else: raise ValueError() gp_kwargs = {"kernel": kernel} rf_kwargs = {} rf_kwargs["num_trees"] = model_kwargs.get("num_trees", 10) rf_kwargs["do_bootstrapping"] = model_kwargs.get( "do_bootstrapping", True) rf_kwargs["ratio_features"] = model_kwargs.get("ratio_features", 1.0) rf_kwargs["min_samples_split"] = model_kwargs.get( "min_samples_split", 2) rf_kwargs["min_samples_leaf"] = model_kwargs.get("min_samples_leaf", 1) rf_kwargs["log_y"] = model_kwargs.get("log_y", True) rf_log = RandomForestWithInstances(**model_kwargs, **rf_kwargs) rf_kwargs = copy.deepcopy(rf_kwargs) rf_kwargs["log_y"] = False rf_no_log = RandomForestWithInstances(**model_kwargs, **rf_kwargs) rh2epm_cost = RunHistory2EPM4Cost(**r2e_def_kwargs) rh2epm_log_cost = RunHistory2EPM4LogScaledCost(**r2e_def_kwargs) rh2epm_copula = RunHistory2EPM4GaussianCopulaCorrect(**r2e_def_kwargs) self.combinations = [] # 2 models * 4 acquisition functions acq_funcs = [EI, PI, LogEI, LCB] acq_func_instances = [] # acq_func_maximizer_instances = [] n_sls_iterations = { 1: 10, 2: 10, 3: 10, 4: 10, 5: 10, 6: 10, 7: 8, 8: 6, }.get(len(self.cs.get_hyperparameters()), 5) acq_func_maximizer_kwargs = { "config_space": self.cs, "rng": rng, "max_steps": 5, "n_steps_plateau_walk": 5, "n_sls_iterations": n_sls_iterations, } self.idx_ei = 0 self.num_models = len(models) self.num_acq_funcs = len(acq_funcs) no_transform_gp = GaussianProcess(**copy.deepcopy(model_kwargs), **copy.deepcopy(gp_kwargs)) ei = EI(model=no_transform_gp) acq_func_maximizer_kwargs["acquisition_function"] = ei ei_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs) self.combinations.append((no_transform_gp, ei, ei_opt, rh2epm_cost)) pi = PI(model=no_transform_gp) acq_func_maximizer_kwargs["acquisition_function"] = pi pi_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs) self.combinations.append((no_transform_gp, pi, pi_opt, rh2epm_cost)) lcb = LCB(model=no_transform_gp) acq_func_maximizer_kwargs["acquisition_function"] = lcb lcb_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs) self.combinations.append((no_transform_gp, lcb, lcb_opt, rh2epm_cost)) gp = GaussianProcess(**copy.deepcopy(model_kwargs), **copy.deepcopy(gp_kwargs)) ei = EI(model=gp) acq_func_maximizer_kwargs["acquisition_function"] = ei ei_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs) self.combinations.append((gp, ei, ei_opt, rh2epm_copula)) gp = GaussianProcess(**copy.deepcopy(model_kwargs), **copy.deepcopy(gp_kwargs)) ei = LogEI(model=gp) acq_func_maximizer_kwargs["acquisition_function"] = ei ei_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs) self.combinations.append((gp, ei, ei_opt, rh2epm_log_cost)) ei = EI(model=rf_no_log) acq_func_maximizer_kwargs["acquisition_function"] = ei ei_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs) self.combinations.append((rf_no_log, ei, ei_opt, rh2epm_cost)) ei = LogEI(model=rf_log) acq_func_maximizer_kwargs["acquisition_function"] = ei ei_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs) self.combinations.append((rf_log, ei, ei_opt, rh2epm_log_cost)) ei = EI(model=rf_no_log) acq_func_maximizer_kwargs["acquisition_function"] = ei ei_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs) self.combinations.append((rf_no_log, ei, ei_opt, rh2epm_copula)) self.num_acq_instances = len(acq_func_instances) self.best_observation = np.inf self.next_evaluations = []
def _train(self, X: np.ndarray, y: np.ndarray, do_optimize: bool = True): """ Performs MCMC sampling to sample hyperparameter configurations from the likelihood and trains for each sample a GP on X and y Parameters ---------- X: np.ndarray (N, D) Input data points. The dimensionality of X is (N, D), with N as the number of points and D is the number of features. y: np.ndarray (N,) The corresponding target values. do_optimize: boolean If set to true we perform MCMC sampling otherwise we just use the hyperparameter specified in the kernel. """ if self.normalize_input: # Normalize input to be in [0, 1] self.X, self.lower, self.upper = normalization.zero_one_normalization( X, self.lower, self.upper) else: self.X = X if len(y.shape) > 1: y = y.flatten() if len(y) != len(X): raise ValueError('Shape mismatch: %s vs %s' % (y.shape, X.shape)) if self.normalize_output: # Normalize output to have zero mean and unit standard deviation self.y, self.y_mean, self.y_std = normalization.zero_mean_unit_var_normalization( y) if self.y_std == 0: raise ValueError( "Cannot normalize output. All targets have the same value") else: self.y = y # Use the mean of the data as mean for the GP self.mean = np.mean(self.y, axis=0) self.gp = george.GP(self.kernel, mean=self.mean) if do_optimize: # We have one walker for each hyperparameter configuration sampler = emcee.EnsembleSampler(self.n_hypers, len(self.kernel) + 1, self._loglikelihood) sampler.random_state = self.rng.get_state() # Do a burn-in in the first iteration if not self.burned: # Initialize the walkers by sampling from the prior if self.prior is None: self.p0 = self.rng.rand(self.n_hypers, len(self.kernel) + 1) else: self.p0 = self.prior.sample_from_prior(self.n_hypers) # Run MCMC sampling self.p0, _, _ = sampler.run_mcmc(self.p0, self.burnin_steps, rstate0=self.rng) self.burned = True # Start sampling pos, _, _ = sampler.run_mcmc(self.p0, self.chain_length, rstate0=self.rng) # Save the current position, it will be the start point in # the next iteration self.p0 = pos # Take the last samples from each walker self.hypers = sampler.chain[:, -1] else: self.hypers = self.gp.kernel.get_parameter_vector().tolist() self.hypers.append(self.noise) self.hypers = [self.hypers] self.models = [] for sample in self.hypers: # Instantiate a GP for each hyperparameter configuration kernel = deepcopy(self.kernel) kernel.set_parameter_vector(sample[:-1]) noise = np.exp(sample[-1]) model = GaussianProcess( types=self.types, bounds=self.bounds, kernel=kernel, normalize_output=self.normalize_output, normalize_input=self.normalize_input, noise=noise, rng=self.rng, ) model._train(X, y, do_optimize=False) self.models.append(model) self.is_trained = True