def __init__(self, get_net=get_default_network, lr=1e-2, use_double_precision=True, verbose=True): """ Wrapper around pybnn Bohamiann implementation. It automatically adjusts the length by the MCMC chain, by performing 100 times more burnin steps than we have data points and sampling ~100 networks weights. Parameters ---------- get_net: func Architecture specification lr: float The MCMC step length use_double_precision: Boolean Use float32 or float64 precision. Note: Using float64 makes the training slower. verbose: Boolean Determines whether to print pybnn output. """ self.lr = lr self.verbose = verbose self.bnn = Bohamiann(get_network=get_net, use_double_precision=use_double_precision)
def __init__(self, lower, upper, sampling_method="adaptive_sghmc", use_double_precision=True, num_steps=None, keep_every=100, burnin_steps=None, learning_rate=1e-2, batch_size=20, epsilon=1e-10, mdecay=0.05, verbose=False, **kwargs): self.num_steps = num_steps self.keep_every = keep_every self.burnin_steps = burnin_steps self.learning_rate = learning_rate self.batch_size = batch_size self.epsilon = epsilon self.mdecay = mdecay self.verbose = verbose self.bnn = Bohamiann(get_network=get_default_network, sampling_method=sampling_method, use_double_precision=use_double_precision, **kwargs) self.burnin_steps = burnin_steps self.lower = lower self.upper = upper
class WrapperBohamiann(BaseModel): def __init__(self, get_net=get_default_network, lr=1e-5, use_double_precision=False, verbose=False): """ Wrapper around pybnn Bohamiann implementation. It automatically adjusts the length by the MCMC chain, by performing 100 times more burnin steps than we have data points and sampling ~100 networks weights. Parameters ---------- get_net: func Architecture specification lr: float The MCMC step length use_double_precision: Boolean Use float32 or float64 precision. Note: Using float64 makes the training slower. verbose: Boolean Determines whether to print pybnn output. """ self.lr = lr self.verbose = verbose self.bnn = Bohamiann(get_network=get_net, use_double_precision=use_double_precision) def train(self, X, y, **kwargs): self.X = X self.y = y self.bnn.train(X, y, lr=self.lr, num_burn_in_steps=X.shape[0] * 100, num_steps=X.shape[0] * 100 + 10000, verbose=self.verbose) def predict(self, X_test): return self.bnn.predict(X_test)
class TestBohamiannSampler(unittest.TestCase): def test_sgld(self): self.X = np.random.rand(10, 3) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) self.model = Bohamiann(normalize_input=True, normalize_output=True, use_double_precision=True, sampling_method="sgld") self.model.train(self.X, self.y, num_burn_in_steps=20, num_steps=100, keep_every=10) def test_preconditioned_sgld(self): self.X = np.random.rand(10, 3) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) self.model = Bohamiann(normalize_input=True, normalize_output=True, use_double_precision=True, sampling_method="preconditioned_sgld") self.model.train(self.X, self.y, num_burn_in_steps=20, num_steps=100, keep_every=10) def test_sghmc(self): self.X = np.random.rand(10, 3) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) self.model = Bohamiann(normalize_input=True, normalize_output=True, use_double_precision=True, sampling_method="sghmc") self.model.train(self.X, self.y, num_burn_in_steps=20, num_steps=100, keep_every=10) def test_adaptive_sghmc(self): self.X = np.random.rand(10, 3) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) self.model = Bohamiann(normalize_input=True, normalize_output=True, use_double_precision=True, sampling_method="adaptive_sghmc") self.model.train(self.X, self.y, num_burn_in_steps=20, num_steps=100, keep_every=10)
def __init__(self, num_samples=6000, keep_every=50, lr=1e-2, normalize_input: bool = True, normalize_output: bool = True, verbose=True, seed=42): self.verbose = verbose self.num_samples = num_samples self.keep_every = keep_every self.lr = lr self.model = Bohamiann(normalize_input=normalize_input, normalize_output=normalize_output, seed=seed)
class TestBohamiann(unittest.TestCase): def setUp(self): self.X = np.random.rand(10, 3) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) self.model = Bohamiann(normalize_input=True, normalize_output=True, use_double_precision=True) self.model.train(self.X, self.y, num_burn_in_steps=20, num_steps=100, keep_every=10) def test_predict(self): X_test = np.random.rand(10, self.X.shape[1]) m, v = self.model.predict(X_test) assert len(m.shape) == 1 assert m.shape[0] == X_test.shape[0] assert len(v.shape) == 1 assert v.shape[0] == X_test.shape[0] def test_gradient_mean(self): X_test = np.random.rand(10, self.X.shape[1]) def wrapper(x): return self.model.predict([x])[0] def wrapper_grad(x): return self.model.predictive_mean_gradient(x) grad = self.model.predictive_mean_gradient(X_test[0]) assert grad.shape[0] == X_test.shape[1] for xi in X_test: err = check_grad(wrapper, wrapper_grad, xi, epsilon=1e-6) assert err < 1e-5 def test_gradient_variance(self): X_test = np.random.rand(10, self.X.shape[1]) def wrapper(x): v = self.model.predict([x])[1] return v def wrapper_grad(x): return self.model.predictive_variance_gradient(x) grad = self.model.predictive_variance_gradient(X_test[0]) assert grad.shape[0] == X_test.shape[1] for xi in X_test: err = check_grad(wrapper, wrapper_grad, xi, epsilon=1e-6) assert err < 1e-5
def get_model(self, **kwargs): predictor = Bohamiann(get_network=get_default_network, sampling_method="adaptive_sghmc", use_double_precision=True, metrics=(nn.MSELoss, ), likelihood_function=nll, print_every_n_steps=10, normalize_input=False, normalize_output=True) return predictor
class BayesianNN(object): def __init__(self): self.model = Bohamiann(print_every_n_steps=1000, sampling_method="adaptive_sghmc") self.trained = False def fit(self, x, y): self.model.train(x, y.flatten(), num_steps=10000 + 100 * len(x), num_burn_in_steps=100 * len(x), keep_every=200, lr=1e-2, verbose=True, continue_training=self.trained) def predict(self, x): mean, var = self.model.predict(x) return mean, 1.96 * np.sqrt(var)
class BOHAMIANNWarp(BaseModel): """ A Wrapper for MC Dropout for a fully connected feed forward neural network.. """ def __init__(self, num_samples=6000, keep_every=50, lr=1e-2, normalize_input: bool = True, normalize_output: bool = True, verbose=True, seed=42): self.verbose = verbose self.num_samples = num_samples self.keep_every = keep_every self.lr = lr self.model = Bohamiann(normalize_input=normalize_input, normalize_output=normalize_output, seed=seed) def _create_model(self, X, Y): Y = Y.flatten() num_burn_in_steps = X.shape[0] * 100 num_steps = X.shape[0] * 100 + self.num_samples self.model.train(X, Y, num_steps=num_steps, num_burn_in_steps=num_burn_in_steps, keep_every=self.keep_every, lr=self.lr, verbose=self.verbose) def _update_model(self, X_all, Y_all): """ Updates the model with new observations. """ Y_all = Y_all.flatten() num_burn_in_steps = X_all.shape[0] * 100 num_steps = X_all.shape[0] * 100 + self.num_samples if self.model is None: self._create_model(X_all, Y_all) else: self.model.train(X_all, Y_all, num_steps=num_steps, num_burn_in_steps=num_burn_in_steps, keep_every=self.keep_every, lr=self.lr, verbose=self.verbose) def predict(self, X): """ Predictions with the model. Returns predictive means and standard deviations at X. """ X = np.atleast_2d(X) m, v = self.model.predict(X) # m and v have shape (N,) s = np.sqrt(v) return m[:, None], s[:, None] def predict_withGradients(self, X): """ Returns the mean, standard deviation, mean gradient and standard deviation gradient at X. """ return print('Not Implemented')
def _get_meta_model( self, X_train: np.ndarray, Y_train: np.ndarray, C_train: np.ndarray, with_cost: bool = False, ): """Create, train and return the objective model, and (optionally) a cost model for the data. Parameters ---------- X_train : np.ndarray Training samples. Y_train : np.ndarray Training objectives. C_train : np.ndarray Training costs. with_cost : bool, optional Whether to also create a surrogate model for the cost. Defaults to `False`. Returns ------- Tuple[Bohamiann, Optional[Bohamiann]] Surrogate model for the objective, as well as another for the cost, if `with_cost` is True, otherwise `None`. """ objective_model = Bohamiann( get_network=type(self).get_architecture, print_every_n_steps=1000, normalize_output=self.normalize_targets, ) logger.info("Training Bohamiann objective model.") if self.max_samples is not None: logger.info( f"Limiting the dataset to a maximum of {self.max_samples} samples." ) X_train = X_train[:self.max_samples, ...] Y_train = Y_train[:self.max_samples, ...] C_train = C_train[:self.max_samples, ...] logger.debug(f"Shapes: {X_train.shape}, {Y_train.shape}") logger.debug(f"config: {self}") objective_model.train( X_train, Y_train, num_steps=self.num_steps + self.num_burnin_steps, num_burn_in_steps=self.num_burnin_steps, keep_every=self.mcmc_thining, lr=self.lr, verbose=True, batch_size=self.batch_size, ) if with_cost: cost_model = Bohamiann(get_network=type(self).get_architecture, print_every_n_steps=1000) logger.info("Training Bohamiann cost model.") cost_model.train( X_train, C_train, num_steps=self.num_steps + self.num_burnin_steps, num_burn_in_steps=self.num_burnin_steps, keep_every=self.mcmc_thining, lr=self.lr, verbose=True, batch_size=self.batch_size, ) else: cost_model = None return objective_model, cost_model
def pybnn_search(search_space, model_type, num_init=20, k=DEFAULT_K, loss=DEFAULT_LOSS, total_queries=DEFAULT_TOTAL_QUERIES, predictor_encoding='adj', cutoff=0, acq_opt_type='mutation', explore_type='ucb', deterministic=True, verbose=True): import torch from pybnn import DNGO from pybnn.bohamiann import Bohamiann from pybnn.util.normalization import zero_mean_unit_var_normalization, zero_mean_unit_var_denormalization def fn(arch): return search_space.query_arch(arch, deterministic=deterministic)[loss] # set up initial data data = search_space.generate_random_dataset( num=num_init, predictor_encoding=predictor_encoding, cutoff=cutoff, deterministic_loss=deterministic) query = num_init + k while query <= total_queries: # set up data x = np.array([d['encoding'] for d in data]) y = np.array([d[loss] for d in data]) scaled_y = np.array([elt / 30 for elt in y]) # get a set of candidate architectures candidates = search_space.get_candidates( data, acq_opt_type=acq_opt_type, predictor_encoding=predictor_encoding, cutoff=cutoff, deterministic_loss=deterministic) xcandidates = np.array([d['encoding'] for d in candidates]) # train the model if model_type == 'dngo': model = DNGO(do_mcmc=False) model.train(x, y, do_optimize=True) elif model_type == 'bohamiann': model = Bohamiann() model.train(x, scaled_y, num_steps=10000, num_burn_in_steps=1000, keep_every=50, lr=1e-2) predictions, var = model.predict(xcandidates) predictions = np.array([pred * 30 for pred in predictions]) stds = np.sqrt(np.array([v * 30 for v in var])) candidate_indices = acq_fn(np.array(predictions), explore_type, stds=stds) model = None gc.collect() # add the k arches with the minimum acquisition function values for i in candidate_indices[:k]: arch_dict = search_space.query_arch( candidates[i]['spec'], epochs=0, predictor_encoding=predictor_encoding, cutoff=cutoff, deterministic=deterministic) data.append(arch_dict) if verbose: top_5_loss = sorted([d[loss] for d in data])[:min(5, len(data))] print('dngo, query {}, top 5 val losses: {}'.format( query, top_5_loss)) query += k return data
def setUp(self): self.X = np.random.rand(10, 3) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) self.model = Bohamiann(normalize_input=True, normalize_output=True, use_double_precision=True) self.model.train(self.X, self.y, num_burn_in_steps=20, num_steps=100, keep_every=10)
def test_adaptive_sghmc(self): self.X = np.random.rand(10, 3) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) self.model = Bohamiann(normalize_input=True, normalize_output=True, use_double_precision=True, sampling_method="adaptive_sghmc") self.model.train(self.X, self.y, num_burn_in_steps=20, num_steps=100, keep_every=10)
class OrionBohamiannWrapper(BaseModel): """ Wrapper for PyBNN's BOHAMIANN model Parameters ---------- normalize_input: bool Normalize the input based on the provided bounds (zero mean and unit standard deviation). Defaults to ``True``. normalize_output: bool Normalize the output based on data (zero mean and unit standard deviation). Defaults to ``False``. burnin_steps: int or None. The number of burnin steps before the sampling procedure starts. If ``None``, ``burnin_steps = n_dims * 100`` where ``n_dims`` is the dimensionality of the search space. Defaults to ``None``. sampling_method: str Can be one of ``['adaptive_sghmc', 'sgld', 'preconditioned_sgld', 'sghmc']``. Defaults to ``"adaptive_sghmc"``. See PyBNN samplers' `code <https://github.com/automl/pybnn/tree/master/pybnn/sampler>`_ for more information. use_double_precision: bool Use double precision if using ``bohamiann``. Note that it can run faster on GPU if using single precision. Defaults to ``True``. num_steps: int or None Number of sampling steps to perform after burn-in is finished. In total, ``num_steps // keep_every`` network weights will be sampled. If ``None``, ``num_steps = n_dims * 100 + 10000`` where ``n_dims`` is the dimensionality of the search space. keep_every: int Number of sampling steps (after burn-in) to perform before keeping a sample. In total, ``num_steps // keep_every`` network weights will be sampled. learning_rate: float Learning rate. Defaults to 1e-2. batch_size: int Batch size for training the neural network. Defaults to 20. epsilon: float epsilon for numerical stability. Defaults to 1e-10. mdecay: float momemtum decay. Defaults to 0.05. verbose: bool Write progress logs in stdout. Defaults to ``False``. """ def __init__(self, lower, upper, sampling_method="adaptive_sghmc", use_double_precision=True, num_steps=None, keep_every=100, burnin_steps=None, learning_rate=1e-2, batch_size=20, epsilon=1e-10, mdecay=0.05, verbose=False, **kwargs): self.num_steps = num_steps self.keep_every = keep_every self.burnin_steps = burnin_steps self.learning_rate = learning_rate self.batch_size = batch_size self.epsilon = epsilon self.mdecay = mdecay self.verbose = verbose self.bnn = Bohamiann(get_network=get_default_network, sampling_method=sampling_method, use_double_precision=use_double_precision, **kwargs) self.burnin_steps = burnin_steps self.lower = lower self.upper = upper # pylint:disable=no-self-use def set_state(self, state_dict): """Restore the state of the optimizer""" torch.random.set_rng_state(state_dict["torch"]) # pylint:disable=no-self-use def state_dict(self): """Return the current state of the optimizer so that it can be restored""" return {"torch": torch.random.get_rng_state()} def seed(self, seed): """Seed all internal RNGs""" if torch.cuda.is_available(): torch.backends.cudnn.benchmark = False torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True torch.manual_seed(seed) def train(self, X, y, **kwargs): """ Sets num_steps and burnin_steps before training with parent's train() """ self.X = X self.y = y if self.num_steps: num_steps = self.num_steps else: num_steps = X.shape[0] * 100 + 10000 if self.burnin_steps is None: burnin_steps = X.shape[0] * 100 else: burnin_steps = self.burnin_steps self.bnn.train(X, y, num_steps=num_steps, keep_every=self.keep_every, num_burn_in_steps=burnin_steps, lr=self.learning_rate, batch_size=self.batch_size, epsilon=self.epsilon, mdecay=self.mdecay, continue_training=False, verbose=self.verbose, **kwargs) def predict(self, X_test): """Predict using bnn.predict()""" return self.bnn.predict(X_test)
X_train = np.array(X_train) Y_train = np.array(Y_train) C_train = np.array(C_train) if args.benchmark != "forrester": C_train = np.log(C_train) if args.benchmark == "xgboost": Y_train = np.log(Y_train) normalize_targets = True if args.benchmark == "fcnet" or args.benchmark == "svm": normalize_targets = False model_objective = Bohamiann(get_network=get_architecture, print_every_n_steps=10000, normalize_output=normalize_targets) model_objective.train(X_train, Y_train, num_steps=num_steps + num_burnin_steps, num_burn_in_steps=num_burnin_steps, keep_every=mcmc_thining, lr=lr, verbose=True, batch_size=batch_size) if args.benchmark != "forrester": model_cost = Bohamiann(get_network=get_default_architecture, print_every_n_steps=10000) model_cost.train(X_train, C_train,
def __init__(self): self.model = Bohamiann(print_every_n_steps=1000, sampling_method="adaptive_sghmc") self.trained = False
x = rng.rand(20) y = f(x) grid = np.linspace(0, 1, 200) fvals = f(grid) plt.plot(grid, fvals, "k--") plt.plot(x, y, "ro") plt.grid() plt.xlim(0, 1) plt.show() # -- Train Model --- model = Bohamiann(print_every_n_steps=1000) model.train(x[:, None], y, num_steps=20000, num_burn_in_steps=2000, keep_every=50, lr=1e-2, verbose=True) # -- Predict with Model --- m, v = model.predict(grid[:, None]) plt.plot(x, y, "ro") plt.grid() plt.plot(grid, fvals, "k--") plt.plot(grid, m, "blue") plt.fill_between(grid,