def __init__(self, params, cost_fun, batch_generator=None, stepsize_schedule=ConstantStepsizeSchedule(0.01), burn_in_steps=3000, session=tf.get_default_session(), dtype=tf.float64, seed=None): """ Initializes the corresponding MCMCSampler super object and sets member variables. Parameters ---------- params : list of `tensorflow.Variable` objects Target parameters for which we want to sample new values. cost_fun : callable Function that takes `params` as input and returns a 1-d `tensorflow.Tensor` that contains the cost-value. Frequently denoted with `U` in literature. batch_generator : `BatchGenerator`, optional Iterable which returns dictionaries to feed into tensorflow.Session.run() calls to evaluate the cost function. Defaults to `None` which indicates that no batches shall be fed. stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule Iterator class that produces a stream of stepsize values that we can use in our samplers. See also: `pysgmcmc.stepsize_schedules` burn_in_steps : int Number of burn-in steps to perform. In each burn-in step, this sampler will adapt its own internal parameters to decrease its error. Defaults to `3000`. session : `tensorflow.Session`, optional Session object which knows about the external part of the graph (which defines `Cost`, and possibly batches). Used internally to evaluate (burn-in/sample) the sampler. dtype : tensorflow.DType, optional Type of elements of `tensorflow.Tensor` objects used in this sampler. Defaults to `tensorflow.float64`. seed : int, optional Random seed to use. Defaults to `None`. See also ---------- pysgmcmc.sampling.MCMCSampler: Super class of this class. Has generic methods shared by all MCMC samplers implemented as part of this framework. pysgmcmc.samplers.sghmc.SGHMCSampler: Instantiation of this class. Uses SGHMC to sample from the target distribution after burn-in. pysgmcmc.samplers.sgld.SGLDSampler: Instantiation of this class. Uses SGLD to sample from the target distribution after burn-in. """ # Sanitize inputs assert isinstance(burn_in_steps, int) super().__init__(params=params, cost_fun=cost_fun, stepsize_schedule=stepsize_schedule, batch_generator=batch_generator, seed=seed, dtype=dtype, session=session) self.burn_in_steps = burn_in_steps
def __init__(self, params, cost_fun, batch_generator=None, stepsize_schedule=ConstantStepsizeSchedule(0.01), burn_in_steps=3000, mdecay=0.05, scale_grad=1.0, session=tf.get_default_session(), dtype=tf.float64, seed=None): """ Initialize the sampler parameters and set up a tensorflow.Graph for later queries. parameters ---------- params : list of tensorflow.Variable objects Target parameters for which we want to sample new values. cost_fun : callable Function that takes `params` as input and returns a 1-d `tensorflow.Tensor` that contains the cost-value. Frequently denoted with `U` in literature. batch_generator : iterable, optional Iterable which returns dictionaries to feed into tensorflow.Session.run() calls to evaluate the cost function. Defaults to `None` which indicates that no batches shall be fed. stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule Iterator class that produces a stream of stepsize values that we can use in our samplers. See also: `pysgmcmc.stepsize_schedules` burn_in_steps : int, optional Number of burn-in steps to perform. In each burn-in step, this sampler will adapt its own internal parameters to decrease its error. Defaults to `3000`.\n For reference see: `Bayesian Optimization with Robust Bayesian Neural Networks. <http://aad.informatik.uni-freiburg.de/papers/16-NIPS-BOHamiANN.pdf>`_ mdecay : float, optional (Constant) momentum decay per time-step. Defaults to `0.05`.\n For reference see: `Bayesian Optimization with Robust Bayesian Neural Networks. <http://aad.informatik.uni-freiburg.de/papers/16-NIPS-BOHamiANN.pdf>`_ scale_grad : float, optional Value that is used to scale the magnitude of the noise used during sampling. In a typical batches-of-data setting this usually corresponds to the number of examples in the entire dataset. Defaults to `1.0` which corresponds to no scaling. session : tensorflow.Session, optional Session object which knows about the external part of the graph (which defines `Cost`, and possibly batches). Used internally to evaluate (burn-in/sample) the sampler. dtype : tensorflow.DType, optional Type of elements of `tensorflow.Tensor` objects used in this sampler. Defaults to `tensorflow.float64`. seed : int, optional Random seed to use. Defaults to `None`. See Also ---------- pysgmcmc.sampling.BurnInMCMCSampler: Base class for `SGHMCSampler` that specifies how actual sampling is performed (using iterator protocol, e.g. `next(sampler)`). """ # Set up BurnInMCMCSampler base class: # initialize member variables common to all samplers # and run initializers for all uninitialized variables in `params` # (to avoid errors in the graph definitions below). super().__init__(params=params, cost_fun=cost_fun, burn_in_steps=burn_in_steps, batch_generator=batch_generator, seed=seed, dtype=dtype, session=session, stepsize_schedule=stepsize_schedule) # Initialize graph constants {{{ # noise = tf.constant(0., name="noise", dtype=dtype) scale_grad = tf.constant(scale_grad, dtype=dtype, name="scale_grad") epsilon_scaled = tf.divide(self.epsilon, tf.sqrt(scale_grad), name="epsilon_scaled") mdecay = tf.constant(mdecay, name="mdecay", dtype=dtype) # }}} Initialize graph constants # grads = [ vectorize(gradient) for gradient in tf.gradients(self.cost, params) ] # Initialize internal sampler parameters {{{ # tau = [ tf.Variable(tf.ones_like(param, dtype=dtype), dtype=dtype, name="tau_{}".format(i), trainable=False) for i, param in enumerate(self.vectorized_params) ] r = [ tf.Variable(1. / (tau[i].initialized_value() + 1), name="R_{}".format(i), trainable=False) for i, param in enumerate(self.vectorized_params) ] g = [ tf.Variable(tf.ones_like(param, dtype=dtype), dtype=dtype, name="g_{}".format(i), trainable=False) for i, param in enumerate(self.vectorized_params) ] v_hat = [ tf.Variable(tf.ones_like(param, dtype=dtype), dtype=dtype, name="v_hat_{}".format(i), trainable=False) for i, param in enumerate(self.vectorized_params) ] # Initialize Mass matrix inverse minv = [ tf.Variable(tf.divide(tf.constant(1., dtype=dtype), tf.sqrt(v_hat[i].initialized_value())), name="minv_{}".format(i), trainable=False) for i, param in enumerate(self.vectorized_params) ] # Initialize momentum V = [ tf.Variable(tf.zeros_like(param, dtype=dtype), dtype=dtype, name="v_{}".format(i), trainable=False) for i, param in enumerate(self.vectorized_params) ] # }}} Initialize internal sampler parameters # self.minv_t = [None] * len(params) # gets burned-in # R_t = 1/ (tau + 1), shouldn't it be: 1 / tau according to terms? # It is not, and changing it to that breaks everything! # Why? for i, (param, grad) in enumerate(zip(params, grads)): vectorized_param = self.vectorized_params[i] # Burn-in logic {{{ # r_t = tf.assign(r[i], 1. / (tau[i] + 1), name="r_t_{}".format(i)) # r_t should always use the old value of tau with tf.control_dependencies([r_t]): tau_t = tf.assign_add( tau[i], safe_divide(-g[i] * g[i] * tau[i], v_hat[i]) + 1, name="tau_t_{}".format(i)) # minv = v_hat^{-1/2} = 1 / sqrt(v_hat) self.minv_t[i] = tf.assign(minv[i], safe_divide(1., safe_sqrt(v_hat[i])), name="minv_t_{}".format(i)) # tau_t, minv_t should always use the old values of G, v_hat with tf.control_dependencies([tau_t, self.minv_t[i]]): g_t = tf.assign_add(g[i], -r_t * g[i] + r_t * grad, name="g_t_{}".format(i)) v_hat_t = tf.assign_add(v_hat[i], -r_t * v_hat[i] + r_t * grad**2, name="v_hat_t_{}".format(i)) # }}} Burn-in logic # with tf.control_dependencies([g_t, v_hat_t]): # Draw random normal sample {{{ # # Equation 10, variance of normal sample # 2 * epsilon ** 2 * mdecay * Minv - 0 (noise is 0) - epsilon ** 4 # = 2 * epsilon ** 2 * epsilon * v_hat^{-1/2} * C * Minv # = 2 * epsilon ** 3 * v_hat^{-1/2} * C * v_hat^{-1/2} - epsilon ** 4 # (co-) variance of normal sample noise_scale = ( tf.constant(2., dtype=dtype) * epsilon_scaled**tf.constant(2., dtype=dtype) * mdecay * self.minv_t[i] - tf.constant(2., dtype=dtype) * epsilon_scaled**tf.constant(3., dtype) * tf.square(self.minv_t[i]) * noise - epsilon_scaled**4) # turn into stddev sigma = tf.sqrt(tf.maximum(noise_scale, 1e-16), name="sigma_{}".format(i)) sample = self._draw_noise_sample( sigma=sigma, shape=vectorized_param.shape) # }}} Draw random sample # # HMC Update {{{ # # Equation 10: right side, where: # Minv = v_hat^{-1/2}, Mdecay = epsilon * v_hat^{-1/2} C v_t = tf.assign_add( V[i], -self.epsilon**2 * self.minv_t[i] * grad - mdecay * V[i] + sample, name="v_t_{}".format(i)) # Equation 10: left side vectorized_Theta_t = tf.assign_add( vectorized_param, v_t) self.theta_t[i] = tf.assign( param, unvectorize(vectorized_Theta_t, original_shape=param.shape), name="theta_t_{}".format(i))
def __init__(self, params, cost_fun, batch_generator=None, stepsize_schedule=ConstantStepsizeSchedule(0.01), session=tf.get_default_session(), dtype=tf.float64, seed=None): """ Initialize the sampler base class. Sets up member variables and initializes uninitialized target parameters in the current `tensorflow.Graph`. Parameters ------------ params : list of `tensorflow.Variable` objects Target parameters for which we want to sample new values. cost_fun : callable Function that takes `params` as input and returns a 1-d `tensorflow.Tensor` that contains the cost-value. Frequently denoted with `U` in literature. batch_generator : `BatchGenerator`, optional Iterable which returns dictionaries to feed into tensorflow.Session.run() calls to evaluate the cost function. Defaults to `None` which indicates that no batches shall be fed. stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule Iterator class that produces a stream of stepsize values that we can use in our samplers. See also: `pysgmcmc.stepsize_schedules` session : `tensorflow.Session`, optional Session object which knows about the external part of the graph (which defines `cost`, and possibly batches). Used internally to evaluate (burn-in/sample) the sampler. dtype : tensorflow.DType, optional Type of elements of `tensorflow.Tensor` objects used in this sampler. Defaults to `tensorflow.float64`. seed : int, optional Random seed to use. Defaults to `None`. See Also ------------ pysgmcmc.sampling.BurnInMCMCSampler: Abstract base class for samplers that perform a burn-in phase to tune their own hyperparameters. Inherits from `sampling.MCMCSampler`. """ # Sanitize inputs assert batch_generator is None or hasattr(batch_generator, "__next__") assert seed is None or isinstance(seed, int) assert isinstance(session, (tf.Session, tf.InteractiveSession)) assert isinstance(dtype, tf.DType) assert callable(cost_fun) self.dtype = dtype self.n_iterations = 0 self.seed = seed assert hasattr(stepsize_schedule, "update") assert hasattr(stepsize_schedule, "__next__") assert hasattr(stepsize_schedule, "initial_value") self.stepsize_schedule = stepsize_schedule self.batch_generator = batch_generator self.session = session self.params = params # set up costs self.cost_fun = cost_fun self.cost = cost_fun(self.params) # compute vectorized clones of all parameters self.vectorized_params = [vectorize(param) for param in self.params] self.epsilon = tf.Variable(self.stepsize_schedule.initial_value, dtype=self.dtype, name="epsilon", trainable=False) # Initialize uninitialized parameters before usage in any sampler. init = tf.variables_initializer( uninitialized_params(session=self.session, params=self.params + self.vectorized_params + [self.epsilon])) self.session.run(init) # query this later to determine the next sample self.theta_t = [None] * len(params)
def __init__(self, params, cost_fun, batch_generator=None, stepsize_schedule=ConstantStepsizeSchedule(0.01), burn_in_steps=3000, A=1.0, scale_grad=1.0, session=tf.get_default_session(), dtype=tf.float64, seed=None): """ Initialize the sampler parameters and set up a tensorflow.Graph for later queries. Parameters ---------- params : list of tensorflow.Variable objects Target parameters for which we want to sample new values. cost_fun : callable Function that takes `params` as input and returns a 1-d `tensorflow.Tensor` that contains the cost-value. Frequently denoted with `U` in literature. batch_generator : BatchGenerator, optional Iterable which returns dictionaries to feed into tensorflow.Session.run() calls to evaluate the cost function. Defaults to `None` which indicates that no batches shall be fed. stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule Iterator class that produces a stream of stepsize values that we can use in our samplers. See also: `pysgmcmc.stepsize_schedules` burn_in_steps: int, optional Number of burn-in steps to perform. In each burn-in step, this sampler will adapt its own internal parameters to decrease its error. Defaults to `3000`.\n For reference see: `Bayesian Optimization with Robust Bayesian Neural Networks. <http://aad.informatik.uni-freiburg.de/papers/16-NIPS-BOHamiANN.pdf>`_ A : float, optional TODO Doku Defaults to `1.0`. scale_grad : float, optional Value that is used to scale the magnitude of the noise used during sampling. In a typical batches-of-data setting this usually corresponds to the number of examples in the entire dataset. session : tensorflow.Session, optional Session object which knows about the external part of the graph (which defines `cost`, and possibly batches). Used internally to evaluate (burn-in/sample) the sampler. dtype : tensorflow.DType, optional Type of elements of `tensorflow.Tensor` objects used in this sampler. Defaults to `tensorflow.float64`. seed : int, optional Random seed to use. Defaults to `None`. See Also ---------- tensorflow_mcmc.sampling.mcmc_base_classes.BurnInMCMCSampler: Base class for `SGLDSampler` that specifies how actual sampling is performed (using iterator protocol, e.g. `next(sampler)`). """ super().__init__(params=params, cost_fun=cost_fun, batch_generator=batch_generator, burn_in_steps=burn_in_steps, seed=seed, session=session, dtype=dtype) n_params = len(params) # Initialize graph constants {{{ # A = tf.constant(A, name="A", dtype=dtype) noise = tf.constant(0., name="noise", dtype=dtype) scale_grad = tf.constant(scale_grad, name="scale_grad", dtype=dtype) # }}} Initialize graph constants # grads = [ vectorize(gradient) for gradient in tf.gradients(self.cost, params) ] # Initialize internal sampler parameters {{{ # tau = [ tf.Variable(tf.ones_like(param, dtype=dtype), dtype=dtype, name="tau_{}".format(i), trainable=False) for i, param in enumerate(self.vectorized_params) ] R = [ tf.Variable(1. / (tau[i].initialized_value() + 1), name="R_{}".format(i), trainable=False) for i, param in enumerate(self.vectorized_params) ] g = [ tf.Variable(tf.ones_like(param, dtype=dtype), dtype=dtype, name="g_{}".format(i), trainable=False) for i, param in enumerate(self.vectorized_params) ] v_hat = [ tf.Variable(tf.ones_like(param, dtype=dtype), dtype=dtype, name="v_hat_{}".format(i), trainable=False) for i, param in enumerate(self.vectorized_params) ] # Initialize mass matrix inverse {{{ # minv = [ tf.Variable(tf.divide(tf.constant(1., dtype=dtype), tf.sqrt(v_hat[i].initialized_value())), name="minv_{}".format(i), trainable=False) for i, param in enumerate(self.vectorized_params) ] # }}} Initialize mass matrix inverse # # }}} Initialize internal sampler parameters # self.minv_t = [None] * n_params # gets burned-in for i, (param, grad) in enumerate(zip(params, grads)): vectorized_param = self.vectorized_params[i] # Burn-in logic {{{ # r_t = tf.assign(R[i], 1. / (tau[i] + 1.), name="r_t_{}".format(i)) # r_t should always use the old value of tau with tf.control_dependencies([r_t]): tau_t = tf.assign_add( tau[i], safe_divide(-g[i] * g[i] * tau[i], v_hat[i]) + 1, name="tau_t_{}".format(i)) self.minv_t[i] = tf.assign(minv[i], safe_divide(1., safe_sqrt(v_hat[i])), name="minv_t_{}".format(i)) # tau_t, minv_t should always use the old values of g, g2 with tf.control_dependencies([tau_t, self.minv_t[i]]): g_t = tf.assign_add(g[i], -r_t * g[i] + r_t * grad, name="g_t_{}".format(i)) v_hat_t = tf.assign_add(v_hat[i], -r_t * v_hat[i] + r_t * grad**2, name="v_hat_t_{}".format(i)) # }}} Burn-in logic # with tf.control_dependencies([g_t, v_hat_t]): # Draw random sample {{{ # sigma = safe_sqrt(2. * self.epsilon * safe_divide( (self.minv_t[i] * (A - noise)), scale_grad)) sample = self._draw_noise_sample( sigma=sigma, shape=vectorized_param.shape) # }}} Draw random sample # # SGLD Update {{{ # vectorized_theta_t = tf.assign_add( vectorized_param, -self.epsilon * self.minv_t[i] * A * grad + sample, ) self.theta_t[i] = tf.assign( param, unvectorize(vectorized_theta_t, original_shape=param.shape), name="Theta_t_{}".format(i))
def __init__(self, params, cost_fun, tf_scope="default", batch_generator=None, stepsize_schedule=ConstantStepsizeSchedule(0.001), mass=1.0, speed_of_light=0.5, D=1.0, Bhat=0.0, session=tf.get_default_session(), dtype=tf.float64, seed=None): """ Initialize the sampler parameters and set up a tensorflow.Graph for later queries. Parameters ---------- params : list of tensorflow.Variable objects Target parameters for which we want to sample new values. Cost : tensorflow.Tensor 1-d Cost tensor that depends on `params`. Frequently denoted as U(theta) in literature. batch_generator : BatchGenerator, optional Iterable which returns dictionaries to feed into tensorflow.Session.run() calls to evaluate the cost function. Defaults to `None` which indicates that no batches shall be fed. stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule Iterator class that produces a stream of stepsize values that we can use in our samplers. See also: `pysgmcmc.stepsize_schedules` mass : float, optional mass constant. Defaults to `1.0`. speed_of_light : float, optional "Speed of light" constant. TODO EXTEND DOKU Defaults to `1.0`. D : float, optional Diffusion constant. Defaults to `1.0`. Bhat : float, optional TODO: Documentation session : tensorflow.Session, optional Session object which knows about the external part of the graph (which defines `Cost`, and possibly batches). Used internally to evaluate (burn-in/sample) the sampler. dtype : tensorflow.DType, optional Type of elements of `tensorflow.Tensor` objects used in this sampler. Defaults to `tensorflow.float64`. seed : int, optional Random seed to use. Defaults to `None`. See Also ---------- pysgmcmc.sampling.MCMCSampler: Base class for `RelativisticSGHMCSampler` that specifies how actual sampling is performed (using iterator protocol, e.g. `next(sampler)`). """ # Set up MCMCSampler base class: # initialize member variables common to all samplers # and run initializers for all uninitialized variables in `params` # (to avoid errors in the graph definitions below). super().__init__(params=params, cost_fun=cost_fun, batch_generator=batch_generator, tf_scope=tf_scope, stepsize_schedule=stepsize_schedule, seed=seed, dtype=dtype, session=session) # Use `-self.Cost` since the rest of the implementation expects # a log likelihood (instead of the *negative* log likelihood that # we normally use as costs) grads = [ vectorize(gradient) for gradient in tf.gradients(-self.cost, params) ] with tf.variable_scope(tf_scope, reuse=tf.AUTO_REUSE): D = tf.constant(D, dtype=dtype) b_hat = tf.constant(Bhat, dtype=dtype) # In internal implementation, stick to mathematical formulas. # For users, prefer readability. m = tf.constant(mass, dtype=dtype) c = tf.constant(speed_of_light, dtype=dtype) momentum = [] for i in range(len(params)): momentum_params = [] for momentum_sample in _sample_relativistic_momentum( m=mass, c=speed_of_light, n_params=self.vectorized_params[i].shape[0], seed=self.seed): momentum_params.append(momentum_sample) momentum_params = tf.reshape(momentum_params, self.vectorized_params[i].shape) momentum_params = tf.Variable(momentum_params, dtype=dtype) momentum.append(momentum_params) # momentum = [ # tf.Variable(momentum_sample, dtype=dtype) # for momentum_sample in _sample_relativistic_momentum( # m=mass, c=speed_of_light, n_params=len(self.params), seed=self.seed # ) # ] # # In internal implementation, stick to mathematical formulas. # # For users, prefer readability. # m = tf.constant(mass, dtype=dtype) # c = tf.constant(speed_of_light, dtype=dtype) for i, (param, grad) in enumerate(zip(params, grads)): vectorized_param = self.vectorized_params[i] p_grad = self.epsilon * momentum[i] / ( m * tf.sqrt(momentum[i] * momentum[i] / (tf.square(m) * tf.square(c)) + 1)) n = tf.sqrt( self.epsilon * (2 * D - self.epsilon * b_hat)) * tf.random_normal( shape=vectorized_param.shape, dtype=dtype, seed=seed) momentum_t = tf.assign_add( momentum[i], tf.reshape(self.epsilon * grad + n - D * p_grad, momentum[i].shape)) p_grad_new = self.epsilon * momentum_t / ( m * tf.sqrt(momentum_t * momentum_t / (tf.square(m) * tf.square(c)) + 1)) vectorized_theta_t = tf.assign_add( vectorized_param, tf.reshape(p_grad_new, vectorized_param.shape)) self.theta_t[i] = tf.assign( param, unvectorize(vectorized_theta_t, original_shape=param.shape))
def __init__(self, session, sampling_method=Sampler.SGHMC, get_net=get_default_net, batch_generator=generate_batches, batch_size=20, stepsize_schedule=ConstantStepsizeSchedule(np.sqrt(1e-4)), n_nets=100, n_iters=50000, burn_in_steps=1000, sample_steps=100, normalize_input=True, normalize_output=True, seed=None, dtype=tf.float64, **sampler_kwargs): """ Bayesian Neural Networks use Bayesian methods to estimate the posterior distribution of a neural network's weights. This allows to also predict uncertainties for test points and thus makes Bayesian Neural Networks suitable for Bayesian optimization. This module uses stochastic gradient MCMC methods to sample from the posterior distribution. See [1] for more details. [1] J. T. Springenberg, A. Klein, S. Falkner, F. Hutter Bayesian Optimization with Robust Bayesian Neural Networks. In Advances in Neural Information Processing Systems 29 (2016). Parameters ---------- session: tensorflow.Session A `tensorflow.Session` object used to delegate computations performed in this network over to `tensorflow`. sampling_method : Sampler, optional Method used to sample networks for this BNN. Defaults to `Sampler.SGHMC`. n_nets: int, optional Number of nets to sample during training (and use to predict). Defaults to `100`. stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule Iterator class that produces a stream of stepsize values that we can use during sampling. See also: `pysgmcmc.stepsize_schedules` mdecay: float, optional Momentum decay per time-step (parameter for SGHMCSampler). Defaults to `0.05`. n_iters: int, optional Total number of iterations of the sampler to perform. Defaults to `50000` batch_size: int, optional Number of datapoints to include in each minibatch. Defaults to `20` datapoints per minibatch. burn_in_steps: int, optional Number of burn-in steps to perform Defaults to `1000`. sample_steps: int, optional Number of sample steps to perform. Defaults to `100`. normalize_input: bool, optional Specifies whether or not input data should be normalized. Defaults to `True` normalize_output: bool, optional Specifies whether or not outputs should be normalized. Defaults to `True` get_net: callable, optional Callable that returns a network specification. Expected inputs are a `tensorflow.Placeholder` object that serves as feedable input to the network and an integer random seed. Expected return value is the networks final output. Defaults to `get_default_net`. batch_generator: callable, optional TODO: DOKU NOTE: Generator callable with signature like generate_batches that yields feedable dicts of minibatches. seed: int, optional Random seed to use in this BNN. Defaults to `None`. dtype : tf.DType, optional Tensorflow datatype to use for internal representation. Defaults to `None`. """ # Sanitize inputs assert isinstance(n_nets, int) assert isinstance(n_iters, int) assert isinstance(burn_in_steps, int) assert isinstance(sample_steps, int) assert isinstance(batch_size, int) assert isinstance(dtype, tf.DType) assert n_nets > 0 assert n_iters > 0 assert burn_in_steps >= 0 assert sample_steps > 0 assert batch_size > 0 assert callable(get_net) assert callable(batch_generator) assert hasattr(stepsize_schedule, "update") assert hasattr(stepsize_schedule, "__next__") if not Sampler.is_supported(sampling_method): raise ValueError( "'BayesianNeuralNetwork.__init__' received unsupported input " "for parameter 'sampling_method'. Input was: {input}.\n" "Supported sampling methods are enumerated in " "'Sampler' enum type.".format(input=sampling_method)) self.sampling_method = sampling_method self.stepsize_schedule = stepsize_schedule self.get_net = get_net self.batch_generator = batch_generator self.normalize_input = normalize_input self.normalize_output = normalize_output self.n_nets = n_nets self.n_iters = n_iters self.batch_size = batch_size self.sampler_kwargs = sampler_kwargs self.burn_in_steps = burn_in_steps self.sample_steps = sample_steps self.samples = deque(maxlen=n_nets) self.seed = seed self.dtype = dtype self.session = session self.is_trained = False
def __init__(self, particles, cost_fun, batch_generator=None, stepsize_schedule=ConstantStepsizeSchedule(0.1), alpha=0.9, fudge_factor=1e-6, session=tf.get_default_session(), dtype=tf.float64, seed=None): """ Initialize the sampler parameters and set up a tensorflow.Graph for later queries. Parameters ---------- particles : List[tensorflow.Variable] List of particles each representing a (different) guess of the target parameters of this sampler. cost_fun : callable Function that takes `params` of *one* particle as input and returns a 1-d `tensorflow.Tensor` that contains the cost-value. Frequently denoted with `U` in literature. batch_generator : iterable, optional Iterable which returns dictionaries to feed into tensorflow.Session.run() calls to evaluate the cost function. Defaults to `None` which indicates that no batches shall be fed. stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule Iterator class that produces a stream of stepsize values that we can use in our samplers. See also: `pysgmcmc.stepsize_schedules` alpha : float, optional TODO DOKU Defaults to `0.9`. fudge_factor : float, optional TODO DOKU Defaults to `1e-6`. session : tensorflow.Session, optional Session object which knows about the external part of the graph (which defines `Cost`, and possibly batches). Used internally to evaluate (burn-in/sample) the sampler. dtype : tensorflow.DType, optional Type of elements of `tensorflow.Tensor` objects used in this sampler. Defaults to `tensorflow.float64`. seed : int, optional Random seed to use. Defaults to `None`. See Also ---------- pysgmcmc.sampling.MCMCSampler: Base class for `SteinVariationalGradientDescentSampler` that specifies how actual sampling is performed (using iterator protocol, e.g. `next(sampler)`). """ assert isinstance(alpha, (int, float)) assert isinstance(fudge_factor, (int, float)) assert callable(cost_fun) self.particles = tf.stack(particles) def cost_fun_wrapper(params): return tf.map_fn(lambda particle: cost_fun(particle), self.particles) cost_fun_wrapper.__name__ = cost_fun.__name__ super().__init__(params=particles, cost_fun=cost_fun_wrapper, batch_generator=batch_generator, session=session, seed=seed, dtype=dtype, stepsize_schedule=stepsize_schedule) fudge_factor = tf.constant(fudge_factor, dtype=self.dtype, name="fudge_factor") self.epsilon = tf.Variable(stepsize_schedule.initial_value, dtype=self.dtype, name="stepsize") self.n_particles = tf.cast(self.particles.shape[0], self.dtype) historical_grad = tf.get_variable("historical_grad", self.particles.shape, dtype=dtype, initializer=tf.zeros_initializer()) self.session.run( tf.variables_initializer([historical_grad, self.epsilon])) lnpgrad = tf.squeeze(tf.gradients(self.cost, self.particles)) kernel_matrix, kernel_gradients = self.svgd_kernel(self.particles) grad_theta = tf.divide( tf.matmul(kernel_matrix, lnpgrad) + kernel_gradients, self.n_particles) historical_grad_t = tf.assign( historical_grad, alpha * historical_grad + (1. - alpha) * (grad_theta**2)) adj_grad = tf.divide(grad_theta, fudge_factor + tf.sqrt(historical_grad_t)) for i, param in enumerate(self.params): self.theta_t[i] = tf.assign_sub(param, self.epsilon * adj_grad[i])
def _init_basic(self, params, cost_fun, tf_scope="default", batch_generator=None, stepsize_schedule=ConstantStepsizeSchedule(0.01), session=tf.get_default_session(), dtype=tf.float64, seed=None): # Sanitize inputs assert batch_generator is None or hasattr(batch_generator, "__next__") assert seed is None or isinstance(seed, int) # assert isinstance(session, (tf.Session, tf.InteractiveSession)) assert isinstance(dtype, tf.DType) # assert callable(cost_fun) self.tf_scope = tf_scope self.dtype = dtype self.n_iterations = 0 self.seed = seed assert hasattr(stepsize_schedule, "update") assert hasattr(stepsize_schedule, "__next__") assert hasattr(stepsize_schedule, "initial_value") self.stepsize_schedule = stepsize_schedule self.batch_generator = batch_generator self.session = session self.params = params # set up costs self.cost_fun = cost_fun self.cost = cost_fun # cost_fun(self.params) # compute vectorized clones of all parameters with tf.variable_scope(self.tf_scope, reuse=tf.AUTO_REUSE): self.vectorized_params = [] for i, param in enumerate(self.params): self.vectorized_params.append(tf.get_variable( initializer=tf.concat([tf.reshape(par.initialized_value(), (-1,)) for par in param], axis=0), name="%s/particle_%s" % (self.tf_scope, i) )) # self.vectorized_params = tf.stack(self.vectorized_params) self.epsilon = tf.get_variable( initializer=self.stepsize_schedule.initial_value, dtype=self.dtype, name="epsilon", trainable=False ) # Initialize uninitialized parameters before usage in any sampler. init = tf.variables_initializer( uninitialized_params( session=self.session, params=self.vectorized_params + [self.epsilon] # params=self.params + self.vectorized_params + [self.epsilon] ) ) self.session.run(init) # query this later to determine the next sample self.theta_t = [None] * len(params) * len(params[0])