示例#1
0
    def __init__(self,
                 params,
                 cost_fun,
                 batch_generator=None,
                 stepsize_schedule=ConstantStepsizeSchedule(0.01),
                 session=tf.get_default_session(),
                 dtype=tf.float64,
                 seed=None):
        """
        Initialize the sampler base class. Sets up member variables and
        initializes uninitialized target parameters in the current
        `tensorflow.Graph`.

        Parameters
        ------------
        params : list of `tensorflow.Variable` objects
            Target parameters for which we want to sample new values.

        cost_fun : callable
            Function that takes `params` as input and returns a
            1-d `tensorflow.Tensor` that contains the cost-value.
            Frequently denoted with `U` in literature.

        batch_generator : `BatchGenerator`, optional
            Iterable which returns dictionaries to feed into
            tensorflow.Session.run() calls to evaluate the cost function.
            Defaults to `None` which indicates that no batches shall be fed.

        stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule
            Iterator class that produces a stream of stepsize values that
            we can use in our samplers.
            See also: `pysgmcmc.stepsize_schedules`

        session : `tensorflow.Session`, optional
            Session object which knows about the external part of the graph
            (which defines `cost`, and possibly batches).
            Used internally to evaluate (burn-in/sample) the sampler.

        dtype : tensorflow.DType, optional
            Type of elements of `tensorflow.Tensor` objects used in this sampler.
            Defaults to `tensorflow.float64`.

        seed : int, optional
            Random seed to use.
            Defaults to `None`.

        See Also
        ------------
        pysgmcmc.sampling.BurnInMCMCSampler:
            Abstract base class for samplers that perform a burn-in phase
            to tune their own hyperparameters.
            Inherits from `sampling.MCMCSampler`.

        """
        # Sanitize inputs
        assert batch_generator is None or hasattr(batch_generator, "__next__")
        assert seed is None or isinstance(seed, int)

        assert isinstance(session, (tf.Session, tf.InteractiveSession))
        assert isinstance(dtype, tf.DType)

        assert callable(cost_fun)

        self.dtype = dtype

        self.n_iterations = 0

        self.seed = seed

        assert hasattr(stepsize_schedule, "update")
        assert hasattr(stepsize_schedule, "__next__")
        assert hasattr(stepsize_schedule, "initial_value")

        self.stepsize_schedule = stepsize_schedule

        self.batch_generator = batch_generator
        self.session = session

        self.params = params

        # set up costs
        self.cost_fun = cost_fun
        self.cost = cost_fun(self.params)

        # compute vectorized clones of all parameters
        self.vectorized_params = [vectorize(param) for param in self.params]

        self.epsilon = tf.Variable(self.stepsize_schedule.initial_value,
                                   dtype=self.dtype,
                                   name="epsilon",
                                   trainable=False)

        # Initialize uninitialized parameters before usage in any sampler.
        init = tf.variables_initializer(
            uninitialized_params(session=self.session,
                                 params=self.params + self.vectorized_params +
                                 [self.epsilon]))
        self.session.run(init)

        # query this later to determine the next sample
        self.theta_t = [None] * len(params)
示例#2
0
    def __init__(self,
                 params,
                 cost_fun,
                 batch_generator=None,
                 stepsize_schedule=ConstantStepsizeSchedule(0.01),
                 burn_in_steps=3000,
                 mdecay=0.05,
                 scale_grad=1.0,
                 session=tf.get_default_session(),
                 dtype=tf.float64,
                 seed=None):
        """ Initialize the sampler parameters and set up a tensorflow.Graph
            for later queries.

        parameters
        ----------
        params : list of tensorflow.Variable objects
            Target parameters for which we want to sample new values.

        cost_fun : callable
            Function that takes `params` as input and returns a
            1-d `tensorflow.Tensor` that contains the cost-value.
            Frequently denoted with `U` in literature.

        batch_generator : iterable, optional
            Iterable which returns dictionaries to feed into
            tensorflow.Session.run() calls to evaluate the cost function.
            Defaults to `None` which indicates that no batches shall be fed.

        stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule
            Iterator class that produces a stream of stepsize values that
            we can use in our samplers.
            See also: `pysgmcmc.stepsize_schedules`

        burn_in_steps : int, optional
            Number of burn-in steps to perform. In each burn-in step, this
            sampler will adapt its own internal parameters to decrease its error.
            Defaults to `3000`.\n
            For reference see:
            `Bayesian Optimization with Robust Bayesian Neural Networks. <http://aad.informatik.uni-freiburg.de/papers/16-NIPS-BOHamiANN.pdf>`_

        mdecay : float, optional
            (Constant) momentum decay per time-step.
            Defaults to `0.05`.\n
            For reference see:
            `Bayesian Optimization with Robust Bayesian Neural Networks. <http://aad.informatik.uni-freiburg.de/papers/16-NIPS-BOHamiANN.pdf>`_

        scale_grad : float, optional
            Value that is used to scale the magnitude of the noise used
            during sampling. In a typical batches-of-data setting this usually
            corresponds to the number of examples in the entire dataset.
            Defaults to `1.0` which corresponds to no scaling.

        session : tensorflow.Session, optional
            Session object which knows about the external part of the graph
            (which defines `Cost`, and possibly batches).
            Used internally to evaluate (burn-in/sample) the sampler.

        dtype : tensorflow.DType, optional
            Type of elements of `tensorflow.Tensor` objects used in this sampler.
            Defaults to `tensorflow.float64`.

        seed : int, optional
            Random seed to use.
            Defaults to `None`.

        See Also
        ----------
        pysgmcmc.sampling.BurnInMCMCSampler:
            Base class for `SGHMCSampler` that specifies how actual sampling
            is performed (using iterator protocol, e.g. `next(sampler)`).

        """

        # Set up BurnInMCMCSampler base class:
        # initialize member variables common to all samplers
        # and run initializers for all uninitialized variables in `params`
        # (to avoid errors in the graph definitions below).
        super().__init__(params=params,
                         cost_fun=cost_fun,
                         burn_in_steps=burn_in_steps,
                         batch_generator=batch_generator,
                         seed=seed,
                         dtype=dtype,
                         session=session,
                         stepsize_schedule=stepsize_schedule)

        #  Initialize graph constants {{{ #

        noise = tf.constant(0., name="noise", dtype=dtype)

        scale_grad = tf.constant(scale_grad, dtype=dtype, name="scale_grad")

        epsilon_scaled = tf.divide(self.epsilon,
                                   tf.sqrt(scale_grad),
                                   name="epsilon_scaled")

        mdecay = tf.constant(mdecay, name="mdecay", dtype=dtype)

        #  }}} Initialize graph constants #

        grads = [
            vectorize(gradient)
            for gradient in tf.gradients(self.cost, params)
        ]

        #  Initialize internal sampler parameters {{{ #

        tau = [
            tf.Variable(tf.ones_like(param, dtype=dtype),
                        dtype=dtype,
                        name="tau_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        r = [
            tf.Variable(1. / (tau[i].initialized_value() + 1),
                        name="R_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        g = [
            tf.Variable(tf.ones_like(param, dtype=dtype),
                        dtype=dtype,
                        name="g_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        v_hat = [
            tf.Variable(tf.ones_like(param, dtype=dtype),
                        dtype=dtype,
                        name="v_hat_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        # Initialize Mass matrix inverse
        minv = [
            tf.Variable(tf.divide(tf.constant(1., dtype=dtype),
                                  tf.sqrt(v_hat[i].initialized_value())),
                        name="minv_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        # Initialize momentum
        V = [
            tf.Variable(tf.zeros_like(param, dtype=dtype),
                        dtype=dtype,
                        name="v_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        #  }}} Initialize internal sampler parameters #

        self.minv_t = [None] * len(params)  # gets burned-in

        # R_t = 1/ (tau + 1), shouldn't it be: 1 / tau according to terms?
        # It is not, and changing it to that breaks everything!
        # Why?

        for i, (param, grad) in enumerate(zip(params, grads)):
            vectorized_param = self.vectorized_params[i]
            #  Burn-in logic {{{ #
            r_t = tf.assign(r[i], 1. / (tau[i] + 1), name="r_t_{}".format(i))

            # r_t should always use the old value of tau
            with tf.control_dependencies([r_t]):
                tau_t = tf.assign_add(
                    tau[i],
                    safe_divide(-g[i] * g[i] * tau[i], v_hat[i]) + 1,
                    name="tau_t_{}".format(i))

                # minv = v_hat^{-1/2} = 1 / sqrt(v_hat)
                self.minv_t[i] = tf.assign(minv[i],
                                           safe_divide(1.,
                                                       safe_sqrt(v_hat[i])),
                                           name="minv_t_{}".format(i))
                # tau_t, minv_t should always use the old values of G, v_hat
                with tf.control_dependencies([tau_t, self.minv_t[i]]):
                    g_t = tf.assign_add(g[i],
                                        -r_t * g[i] + r_t * grad,
                                        name="g_t_{}".format(i))

                    v_hat_t = tf.assign_add(v_hat[i],
                                            -r_t * v_hat[i] + r_t * grad**2,
                                            name="v_hat_t_{}".format(i))

                    #  }}} Burn-in logic #

                    with tf.control_dependencies([g_t, v_hat_t]):

                        #  Draw random normal sample {{{ #

                        # Equation 10, variance of normal sample

                        # 2 * epsilon ** 2 * mdecay * Minv - 0 (noise is 0) - epsilon ** 4
                        # = 2 * epsilon ** 2 * epsilon * v_hat^{-1/2} * C * Minv
                        # = 2 * epsilon ** 3 * v_hat^{-1/2} * C * v_hat^{-1/2} - epsilon ** 4

                        # (co-) variance of normal sample
                        noise_scale = (
                            tf.constant(2., dtype=dtype) *
                            epsilon_scaled**tf.constant(2., dtype=dtype) *
                            mdecay * self.minv_t[i] -
                            tf.constant(2., dtype=dtype) *
                            epsilon_scaled**tf.constant(3., dtype) *
                            tf.square(self.minv_t[i]) * noise -
                            epsilon_scaled**4)

                        # turn into stddev
                        sigma = tf.sqrt(tf.maximum(noise_scale, 1e-16),
                                        name="sigma_{}".format(i))

                        sample = self._draw_noise_sample(
                            sigma=sigma, shape=vectorized_param.shape)

                        #  }}} Draw random sample #

                        #  HMC Update {{{ #

                        # Equation 10: right side, where:
                        # Minv = v_hat^{-1/2}, Mdecay = epsilon * v_hat^{-1/2} C
                        v_t = tf.assign_add(
                            V[i],
                            -self.epsilon**2 * self.minv_t[i] * grad -
                            mdecay * V[i] + sample,
                            name="v_t_{}".format(i))

                        # Equation 10: left side
                        vectorized_Theta_t = tf.assign_add(
                            vectorized_param, v_t)

                        self.theta_t[i] = tf.assign(
                            param,
                            unvectorize(vectorized_Theta_t,
                                        original_shape=param.shape),
                            name="theta_t_{}".format(i))
示例#3
0
    def __init__(self,
                 params,
                 cost_fun,
                 tf_scope="default",
                 batch_generator=None,
                 stepsize_schedule=ConstantStepsizeSchedule(0.001),
                 mass=1.0,
                 speed_of_light=0.5,
                 D=1.0,
                 Bhat=0.0,
                 session=tf.get_default_session(),
                 dtype=tf.float64,
                 seed=None):
        """ Initialize the sampler parameters and set up a tensorflow.Graph
            for later queries.

        Parameters
        ----------
        params : list of tensorflow.Variable objects
            Target parameters for which we want to sample new values.

        Cost : tensorflow.Tensor
            1-d Cost tensor that depends on `params`.
            Frequently denoted as U(theta) in literature.

        batch_generator : BatchGenerator, optional
            Iterable which returns dictionaries to feed into
            tensorflow.Session.run() calls to evaluate the cost function.
            Defaults to `None` which indicates that no batches shall be fed.

        stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule
            Iterator class that produces a stream of stepsize values that
            we can use in our samplers.
            See also: `pysgmcmc.stepsize_schedules`

        mass : float, optional
            mass constant.
            Defaults to `1.0`.

        speed_of_light : float, optional
            "Speed of light" constant. TODO EXTEND DOKU
            Defaults to `1.0`.

        D : float, optional
            Diffusion constant.
            Defaults to `1.0`.

        Bhat : float, optional
            TODO: Documentation

        session : tensorflow.Session, optional
            Session object which knows about the external part of the graph
            (which defines `Cost`, and possibly batches).
            Used internally to evaluate (burn-in/sample) the sampler.

        dtype : tensorflow.DType, optional
            Type of elements of `tensorflow.Tensor` objects used in this sampler.
            Defaults to `tensorflow.float64`.

        seed : int, optional
            Random seed to use.
            Defaults to `None`.

        See Also
        ----------
        pysgmcmc.sampling.MCMCSampler:
            Base class for `RelativisticSGHMCSampler` that specifies how
            actual sampling is performed (using iterator protocol,
            e.g. `next(sampler)`).

        """

        # Set up MCMCSampler base class:
        # initialize member variables common to all samplers
        # and run initializers for all uninitialized variables in `params`
        # (to avoid errors in the graph definitions below).
        super().__init__(params=params,
                         cost_fun=cost_fun,
                         batch_generator=batch_generator,
                         tf_scope=tf_scope,
                         stepsize_schedule=stepsize_schedule,
                         seed=seed,
                         dtype=dtype,
                         session=session)

        # Use `-self.Cost` since the rest of the implementation expects
        # a log likelihood (instead of the *negative* log likelihood that
        # we normally use as costs)
        grads = [
            vectorize(gradient)
            for gradient in tf.gradients(-self.cost, params)
        ]

        with tf.variable_scope(tf_scope, reuse=tf.AUTO_REUSE):

            D = tf.constant(D, dtype=dtype)
            b_hat = tf.constant(Bhat, dtype=dtype)

            # In internal implementation, stick to mathematical formulas.
            # For users, prefer readability.
            m = tf.constant(mass, dtype=dtype)
            c = tf.constant(speed_of_light, dtype=dtype)

        momentum = []

        for i in range(len(params)):
            momentum_params = []

            for momentum_sample in _sample_relativistic_momentum(
                    m=mass,
                    c=speed_of_light,
                    n_params=self.vectorized_params[i].shape[0],
                    seed=self.seed):
                momentum_params.append(momentum_sample)

            momentum_params = tf.reshape(momentum_params,
                                         self.vectorized_params[i].shape)
            momentum_params = tf.Variable(momentum_params, dtype=dtype)
            momentum.append(momentum_params)

        # momentum = [
        #     tf.Variable(momentum_sample, dtype=dtype)
        #     for momentum_sample in _sample_relativistic_momentum(
        #         m=mass, c=speed_of_light, n_params=len(self.params), seed=self.seed
        #     )
        # ]

        # # In internal implementation, stick to mathematical formulas.
        # # For users, prefer readability.
        # m = tf.constant(mass, dtype=dtype)
        # c = tf.constant(speed_of_light, dtype=dtype)

        for i, (param, grad) in enumerate(zip(params, grads)):
            vectorized_param = self.vectorized_params[i]

            p_grad = self.epsilon * momentum[i] / (
                m * tf.sqrt(momentum[i] * momentum[i] /
                            (tf.square(m) * tf.square(c)) + 1))

            n = tf.sqrt(
                self.epsilon *
                (2 * D - self.epsilon * b_hat)) * tf.random_normal(
                    shape=vectorized_param.shape, dtype=dtype, seed=seed)

            momentum_t = tf.assign_add(
                momentum[i],
                tf.reshape(self.epsilon * grad + n - D * p_grad,
                           momentum[i].shape))

            p_grad_new = self.epsilon * momentum_t / (
                m * tf.sqrt(momentum_t * momentum_t /
                            (tf.square(m) * tf.square(c)) + 1))
            vectorized_theta_t = tf.assign_add(
                vectorized_param, tf.reshape(p_grad_new,
                                             vectorized_param.shape))

            self.theta_t[i] = tf.assign(
                param,
                unvectorize(vectorized_theta_t, original_shape=param.shape))
示例#4
0
文件: sgld.py 项目: thobotics/RoMBRL
    def __init__(self,
                 params,
                 cost_fun,
                 batch_generator=None,
                 stepsize_schedule=ConstantStepsizeSchedule(0.01),
                 burn_in_steps=3000,
                 A=1.0,
                 scale_grad=1.0,
                 session=tf.get_default_session(),
                 dtype=tf.float64,
                 seed=None):
        """ Initialize the sampler parameters and set up a tensorflow.Graph
            for later queries.

        Parameters
        ----------
        params : list of tensorflow.Variable objects
            Target parameters for which we want to sample new values.

        cost_fun : callable
            Function that takes `params` as input and returns a
            1-d `tensorflow.Tensor` that contains the cost-value.
            Frequently denoted with `U` in literature.

        batch_generator : BatchGenerator, optional
            Iterable which returns dictionaries to feed into
            tensorflow.Session.run() calls to evaluate the cost function.
            Defaults to `None` which indicates that no batches shall be fed.

        stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule
            Iterator class that produces a stream of stepsize values that
            we can use in our samplers.
            See also: `pysgmcmc.stepsize_schedules`

        burn_in_steps: int, optional
            Number of burn-in steps to perform. In each burn-in step, this
            sampler will adapt its own internal parameters to decrease its error.
            Defaults to `3000`.\n
            For reference see:
            `Bayesian Optimization with Robust Bayesian Neural Networks. <http://aad.informatik.uni-freiburg.de/papers/16-NIPS-BOHamiANN.pdf>`_

        A : float, optional
            TODO Doku
            Defaults to `1.0`.

        scale_grad : float, optional
            Value that is used to scale the magnitude of the noise used
            during sampling. In a typical batches-of-data setting this usually
            corresponds to the number of examples in the entire dataset.

        session : tensorflow.Session, optional
            Session object which knows about the external part of the graph
            (which defines `cost`, and possibly batches).
            Used internally to evaluate (burn-in/sample) the sampler.

        dtype : tensorflow.DType, optional
            Type of elements of `tensorflow.Tensor` objects used in this sampler.
            Defaults to `tensorflow.float64`.

        seed : int, optional
            Random seed to use.
            Defaults to `None`.

        See Also
        ----------
        tensorflow_mcmc.sampling.mcmc_base_classes.BurnInMCMCSampler:
            Base class for `SGLDSampler` that specifies how actual sampling
            is performed (using iterator protocol, e.g. `next(sampler)`).

        """

        super().__init__(params=params,
                         cost_fun=cost_fun,
                         batch_generator=batch_generator,
                         burn_in_steps=burn_in_steps,
                         seed=seed,
                         session=session,
                         dtype=dtype)

        n_params = len(params)

        #  Initialize graph constants {{{ #

        A = tf.constant(A, name="A", dtype=dtype)
        noise = tf.constant(0., name="noise", dtype=dtype)
        scale_grad = tf.constant(scale_grad, name="scale_grad", dtype=dtype)

        #  }}} Initialize graph constants #

        grads = [
            vectorize(gradient)
            for gradient in tf.gradients(self.cost, params)
        ]

        #  Initialize internal sampler parameters {{{ #

        tau = [
            tf.Variable(tf.ones_like(param, dtype=dtype),
                        dtype=dtype,
                        name="tau_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        R = [
            tf.Variable(1. / (tau[i].initialized_value() + 1),
                        name="R_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        g = [
            tf.Variable(tf.ones_like(param, dtype=dtype),
                        dtype=dtype,
                        name="g_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        v_hat = [
            tf.Variable(tf.ones_like(param, dtype=dtype),
                        dtype=dtype,
                        name="v_hat_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        #  Initialize mass matrix inverse {{{ #

        minv = [
            tf.Variable(tf.divide(tf.constant(1., dtype=dtype),
                                  tf.sqrt(v_hat[i].initialized_value())),
                        name="minv_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        #  }}} Initialize mass matrix inverse #

        #  }}} Initialize internal sampler parameters #

        self.minv_t = [None] * n_params  # gets burned-in

        for i, (param, grad) in enumerate(zip(params, grads)):

            vectorized_param = self.vectorized_params[i]

            #  Burn-in logic {{{ #
            r_t = tf.assign(R[i], 1. / (tau[i] + 1.), name="r_t_{}".format(i))
            # r_t should always use the old value of tau
            with tf.control_dependencies([r_t]):
                tau_t = tf.assign_add(
                    tau[i],
                    safe_divide(-g[i] * g[i] * tau[i], v_hat[i]) + 1,
                    name="tau_t_{}".format(i))

                self.minv_t[i] = tf.assign(minv[i],
                                           safe_divide(1.,
                                                       safe_sqrt(v_hat[i])),
                                           name="minv_t_{}".format(i))
                # tau_t, minv_t should always use the old values of g, g2
                with tf.control_dependencies([tau_t, self.minv_t[i]]):
                    g_t = tf.assign_add(g[i],
                                        -r_t * g[i] + r_t * grad,
                                        name="g_t_{}".format(i))

                    v_hat_t = tf.assign_add(v_hat[i],
                                            -r_t * v_hat[i] + r_t * grad**2,
                                            name="v_hat_t_{}".format(i))

                    #  }}} Burn-in logic #
                    with tf.control_dependencies([g_t, v_hat_t]):
                        #  Draw random sample {{{ #

                        sigma = safe_sqrt(2. * self.epsilon * safe_divide(
                            (self.minv_t[i] * (A - noise)), scale_grad))

                        sample = self._draw_noise_sample(
                            sigma=sigma, shape=vectorized_param.shape)

                        #  }}} Draw random sample #

                        #  SGLD Update {{{ #

                        vectorized_theta_t = tf.assign_add(
                            vectorized_param,
                            -self.epsilon * self.minv_t[i] * A * grad + sample,
                        )
                        self.theta_t[i] = tf.assign(
                            param,
                            unvectorize(vectorized_theta_t,
                                        original_shape=param.shape),
                            name="Theta_t_{}".format(i))
示例#5
0
文件: svgd.py 项目: thobotics/RoMBRL
    def __init__(self, particles, cost_fun, tf_scope="default", batch_generator=None,
                 stepsize_schedule=ConstantStepsizeSchedule(0.1),
                 alpha=0.9, fudge_factor=1e-6, session=tf.get_default_session(),
                 dtype=tf.float64, seed=None):
        """ Initialize the sampler parameters and set up a tensorflow.Graph
            for later queries.

        Parameters
        ----------
        particles : List[tensorflow.Variable]
            List of particles each representing a (different) guess of the
            target parameters of this sampler.

        cost_fun : callable
            Function that takes `params` of *one* particle as input and
            returns a 1-d `tensorflow.Tensor` that contains the cost-value.
            Frequently denoted with `U` in literature.

        batch_generator : iterable, optional
            Iterable which returns dictionaries to feed into
            tensorflow.Session.run() calls to evaluate the cost function.
            Defaults to `None` which indicates that no batches shall be fed.

        stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule
            Iterator class that produces a stream of stepsize values that
            we can use in our samplers.
            See also: `pysgmcmc.stepsize_schedules`

        alpha : float, optional
            TODO DOKU
            Defaults to `0.9`.

        fudge_factor : float, optional
            TODO DOKU
            Defaults to `1e-6`.

        session : tensorflow.Session, optional
            Session object which knows about the external part of the graph
            (which defines `Cost`, and possibly batches).
            Used internally to evaluate (burn-in/sample) the sampler.

        dtype : tensorflow.DType, optional
            Type of elements of `tensorflow.Tensor` objects used in this sampler.
            Defaults to `tensorflow.float64`.

        seed : int, optional
            Random seed to use.
            Defaults to `None`.

        See Also
        ----------
        pysgmcmc.sampling.MCMCSampler:
            Base class for `SteinVariationalGradientDescentSampler` that
            specifies how actual sampling is performed (using iterator protocol,
            e.g. `next(sampler)`).

        """

        assert isinstance(alpha, (int, float))
        assert isinstance(fudge_factor, (int, float))
        # assert callable(cost_fun)

        # self.particles = tf.stack(particles)

        self.particles = particles

        # def cost_fun_wrapper(params):
        #     return tf.map_fn(lambda particle: cost_fun(particle), self.particles)
        # cost_fun_wrapper.__name__ = "potential_energy"  # cost_fun.__name__

        # super().__init__(
        self._init_basic(
            params=particles,
            cost_fun=cost_fun,  # cost_fun_wrapper,
            tf_scope=tf_scope,
            batch_generator=batch_generator,
            session=session, seed=seed, dtype=dtype,
            stepsize_schedule=stepsize_schedule
        )

        with tf.variable_scope(tf_scope, reuse=tf.AUTO_REUSE):

            fudge_factor = tf.constant(
                fudge_factor, dtype=self.dtype, name="fudge_factor"
            )

            self.epsilon = tf.Variable(
                stepsize_schedule.initial_value, dtype=self.dtype, name="stepsize"
            )

            stack_vectorized_params = tf.stack(self.vectorized_params)

            self.n_particles = tf.cast(
                # self.particles.shape[0], self.dtype
                stack_vectorized_params.shape[0], self.dtype
            )

            historical_grad = tf.get_variable(
                "historical_grad", stack_vectorized_params.shape, dtype=dtype,
                initializer=tf.zeros_initializer()
            )

        self.session.run(
            tf.variables_initializer([historical_grad, self.epsilon])
        )

        # lnpgrad = tf.squeeze(tf.gradients(self.cost, self.particles))
        grads = []
        for i, cost in enumerate(cost_fun):
            grads.append(tf.concat([vectorize(gradient) for gradient in tf.gradients(cost, self.particles[i])], axis=0))
        lnpgrad = tf.squeeze(grads)

        kernel_matrix, kernel_gradients = self.svgd_kernel(stack_vectorized_params)  # self.svgd_kernel(self.particles)

        grad_theta = tf.divide(
            tf.matmul(kernel_matrix, lnpgrad) + kernel_gradients,
            self.n_particles
        )

        historical_grad_t = tf.assign(
            historical_grad,
            alpha * historical_grad + (1. - alpha) * (grad_theta ** 2)
        )

        adj_grad = tf.divide(
            grad_theta,
            fudge_factor + tf.sqrt(historical_grad_t)
        )

        for i, particle in enumerate(self.particles):

            vectorized_Theta_t = tf.assign_sub(
                self.vectorized_params[i], self.epsilon * adj_grad[i]
            )
            start_idx = 0

            for j, param in enumerate(particle):
                flat_shape = tf.reduce_prod(param.shape)
                vectorized_param = vectorized_Theta_t[start_idx:start_idx+flat_shape]
                self.theta_t[i*len(particle) + j] = tf.assign(
                    param,
                    tf.reshape(vectorized_param, shape=param.shape),
                    name="theta_t_%d_%d" % (i, j)
                )
                start_idx += flat_shape
        return