class RBM(Block, Model):
    A base interface for RBMs, implementing the binary-binary case.

    def __init__(self, nvis = None, nhid = None,
            vis_space = None,
            hid_space = None,
            transformer = None,
            irange=0.5, rng=None, init_bias_vis = None,
            init_bias_vis_marginals = None, init_bias_hid=0.0,
            base_lr = 1e-3, anneal_start = None, nchains = 100, sml_gibbs_steps = 1,
            random_patches_src = None,
            monitor_reconstruction = False):

        Construct an RBM object.

        nvis : int
            Number of visible units in the model.
            (Specifying this implies that the model acts on a vector,
            i.e. it sets vis_space = pylearn2.space.VectorSpace(nvis) )
        nhid : int
            Number of hidden units in the model.
            (Specifying this implies that the model acts on a vector)
            A pylearn2.space.Space object describing what kind of vector
            space the RBM acts on. Don't specify if you used nvis / hid
            A pylearn2.space.Space object describing what kind of vector
            space the RBM's hidden units live in. Don't specify if you used
            nvis / nhid
        init_bias_vis_marginals: either None, or a Dataset to use to initialize
            the visible biases to the inverse sigmoid of the data marginals
        irange : float, optional
            The size of the initial interval around 0 for weights.
        rng : RandomState object or seed
            NumPy RandomState object to use when initializing parameters
            of the model, or (integer) seed to use to create one.
        init_bias_vis : array_like, optional
            Initial value of the visible biases, broadcasted as necessary.
        init_bias_hid : array_like, optional
            initial value of the hidden biases, broadcasted as necessary.
        monitor_reconstruction : if True, will request a monitoring channel to monitor
            reconstruction error
        random_patches_src: Either None, or a Dataset from which to draw random patches
            in order to initialize the weights. Patches will be multiplied by irange

        Parameters for default SML learning rule:

            base_lr : the base learning rate
            anneal_start : number of steps after which to start annealing on a 1/t schedule
            nchains: number of negative chains
            sml_gibbs_steps: number of gibbs steps to take per update



        if init_bias_vis_marginals is not None:
            assert init_bias_vis is None
            X = init_bias_vis_marginals.X
            assert X.min() >= 0.0
            assert X.max() <= 1.0

            marginals = X.mean(axis=0)

            #rescale the marginals a bit to avoid NaNs
            init_bias_vis = inverse_sigmoid_numpy(.01 + .98 * marginals)

        if init_bias_vis is None:
            init_bias_vis = 0.0

        if rng is None:
            # TODO: global rng configuration stuff.
            rng = numpy.random.RandomState(1001)
        self.rng = rng

        if vis_space is None:
            #if we don't specify things in terms of spaces and a transformer,
            #assume dense matrix multiplication and work off of nvis, nhid
            assert hid_space is None
            assert transformer is None or isinstance(transformer,MatrixMul)
            assert nvis is not None
            assert nhid is not None

            if transformer is None:
                if random_patches_src is None:
                    W = rng.uniform(-irange, irange, (nvis, nhid))
                    if hasattr(random_patches_src, '__array__'):
                        W = irange * random_patches_src.T
                        assert W.shape == (nvis, nhid)
                        #assert type(irange) == type(0.01)
                        #assert irange == 0.01
                        W = irange * random_patches_src.get_batch_design(nhid).T

                self.transformer = MatrixMul(  sharedX(
                self.transformer = transformer

            self.vis_space = VectorSpace(nvis)
            self.hid_space = VectorSpace(nhid)
            assert hid_space is not None
            assert transformer is not None
            assert nvis is None
            assert nhid is None

            self.vis_space = vis_space
            self.hid_space = hid_space
            self.transformer = transformer

            b_vis = self.vis_space.get_origin()
            b_vis += init_bias_vis
        except ValueError:
            raise ValueError("bad shape or value for init_bias_vis")
        self.bias_vis = sharedX(b_vis, name='bias_vis', borrow=True)

            b_hid = self.hid_space.get_origin()
            b_hid += init_bias_hid
        except ValueError:
            raise ValueError('bad shape or value for init_bias_hid')
        self.bias_hid = sharedX(b_hid, name='bias_hid', borrow=True)

        self.random_patches_src = random_patches_src

        self.__dict__.update(nhid=nhid, nvis=nvis)
        self._params = safe_union(self.transformer.get_params(), [self.bias_vis, self.bias_hid])

        self.base_lr = base_lr
        self.anneal_start = anneal_start
        self.nchains = nchains
        self.sml_gibbs_steps = sml_gibbs_steps

    def get_input_dim(self):
        if not isinstance(self.vis_space, VectorSpace):
            raise TypeError("Can't describe "+str(type(self.vis_space))+" as a dimensionality number.")
        return self.vis_space.dim

    def get_output_dim(self):
        if not isinstance(self.hid_space, VectorSpace):
            raise TypeError("Can't describe "+str(type(self.hid_space))+" as a dimensionality number.")
        return self.hid_space.dim

    def get_input_space(self):
        return self.vis_space

    def get_output_space(self):
        return self.hid_space

    def get_params(self):
        return [param for param in self._params]

    def get_weights(self, borrow=False):

        weights ,= self.transformer.get_params()

        return weights.get_value(borrow=borrow)

    def get_weights_topo(self):
        return self.transformer.get_weights_topo()

    def get_weights_format(self):
        return ['v', 'h']

    def get_monitoring_channels(self, data):
        V = data
        theano_rng = RandomStreams(42)

        #TODO: re-enable this in the case where self.transformer
        #is a matrix multiply
        #norms = theano_norms(self.weights)

        H = self.mean_h_given_v(V)

        h = H.mean(axis=0)

        return { 'bias_hid_min' : T.min(self.bias_hid),
                 'bias_hid_mean' : T.mean(self.bias_hid),
                 'bias_hid_max' : T.max(self.bias_hid),
                 'bias_vis_min' : T.min(self.bias_vis),
                 'bias_vis_mean' : T.mean(self.bias_vis),
                 'bias_vis_max': T.max(self.bias_vis),
                 'h_min' : T.min(h),
                 'h_mean': T.mean(h),
                 'h_max' : T.max(h),
                 #'W_min' : T.min(self.weights),
                 #'W_max' : T.max(self.weights),
                 #'W_norms_min' : T.min(norms),
                 #'W_norms_max' : T.max(norms),
                 #'W_norms_mean' : T.mean(norms),
                'reconstruction_error' : self.reconstruction_error(V, theano_rng) }

    def get_monitoring_data_specs(self):
        Get the data_specs describing the data for get_monitoring_channel.

        This implementation returns specification corresponding to unlabeled
        return (self.get_input_space(), self.get_input_source())

    def ml_gradients(self, pos_v, neg_v):
        Get the contrastive gradients given positive and negative phase
        visible units.

        pos_v : tensor_like
            Theano symbolic representing a minibatch on the visible units,
            with the first dimension indexing training examples and the second
            indexing data dimensions (usually actual training data).
        neg_v : tensor_like
            Theano symbolic representing a minibatch on the visible units,
            with the first dimension indexing training examples and the second
            indexing data dimensions (usually reconstructions of the data or
            sampler particles from a persistent Markov chain).

        grads : list
            List of Theano symbolic variables representing gradients with
            respect to model parameters, in the same order as returned by

        `pos_v` and `neg_v` need not have the same first dimension, i.e.
        minibatch size.

        # taking the mean over each term independently allows for different
        # mini-batch sizes in the positive and negative phase.
        ml_cost = (self.free_energy_given_v(pos_v).mean() -

        grads = tensor.grad(ml_cost, self.get_params(),
                            consider_constant=[pos_v, neg_v])

        return grads

    def train_batch(self, dataset, batch_size):
        """ A default learning rule based on SML """
        return True

    def learn_mini_batch(self, X):
        """ A default learning rule based on SML """

        if not hasattr(self, 'learn_func'):

        rval =  self.learn_func(X)

        return rval

    def redo_theano(self):
        """ Compiles the theano function for the default learning rule """

        init_names = dir(self)

        minibatch = tensor.matrix()

        optimizer = _SGDOptimizer(self, self.base_lr, self.anneal_start)

        sampler = sampler = BlockGibbsSampler(self, 0.5 + np.zeros((self.nchains, self.get_input_dim())), self.rng,
                                                  steps= self.sml_gibbs_steps)

        updates = training_updates(visible_batch=minibatch, model=self,
                                            sampler=sampler, optimizer=optimizer)

        self.learn_func = theano.function([minibatch], updates=updates)

        final_names = dir(self)

        self.register_names_to_del([name for name in final_names if name not in init_names])

    def gibbs_step_for_v(self, v, rng):
        Do a round of block Gibbs sampling given visible configuration

        v  : tensor_like
            Theano symbolic representing the hidden unit states for a batch of
            training examples (or negative phase particles), with the first
            dimension indexing training examples and the second indexing data
        rng : RandomStreams object
            Random number generator to use for sampling the hidden and visible

        v_sample : tensor_like
            Theano symbolic representing the new visible unit state after one
            round of Gibbs sampling.
        locals : dict
            Contains the following auxiliary state as keys (all symbolics
            except shape tuples):
             * `h_mean`: the returned value from `mean_h_given_v`
             * `h_mean_shape`: shape tuple indicating the size of `h_mean` and
             * `h_sample`: the stochastically sampled hidden units
             * `v_mean_shape`: shape tuple indicating the shape of `v_mean` and
             * `v_mean`: the returned value from `mean_v_given_h`
             * `v_sample`: the stochastically sampled visible units
        h_mean = self.mean_h_given_v(v)
        assert h_mean.type.dtype == v.type.dtype
        # For binary hidden units
        # TODO: factor further to extend to other kinds of hidden units
        #       (e.g. spike-and-slab)
        h_sample = rng.binomial(size = h_mean.shape, n = 1 , p = h_mean, dtype=h_mean.type.dtype)
        assert h_sample.type.dtype == v.type.dtype
        # v_mean is always based on h_sample, not h_mean, because we don't
        # want h transmitting more than one bit of information per unit.
        v_mean = self.mean_v_given_h(h_sample)
        assert v_mean.type.dtype == v.type.dtype
        v_sample = self.sample_visibles([v_mean], v_mean.shape, rng)
        assert v_sample.type.dtype == v.type.dtype
        return v_sample, locals()

    def sample_visibles(self, params, shape, rng):
        Stochastically sample the visible units given hidden unit
        configurations for a set of training examples.

        params : list
            List of the necessary parameters to sample :math:`p(v|h)`. In the
            case of a binary-binary RBM this is a single-element list
            containing the symbolic representing :math:`p(v|h)`, as returned
            by `mean_v_given_h`.

        vprime : tensor_like
            Theano symbolic representing stochastic samples from :math:`p(v|h)`
        v_mean = params[0]
        return as_floatX(rng.uniform(size=shape) < v_mean)

    def input_to_h_from_v(self, v):
        Compute the affine function (linear map plus bias) that serves as
        input to the hidden layer in an RBM.

        v  : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the one or several
            minibatches on the visible units, with the first dimension indexing
            training examples and the second indexing data dimensions.

        a : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the input to each
            hidden unit for each training example.

        if isinstance(v, tensor.Variable):
            return self.bias_hid + self.transformer.lmul(v)
            return [self.input_to_h_from_v(vis) for vis in v]

    def input_to_v_from_h(self, h):
        Compute the affine function (linear map plus bias) that serves as
        input to the visible layer in an RBM.

        h  : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the one or several
            minibatches on the hidden units, with the first dimension indexing
            training examples and the second indexing data dimensions.

        a : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the input to each
            visible unit for each row of h.
        if isinstance(h, tensor.Variable):
            return self.bias_vis + self.transformer.lmul_T(h)
            return [self.input_to_v_from_h(hid) for hid in h]

    def upward_pass(self, v):
        wrapper around mean_h_given_v method.  Called when RBM is accessed
        by mlp.HiddenLayer.
        return self.mean_h_given_v(v)

    def mean_h_given_v(self, v):
        Compute the mean activation of the hidden units given visible unit
        configurations for a set of training examples.

        v  : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the hidden unit
            states for a batch (or several) of training examples, with the
            first dimension indexing training examples and the second indexing
            data dimensions.

        h : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the mean
            (deterministic) hidden unit activations given the visible units.
        if isinstance(v, tensor.Variable):
            return nnet.sigmoid(self.input_to_h_from_v(v))
            return [self.mean_h_given_v(vis) for vis in v]

    def mean_v_given_h(self, h):
        Compute the mean activation of the visibles given hidden unit
        configurations for a set of training examples.

        h  : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the hidden unit
            states for a batch (or several) of training examples, with the
            first dimension indexing training examples and the second indexing
            hidden units.

        vprime : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the mean
            (deterministic) reconstruction of the visible units given the
            hidden units.
        if isinstance(h, tensor.Variable):
            return nnet.sigmoid(self.input_to_v_from_h(h))
            return [self.mean_v_given_h(hid) for hid in h]

    def free_energy_given_v(self, v):
        Calculate the free energy of a visible unit configuration by
        marginalizing over the hidden units.

        v : tensor_like
            Theano symbolic representing the hidden unit states for a batch of
            training examples, with the first dimension indexing training
            examples and the second indexing data dimensions.

        f : tensor_like
            1-dimensional tensor (vector) representing the free energy
            associated with each row of v.
        sigmoid_arg = self.input_to_h_from_v(v)
        return (-tensor.dot(v, self.bias_vis) -

    def free_energy(self, V):
        return self.free_energy_given_v(V)

    def free_energy_given_h(self, h):
        Calculate the free energy of a hidden unit configuration by
        marginalizing over the visible units.

        h : tensor_like
            Theano symbolic representing the hidden unit states, with the
            first dimension indexing training examples and the second
            indexing data dimensions.

        f : tensor_like
            1-dimensional tensor (vector) representing the free energy
            associated with each row of v.
        sigmoid_arg = self.input_to_v_from_h(h)
        return (-tensor.dot(h, self.bias_hid) -

    def __call__(self, v):
        Forward propagate (symbolic) input through this module, obtaining
        a representation to pass on to layers above.

        This just aliases the `mean_h_given_v()` function for syntactic
        return self.mean_h_given_v(v)

    def reconstruction_error(self, v, rng):
        Compute the mean-squared error (mean over examples, sum over units)
        across a minibatch after a Gibbs
        step starting from the training data.

        v : tensor_like
            Theano symbolic representing the hidden unit states for a batch of
            training examples, with the first dimension indexing training
            examples and the second indexing data dimensions.
        rng : RandomStreams object
            Random number generator to use for sampling the hidden and visible

        mse : tensor_like
            0-dimensional tensor (essentially a scalar) indicating the mean
            reconstruction error across the minibatch.

        The reconstruction used to assess error samples only the hidden
        units. For the visible units, it uses the conditional mean.
        No sampling of the visible units is done, to reduce noise in the estimate.
        sample, _locals = self.gibbs_step_for_v(v, rng)
        return ((_locals['v_mean'] - v) ** 2).sum(axis=1).mean()
class BinaryVector(VisibleLayer):
    A DBM visible layer consisting of binary random variables living
    in a VectorSpace.
    def __init__(self, nvis, bias_from_marginals=None):
            nvis: the dimension of the space
            bias_from_marginals: a dataset, whose marginals are used to
                            initialize the visible biases

        del self.self
        # Don't serialize the dataset
        del self.bias_from_marginals

        self.space = VectorSpace(nvis)
        self.input_space = self.space

        origin = self.space.get_origin()

        if bias_from_marginals is None:
            init_bias = np.zeros((nvis, ))
            X = bias_from_marginals.get_design_matrix()
            assert X.max() == 1.
            assert X.min() == 0.
            assert not np.any((X > 0.) * (X < 1.))

            mean = X.mean(axis=0)

            mean = np.clip(mean, 1e-7, 1 - 1e-7)

            init_bias = inverse_sigmoid_numpy(mean)

        self.bias = sharedX(init_bias, 'visible_bias')

    def get_biases(self):
        return self.bias.get_value()

    def set_biases(self, biases):

    def get_total_state_space(self):
        return self.get_input_space()

    def get_params(self):
        return set([self.bias])

    def sample(self,

        assert state_below is None

        msg = layer_above.downward_message(state_above)

        bias = self.bias

        z = msg + bias

        phi = T.nnet.sigmoid(z)

        rval = theano_rng.binomial(size=phi.shape, p=phi, dtype=phi.dtype, n=1)

        return rval

    def make_state(self, num_examples, numpy_rng):

        driver = numpy_rng.uniform(0., 1., (num_examples, self.nvis))
        mean = sigmoid_numpy(self.bias.get_value())
        sample = driver < mean

        rval = sharedX(sample, name='v_sample_shared')

        return rval

    def expected_energy_term(self,

        assert state_below is None
        assert average_below is None
        assert average in [True, False]

        # Energy function is linear so it doesn't matter if we're averaging or not
        rval = -T.dot(state, self.bias)

        assert rval.ndim == 1

        return rval
class IsingVisible(VisibleLayer):
    A DBM visible layer consisting of random variables living
    in a VectorSpace, with values in {-1, 1}
    Implements the energy function term
    -b^T h

    def __init__(self,
            bias_from_marginals = None):
            nvis: the dimension of the space
            bias_from_marginals: a dataset, whose marginals are used to
                            initialize the visible biases

        del self.self
        # Don't serialize the dataset
        del self.bias_from_marginals

        self.space = VectorSpace(nvis)
        self.input_space = self.space

        origin = self.space.get_origin()

        if bias_from_marginals is None:
            init_bias = np.zeros((nvis,))
            init_bias = init_tanh_bias_from_marginals(bias_from_marginals)

        self.bias = sharedX(init_bias, 'visible_bias')

    def get_biases(self):
        return self.bias.get_value()

    def set_biases(self, biases, recenter=False):
        if recenter:
            assert self.center

    def upward_state(self, total_state):
        return total_state

    def get_params(self):
        return [self.bias]

    def sample(self, state_below = None, state_above = None,
            layer_above = None,
            theano_rng = None):

        assert state_below is None

        msg = layer_above.downward_message(state_above)

        bias = self.bias

        z = msg + bias

        phi = T.nnet.sigmoid(2. * z)

        rval = theano_rng.binomial(size = phi.shape, p = phi, dtype = phi.dtype,
                       n = 1 )

        return rval * 2. - 1.

    def make_state(self, num_examples, numpy_rng):
        driver = numpy_rng.uniform(0.,1., (num_examples, self.nvis))
        on_prob = sigmoid_numpy(2. * self.bias.get_value())
        sample = 2. * (driver < on_prob) - 1.

        rval = sharedX(sample, name = 'v_sample_shared')

        return rval

    def make_symbolic_state(self, num_examples, theano_rng):
        mean = T.nnet.sigmoid(2. * self.b)
        rval = theano_rng.binomial(size=(num_examples, self.nvis), p=mean)
        rval = 2. * (rval) - 1.

        return rval

    def expected_energy_term(self, state, average, state_below = None, average_below = None):

        assert state_below is None
        assert average_below is None
        assert average in [True, False]

        # Energy function is linear so it doesn't matter if we're averaging or not
        rval = -T.dot(state, self.bias)

        assert rval.ndim == 1

        return rval
class BoltzmannIsingVisible(VisibleLayer):
    An IsingVisible whose parameters are defined in Boltzmann machine

    def __init__(self,
            bias_from_marginals = None):
            nvis: the dimension of the space
            bias_from_marginals: a dataset, whose marginals are used to
                            initialize the visible biases

        del self.self
        # Don't serialize the dataset
        del self.bias_from_marginals

        self.space = VectorSpace(nvis)
        self.input_space = self.space

        origin = self.space.get_origin()

        if bias_from_marginals is None:
            init_bias = np.zeros((nvis,))
            # data is in [-1, 1], but want biases for a sigmoid
            init_bias = init_sigmoid_bias_from_array(bias_from_marginals.X / 2. + 0.5)
            # init_bias =
        self.boltzmann_bias = sharedX(init_bias, 'visible_bias')

    def get_biases(self):
        assert False # not really sure what this should do for this layer

    def set_biases(self, biases, recenter=False):
        assert False # not really sure what this should do for this layer

    def ising_bias(self, for_sampling=False):
        if for_sampling and self.layer_above.sampling_b_stdev is not None:
            return self.noisy_sampling_b
        return 0.5 * self.boltzmann_bias + 0.25 * self.layer_above.W.sum(axis=1)

    def ising_bias_numpy(self):
        return 0.5 * self.boltzmann_bias.get_value() + 0.25 * self.layer_above.W.get_value().sum(axis=1)

    def upward_state(self, total_state):
        return total_state

    def get_params(self):
        rval =  [self.boltzmann_bias]
        return rval

    def sample(self, state_below = None, state_above = None,
            layer_above = None,
            theano_rng = None):

        assert state_below is None

        msg = layer_above.downward_message(state_above, for_sampling=True)

        bias = self.ising_bias(for_sampling=True)

        z = msg + bias

        phi = T.nnet.sigmoid(2. * z)

        rval = theano_rng.binomial(size = phi.shape, p = phi, dtype = phi.dtype,
                       n = 1 )

        return rval * 2. - 1.

    def make_state(self, num_examples, numpy_rng):
        driver = numpy_rng.uniform(0.,1., (num_examples, self.nvis))
        on_prob = sigmoid_numpy(2. * self.ising_bias_numpy())
        sample = 2. * (driver < on_prob) - 1.

        rval = sharedX(sample, name = 'v_sample_shared')

        return rval

    def make_symbolic_state(self, num_examples, theano_rng):
        mean = T.nnet.sigmoid(2. * self.ising_bias())
        rval = theano_rng.binomial(size=(num_examples, self.nvis), p=mean)
        rval = 2. * (rval) - 1.

        return rval

    def expected_energy_term(self, state, average, state_below = None, average_below = None):

        # state = Print('v_state', attrs=['min', 'max'])(state)

        assert state_below is None
        assert average_below is None
        assert average in [True, False]

        # Energy function is linear so it doesn't matter if we're averaging or not
        rval = -T.dot(state, self.ising_bias())

        assert rval.ndim == 1

        return rval

    def get_monitoring_channels(self):
        rval = OrderedDict()

        ising_b = self.ising_bias()

        rval['ising_b_min'] = ising_b.min()
        rval['ising_b_max'] = ising_b.max()

        if hasattr(self, 'noisy_sampling_b'):
            rval['noisy_sampling_b_min'] = self.noisy_sampling_b.min()
            rval['noisy_sampling_b_max'] = self.noisy_sampling_b.max()

        return rval
