def theano_print_shape(var, msg):
    """
    Helper function for printing the shape of a Theano expression during run
    time of the Theano graph.

    Parameters
    ----------

    var : Theano expression
        The variable whose shape to be printed at runtime.

    msg : str
        The message to be printed together with the shape.

    Returns
    -------
    A Theano expression which should be used instead of the original expression
    in order the printing to happen.
    """
    if var.ndim == 0:
        pr = Print(msg + "(SCALAR)")(var)
        return T.switch(T.lt(0, 1), var, pr)
    else:
        pr = Print(msg)(T.shape(var))
        return T.switch(T.lt(0, 1), var, T.cast(pr[0], var.dtype))
def theano_print_value(var, msg):

    if var.ndim == 0:
        pr = Print(msg + "(SCALAR)")(var)
        return T.switch(T.lt(0, 1), var, pr)
    else:
        pr = Print(msg)(var)
        return T.switch(T.lt(0, 1), var, T.cast(pr[0], var.dtype))
Exemplo n.º 3
0
def dropout_theano(x, level, seed=None):
    print("-----Input before dropout-------")
    Print(x)
    print("----------------------------------")
    if level < 0. or level >= 1:
        raise Exception('Dropout level must be in interval [0, 1[.')
    if seed is None:
        seed = np.random.randint(1, 10e6)
    rng = RandomStreams(seed=seed)
    print("-----Random stream-------", rng)
    retain_prob = 1. - level
    x *= rng.binomial(x.shape, p=retain_prob, dtype=x.dtype)
    x /= retain_prob
    print("-----Input after dropout-------")
    Print(x)
    return x
Exemplo n.º 4
0
    def dynamic_scale(self, state_below):
        """
        .. todo::

            WRITEME
        """

        self.input_space.validate(state_below)

        if self.requires_reformat:
            state_below = self.input_space.format_as(state_below,
                                                     self.desired_space)

        z = self.transformer.lmul(state_below) + self.b

        if not hasattr(self, 'randomize_pools'):
            self.randomize_pools = False

        if not hasattr(self, 'pool_stride'):
            self.pool_stride = self.pool_size

        if self.randomize_pools:
            z = T.dot(z, self.permute)

        if not hasattr(self, 'min_zero'):
            self.min_zero = False

        if self.min_zero:
            p = 0.
        else:
            p = None

        last_start = self.detector_layer_dim - self.pool_size
        for i in xrange(self.pool_size):
            cur = z[:, i:last_start + i + 1:self.pool_stride]
            if p is None:
                p = cur
            else:
                p = T.maximum(cur, p)

        cost = p.sum()

        mask = T.grad(cost, z)

        counts = mask.sum(axis=0)

        reweight = T.cast(mask.shape[0], config.floatX) / T.clip(
            counts, 1.0, 1e6)

        reweight = Print('reweight', attrs=['min', 'mean', 'max'])(reweight)

        params = self.get_params()
        rval = OrderedDict()

        for param in params:
            rval[param] = reweight

        return rval
Exemplo n.º 5
0
def cosine_dist(tensor, matrix):
    """
    Along axis 1 for both inputs.
    Assumes dimensions 0 and 1 are equal
    """
    matrix_norm = T.shape_padright(matrix.norm(2, axis=1))
    tensor_norm = tensor.norm(2, axis=1)
    norm_ = (matrix_norm * tensor_norm)
    norm_ = Print('norm_')(norm_)
    return T.batched_dot(matrix, tensor) / norm_
Exemplo n.º 6
0
    def apply(self, source):

        x_linear = self.x_to_h.apply(
            source.reshape(
                (source.shape[1], source.shape[0], source.shape[2])))
        x_linear.name = 'x_linear'
        if self.print_intermediate:
            x_linear = Print(message='x_linear info',
                             attrs=self.print_attrs)(x_linear)

        h, c = self.lstm.apply(x_linear)
        if self.print_intermediate:
            h = Print(message="hidden states info", attrs=self.print_attrs)(h)

        y_hat = self.h_to_o.apply(h)
        y_hat.name = 'y_hat'
        if self.print_intermediate:
            y_hat = Print(message="y_hat info", attrs=self.print_attrs)(y_hat)

        return y_hat
Exemplo n.º 7
0
def get_reconstruction_func():
    V = model.get_input_space().make_theano_batch(name="V")
    assert V.dtype == 'float32'

    if hasattr(model, 'e_step'):
        #S3C
        mf = model.e_step.variational_inference(V)
        H = mf['H_hat']
        S = mf['S_hat']
        Z = H * S
        recons = T.dot(Z, model.W.T)
    elif hasattr(model, 's3c'):
        #PDDBM

        mf = model.inference_procedure.infer(V)
        H = mf['H_hat']
        S = mf['S_hat']
        Z = H * S
        recons = T.dot(Z, model.s3c.W.T)
    else:
        #RBM
        if corrupt > -1.:
            from pylearn2.corruption import GaussianCorruptor
            c = GaussianCorruptor(stdev=corrupt)
            corrupted_V = c(V)
            H = model.mean_h_given_v(corrupted_V)
        from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
        theano_rng = RandomStreams(42)
        H_sample = theano_rng.binomial(size=H.shape, p=H)
        from theano.printing import Print
        H_sample = Print('H_sample', attrs=['mean'])(H_sample)
        H_sample = T.cast(H, 'float32')
        recons = model.mean_v_given_h(H_sample)
        recons = Print('recons', attrs=['min', 'mean', 'max'])(recons)

    rval = function([V], recons)

    return rval
Exemplo n.º 8
0
def clip_gradients_norm(gradients, threshold, parameters, fix_nan=False):
    gradient_sqr_vec = T.concatenate([T.sqr(g.flatten()) for g in gradients])
    gradient_norm = T.sqrt(gradient_sqr_vec.sum())
    rescale = T.maximum(gradient_norm, threshold)
    if fix_nan:
        isnan = T.or_(T.isnan(gradient_norm), T.isinf(gradient_norm))
    else:
        isnan = None
    rv = []
    for i, g in enumerate(gradients):
        if fix_nan:
            alt_g = 0.1 * parameters[i]
            print_alt_g = Print(
                "NaN detected! Fixing with pseudogradient with mean:",
                ["mean"])(alt_g)
            new_g = T.switch(isnan, print_alt_g, g / rescale)
        else:
            new_g = g / rescale
        rv.append(new_g)
    return rv
Exemplo n.º 9
0
def multivariate_normal_nohypers(datasets, weights, hyperparams, residuals):
    """
    Calculate posterior Likelihood of a Multivariate Normal distribution.
    Uses plain inverse of the covariances.
    DEPRECATED! Is currently not being used in beat.
    Can only be executed in a `with model context`.

    Parameters
    ----------
    datasets : list
        of :class:`heart.SeismicDataset` or :class:`heart.GeodeticDataset`
    weights : list
        of :class:`theano.shared`
        Square matrix of the inverse of the covariance matrix as weights
    hyperparams : dict
        of :class:`theano.`
    residual : list or array of model residuals

    Returns
    -------
    array_like
    """
    n_t = len(datasets)

    logpts = tt.zeros((n_t), 'float64')

    for l, data in enumerate(datasets):
        M = tt.cast(shared(
            data.samples, name='nsamples', borrow=True), 'int16')
        maha = residuals[l].dot(weights[l]).dot(residuals[l].T)
        slogpdet = Print('theano logpdet')(data.covariance.slog_pdet)
        logpts = tt.set_subtensor(
            logpts[l:l + 1],
            (-0.5) * (
                M * log_2pi + slogpdet + maha
                ))

    return logpts
Exemplo n.º 10
0
from pylearn2.utils import serial

model = serial.load('rectifier_7.pkl')

import theano.tensor as T
X = T.matrix()
state = model.fprop(X)
target = T.matrix()

right_cost = model.layers[-1].kl(Y=target, Y_hat=state)
wrong_cost = model.layers[-1].kl(Y=target[::-1,:], Y_hat=state)

<<<<<<< HEAD
#from theano.printing import Print
#right_cost = Print('right_cost')(right_cost)
=======
from theano.printing import Print
right_cost = Print('right_cost')(right_cost)
>>>>>>> 4cd43f0de3b75082a22881d3cefe26f82bf3d582

acc = (wrong_cost > right_cost).mean()

from theano import function

f = function([X, target], acc)

acc = f(dataset.X, dataset.y)

print acc
Exemplo n.º 11
0
def print_tensor(x, message=''):
    '''Print the message and the tensor when evaluated and return the same
    tensor.
    '''
    p_op = Print(message)
    return p_op(x)
Exemplo n.º 12
0
def grad_dir_func( pdf ):
    grad = T.grad(pdf.sum(), X)
    grad = Print('before',attrs=['min','max'])(grad)
    grad /= T.sqrt(1e-15+T.sum(T.sqr(grad),axis=1).dimshuffle(0,'x'))
    grad = Print('after',attrs=['min','max'])(grad)
    return FuckYouTheano(function([X],grad))
Exemplo n.º 13
0
# First, we inspect the compiled graph to verify that it does not use the
# softmax op.
# Second, we run the same functionality without the Print op. We verify that
# the softmax op appears in the compiled graph, and we verify that the new
# graph gets the correct output.
from ex_03_detect_op_soln import contains_softmax

import numpy as np

from theano import function
from theano.printing import Print
import theano.tensor as T

X = T.matrix()
p_tilde = T.exp(X)
p_tilde = Print('p_tilde', attrs=['min', 'max'])(p_tilde)
denom = p_tilde.sum(axis=1, keepdims=True)
p = p_tilde / denom

f = function([X], p)

assert not contains_softmax(f)

X = -1000. * np.ones((2, 2)).astype(X.dtype)

output = f(X)

assert np.all(np.isnan(output))

X = T.matrix()
p_tilde = T.exp(X)
Exemplo n.º 14
0
def apply_nan_suppression(updates, print_mode='all'):
    """Returns a modified update dictionary replacing updates containing
    non-finite values with no-op updates

    If any NaN or infinity values are found in the new_expression (second)
    half of an update, the update is replaced with the do-nothing update
    (shared_variable, shared_variable).

    This can be used to patch over the most intransigent, slippery instances
    of NaNs creeping into training, if they appear rarely and one is reasonably
    sure that the problem is not fundamental to the model.

    Parameters
    ----------
    updates : OrderedDict
        A dictionary mapping parameters to update expressions

    print_mode : str
        If ``'all'``, print a debugging message containing the name of the
        shared variable and its suppressed update value whenever a non-finite
        value is detected. If ``'shape'``, print only the name of the variable
        and the shape of the update value. If ``'none'``, suppress NaNs
        silently without printing anything.

    Returns
    -------
    OrderedDict
        A copy of `updates` with expressions containing non-finite values
        replaced by the original value.

    Examples
    --------
    >>> param = theano.shared(np.array([0., 0.], dtype=np.float32),
    ...                       name='param')
    >>> inc = T.fvector('inc')
    >>> updates = OrderedDict([(param, param + inc)])
    >>> safe_updates = apply_nan_suppression(updates)
    >>> func = theano.function([inc], safe_updates[param],
    ...                        updates=safe_updates)
    >>> func([1., 2.])
    array([ 1.,  2.], dtype=float32)
    >>> func([2., float('nan')])
    Warning: non-finite update suppressed for param: __str__ = [  3.  nan]
    array([ 1.,  2.], dtype=float32)
    """
    new_updates = OrderedDict([])

    for shared_variable, new_expression in updates.iteritems():
        isnan = T.isnan(new_expression).any() | T.isinf(new_expression).any()

        warning_msg = 'Warning: non-finite update suppressed for %s'
        if print_mode == 'all':
            suppressed = T.zeros_like(
                Print((warning_msg + ':') %
                      shared_variable.name)(new_expression))
        elif print_mode == 'shape':
            suppressed = T.zeros_like(
                Print((warning_msg + ':') % shared_variable.name,
                      attrs=('shape', ))(new_expression))
        elif print_mode == 'none' or print_mode is None:
            suppressed = T.zeros_like(new_expression)
        else:
            raise ValueError(
                "print_mode must be one of 'all', 'shape', or 'none'")

        # For some reason, the ifelse needs to be used in a calculation, or the
        # Print gets optimized away. So we can't do
        #   suppressed = (zeros_like(Print('warning')(new_expression)) +
        #                 shared_variable)
        #   ifelse(isnan, suppressed, new_expression)
        new_updates[shared_variable] = shared_variable + ifelse(
            isnan, suppressed, new_expression - shared_variable)

    return new_updates
Exemplo n.º 15
0
if t not in [ENERGY, SCORE, SCORED]:
    g.components.append(HeatMap( f = function([X], model.free_energy(X)),  normalizer = None ))
    offset = g.render().mean()
#

if t == ENERGY:
    df = dataset.free_energy_func
    mfe = model.free_energy(X)
    mf = function([X],mfe)
    normalizer = energy_normalizer
elif t == PDF:
    df = dataset.pdf_func

    mfe = model.free_energy(X)
    mfe = Print('model free energy',attrs=['min','max'])(mfe)
    mf = function([X], T.exp(-mfe+offset))

    normalizer = pdf_normalizer
elif t == GPDF:
    df = grad_func(dataset.pdf(X))
    mf = grad_func(T.exp(-model.free_energy(X)+offset))

    normalizer = gpdf_normalizer
elif t == GDPDF:
    df = grad_dir_func(dataset.pdf(X))
    mf = grad_dir_func(T.exp(-model.free_energy(X)+offset))

    normalizer = gdpdf_normalizer
elif t == SCORE:
    df = grad_func(- dataset.free_energy(X))
Exemplo n.º 16
0
def theano_print_min_max_vals(var, msg):
    pr = Print(msg)(T.stack((T.min(var), T.max(var))))
    return T.switch(T.lt(0, 1), var, T.cast(pr[0], var.dtype))
Exemplo n.º 17
0
def theano_print_vals(var, msg):
    return Print(msg)(var)
Exemplo n.º 18
0
        def recurrence(i, h_tm1, w_a, M_a, *args, **kwargs):
            """
            notes
            Headers from paper in all caps
            mem = n_article slots if is_article else n_title_slots

            :param i: center index of sliding window
            :param h_tm1: h_{t-1} (hidden state)
            :param w_a: attention weights for article memory
            :param M_a: article memory
            :param args: gru_weights, maybe w_t, maybe M_t
                   gru_weights: weights with which to initialize GRULayer on each time step
                   w_t: attention weights for titles memory
                   M_t: titles memory
            :param kwargs: is_training, is_article
                   is_training:
                   is_article: we use different parts of memory when working with a article
            :return: [y = model outputs,
                      i + 1 = increment index,
                      h w, M (see above)]
            """
            is_training = kwargs['is_training']
            is_article = kwargs['is_article']
            gru_weights = args[:depth]
            if len(args) > depth:
                w_t = args[depth]
                M_t = args[depth + 1]

            i_type = T.iscalar if is_article or is_training else T.ivector
            assert i.type == i_type

            if not is_article:
                assert w_t is not None and M_t is not None

            word_idxs = i
            if is_article or is_training:
                # get representation of word window
                document = articles if is_article else titles  # [instances, bucket_width]
                word_idxs = document[:, i:i + 1]  # [instances, 1]
            # x_i = self.emb[word_idxs].flatten(ndim=2)  # [instances, embedding_dim]

            input = InputLayer(shape=(None, 1), input_var=word_idxs)
            embed = EmbeddingLayer(input, num_embeddings, embedding_dim)
            gru = GRULayer(incoming=embed,
                           num_units=embedding_dim,
                           hid_init=self.gru0)
            for weight in gru_weights:
                gru = GRULayer(incoming=gru,
                               num_units=embedding_dim,
                               hid_init=weight)
            x_i = get_output(gru).flatten(ndim=2)
            x_i = Print('x_i')(x_i)  # [instances, embedding_dim]

            gru_weights = []

            if is_article:
                M_read = M_a  # [instances, memory_size, n_article_slots]
                w_read = w_a  # [instances, n_article_slots]
            else:
                M_read = T.concatenate(
                    [M_a, M_t],
                    axis=2)  # [instances, memory_size, n_title_slots]
                w_read = T.concatenate([w_a, w_t],
                                       axis=1)  # [instances, n_title_slots]

            # eqn 15
            c = T.batched_dot(M_read, w_read)  # [instances, memory_size]

            # EXTERNAL MEMORY READ
            def get_attention(Wg, bg, M, w):
                g = T.nnet.sigmoid(T.dot(x_i, Wg) + bg)  # [instances, mem]

                # eqn 11
                k = T.dot(h_tm1, self.Wk) + self.bk  # [instances, memory_size]

                # eqn 13
                beta = T.dot(h_tm1, self.Wb) + self.bb
                beta = T.nnet.softplus(beta)
                beta = T.addbroadcast(beta, 1)  # [instances, 1]

                # eqn 12
                w_hat = T.nnet.softmax(beta * cosine_dist(M, k))

                # eqn 14
                return (1 - g) * w + g * w_hat  # [instances, mem]

            w_a = get_attention(self.Wg_a, self.bg_a, M_a,
                                w_a)  # [instances, n_article_slots]
            if not is_article:
                w_t = get_attention(self.Wg_t, self.bg_t, M_t,
                                    w_t)  # [instances, n_title_slots]

            # MODEL INPUT AND OUTPUT
            # eqn 9
            h = T.dot(c, self.Wh) + T.dot(
                x_i, self.Wx) + self.bh  # [instances, hidden_size]

            # eqn 10
            y = T.nnet.softmax(T.dot(h, self.W) +
                               self.b)  # [instances, nclasses]

            # EXTERNAL MEMORY UPDATE
            def update_memory(We, be, w_update, M_update):
                # eqn 17
                e = T.nnet.sigmoid(T.dot(h_tm1, We) + be)  # [instances, mem]
                f = 1. - w_update * e  # [instances, mem]

                # eqn 16
                v = T.tanh(T.dot(h, self.Wv) +
                           self.bv)  # [instances, memory_size]

                # need to add broadcast layers for memory update
                f = f.dimshuffle(0, 'x', 1)  # [instances, 1, mem]
                u = w_update.dimshuffle(0, 'x', 1)  # [instances, 1, mem]
                v = v.dimshuffle(0, 1, 'x')  # [instances, memory_size, 1]

                # eqn 19
                return M_update * f + T.batched_dot(v, u) * (
                    1 - f)  # [instances, memory_size, mem]

            M_a = update_memory(self.We_a, self.be_a, w_a, M_a)
            attention_and_memory = [w_a, M_a]
            if not is_article:
                M_t = update_memory(self.We_t, self.be_t, w_t, M_t)
                attention_and_memory += [w_t, M_t]

            y_max = y.argmax(axis=1).astype(int32)
            next_idxs = i + 1 if is_training or is_article else y_max
            return [y, y_max, next_idxs, h] + attention_and_memory
Exemplo n.º 19
0
def theano_print_shape(var, msg):
    pr = Print(msg)(T.shape(var))
    return T.switch(T.lt(0, 1), var, T.cast(pr[0], var.dtype))
Exemplo n.º 20
0
def print_(x, message=''):
    return Print(message)(x)