Пример #1
0
    def fprop(self, state_below):
        self.input_space.validate(state_below)

        if self.needs_reformat:
            state_below = self.input_space.format_as(state_below,
                                                     self.desired_space)

        for value in get_debug_values(state_below):
            if self.mlp.batch_size is not None and value.shape[
                    0] != self.mlp.batch_size:
                raise ValueError("state_below should have batch size " +
                                 str(self.dbm.batch_size) + " but has " +
                                 str(value.shape[0]))

        self.desired_space.validate(state_below)
        assert state_below.ndim == 2

        if not hasattr(self, 'no_affine'):
            self.no_affine = False

        if self.no_affine:
            rval = state_below
        else:
            assert self.W.ndim == 2
            b = self.b
            W = self.W

            rval = T.dot(state_below, W) + b

        for value in get_debug_values(rval):
            if self.mlp.batch_size is not None:
                assert value.shape[0] == self.mlp.batch_size

        return rval
Пример #2
0
    def fprop(self, state_below):

        self.input_space.validate(state_below)

        if self.needs_reformat:
            state_below = self.input_space.format_as(state_below,
                                                     self.desired_space)

        for value in get_debug_values(state_below):
            if self.mlp.batch_size is not None and value.shape[
                    0] != self.mlp.batch_size:
                raise ValueError("state_below should have batch size " +
                                 str(self.dbm.batch_size) + " but has " +
                                 str(value.shape[0]))

        self.desired_space.validate(state_below)
        assert state_below.ndim == 2

        assert self.W.ndim == 3

        Z = T.tensordot(state_below, self.W, axes=[[1], [0]]) + self.b

        rval = batched_softmax(Z)

        for value in get_debug_values(rval):
            if self.mlp.batch_size is not None:
                assert value.shape[0] == self.mlp.batch_size

        return rval
Пример #3
0
    def fprop(self, state_below):
        self.input_space.validate(state_below)

        if self.needs_reformat:
            state_below = self.input_space.format_as(state_below, self.desired_space)

        for value in get_debug_values(state_below):
            if self.mlp.batch_size is not None and value.shape[0] != self.mlp.batch_size:
                raise ValueError("state_below should have batch size "+str(self.dbm.batch_size)+" but has "+str(value.shape[0]))

        self.desired_space.validate(state_below)
        assert state_below.ndim == 2

        if not hasattr(self, 'no_affine'):
            self.no_affine = False

        if self.no_affine:
            rval = state_below
        else:
            assert self.W.ndim == 2
            b = self.b
            W = self.W

            rval = T.dot(state_below, W) + b

        for value in get_debug_values(rval):
            if self.mlp.batch_size is not None:
                assert value.shape[0] == self.mlp.batch_size

        return rval
    def fprop(self, state_below):

        self.input_space.validate(state_below)

        if self.needs_reformat:
            state_below = self.input_space.format_as(state_below, self.desired_space)

        for value in get_debug_values(state_below):
            if self.mlp.batch_size is not None and value.shape[0] != self.mlp.batch_size:
                raise ValueError("state_below should have batch size "+str(self.dbm.batch_size)+" but has "+str(value.shape[0]))

        self.desired_space.validate(state_below)
        assert state_below.ndim == 2

        W = T.dot(self.V, self.U)
        assert W.ndim == 2

        Z = T.dot(state_below, W.T)

        rval = Z

        for value in get_debug_values(rval):
            if self.mlp.batch_size is not None:
                assert value.shape[0] == self.mlp.batch_size

        return (rval, state_below)
Пример #5
0
    def entropy_h(self, H_hat):
        """
        .. todo::

            WRITEME properly

        entropy of the hidden layers under the mean field distribution
        defined by H_hat
        """

        for Hv in get_debug_values(H_hat[0]):
            assert Hv.min() >= 0.0
            assert Hv.max() <= 1.0

        total = entropy_binary_vector(H_hat[0])

        for H in H_hat[1:]:

            for Hv in get_debug_values(H):
                assert Hv.min() >= 0.0
                assert Hv.max() <= 1.0

            total += entropy_binary_vector(H)

        return total
Пример #6
0
    def fprop(self, state_below,targets):
        self.input_space.validate(state_below)        
        if self.needs_reformat:
            state_below = self.input_space.format_as(state_below, self.desired_space)
        for value in get_debug_values(state_below):
            if self.mlp.batch_size is not None and value.shape[0] != self.mlp.batch_size:
                raise ValueError("state_below should have batch size "+str(self.dbm.batch_size)+" but has "+str(value.shape[0]))
        self.desired_space.validate(state_below)
        
        assert state_below.ndim == 2
        if not hasattr(self, 'no_affine'):
            self.no_affine = False
        if self.no_affine:
            raise NotImplementedError()

        assert self.W_class.ndim == 3
        assert self.W_cluster.ndim == 2

        #we get the cluster by doing hW_cluster + b_cluster
        probcluster = T.dot(state_below, self.W_cluster) + self.b_cluster
        probcluster = T.nnet.softmax(probcluster)


        #check this line again
        batch_clusters = self.array_clusters[T.cast(T.argmax(targets).flatten(),'int32')]
        Z = T.nnet.GroupDot(self.n_clusters)(state_below,
                                                        self.W_class,
                                                        self.b_class,
                                                        T.cast(batch_clusters,'int32'))
        probclass = T.nnet.softmax(Z)
        
        for value in get_debug_values(probclass):
             if self.mlp.batch_size is not None:
                assert value.shape[0] == self.mlp.batch_size
        return probclass, probcluster
Пример #7
0
    def fprop(self, state_below):

        self.input_space.validate(state_below)

        if self.needs_reformat:
            state_below = self.input_space.format_as(state_below, self.desired_space)

        for value in get_debug_values(state_below):
            if value.shape[0] != self.mlp.batch_size:
                raise ValueError("state_below should have batch size "+str(self.dbm.batch_size)+" but has "+str(value.shape[0]))

        self.desired_space.validate(state_below)

        assert self.W.ndim == 2
        assert state_below.ndim == 2

        b = self.b

        Z = T.dot(state_below, self.W) + b

        rval = T.nnet.softmax(Z)

        for value in get_debug_values(rval):
            assert value.shape[0] == self.mlp.batch_size

        return rval
Пример #8
0
    def fprop(self, state_below):

        self.input_space.validate(state_below)

        if self.needs_reformat:
            state_below = self.input_space.format_as(state_below, self.desired_space)

        for value in get_debug_values(state_below):
            if self.mlp.batch_size is not None and value.shape[0] != self.mlp.batch_size:
                raise ValueError("state_below should have batch size "+str(self.dbm.batch_size)+" but has "+str(value.shape[0]))

        self.desired_space.validate(state_below)
        assert state_below.ndim == 2

        assert self.W.ndim == 3

        Z = T.tensordot(state_below, self.W, axes=[[1],[0]]) + self.b

        rval = batched_softmax(Z)

        for value in get_debug_values(rval):
            if self.mlp.batch_size is not None:
                assert value.shape[0] == self.mlp.batch_size

        return rval
Пример #9
0
def lrn_same_map(c01b,size,pow,scale,image_side):
    mx = None
    for c01bv in get_debug_values(c01b):
        assert not np.any(np.isinf(c01bv))
        assert c01bv.shape[1] == image_side
        assert c01bv.shape[2] == image_side
        
    new_side = size-1+image_side


    wide_infinity = T.alloc(0.0,
                        c01b.shape[0],
                        new_side,
                        new_side,
            	c01b.shape[3])
            	
            	
    c01b_pad = T.set_subtensor(wide_infinity[:, 1:1+image_side, 1:1+image_side, :], T.sqr(c01b))
	


    wide_infinity_count = T.alloc(0,  c01b.shape[0], new_side,
                                  new_side,c01b.shape[3])
    c01b_count = T.set_subtensor(wide_infinity_count[:, 1:1+image_side, 1:1+image_side, :], 1)
    for row_within_pool in xrange(size):
        row_stop = image_side + row_within_pool
        for col_within_pool in xrange(size):
            col_stop = image_side + col_within_pool
            cur = c01b_pad[:,
                       row_within_pool:row_stop:1,
                       col_within_pool:col_stop:1,
			            :]

            cur_count = c01b_count[:,
                                   row_within_pool:row_stop:1,
                                   col_within_pool:col_stop:1,
					        :]
            if mx is None:
                mx = cur
                count = cur_count
            else:
                mx = mx + cur
                count = count + cur_count


    mx /= count
    mx = scale*mx
    mx = mx+1
    for mxv in get_debug_values(mx):
        assert not np.any(np.isnan(mxv))
        assert not np.any(np.isinf(mxv))
    new_c01b = c01b/T.pow(mx,pow)
    return new_c01b
Пример #10
0
    def _validate_impl(self, is_numeric, batch):
        # checks that batch isn't a tuple, checks batch.type against self.dtype
        super(IndexSequenceSpace, self)._validate_impl(is_numeric, batch)

        if is_numeric:
            # Use the 'CudaNdarray' string to avoid importing
            # theano.sandbox.cuda when it is not available
            if not isinstance(batch, np.ndarray) \
               and str(type(batch)) != "<type 'CudaNdarray'>":
                raise TypeError("The value of a IndexSequenceSpace batch "
                                "should be a numpy.ndarray, or CudaNdarray, "
                                "but is %s." % str(type(batch)))
            if batch.ndim != 2:
                raise ValueError("The value of a IndexSequenceSpace batch "
                                 "must be 2D, got %d dimensions for %s." %
                                 (batch.ndim, batch))
            if batch.shape[1] != self.dim:
                raise ValueError("The width of a IndexSequenceSpace batch "
                                 "must match with the space's dimension, but "
                                 "batch has shape %s and dim = %d." %
                                 (str(batch.shape), self.dim))
        else:
            if not isinstance(batch, theano.gof.Variable):
                raise TypeError("IndexSequenceSpace batch should be a theano "
                                "Variable, got " + str(type(batch)))
            if not isinstance(batch.type, (theano.tensor.TensorType,
                                           CudaNdarrayType)):
                raise TypeError("IndexSequenceSpace batch should be "
                                "TensorType or CudaNdarrayType, got " +
                                str(batch.type))
            if batch.ndim != 2:
                raise ValueError('IndexSequenceSpace batches must be 2D, got '
                                 '%d dimensions' % batch.ndim)
            for val in get_debug_values(batch):
                self.np_validate(val)
Пример #11
0
def entropy_binary_vector(P):
    """
        if P[i,j] represents the probability
            of some binary random variable X[i,j] being 1
        then rval[i] gives the entropy of the random vector
        X[i,:]
    """

    oneMinusP = 1.-P

    PlogP = xlogx(P)
    omPlogOmP = xlogx(oneMinusP)

    term1 = - T.sum( PlogP , axis=1)
    assert len(term1.type.broadcastable) == 1

    term2 = - T.sum( omPlogOmP , axis =1 )
    assert len(term2.type.broadcastable) == 1

    rval = term1 + term2

    for plp, olo, t1, t2, rv in get_debug_values(PlogP, omPlogOmP, term1, term2, rval):
        debug_assert(not np.any(np.isnan(plp)))
        debug_assert(not np.any(np.isinf(olo)))
        debug_assert(not np.any(np.isnan(plp)))
        debug_assert(not np.any(np.isinf(olo)))

        debug_assert(not np.any(np.isnan(t1)))
        debug_assert(not np.any(np.isnan(t2)))
        debug_assert(not np.any(np.isnan(rv)))

    return rval
Пример #12
0
 def validate(self, batch):
     if not isinstance(batch, theano.gof.Variable):
         raise TypeError(
             "Conv2DSpace batches must be theano Variables, got " +
             str(type(batch)))
     if not isinstance(batch.type,
                       (theano.tensor.TensorType, CudaNdarrayType)):
         raise TypeError()
     if batch.ndim != 4:
         raise ValueError()
     for val in get_debug_values(batch):
         d = self.axes.index('c')
         actual_channels = val.shape[d]
         if actual_channels != self.num_channels:
             raise ValueError("Expected axis "+str(d)+" to be number of channels ("+str(self.num_channels)+\
                     ") but it is "+str(actual_channels))
         assert val.shape[self.axes.index('c')] == self.num_channels
         for coord in [0, 1]:
             d = self.axes.index(coord)
             actual_shape = val.shape[d]
             expected_shape = self.shape[coord]
             if actual_shape != expected_shape:
                 raise ValueError("Conv2DSpace with shape "+str(self.shape) + \
                         " and axes " + str(self.axes) + " expected dimension " + \
                         str(d) + " of a batch (" + str(batch)+") to have length " + str(expected_shape) + \
                         " but it has "+str(actual_shape))
Пример #13
0
def test_get_debug_values_exc():
    """tests that get_debug_value raises an exception when
        debugger is set to raise and a value is missing """

    prev_value = config.compute_test_value
    try:
        config.compute_test_value = 'raise'

        x = T.vector()

        try:
            for x_val in op.get_debug_values(x):
                # this assert catches the case where we
                # erroneously get a value returned
                assert False
            raised = False
        except AttributeError:
            raised = True

        # this assert catches the case where we got []
        # returned, and possibly issued a warning,
        # rather than raising an exception
        assert raised

    finally:
        config.compute_test_value = prev_value
Пример #14
0
def test_kl():
    """
    Test whether function kl() has properly processed the input.
    """
    init_mode = theano.config.compute_test_value
    theano.config.compute_test_value = 'raise'
    
    try:
        mlp = MLP(layers=[Sigmoid(dim=10, layer_name='Y', irange=0.1)],
                  nvis=10)
        X = mlp.get_input_space().make_theano_batch()
        Y = mlp.get_output_space().make_theano_batch()
        X.tag.test_value = np.random.random(
            get_debug_values(X)[0].shape).astype(theano.config.floatX)
        Y_hat = mlp.fprop(X)

        # This call should not raise any error:
        ave = kl(Y, Y_hat, 1)

        # The following calls should raise ValueError exceptions:
        Y.tag.test_value[2][3] = 1.1
        np.testing.assert_raises(ValueError, kl, Y, Y_hat, 1)
        Y.tag.test_value[2][3] = -0.1
        np.testing.assert_raises(ValueError, kl, Y, Y_hat, 1)
    
    finally:
        theano.config.compute_test_value = init_mode
Пример #15
0
    def _validate_impl(self, is_numeric, batch):
        # checks that batch isn't a tuple, checks batch.type against self.dtype
        super(IndexSequenceSpace, self)._validate_impl(is_numeric, batch)

        if is_numeric:
            # Use the 'CudaNdarray' string to avoid importing
            # theano.sandbox.cuda when it is not available
            if not isinstance(batch, np.ndarray) \
               and str(type(batch)) != "<type 'CudaNdarray'>":
                raise TypeError("The value of a IndexSequenceSpace batch "
                                "should be a numpy.ndarray, or CudaNdarray, "
                                "but is %s." % str(type(batch)))
            if batch.ndim != 2:
                raise ValueError("The value of a IndexSequenceSpace batch "
                                 "must be 2D, got %d dimensions for %s." %
                                 (batch.ndim, batch))
            if batch.shape[1] != self.dim:
                raise ValueError("The width of a IndexSequenceSpace batch "
                                 "must match with the space's dimension, but "
                                 "batch has shape %s and dim = %d." %
                                 (str(batch.shape), self.dim))
        else:
            if not isinstance(batch, theano.gof.Variable):
                raise TypeError("IndexSequenceSpace batch should be a theano "
                                "Variable, got " + str(type(batch)))
            if not isinstance(batch.type,
                              (theano.tensor.TensorType, CudaNdarrayType)):
                raise TypeError("IndexSequenceSpace batch should be "
                                "TensorType or CudaNdarrayType, got " +
                                str(batch.type))
            if batch.ndim != 2:
                raise ValueError('IndexSequenceSpace batches must be 2D, got '
                                 '%d dimensions' % batch.ndim)
            for val in get_debug_values(batch):
                self.np_validate(val)
Пример #16
0
def expand_2d(b01c, expand_shape, expand_stride, image_shape):
    for b01cv in get_debug_values(b01c):
        assert not np.any(np.isinf(b01cv))
        assert b01cv.shape[1] == image_shape[0]
        assert b01cv.shape[2] == image_shape[1]
        assert b01cv.shape[3] == np.prod(expand_shape)
        
    for i in range(len(expand_shape)):
        assert expand_shape[i] % expand_stride[i] ==0
        
    b0101 = b01c.reshape((b01c.shape[0], image_shape[0], image_shape[1],
                          expand_shape[0], expand_shape[1]))
         
    required_r = (image_shape[0] - 1) * expand_stride[0] + expand_shape[0]
    required_c = (image_shape[1] - 1) * expand_stride[1] + expand_shape[1]
    wide_b01 = T.alloc(0., b01c.shape[0], required_r, required_c)
    
    for row_within_expand in xrange(expand_shape[0]):
        row_stop = (image_shape[0] - 1) * expand_stride[0] + \
                    row_within_expand + 1
        for col_within_expand in xrange(expand_shape[1]):
            col_stop = (image_shape[1] - 1) * expand_stride[1] + \
                        col_within_expand + 1
            wide_b01 = T.inc_subtensor(wide_b01[:,
                row_within_expand:row_stop:expand_stride[0], 
                col_within_expand:col_stop:expand_stride[1]],
            b0101[:,:,:,row_within_expand, col_within_expand])
            
    wide_b01 = wide_b01 / (expand_shape[0] / expand_stride[0]) ** 2
    wide_b01c = wide_b01.reshape((b01c.shape[0], required_r, required_c, 1))
    return wide_b01c
Пример #17
0
    def expected_energy_term(self, state, average, state_below, average_below):

        # state = Print('h_state', attrs=['min', 'max'])(state)

        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError("self.dbm.batch_size is %d but got shape of %d" % (self.dbm.batch_size, sb.shape[0]))
                    assert reduce(lambda x,y: x * y, sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below, self.desired_space)

        # Energy function is linear so it doesn't matter if we're averaging or not
        # Specifically, our terms are -u^T W d - b^T d where u is the upward state of layer below
        # and d is the downward state of this layer

        bias_term = T.dot(state, self.b)
        weights_term = (self.transformer.lmul(state_below) * state).sum(axis=1)

        rval = -bias_term - weights_term

        assert rval.ndim == 1

        return rval
Пример #18
0
    def expected_energy_term(self, state, average, state_below, average_below):

        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError(
                            "self.dbm.batch_size is %d but got shape of %d" %
                            (self.dbm.batch_size, sb.shape[0]))
                    assert reduce(lambda x, y: x * y,
                                  sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below,
                                                     self.desired_space)

        downward_state = self.downward_state(state)
        self.h_space.validate(downward_state)

        # Energy function is linear so it doesn't matter if we're averaging or not
        # Specifically, our terms are -u^T W d - b^T d where u is the upward state of layer below
        # and d is the downward state of this layer

        bias_term = T.dot(downward_state, self.b)
        weights_term = (self.transformer.lmul(state_below) *
                        downward_state).sum(axis=1)

        rval = -bias_term - weights_term

        assert rval.ndim == 1

        return rval
Пример #19
0
    def mf_update(self, state_below, state_above, layer_above = None, double_weights = False, iter_name = None):

        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError("self.dbm.batch_size is %d but got shape of %d" % (self.dbm.batch_size, sb.shape[0]))
                    assert reduce(lambda x,y: x * y, sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below, self.desired_space)

        if iter_name is None:
            iter_name = 'anon'

        if state_above is not None:
            assert layer_above is not None
            msg = layer_above.downward_message(state_above)
            msg.name = 'msg_from_'+layer_above.layer_name+'_to_'+self.layer_name+'['+iter_name+']'
        else:
            msg = None

        if double_weights:
            state_below = 2. * state_below
            state_below.name = self.layer_name + '_'+iter_name + '_2state'
        z = self.transformer.lmul(state_below) + self.b
        if self.layer_name is not None and iter_name is not None:
            z.name = self.layer_name + '_' + iter_name + '_z'
        if msg is not None:
            z = z + msg
        h = T.tanh(z)

        return h
Пример #20
0
def kl(Y, Y_hat, batch_axis):
    """
    Warning: This function expects a sigmoid nonlinearity in the
    output layer. Returns a batch (vector) of mean across units of
    KL divergence for each example,
    KL(P || Q) where P is defined by Y and Q is defined by Y_hat:

    p log p - p log q + (1-p) log (1-p) - (1-p) log (1-q)
    For binary p, some terms drop out:
    - p log q - (1-p) log (1-q)
    - p log sigmoid(z) - (1-p) log sigmoid(-z)
    p softplus(-z) + (1-p) softplus(z)

    Parameters
    ----------
    Y : Variable
        targets for the sigmoid outputs. Currently Y must be purely binary.
        If it's not, you'll still get the right gradient, but the
        value in the monitoring channel will be wrong.
    Y_hat : Variable
        predictions made by the sigmoid layer. Y_hat must be generated by
        fprop, i.e., it must be a symbolic sigmoid.
    batch_axis : list
        list of axes to compute average kl divergence across.

    Returns
    -------
    ave : Variable
        average kl divergence between Y and Y_hat.
    """

    assert hasattr(Y_hat, 'owner')
    assert batch_axis is not None

    owner = Y_hat.owner
    assert owner is not None
    op = owner.op

    if not hasattr(op, 'scalar_op'):
        raise ValueError("Expected Y_hat to be generated by an Elemwise "
                         "op, got "+str(op)+" of type "+str(type(op)))
    assert isinstance(op.scalar_op, T.nnet.sigm.ScalarSigmoid)

    for Yv in get_debug_values(Y):
        if not (Yv.min() >= 0.0 and Yv.max() <= 1.0):
            raise ValueError("Expected Y to be between 0 and 1. Either Y"
                             + "< 0 or Y > 1 was found in the input.")

    z, = owner.inputs

    term_1 = Y * T.nnet.softplus(-z)
    term_2 = (1 - Y) * T.nnet.softplus(z)

    total = term_1 + term_2
    naxes = total.ndim
    axes_to_reduce = list(range(naxes))
    del axes_to_reduce[batch_axis]
    ave = total.mean(axis=axes_to_reduce)

    return ave
Пример #21
0
def test_get_debug_values_success():
    """tests that get_debug_value returns values when available
    (and the debugger is on)"""

    prev_value = config.compute_test_value
    for mode in ['ignore', 'warn', 'raise']:

        try:
            config.compute_test_value = mode

            x = T.vector()
            x.tag.test_value = numpy.zeros((4,), dtype=config.floatX)
            y = numpy.zeros((5, 5))

            iters = 0

            for x_val, y_val in op.get_debug_values(x, y):

                assert x_val.shape == (4,)
                assert y_val.shape == (5, 5)

                iters += 1

            assert iters == 1

        finally:
            config.compute_test_value = prev_value
Пример #22
0
    def truncated_KL(self, V, obs, Y=None, no_v_bias=False):
        """ KL divergence between variation and true posterior, dropping terms that don't
            depend on the variational parameters

            if no_v_bias is True, ignores the contribution of the visible biases to the expected energy
            """
        """
            D_KL ( Q(h ) || P(h | v) ) =  - sum_h Q(h) log P(h | v) + sum_h Q(h) log Q(h)
                                       = -sum_h Q(h) log P( h, v) + sum_h Q(h) log P(v) + sum_h Q(h) log Q(h)
            <truncated version>        = -sum_h Q(h) log P( h, v) + sum_h Q(h) log Q(h)
                                       = -sum_h Q(h) log exp( -E (h,v)) + sum_h Q(h) log Z + sum_H Q(h) log Q(h)
            <truncated version>        = sum_h Q(h) E(h, v) + sum_h Q(h) log Q(h)

            this comment was written before adding support for Y
        """

        H_hat = obs['H_hat']

        for Hv in get_debug_values(H_hat):
            assert Hv.min() >= 0.0
            assert Hv.max() <= 1.0

        entropy_term = -self.model.entropy_h(H_hat=H_hat)
        assert len(entropy_term.type.broadcastable) == 1
        energy_term = self.model.expected_energy_batch(V_hat=V,
                                                       H_hat=H_hat,
                                                       Y_hat=Y,
                                                       no_v_bias=no_v_bias)
        assert len(energy_term.type.broadcastable) == 1

        KL = entropy_term + energy_term

        return KL
Пример #23
0
def expand_2d(b01c, expand_shape, expand_stride, image_shape):
    for b01cv in get_debug_values(b01c):
        assert not np.any(np.isinf(b01cv))
        assert b01cv.shape[1] == image_shape[0]
        assert b01cv.shape[2] == image_shape[1]
        assert b01cv.shape[3] == np.prod(expand_shape)

    for i in range(len(expand_shape)):
        assert expand_shape[i] % expand_stride[i] == 0

    b0101 = b01c.reshape((b01c.shape[0], image_shape[0], image_shape[1],
                          expand_shape[0], expand_shape[1]))

    required_r = (image_shape[0] - 1) * expand_stride[0] + expand_shape[0]
    required_c = (image_shape[1] - 1) * expand_stride[1] + expand_shape[1]
    wide_b01 = T.alloc(0., b01c.shape[0], required_r, required_c)

    for row_within_expand in xrange(expand_shape[0]):
        row_stop = (image_shape[0] - 1) * expand_stride[0] + \
                    row_within_expand + 1
        for col_within_expand in xrange(expand_shape[1]):
            col_stop = (image_shape[1] - 1) * expand_stride[1] + \
                        col_within_expand + 1
            wide_b01 = T.inc_subtensor(
                wide_b01[:, row_within_expand:row_stop:expand_stride[0],
                         col_within_expand:col_stop:expand_stride[1]],
                b0101[:, :, :, row_within_expand, col_within_expand])

    wide_b01 = wide_b01 / (expand_shape[0] / expand_stride[0])**2
    wide_b01c = wide_b01.reshape((b01c.shape[0], required_r, required_c, 1))
    return wide_b01c
Пример #24
0
    def truncated_KL(self, V, obs, no_v_bias = False):
        """ KL divergence between variation and true posterior, dropping terms that don't
            depend on the variational parameters

            if no_v_bias is True, ignores the contribution of the visible biases to the expected energy
            """

        """
            D_KL ( Q(h ) || P(h | v) ) =  - sum_h Q(h) log P(h | v) + sum_h Q(h) log Q(h)
                                       = -sum_h Q(h) log P( h, v) + sum_h Q(h) log P(v) + sum_h Q(h) log Q(h)
            <truncated version>        = -sum_h Q(h) log P( h, v) + sum_h Q(h) log Q(h)
                                       = -sum_h Q(h) log exp( -E (h,v)) + sum_h Q(h) log Z + sum_H Q(h) log Q(h)
            <truncated version>        = sum_h Q(h) E(h, v) + sum_h Q(h) log Q(h)
        """

        H_hat = obs['H_hat']

        for Hv in get_debug_values(H_hat):
            assert Hv.min() >= 0.0
            assert Hv.max() <= 1.0

        entropy_term = - self.model.entropy_h(H_hat = H_hat)
        assert len(entropy_term.type.broadcastable) == 1
        energy_term = self.model.expected_energy_batch(V_hat = V, H_hat = H_hat, no_v_bias = no_v_bias)
        assert len(energy_term.type.broadcastable) == 1

        KL = entropy_term + energy_term

        return KL
Пример #25
0
def kl(Y, Y_hat, batch_axis):
    """
    Warning: This function expects a sigmoid nonlinearity in the
    output layer. Returns a batch (vector) of mean across units of
    KL divergence for each example,
    KL(P || Q) where P is defined by Y and Q is defined by Y_hat:

    p log p - p log q + (1-p) log (1-p) - (1-p) log (1-q)
    For binary p, some terms drop out:
    - p log q - (1-p) log (1-q)
    - p log sigmoid(z) - (1-p) log sigmoid(-z)
    p softplus(-z) + (1-p) softplus(z)

    Parameters
    ----------
    Y : Variable
        targets for the sigmoid outputs. Currently Y must be purely binary.
        If it's not, you'll still get the right gradient, but the
        value in the monitoring channel will be wrong.
    Y_hat : Variable
        predictions made by the sigmoid layer. Y_hat must be generated by
        fprop, i.e., it must be a symbolic sigmoid.
    batch_axis : list
        list of axes to compute average kl divergence across.

    Returns
    -------
    ave : Variable
        average kl divergence between Y and Y_hat.
    """

    assert hasattr(Y_hat, 'owner')
    assert batch_axis is not None

    owner = Y_hat.owner
    assert owner is not None
    op = owner.op

    if not hasattr(op, 'scalar_op'):
        raise ValueError("Expected Y_hat to be generated by an Elemwise "
                         "op, got " + str(op) + " of type " + str(type(op)))
    assert isinstance(op.scalar_op, T.nnet.sigm.ScalarSigmoid)

    for Yv in get_debug_values(Y):
        if not (Yv.min() >= 0.0 and Yv.max() <= 1.0):
            raise ValueError("Expected Y to be between 0 and 1. Either Y" +
                             "< 0 or Y > 1 was found in the input.")

    z, = owner.inputs

    term_1 = Y * T.nnet.softplus(-z)
    term_2 = (1 - Y) * T.nnet.softplus(z)

    total = term_1 + term_2
    naxes = total.ndim
    axes_to_reduce = range(naxes)
    del axes_to_reduce[batch_axis]
    ave = total.mean(axis=axes_to_reduce)

    return ave
Пример #26
0
    def fprop(self, state_below, add_noise=True):
        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError("self.dbm.batch_size is %d but got shape of %d" % (self.dbm.batch_size, sb.shape[0]))
                    assert reduce(lambda x,y: x * y, sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below, self.desired_space)
        
        self.x = state_below
        
        # linear part
        if isinstance(self.x, S.SparseVariable):
            z = S.dot(self.x,self.W[0]) + self.b[0]
        else:
            z = T.dot(self.x,self.W[0]) + self.b[0]
        
        self.z = self.activate(z, self.expert_activation)
        
        # first layer non-linear part
        if isinstance(self.x, S.SparseVariable):
            h = S.dot(self.x,self.W[1]) + self.b[1]
        else:
            h = T.dot(self.x,self.W[1]) + self.b[1]
        
        # activate hidden units of non-linear part
        self.h = self.activate(h, self.hidden_activation)
            
        noise = 0.
        if add_noise:
            rng = MRG_RandomStreams(self.mlp.rng.randint(2**15))
            noise = rng.normal(size = self.z.shape, 
                                    std=self.noise_stdev ,
                                    dtype=self.z.type.dtype) 
        
        # second layer non-linear part
        self.a = T.dot(self.h,self.W[2]) + self.b[2] + noise
        
        # activate non-linear part
        self.m_mean = self.activate(self.a, self.gater_activation)
        
        # how many are over 0:
        self.effective_sparsity = T.cast(T.gt(self.m_mean, 0), 
                                         theano.config.floatX).mean()
           
        # mix output of linear part with output of non-linear part
        self.p = self.m_mean * self.z
        
        if self.layer_name is not None:
            self.z.name = self.layer_name + '_z'
            self.h.name = self.layer_name + '_h'
            self.a.name = self.layer_name + '_a'
            self.m_mean.name = self.layer_name + '_m_mean'
            self.p.name = self.layer_name + '_p'
        
        return self.p
Пример #27
0
    def fprop(self, state_below):
        #change model to add new variable which sends which indices of the data are here
        self.input_space.validate(state_below)        


        if self.needs_reformat:
            state_below = self.input_space.format_as(state_below, self.desired_space)
        for value in get_debug_values(state_below):
            print 'getting debug values'
            print value
        #     if self.mlp.batch_size is not None and value.shape[0] != self.mlp.batch_size:
        #         raise ValueError("state_below should have batch size "+str(self.dbm.batch_size)+" but has "+str(value.shape[0]))
        self.desired_space.validate(state_below)
        assert state_below.ndim == 2
        if not hasattr(self, 'no_affine'):
            self.no_affine = False
        if self.no_affine:
            raise NotImplementedError()

        assert self.W_class.ndim == 3
        assert self.W_cluster.ndim == 2

        #we get the cluster by doing hW_cluster + b_cluster
        probcluster = T.dot(state_below, self.W_cluster) + self.b_cluster
        probcluster = T.nnet.softmax(probcluster)
        for value in get_debug_values(probcluster):
            print 'val is'
            print val

        print 'type of state below is'
        print state_below.type
        print state_below.dtype
        print state_below.ndim
        self.cluster_targets = range(5)

        #need the predicted clusters for this batch
            
        Z = T.nnet.GroupDot(self.n_clusters)(state_below,
                                                    self.W_class,
                                                    self.b_class,
                                        self.cluster_targets)
        probclass = T.nnet.softmax(Z)
        for value in get_debug_values(probclass):
             if self.mlp.batch_size is not None:
                assert value.shape[0] == self.mlp.batch_size
        return probclass, probcluster
Пример #28
0
 def validate(self, batch):
     if not isinstance(batch, theano.gof.Variable):
         raise TypeError("Conv3DSpace batches must be theano Variables, got "+str(type(batch)))
     if not isinstance(batch.type, (theano.tensor.TensorType,CudaNdarrayType)):
         raise TypeError()
     if batch.ndim != 5:
         raise ValueError()
     for val in get_debug_values(batch):
         self.np_validate(val)
Пример #29
0
 def validate(self, batch):
     if not isinstance(batch, theano.gof.Variable):
         raise TypeError("Conv2DSpace batches must be theano Variables, got "+str(type(batch)))
     if not isinstance(batch.type, (theano.tensor.TensorType,CudaNdarrayType)):
         raise TypeError()
     if batch.ndim != 4:
         raise ValueError()
     for val in get_debug_values(batch):
         self.np_validate(val)
Пример #30
0
    def get_layer_monitoring_channels(self, state_below=None,
                                    state=None, targets=NotImplementedError):

        if self.no_affine:
            return OrderedDict()

        W_class = self.W_class
        W_cluster = self.W_cluster

        assert W_class.ndim == 3
        assert W_cluster.ndim == 2

        sq_W = T.sqr(W_cluster)
        sq_W_class = T.sqr(W_class)

        row_norms = T.sqrt(sq_W.sum(axis=1))
        col_norms = T.sqrt(sq_W.sum(axis=0))

        row_norms_class = T.sqrt(sq_W_class.sum(axis=1))
        col_norms_class = T.sqrt(sq_W_class.sum(axis=0))

        rval = OrderedDict([
                            ('row_norms_min'  , row_norms.min()),
                            ('row_norms_mean' , row_norms.mean()),
                            ('row_norms_max'  , row_norms.max()),
                            ('col_norms_min'  , col_norms.min()),
                            ('col_norms_mean' , col_norms.mean()),
                            ('col_norms_max'  , col_norms.max()),
                            ('class_row_norms_min'  , row_norms_class.min()),
                            ('class_row_norms_mean' , row_norms_class.mean()),
                            ('class_row_norms_max'  , row_norms_class.max()),
                            ('class_col_norms_min'  , col_norms_class.min()),
                            ('class_col_norms_mean' , col_norms_class.mean()),
                            ('class_col_norms_max'  , col_norms_class.max()),
                            ])


        if (state_below is not None) or (state is not None):
            if state is None:

                for value in get_debug_values(state_below):
                    print 'value is'+ value
                state=self.fprop (state_below)
            #print state
            state, cls = state
            mx = state.max(axis=1)
            rval.update(OrderedDict([('mean_max_class',mx.mean()),
                                     ('max_max_class' , mx.max()),
                                     ('min_max_class' , mx.min())
                                    ]))
            if targets is not None:
                rval['nll'] = self.cost(Y_hat=(state,cls), Y=targets)
                rval['perplexity'] = 10 ** (rval['nll']/np.log(10).astype('float32'))
                rval['entropy'] = rval['nll']/np.log(2).astype('float32')
        return rval
Пример #31
0
 def validate(self, batch):
     if not isinstance(batch, theano.gof.Variable):
         raise TypeError()
     if not isinstance(batch.type, (theano.tensor.TensorType,CudaNdarrayType)):
         raise TypeError()
     if batch.ndim != 4:
         raise ValueError()
     for val in get_debug_values(batch):
         assert val.shape[self.axes.index('c')] == self.nchannels
         for coord in [0,1]:
             assert val.shape[self.axes.index(coord)] == self.shape[coord]
Пример #32
0
 def validate(self, batch):
     if not isinstance(batch, theano.gof.Variable):
         raise TypeError("VectorSpace batch should be a theano Variable, got "+str(type(batch)))
     if not self.sparse and not isinstance(batch.type, (theano.tensor.TensorType, CudaNdarrayType)):
         raise TypeError("VectorSpace batch should be TensorType or CudaNdarrayType, got "+str(batch.type))
     if self.sparse and not isinstance(batch.type, theano.sparse.SparseType):
         raise TypeError()
     if batch.ndim != 2:
         raise ValueError('VectorSpace batches must be 2D, got %d dimensions' % batch.ndim)
     for val in get_debug_values(batch):
         self.np_validate(val)
Пример #33
0
def entropy_binary_vector(P):
    """
    .. todo::

        WRITEME properly

    if P[i,j] represents the probability
        of some binary random variable X[i,j] being 1
    then rval[i] gives the entropy of the random vector
    X[i,:]
    """

    for Pv in get_debug_values(P):
        assert Pv.min() >= 0.0
        assert Pv.max() <= 1.0

    oneMinusP = 1. - P

    PlogP = xlogx(P)
    omPlogOmP = xlogx(oneMinusP)

    term1 = -T.sum(PlogP, axis=1)
    assert len(term1.type.broadcastable) == 1

    term2 = -T.sum(omPlogOmP, axis=1)
    assert len(term2.type.broadcastable) == 1

    rval = term1 + term2

    for plp, olo, t1, t2, rv in get_debug_values(PlogP, omPlogOmP, term1,
                                                 term2, rval):
        debug_assert(not np.any(np.isnan(plp)))
        debug_assert(not np.any(np.isinf(olo)))
        debug_assert(not np.any(np.isnan(plp)))
        debug_assert(not np.any(np.isinf(olo)))

        debug_assert(not np.any(np.isnan(t1)))
        debug_assert(not np.any(np.isnan(t2)))
        debug_assert(not np.any(np.isnan(rv)))

    return rval
Пример #34
0
 def validate(self, batch):
     if not isinstance(batch, theano.gof.Variable):
         raise TypeError()
     if not isinstance(batch.type,
                       (theano.tensor.TensorType, CudaNdarrayType)):
         raise TypeError()
     if batch.ndim != 4:
         raise ValueError()
     for val in get_debug_values(batch):
         assert val.shape[self.axes.index('c')] == self.nchannels
         for coord in [0, 1]:
             assert val.shape[self.axes.index(coord)] == self.shape[coord]
Пример #35
0
 def validate(self, batch):
     if not isinstance(batch, theano.gof.Variable):
         raise TypeError("%s batches must be Theano Variables, got %s" % (str(type(self)), str(type(batch))))
     if not isinstance(batch.type, (theano.tensor.TensorType, CudaNdarrayType)):
         raise TypeError()
     if batch.ndim != 5:
         raise ValueError()
     if not batch.broadcastable[self.axes.index("b")]:
         raise ValueError(
             "%s batches should be broadcastable along the " "'b' (batch size) dimension." % str(type(self))
         )
     for val in get_debug_values(batch):
         self.np_validate(val)
Пример #36
0
    def foo(self, state_below):

        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError(
                            "self.dbm.batch_size is %d but got shape of %d" % (self.dbm.batch_size, sb.shape[0])
                        )
                    assert reduce(lambda x, y: x * y, sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below, self.desired_space)

        z = self.transformer.lmul(state_below) + self.b

        if not hasattr(self, "randomize_pools"):
            self.randomize_pools = False

        if not hasattr(self, "pool_stride"):
            self.pool_stride = self.pool_size

        if self.randomize_pools:
            z = T.dot(z, self.permute)

        if not hasattr(self, "min_zero"):
            self.min_zero = False

        if self.min_zero:
            p = T.zeros_like(z)
        else:
            p = None

        last_start = self.detector_layer_dim - self.pool_size

        pooling_stack = []
        for i in xrange(self.pool_size):
            cur = z[:, i : last_start + i + 1 : self.pool_stride]
            cur = cur.reshape((cur.shape[0], cur.shape[1], 1))
            assert cur.ndim == 3
            pooling_stack.append(cur)
        if self.min_zero:
            pooling_stack.append(T.zeros_like(cur))
        pooling_stack = T.concatenate(pooling_stack, axis=2)
        p = pooling_stack.max(axis=2)
        counts = (T.eq(pooling_stack, p.dimshuffle(0, 1, "x"))).sum(axis=0)

        p.name = self.layer_name + "_p_"

        return p, counts
Пример #37
0
 def validate(self, batch):
     if not isinstance(batch, theano.gof.Variable):
         raise TypeError("%s batches must be Theano Variables, got %s" %
                         (str(type(self)), str(type(batch))))
     if not isinstance(batch.type,
                       (theano.tensor.TensorType, CudaNdarrayType)):
         raise TypeError()
     if batch.ndim != 5:
         raise ValueError()
     if not batch.broadcastable[self.axes.index('b')]:
         raise ValueError("%s batches should be broadcastable along the "
                          "'b' (batch size) dimension." % str(type(self)))
     for val in get_debug_values(batch):
         self.np_validate(val)
Пример #38
0
def test_get_debug_values_no_debugger():
    'get_debug_values should return [] when debugger is off'

    prev_value = config.compute_test_value
    try:
        config.compute_test_value = 'off'

        x = T.vector()

        for x_val in op.get_debug_values(x):
            assert False

    finally:
        config.compute_test_value = prev_value
Пример #39
0
def entropy_binary_vector(P):
    """
    .. todo::

        WRITEME properly

    If P[i,j] represents the probability of some binary random variable X[i,j]
    being 1, then rval[i] gives the entropy of the random vector X[i,:]
    """

    for Pv in get_debug_values(P):
        assert Pv.min() >= 0.0
        assert Pv.max() <= 1.0

    oneMinusP = 1. - P

    PlogP = xlogx(P)
    omPlogOmP = xlogx(oneMinusP)

    term1 = - T.sum(PlogP, axis=1)
    assert len(term1.type.broadcastable) == 1

    term2 = - T.sum(omPlogOmP, axis=1)
    assert len(term2.type.broadcastable) == 1

    rval = term1 + term2

    debug_vals = get_debug_values(PlogP, omPlogOmP, term1, term2, rval)
    for plp, olo, t1, t2, rv in debug_vals:
        debug_assert(isfinite(plp))
        debug_assert(isfinite(olo))

        debug_assert(not contains_nan(t1))
        debug_assert(not contains_nan(t2))
        debug_assert(not contains_nan(rv))

    return rval
Пример #40
0
    def foo(self, state_below):

        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError("self.dbm.batch_size is %d but got shape of %d" % (self.dbm.batch_size, sb.shape[0]))
                    assert reduce(lambda x,y: x * y, sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below, self.desired_space)

        z = self.transformer.lmul(state_below) + self.b

        if not hasattr(self, 'randomize_pools'):
            self.randomize_pools = False

        if not hasattr(self, 'pool_stride'):
            self.pool_stride = self.pool_size

        if self.randomize_pools:
            z = T.dot(z, self.permute)

        if not hasattr(self, 'min_zero'):
            self.min_zero = False

        if self.min_zero:
            p = T.zeros_like(z)
        else:
            p = None

        last_start = self.detector_layer_dim  - self.pool_size

        pooling_stack = []
        for i in xrange(self.pool_size):
            cur = z[:,i:last_start+i+1:self.pool_stride]
            cur = cur.reshape((cur.shape[0], cur.shape[1], 1))
            assert cur.ndim == 3
            pooling_stack.append(cur)
        if self.min_zero:
            pooling_stack.append(T.zeros_like(cur))
        pooling_stack = T.concatenate(pooling_stack, axis=2)
        p = pooling_stack.max(axis=2)
        counts = (T.eq(pooling_stack, p.dimshuffle(0, 1, 'x'))).sum(axis=0)

        p.name = self.layer_name + '_p_'

        return p, counts
Пример #41
0
    def init_H_hat(self, V):

        if self.model.recycle_q:
            rval = self.model.prev_H
            if config.compute_test_value != 'off':
                if rval.get_value().shape[0] != V.tag.test_value.shape[0]:
                    raise Exception('E step given wrong test batch size', rval.get_value().shape, V.tag.test_value.shape)
        else:
            rval = T.alloc(1., V.shape[0], self.model.nhid)

            for rval_value, V_value in get_debug_values(rval, V):
                if rval_value.shape[0] != V_value.shape[0]:
                    debug_error_message("rval.shape = %s, V.shape = %s, element 0 should match but doesn't", str(rval_value.shape), str(V_value.shape))

        return rval
Пример #42
0
 def _validate_impl(self, is_numeric, batch):
     super(ContextSpace, self)._validate_impl(is_numeric, batch)
     if is_numeric:
         if batch.ndim != 3:
             raise TypeError("ContectSpace should have a 3D array. Got " +
                             str(batch.ndim))
     else:
         if not isinstance(batch, theano.gof.Variable):
             raise TypeError("Not a valid syblic variable. Got " +
                             str(batch))
         if batch.ndim != 3:
             raise TypeError("Required a 3D tensor. Got " + str(batch) +
                             " with %i" % batch.ndim)
         for val in get_debug_values(batch):
             self.np_validate(val)
Пример #43
0
def test_get_det_debug_values_ignore():
    """get_debug_values should return [] when debugger is ignore
        and some values are missing """

    prev_value = config.compute_test_value
    try:
        config.compute_test_value = 'ignore'

        x = T.vector()

        for x_val in op.get_debug_values(x):
            assert False

    finally:
        config.compute_test_value = prev_value
Пример #44
0
    def _validate(self, batch):
        """
        .. todo::

            WRITEME
        """
        if not isinstance(batch, theano.gof.Variable):
            raise TypeError("IndexSpace batch should be a theano Variable, "
                            "got " + str(type(batch)))
        if not isinstance(batch.type, (theano.tensor.TensorType,
                                       CudaNdarrayType)):
            raise TypeError("VectorSpace batch should be TensorType or "
                            "CudaNdarrayType, got "+str(batch.type))
        if batch.ndim != 2:
            raise ValueError('IndexSpace batches must be 2D, got %d '
                             'dimensions' % batch.ndim)
        for val in get_debug_values(batch):
            self.np_validate(val)
Пример #45
0
    def fprop(self, state_below):

        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError("self.dbm.batch_size is %d but got shape of %d" % (self.dbm.batch_size, sb.shape[0]))
                    assert reduce(lambda x,y: x * y, sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below, self.desired_space)

        z = self.transformer.lmul(state_below) + self.b

        if not hasattr(self, 'randomize_pools'):
            self.randomize_pools = False

        if not hasattr(self, 'pool_stride'):
            self.pool_stride = self.pool_size

        if self.randomize_pools:
            z = T.dot(z, self.permute)

        if not hasattr(self, 'min_zero'):
            self.min_zero = False

        if self.min_zero:
            p = T.zeros_like(z)
        else:
            p = None

        last_start = self.detector_layer_dim  - self.pool_size
        for i in xrange(self.pool_size):
            cur = z[:,i:last_start+i+1:self.pool_stride]
            if p is None:
                p = cur
            else:
                p = T.maximum(cur, p)

        p.name = self.layer_name + '_p_'

        return p
Пример #46
0
 def validate(self, batch):
     if not isinstance(batch, theano.gof.Variable):
         raise TypeError(
             "VectorSpace batch should be a theano Variable, got " +
             str(type(batch)))
     if not self.sparse and not isinstance(
             batch.type, (theano.tensor.TensorType, CudaNdarrayType)):
         raise TypeError(
             "VectorSpace batch should be TensorType or CudaNdarrayType, got "
             + str(batch.type))
     if self.sparse and not isinstance(batch.type,
                                       theano.sparse.SparseType):
         raise TypeError()
     if batch.ndim != 2:
         raise ValueError(
             'VectorSpace batches must be 2D, got %d dimensions' %
             batch.ndim)
     for val in get_debug_values(batch):
         self.np_validate(val)
Пример #47
0
        def get_func(learn_discriminator, learn_generator):

            updates = OrderedDict()

            assert (learn_discriminator or learn_generator
                    ) and not (learn_discriminator and learn_generator)

            if learn_discriminator:
                cur_params = model.discriminator.get_params()
            else:
                cur_params = model.generator.get_params()

            cur_grads = OrderedDict()
            for param in cur_params:
                cur_grads[param] = grads[param]

            for param in grads:
                if grads[param].name is None and cost_value is not None:
                    grads[param].name = ('grad(%(costname)s, %(paramname)s)' %
                                         {
                                             'costname': cost_value.name,
                                             'paramname': param.name
                                         })
                assert grads[param].dtype == param.dtype

            cur_lr_scalers = OrderedDict()
            for param in cur_params:
                if param in lr_scalers:
                    lr_scaler = lr_scalers[param]
                    cur_lr_scalers[param] = lr_scaler

            log.info('Parameter and initial learning rate summary:')
            for param in cur_params:
                param_name = param.name
                if param_name is None:
                    param_name = 'anon_param'
                lr = learning_rate.get_value() * cur_lr_scalers.get(param, 1.)
                log.info('\t' + param_name + ': ' + str(lr))

            if self.learning_rule:
                updates.update(
                    self.learning_rule.get_updates(learning_rate, cur_grads,
                                                   cur_lr_scalers))
            else:
                # Use standard SGD updates with fixed learning rate.
                updates.update( dict(safe_zip(params, [param - learning_rate * \
                    lr_scalers.get(param, 1.) * grads[param]
                                        for param in params])))

            for param in cur_params:
                if updates[param].name is None:
                    updates[param].name = 'sgd_update(' + param.name + ')'
            model.modify_updates(updates)
            for param in cur_params:
                update = updates[param]
                if update.name is None:
                    update.name = 'censor(sgd_update(' + param.name + '))'
                for update_val in get_debug_values(update):
                    if np.any(np.isinf(update_val)):
                        raise ValueError("debug value of %s contains infs" %
                                         update.name)
                    if np.any(np.isnan(update_val)):
                        raise ValueError("debug value of %s contains nans" %
                                         update.name)

            with log_timing(log, 'Compiling sgd_update'):
                return function(theano_args,
                                updates=updates,
                                name='sgd_update',
                                on_unused_input='ignore',
                                mode=self.theano_function_mode)
def max_pool_channels(z, pool_size, top_down=None, theano_rng=None):
    """
    Unlike Honglak's convolutional max pooling, which pools over spatial
    locations within each channels, this does max pooling in a densely
    connected model. Here we pool groups of channels together.

    Parameters
    ----------
    z : theano matrix
        representings a batch of input from below
    pool_size : int
        the number of features to combine into one pooled unit
    top_down : theano matrix, optional
        a theano matrix representing input from above
        if None, assumes top-down input is 0
    theano_rng : MRG_RandomStreams, optional
        For random numbers for sampling

    Returns
    -------
    h : theano matrix
        a theano matrix for the expected value of the detector layer h
    p : theano matrix
        a theano matrix for the expected value of the pooling layer p
    h_samples : theano matrix, only returned if theano_rng is not None
        a theano matrix of samples of the detector layer
    p_samples: theano matrix, only returned if theano_rng is not None
        a theano matrix of samples of the pooling layer

    Notes
    -----
    all matrices are formatted as (num_example, num_features)
    """

    z_name = z.name
    if z_name is None:
        z_name = 'anon_z'

    if pool_size == 1:
        if top_down is None:
            top_down = 0.
        total_input = z + top_down
        p = T.nnet.sigmoid(total_input)
        h = p

        if theano_rng is None:
            return p, h
        else:
            t1 = time.time()
            p_samples = theano_rng.binomial(p=p,
                                            size=p.shape,
                                            dtype=p.dtype,
                                            n=1)
            t2 = time.time()
            if t2 - t1 > 0.5:
                warnings.warn("TODO: speed up theano's random number seeding. "
                              "max pooling spent " + str(t2 - t1) +
                              "in a call to theano_rng.binomial.")
            h_samples = p_samples
            return p_samples, h_samples, p_samples, h_samples
    else:
        batch_size, n = z.shape

        mx = None

        if top_down is None:
            t = 0.
        else:
            t = -top_down
            t.name = 'neg_top_down'

        zpart = []
        for i in xrange(pool_size):
            cur_part = z[:, i:n:pool_size]
            if z_name is not None:
                cur_part.name = z_name + '[%d]' % (i)
            zpart.append(cur_part)
            if mx is None:
                mx = T.maximum(t, cur_part)
                if cur_part.name is not None:
                    mx.name = 'max(-top_down,' + cur_part.name + ')'
            else:
                max_name = None
                if cur_part.name is not None:
                    mx_name = 'max(' + cur_part.name + ',' + mx.name + ')'
                mx = T.maximum(mx, cur_part)
                mx.name = mx_name
        mx.name = 'local_max(' + z_name + ')'

        pt = []

        for i in xrange(pool_size):
            z_i = zpart[i]
            safe = z_i - mx
            safe.name = 'safe_z(%s)' % z_i.name
            cur_pt = T.exp(safe)
            cur_pt.name = 'pt(%s)' % z_i.name
            assert cur_pt.ndim == 2
            pt.append(cur_pt)

        off_pt = T.exp(t - mx)
        assert off_pt.ndim == 2
        off_pt.name = 'p_tilde_off(%s)' % z_name

        denom = off_pt
        for i in xrange(pool_size):
            denom = denom + pt[i]
        assert denom.ndim == 2
        denom.name = 'denom(%s)' % z_name

        off_prob = off_pt / denom
        p = 1. - off_prob
        assert p.dtype == z.dtype

        hpart = [pt_i / denom for pt_i in pt]

        h = T.alloc(0., batch_size, n)

        for i in xrange(pool_size):
            h.name = 'h_interm'
            hp = hpart[i]
            sub_h = h[:, i:n:pool_size]
            assert sub_h.ndim == 2
            assert hp.ndim == 2
            for hv, hsv, hpartv in get_debug_values(h, sub_h, hp):
                print hv.shape
                print hsv.shape
                print hpartv.shape
            h = T.set_subtensor(sub_h, hp)

    p.name = 'p(%s)' % z_name
    h.name = 'h(%s)' % z_name

    if theano_rng is None:
        return p, h
    else:
        events = []
        for i in xrange(pool_size):
            events.append(hpart[i])
        events.append(off_prob)

        events = [event.dimshuffle(0, 1, 'x') for event in events]

        events = tuple(events)

        stacked_events = T.concatenate(events, axis=2)

        outcomes = pool_size + 1
        reshaped_events = stacked_events.reshape(
            (batch_size * n // pool_size, outcomes))

        t1 = time.time()
        multinomial = theano_rng.multinomial(pvals=reshaped_events,
                                             dtype=p.dtype)
        t2 = time.time()
        if t2 - t1 > 0.5:
            warnings.warn("TODO: speed up theano's random number seeding."
                          "max pooling spent " + str(t2 - t1) +
                          " in a call to theano_rng.multinomial.")

        reshaped_multinomial = multinomial.reshape(
            (batch_size, n // pool_size, outcomes))

        h_sample = T.zeros_like(z)

        idx = 0
        for i in xrange(pool_size):
            h_sample = T.set_subtensor(h_sample[:, i:n:pool_size],
                                       reshaped_multinomial[:, :, idx])
            idx += 1

        p_sample = 1 - reshaped_multinomial[:, :, -1]

        assert h_sample.dtype == z.dtype

        return p, h, p_sample, h_sample
def max_pool(z, pool_shape, top_down=None, theano_rng=None):
    """
    Parameters
    ----------
    z : theano 4-tensor
        a theano 4-tensor representing input from below
    pool_shape : tuple
        tuple of ints. the shape of regions to be pooled
    top_down : theano 4-tensor, optional
        a theano 4-tensor representing input from above
        if None, assumes top-down input is 0
    theano_rng : MRG_RandomStreams, optional
        Used for random numbers for sampling

    Returns
    -------
    h : theano 4-tensor
        the expected value of the detector layer h
    p : theano 4-tensor
        the expected value of the pooling layer p
    h_samples : theano 4-tensor, only returned if theano_rng is not None
        samples of the detector layer
    p_samples : theano 4-tensor, only returned if theano_rng is not None
        samples of the pooling layer

    Notes
    ------

    all 4-tensors are formatted with axes ('b', 'c', 0, 1).
    This is for maximum speed when using theano's conv2d
    to generate z and top_down, or when using it to infer conditionals of
    other layers using the return values.

    Detailed description:

    Suppose you have a variable h that lives in a Conv2DSpace h_space and
    you want to pool it down to a variable p that lives in a smaller
    Conv2DSpace p.

    This function does that, using non-overlapping pools.

    Specifically, consider one channel of h. h must have a height that is a
    multiple of pool_shape[0] and a width that is a multiple of pool_shape[1].
    A channel of h can thus be broken down into non-overlapping rectangles
    of shape pool_shape.

    Now consider one rectangular pooled region within one channel of h.
    I now use 'h' to refer just to this rectangle, and 'p' to refer to
    just the one pooling unit associated with that rectangle.
    We assume that the space that h and p live in is constrained such
    that h and p are both binary and p = max(h). To reduce the state-space
    in order to make probabilistic computations cheaper we also
    constrain sum(h) <= 1.
    Suppose h contains k different units. Suppose that the only term
    in the model's energy function involving h is -(z*h).sum()
    (elemwise multiplication) and the only term in
    the model's energy function involving p is -(top_down*p).sum().

    Then P(h[i] = 1) = softmax( [ z[1], z[2], ..., z[k], -top_down] )[i]
    and P(p = 1) = 1-softmax( [z[1], z[2], ..., z[k], -top_down])[k]

    This variation of the function assumes that z, top_down, and all
    return values use Conv2D axes ('b', 'c', 0, 1).
    This variation of the function implements the softmax using a
    theano graph of exp, maximum, sub, and div operations.

    Performance notes:
    It might be possible to make a faster implementation with different
    theano ops. rather than using set_subtensor, it might be possible
    to use the stuff in theano.sandbox.neighbours. Probably not possible,
    or at least nasty, because that code isn't written with multiple
    channels in mind, and I don't think just a reshape can fix it.
    Some work on this in galatea.cond.neighbs.py
    At some point images2neighbs' gradient was broken so check that
    it has been fixed before sinking too much time into this.

    Stabilizing the softmax is also another source of slowness.
    Here it is stabilized with several calls to maximum and sub.
    It might also be possible to stabilize it with
    T.maximum(-top_down,T.signal.downsample.max_pool(z)).
    Don't know if that would be faster or slower.

    Elsewhere in this file I implemented the softmax with a reshape
    and call to Softmax / SoftmaxWithBias.
    This is slower, even though Softmax is faster on the GPU than the
    equivalent max/sub/exp/div graph. Maybe the reshape is too expensive.

    Benchmarks show that most of the time is spent in GpuIncSubtensor
    when running on gpu. So it is mostly that which needs a faster
    implementation. One other way to implement this would be with
    a linear.Conv2D.lmul_T, where the convolution stride is equal to
    the pool width, and the thing to multiply with is the hparts stacked
    along the channel axis. Unfortunately, conv2D doesn't work right
    with stride > 2 and is pretty slow for stride 2. Conv3D is used to
    mitigate some of this, but only has CPU code.
    """

    z_name = z.name
    if z_name is None:
        z_name = 'anon_z'

    batch_size, ch, zr, zc = z.shape

    r, c = pool_shape

    zpart = []

    mx = None

    if top_down is None:
        t = 0.
    else:
        t = -top_down
        t.name = 'neg_top_down'

    for i in xrange(r):
        zpart.append([])
        for j in xrange(c):
            cur_part = z[:, :, i:zr:r, j:zc:c]
            if z_name is not None:
                cur_part.name = z_name + '[%d,%d]' % (i, j)
            zpart[i].append(cur_part)
            if mx is None:
                mx = T.maximum(t, cur_part)
                if cur_part.name is not None:
                    mx.name = 'max(-top_down,' + cur_part.name + ')'
            else:
                max_name = None
                if cur_part.name is not None:
                    mx_name = 'max(' + cur_part.name + ',' + mx.name + ')'
                mx = T.maximum(mx, cur_part)
                mx.name = mx_name
    mx.name = 'local_max(' + z_name + ')'

    pt = []

    for i in xrange(r):
        pt.append([])
        for j in xrange(c):
            z_ij = zpart[i][j]
            safe = z_ij - mx
            safe.name = 'safe_z(%s)' % z_ij.name
            cur_pt = T.exp(safe)
            cur_pt.name = 'pt(%s)' % z_ij.name
            pt[-1].append(cur_pt)

    off_pt = T.exp(t - mx)
    off_pt.name = 'p_tilde_off(%s)' % z_name
    denom = off_pt

    for i in xrange(r):
        for j in xrange(c):
            denom = denom + pt[i][j]
    denom.name = 'denom(%s)' % z_name

    off_prob = off_pt / denom
    p = 1. - off_prob
    p.name = 'p(%s)' % z_name

    hpart = []
    for i in xrange(r):
        hpart.append([pt_ij / denom for pt_ij in pt[i]])

    h = T.alloc(0., batch_size, ch, zr, zc)

    for i in xrange(r):
        for j in xrange(c):
            h.name = 'h_interm'
            h = T.set_subtensor(h[:, :, i:zr:r, j:zc:c], hpart[i][j])

    h.name = 'h(%s)' % z_name

    if theano_rng is None:
        return p, h
    else:
        events = []
        for i in xrange(r):
            for j in xrange(c):
                events.append(hpart[i][j])
        events.append(off_prob)

        events = [event.dimshuffle(0, 1, 2, 3, 'x') for event in events]

        events = tuple(events)

        stacked_events = T.concatenate(events, axis=4)

        rows = zr // pool_shape[0]
        cols = zc // pool_shape[1]
        outcomes = pool_shape[0] * pool_shape[1] + 1
        assert stacked_events.ndim == 5
        for se, bs, r, c, chv in get_debug_values(stacked_events, batch_size,
                                                  rows, cols, ch):
            assert se.shape[0] == bs
            assert se.shape[1] == r
            assert se.shape[2] == c
            assert se.shape[3] == chv
            assert se.shape[4] == outcomes
        reshaped_events = stacked_events.reshape(
            (batch_size * rows * cols * ch, outcomes))

        multinomial = theano_rng.multinomial(pvals=reshaped_events,
                                             dtype=p.dtype)

        reshaped_multinomial = multinomial.reshape(
            (batch_size, ch, rows, cols, outcomes))

        h_sample = T.alloc(0., batch_size, ch, zr, zc)

        idx = 0
        for i in xrange(r):
            for j in xrange(c):
                h_sample = T.set_subtensor(
                    h_sample[:, :, i:zr:r, j:zc:c],
                    reshaped_multinomial[:, :, :, :, idx])
                idx += 1

        p_sample = 1 - reshaped_multinomial[:, :, :, :, -1]

        return p, h, p_sample, h_sample
Пример #50
0
    def setup(self, model, dataset):
        """
        Compiles the theano functions needed for the train method.
        """
        if self.cost is None:
            self.cost = model.get_default_cost()

        inf_params = [param for param in model.get_params()
                      if np.any(np.isinf(param.get_value()))]
        if len(inf_params) > 0:
            raise ValueError("These params are Inf: "+str(inf_params))
        if any([np.any(np.isnan(param.get_value()))
                for param in model.get_params()]):
            nan_params = [param for param in model.get_params()
                          if np.any(np.isnan(param.get_value()))]
            raise ValueError("These params are NaN: "+str(nan_params))
        self.model = model

        self._synchronize_batch_size(model)
        model._test_batch_size = self.batch_size
        self.monitor = Monitor.get_monitor(model)
        self.monitor._sanity_check()

        data_specs = self.cost.get_data_specs(self.model)
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

        # Build a flat tuple of Theano Variables, one for each space.
        # We want that so that if the same space/source is specified
        # more than once in data_specs, only one Theano Variable
        # is generated for it, and the corresponding value is passed
        # only once to the compiled Theano function.
        theano_args = []
        for space, source in safe_zip(space_tuple, source_tuple):
            name = '%s[%s]' % (self.__class__.__name__, source)
            arg = space.make_theano_batch(name=name,
                                          batch_size=self.batch_size)
            theano_args.append(arg)
        theano_args = tuple(theano_args)

        # Methods of `self.cost` need args to be passed in a format compatible
        # with data_specs
        nested_args = mapping.nest(theano_args)
        fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args)
        self.on_load_batch = fixed_var_descr.on_load_batch

        cost_value = self.cost.expr(model, nested_args,
                                    ** fixed_var_descr.fixed_vars)

        if cost_value is not None and cost_value.name is None:
            # Concatenate the name of all tensors in theano_args !?
            cost_value.name = 'objective'

        # Set up monitor to model the objective value, learning rate,
        # momentum (if applicable), and extra channels defined by
        # the cost
        learning_rate = self.learning_rate
        if self.monitoring_dataset is not None:
            self.monitor.setup(dataset=self.monitoring_dataset,
                               cost=self.cost,
                               batch_size=self.batch_size,
                               num_batches=self.monitoring_batches,
                               extra_costs=self.monitoring_costs,
                               mode=self.monitor_iteration_mode)
            dataset_name = self.monitoring_dataset.keys()[0]
            monitoring_dataset = self.monitoring_dataset[dataset_name]
            #TODO: have Monitor support non-data-dependent channels
            self.monitor.add_channel(name='learning_rate',
                                     ipt=None,
                                     val=learning_rate,
                                     data_specs=(NullSpace(), ''),
                                     dataset=monitoring_dataset)

            if self.learning_rule:
                self.learning_rule.add_channels_to_monitor(
                        self.monitor,
                        monitoring_dataset)

        params = list(model.get_params())
        assert len(params) > 0
        for i, param in enumerate(params):
            if param.name is None:
                param.name = 'sgd_params[%d]' % i

        grads, updates = self.cost.get_gradients(model, nested_args,
                                                 ** fixed_var_descr.fixed_vars)
        if not isinstance(grads, OrderedDict):
            raise TypeError(str(type(self.cost)) + ".get_gradients returned " +
                            "something with" + str(type(grads)) + "as its " +
                            "first member. Expected OrderedDict.")

        for param in grads:
            assert param in params
        for param in params:
            assert param in grads

        for param in grads:
            if grads[param].name is None and cost_value is not None:
                grads[param].name = ('grad(%(costname)s, %(paramname)s)' %
                                     {'costname': cost_value.name,
                                      'paramname': param.name})
            assert grads[param].dtype == param.dtype

        lr_scalers = model.get_lr_scalers()

        for key in lr_scalers:
            if key not in params:
                raise ValueError("Tried to scale the learning rate on " +\
                        str(key)+" which is not an optimization parameter.")

        log.info('Parameter and initial learning rate summary:')
        for param in params:
            param_name = param.name
            if param_name is None:
                param_name = 'anon_param'
            lr = learning_rate.get_value() * lr_scalers.get(param,1.)
            log.info('\t' + param_name + ': ' + str(lr))

        if self.learning_rule:
            updates.update(self.learning_rule.get_updates(
                learning_rate, grads, lr_scalers))
        else:
            # Use standard SGD updates with fixed learning rate.
            updates.update( dict(safe_zip(params, [param - learning_rate * \
                lr_scalers.get(param, 1.) * grads[param]
                                    for param in params])))

        for param in params:
            if updates[param].name is None:
                updates[param].name = 'sgd_update(' + param.name + ')'
        model.censor_updates(updates)
        for param in params:
            update = updates[param]
            if update.name is None:
                update.name = 'censor(sgd_update(' + param.name + '))'
            for update_val in get_debug_values(update):
                if np.any(np.isinf(update_val)):
                    raise ValueError("debug value of %s contains infs" %
                            update.name)
                if np.any(np.isnan(update_val)):
                    raise ValueError("debug value of %s contains nans" %
                            update.name)


        with log_timing(log, 'Compiling sgd_update'):
            self.sgd_update = function(theano_args,
                                       updates=updates,
                                       name='sgd_update',
                                       on_unused_input='ignore',
                                       mode=self.theano_function_mode)
        self.params = params
Пример #51
0
def lwta_3d_b012c(b012c, pool_shape, pool_stride, video_shape):
    """
    Modified from pylearn2.models.mlp.max_pool_c01b.
    """
    mx = None
    t, r, c = video_shape
    pt, pr, pc = pool_shape
    ts, rs, cs = pool_stride
    assert pt > 0
    assert pr > 0
    assert pc > 0
    assert pt <= t
    assert pr <= r
    assert pc <= c

    # Compute index in pooled space of last needed pool
    # (needed = each input pixel must appear in at least one pool)
    def last_pool(im_shp, p_shp, p_strd):
        rval = int(np.ceil(float(im_shp - p_shp) / p_strd))
        assert p_strd * rval + p_shp >= im_shp
        assert p_strd * (rval - 1) + p_shp < im_shp
        return rval

    # Compute starting row of the last pool
    last_pool_t = last_pool(video_shape[0], pool_shape[0],
                            pool_stride[0]) * pool_stride[0]
    # Compute number of rows needed in image for all indexes to work out
    required_t = last_pool_t + pr

    last_pool_r = last_pool(video_shape[1], pool_shape[1],
                            pool_stride[1]) * pool_stride[1]
    required_r = last_pool_r + pc

    last_pool_c = last_pool(video_shape[2], pool_shape[2],
                            pool_stride[2]) * pool_stride[2]
    required_c = last_pool_c + pc

    for b012cv in get_debug_values(b012c):
        assert not np.any(np.isinf(b012cv))
        assert b012cv.shape[1] == t
        assert b012cv.shape[2] == r
        assert b012cv.shape[3] == c

    wide_infinity = T.alloc(-np.inf, b012c.shape[0], required_t, required_r,
                            required_c, b012c.shape[4])

    name = b012c.name
    if name is None:
        name = 'anon_b012c'
    b012c = T.set_subtensor(wide_infinity[:, 0:t, 0:r, 0:c, :], b012c)
    b012c.name = 'infinite_padded_' + name

    for time_within_pool in xrange(pool_shape[0]):
        time_stop = last_pool_t + time_within_pool + 1
        for row_within_pool in xrange(pool_shape[1]):
            row_stop = last_pool_r + row_within_pool + 1
            for col_within_pool in xrange(pool_shape[2]):
                col_stop = last_pool_c + col_within_pool + 1
                cur = b012c[:, time_within_pool:time_stop:ts,
                            row_within_pool:row_stop:rs,
                            col_within_pool:col_stop:cs, :]
                if mx is None:
                    mx = cur
                else:
                    mx = T.maximum(mx, cur)

    for time_within_pool in xrange(pool_shape[0]):
        time_stop = last_pool_t + time_within_pool + 1
        for row_within_pool in xrange(pool_shape[1]):
            row_stop = last_pool_r + row_within_pool + 1
            for col_within_pool in xrange(pool_shape[2]):
                col_stop = last_pool_c + col_within_pool + 1
                cur = b012c[:, time_within_pool:time_stop:ts,
                            row_within_pool:row_stop:rs,
                            col_within_pool:col_stop:cs, :]
                b012c = T.set_subtensor(
                    b012c[:, time_within_pool:time_stop:ts,
                          row_within_pool:row_stop:rs,
                          col_within_pool:col_stop:cs, :], cur * (cur >= mx))

    b012c = b012c[:, 0:t, 0:r, 0:c, :]  # remove infinity padding
    for b012cv in get_debug_values(b012c):
        assert not np.any(np.isnan(b012cv))
        assert not np.any(np.isinf(b012cv))

    return b012c
Пример #52
0
    def setup(self, model, dataset):
        """
        Compiles the theano functions needed for the train method.

        Parameters
        ----------
        model : a Model instance
        dataset : Dataset
        """
        if self.cost is None:
            self.cost = model.get_default_cost()

        inf_params = [param for param in model.get_params()
                      if contains_inf(param.get_value())]
        if len(inf_params) > 0:
            raise ValueError("These params are Inf: "+str(inf_params))
        if any([contains_nan(param.get_value())
                for param in model.get_params()]):
            nan_params = [param for param in model.get_params()
                          if contains_nan(param.get_value())]
            raise ValueError("These params are NaN: "+str(nan_params))
        self.model = model

        self._synchronize_batch_size(model)
        model._test_batch_size = self.batch_size
        self.monitor = Monitor.get_monitor(model)
        self.monitor._sanity_check()

        # test if force batch size and batch size
        has_force_batch_size = getattr(model, "force_batch_size", False)
        train_dataset_is_uneven = \
            dataset.get_num_examples() % self.batch_size != 0

        has_monitoring_datasets = \
            self.monitoring_dataset is not None and \
            self.monitoring_dataset.values() > 0

        if has_monitoring_datasets:
            monitoring_datasets_are_uneven = \
                any(d.get_num_examples() % self.batch_size
                    != 0 for d in self.monitoring_dataset.values())
        else:
            monitoring_datasets_are_uneven = False  # or True it doesn't matter

        if has_force_batch_size and train_dataset_is_uneven and \
           not has_uniform_batch_size(self.train_iteration_mode):

            raise ValueError("Dataset size is not a multiple of batch size."
                             "You should set train_iteration_mode (and "
                             "maybe monitor_iteration_mode) to "
                             "even_sequential, even_shuffled_sequential or "
                             "even_batchwise_shuffled_sequential")

        if has_force_batch_size and has_monitoring_datasets and \
           monitoring_datasets_are_uneven and \
           not has_uniform_batch_size(self.monitor_iteration_mode):

            raise ValueError("Dataset size is not a multiple of batch size."
                             "You should set monitor_iteration_mode to "
                             "even_sequential, even_shuffled_sequential or "
                             "even_batchwise_shuffled_sequential")

        data_specs = self.cost.get_data_specs(self.model)
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

        # Build a flat tuple of Theano Variables, one for each space.
        # We want that so that if the same space/source is specified
        # more than once in data_specs, only one Theano Variable
        # is generated for it, and the corresponding value is passed
        # only once to the compiled Theano function.
        theano_args = []
        for space, source in safe_zip(space_tuple, source_tuple):
            name = '%s[%s]' % (self.__class__.__name__, source)
            arg = space.make_theano_batch(name=name,
                                          batch_size=self.batch_size)
            theano_args.append(arg)
        theano_args = tuple(theano_args)

        # Methods of `self.cost` need args to be passed in a format compatible
        # with data_specs
        nested_args = mapping.nest(theano_args)
        fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args)
        self.on_load_batch = fixed_var_descr.on_load_batch

        cost_value = self.cost.expr(model, nested_args,
                                    ** fixed_var_descr.fixed_vars)

        if cost_value is not None and cost_value.name is None:
            # Concatenate the name of all tensors in theano_args !?
            cost_value.name = 'objective'

        learning_rate = self.learning_rate
        params = list(model.get_params())
        assert len(params) > 0
        for i, param in enumerate(params):
            if param.name is None:
                param.name = 'sgd_params[%d]' % i

        grads, updates = self.cost.get_gradients(model, nested_args,
                                                 ** fixed_var_descr.fixed_vars)
        if not isinstance(grads, OrderedDict):
            raise TypeError(str(type(self.cost)) + ".get_gradients returned " +
                            "something with" + str(type(grads)) + "as its " +
                            "first member. Expected OrderedDict.")

        for param in grads:
            assert param in params
        for param in params:
            assert param in grads

        for param in grads:
            if grads[param].name is None and cost_value is not None:
                grads[param].name = ('grad(%(costname)s, %(paramname)s)' %
                                     {'costname': cost_value.name,
                                      'paramname': param.name})
            assert grads[param].dtype == param.dtype

        lr_scalers = model.get_lr_scalers()

        for key in lr_scalers:
            if key not in params:
                raise ValueError("Tried to scale the learning rate on " +\
                        str(key)+" which is not an optimization parameter.")

        log.info('Parameter and initial learning rate summary:')
        for param in params:
            param_name = param.name
            if param_name is None:
                param_name = 'anon_param'
            lr = learning_rate.get_value() * lr_scalers.get(param,1.)
            log.info('\t' + param_name + ': ' + str(lr))

        if self.learning_rule:
            updates.update(self.learning_rule.get_updates(
                learning_rate, grads, lr_scalers))
        else:
            # Use standard SGD updates with fixed learning rate.
            updates.update( dict(safe_zip(params, [param - learning_rate * \
                lr_scalers.get(param, 1.) * grads[param]
                                    for param in params])))

        for param in params:
            if updates[param].name is None:
                updates[param].name = 'sgd_update(' + param.name + ')'
        model.modify_updates(updates)
        for param in params:
            update = updates[param]
            if update.name is None:
                update.name = 'censor(sgd_update(' + param.name + '))'
            for update_val in get_debug_values(update):
                if contains_inf(update_val):
                    raise ValueError("debug value of %s contains infs" %
                            update.name)
                if contains_nan(update_val):
                    raise ValueError("debug value of %s contains nans" %
                            update.name)


        # Set up monitor to model the objective value, learning rate,
        # momentum (if applicable), and extra channels defined by
        # the cost.
        # We have to do that after learning_rule.get_updates has been
        # called, since it may have an effect on
        # learning_rule.add_channels_to_monitor (that is currently the case
        # for AdaDelta and RMSProp).
        self._setup_monitor()

        with log_timing(log, 'Compiling sgd_update'):
            self.sgd_update = function(theano_args,
                                       updates=updates,
                                       name='sgd_update',
                                       on_unused_input='ignore',
                                       mode=self.theano_function_mode)
        self.params = params
Пример #53
0
updates = {}

alpha = T.scalar()
alpha.name = 'alpha'
alpha.tag.test_value = lr

for cost, params in [(mf1_cost, mf1mod.get_params())]:
    #(mfn_cost, mfnmod.get_params()) ]:
    for param in params:
        if param.name != 'mu' and param.name != 'beta':
            inc = sharedX(np.zeros(param.get_value().shape))
            grad = T.grad(cost, param)
            #grad = Print('d cost / d '+param.name,attrs=['min','max'])(grad)
            new_inc = momentum * inc - alpha * grad
            for v in get_debug_values(new_inc):
                assert not np.any(np.isnan(v))
                assert not np.any(np.isinf(v))
            updates[param] = param + new_inc
            #updates[param] = Print('updates['+param.name+']',attrs=['min','max'])(updates[param])
            for v in get_debug_values(updates[param]):
                assert not np.any(np.isnan(v))
                assert not np.any(np.isinf(v))
            updates[inc] = new_inc

from theano import function

func = function([Xb, yb, alpha], updates=updates)

nodes = func.maker.fgraph.toposort()