Exemplo n.º 1
0
 def blah(i, node, thunk):
     imap[node] = str(i)
     if print_prog:# and node.op.__class__ is T.DimShuffle:
         if False and  node.op == T.DimShuffle((), ['x', 'x'], inplace = True):
             print node.op == T.DimShuffle((), ['x', 'x'], inplace = True),
             print node.inputs[0], type(node.inputs[0]), 
             print node.inputs[0].equals(T.constant(2)), 
         outputs = node.outputs
         inputs = theano.gof.graph.inputs(outputs)
         print 'node ', i, node,
         print ':'.join([imap[inp.owner] for inp in node.inputs])
Exemplo n.º 2
0
    def test_sparseblockgemvF(self):
        # Test the fortan order for W (which can happen in the grad for some
        # graphs).

        b = tensor.fmatrix()
        W = tensor.ftensor4()
        h = tensor.ftensor3()
        iIdx = tensor.imatrix()
        oIdx = tensor.imatrix()

        o = self.gemv_op(
            b.take(oIdx, axis=0),
            tensor.DimShuffle((False, False, False, False), (0, 1, 3, 2))(
                tensor.as_tensor_variable(W)
            ),
            h,
            iIdx,
            oIdx,
        )

        f = theano.function([W, h, iIdx, b, oIdx], o, mode=self.mode)

        W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()

        th_out = f(np.swapaxes(W_val, 2, 3), h_val, iIdx_val, b_val, oIdx_val)
        ref_out = self.gemv_numpy(
            b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val
        )

        utt.assert_allclose(ref_out, th_out)
def test_bug_2009_06_02_trac_387():
    y = tensor.lvector('y')
    f = theano.function([y],
                        tensor.int_div(
                            tensor.DimShuffle(y[0].broadcastable, ['x'])(y[0]),
                            2))
    print(f(numpy.ones(1, dtype='int64') * 3))
Exemplo n.º 4
0
def test_bug_2009_06_02_trac_387():
    y = tensor.lvector("y")
    f = theano.function([y],
                        tensor.int_div(
                            tensor.DimShuffle(y[0].broadcastable, ["x"])(y[0]),
                            2))
    print(f(np.ones(1, dtype="int64") * 3))
Exemplo n.º 5
0
def max_pool(images, imgshp, maxpoolshp):
    """Implements a max pooling layer

    Takes as input a 2D tensor of shape batch_size x img_size and
    performs max pooling.  Max pooling downsamples by taking the max
    value in a given area, here defined by maxpoolshp. Outputs a 2D
    tensor of shape batch_size x output_size.

    :param images: 2D tensor containing images on which to apply convolution.
                   Assumed to be of shape batch_size x img_size
    :param imgshp: tuple containing image dimensions
    :param maxpoolshp: tuple containing shape of area to max pool over

    :return: out1, symbolic result (2D tensor)
    :return: out2, logical shape of the output
    """
    N = numpy
    poolsize = N.int64(N.prod(maxpoolshp))

    # imgshp contains either 2 entries (height,width) or 3 (nfeatures,h,w)
    # in the first case, default nfeatures to 1
    if N.size(imgshp) == 2:
        imgshp = (1, ) + imgshp

    # construct indices and index pointers for sparse matrix, which,
    # when multiplied with input images will generate a stack of image
    # patches
    indices, indptr, spmat_shape, sptype, outshp = \
            convolution_indices.conv_eval(imgshp, maxpoolshp,
                                          maxpoolshp, mode='valid')

    #    print 'XXXXXXXXXXXXXXXX MAX POOLING LAYER XXXXXXXXXXXXXXXXXXXX'
    #    print 'imgshp = ', imgshp
    #    print 'maxpoolshp = ', maxpoolshp
    #    print 'outshp = ', outshp

    # build sparse matrix, then generate stack of image patches
    csc = theano.sparse.CSM(sptype)(N.ones(indices.size), indices, indptr,
                                    spmat_shape)
    patches = sparse.structured_dot(csc, images.T).T

    pshape = tensor.stack([images.shape[0] *\
                               tensor.as_tensor(N.prod(outshp)),
                           tensor.as_tensor(imgshp[0]),
                           tensor.as_tensor(poolsize)])
    patch_stack = tensor.reshape(patches, pshape, ndim=3)

    out1 = tensor.max(patch_stack, axis=2)

    pshape = tensor.stack([
        images.shape[0],
        tensor.as_tensor(N.prod(outshp)),
        tensor.as_tensor(imgshp[0])
    ])
    out2 = tensor.reshape(out1, pshape, ndim=3)

    out3 = tensor.DimShuffle(out2.broadcastable, (0, 2, 1))(out2)

    return tensor.flatten(out3, 2), outshp
Exemplo n.º 6
0
    def makeKeepDims_local(self, x, y, axis):
        x = tensor.as_tensor_variable(x)
        y = tensor.as_tensor_variable(y)

        if axis is None:
            axis = numpy.arange(x.ndim)
        i = 0
        new_dims = []
        for j, _ in enumerate(x.shape):
            if j in axis:
                new_dims.append('x')
            else:
                new_dims.append(i)
                i += 1

        return tensor.DimShuffle(y.type.broadcastable, new_dims)(y)
Exemplo n.º 7
0
    def makeKeepDims_local(self, x, y, axis):
        x = tensor.as_tensor_variable(x)
        y = tensor.as_tensor_variable(y)

        if axis is None:
            axis = numpy.arange(x.ndim)
        elif isinstance(axis, int):
            axis = [axis]
        i = 0
        newaxis = []
        for a in axis:
            if a < 0:
                a += x.type.ndim
            newaxis.append(a)
        new_dims = []
        for j, _ in enumerate(x.shape):
            if j in newaxis:
                new_dims.append('x')
            else:
                new_dims.append(i)
                i += 1

        return tensor.DimShuffle(y.type.broadcastable, new_dims)(y)
Exemplo n.º 8
0
    def makeKeepDims_local(self, x, y, axis):
        if axis is None:
            newaxis = list(range(x.ndim))
        elif isinstance(axis, integer_types):
            if axis < 0:
                newaxis = [axis + x.type.ndim]
            else:
                newaxis = [axis]
        else:
            newaxis = []
            for a in axis:
                if a < 0:
                    a += x.type.ndim
                newaxis.append(a)
        i = 0
        new_dims = []
        for j, _ in enumerate(x.shape):
            if j in newaxis:
                new_dims.append('x')
            else:
                new_dims.append(i)
                i += 1

        return tensor.DimShuffle(y.type.broadcastable, new_dims)(y)
Exemplo n.º 9
0
def convolve(kerns,
             kshp,
             nkern,
             images,
             imgshp,
             step=(1, 1),
             bias=None,
             mode='valid',
             flatten=True):
    """Convolution implementation by sparse matrix multiplication.

    :note: For best speed, put the matrix which you expect to be
           smaller as the 'kernel' argument

    "images" is assumed to be a matrix of shape batch_size x img_size,
    where the second dimension represents each image in raster order

    If flatten is "False", the output feature map will have shape:

    .. code-block:: python

        batch_size x number of kernels x output_size

    If flatten is "True", the output feature map will have shape:

    .. code-block:: python

        batch_size x number of kernels * output_size

    .. note::

        IMPORTANT: note that this means that each feature map (image
        generate by each kernel) is contiguous in memory. The memory
        layout will therefore be: [ <feature_map_0> <feature_map_1>
        ... <feature_map_n>], where <feature_map> represents a
        "feature map" in raster order

    kerns is a 2D tensor of shape nkern x N.prod(kshp)

    :param kerns: 2D tensor containing kernels which are applied at every pixel
    :param kshp: tuple containing actual dimensions of kernel (not symbolic)
    :param nkern: number of kernels/filters to apply.
                  nkern=1 will apply one common filter to all input pixels
    :param images: tensor containing images on which to apply convolution
    :param imgshp: tuple containing image dimensions
    :param step: determines number of pixels between adjacent receptive fields
                 (tuple containing dx,dy values)
    :param mode: 'full', 'valid' see CSM.evaluate function for details
    :param sumdims: dimensions over which to sum for the tensordot operation.
                    By default ((2,),(1,)) assumes kerns is a nkern x kernsize
                    matrix and images is a batchsize x imgsize matrix
                    containing flattened images in raster order
    :param flatten: flatten the last 2 dimensions of the output. By default,
                    instead of generating a batchsize x outsize x nkern tensor,
                    will flatten to batchsize x outsize*nkern

    :return: out1, symbolic result
    :return: out2, logical shape of the output img (nkern,heigt,width)

    :TODO: test for 1D and think of how to do n-d convolutions
    """
    N = numpy
    # start by computing output dimensions, size, etc
    kern_size = N.int64(N.prod(kshp))

    # inshp contains either 2 entries (height,width) or 3 (nfeatures,h,w)
    # in the first case, default nfeatures to 1
    if N.size(imgshp) == 2:
        imgshp = (1, ) + imgshp

    # construct indices and index pointers for sparse matrix, which,
    # when multiplied with input images will generate a stack of image
    # patches
    indices, indptr, spmat_shape, sptype, outshp = \
            convolution_indices.conv_eval(imgshp, kshp, step, mode)

    # build sparse matrix, then generate stack of image patches
    csc = theano.sparse.CSM(sptype)(N.ones(indices.size), indices, indptr,
                                    spmat_shape)
    patches = (sparse.structured_dot(csc, images.T)).T

    # compute output of linear classifier
    pshape = tensor.stack(images.shape[0] * tensor.as_tensor(N.prod(outshp)),\
                          tensor.as_tensor(imgshp[0] * kern_size))
    patch_stack = tensor.reshape(patches, pshape, ndim=2)

    # kern is of shape: nkern x ksize*number_of_input_features
    # output is thus of shape: bsize*outshp x nkern
    output = tensor.dot(patch_stack, kerns.T)

    # add bias across each feature map (more efficient to do it now)
    if bias is not None:
        output += bias

    # now to have feature maps in raster order ...
    # go from bsize*outshp x nkern to bsize x nkern*outshp
    newshp = tensor.stack(images.shape[0],\
                          tensor.as_tensor(N.prod(outshp)),\
                          tensor.as_tensor(nkern))
    tensout = tensor.reshape(output, newshp, ndim=3)
    output = tensor.DimShuffle((False, ) * tensout.ndim, (0, 2, 1))(tensout)
    if flatten:
        output = tensor.flatten(output, 2)

    return output, N.hstack((nkern, outshp))
Exemplo n.º 10
0
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 input=None,
                 n_visible=784,
                 n_hidden=500,
                 W=None,
                 bhid=None,
                 bvis=None):
        """
        Initialize the dA class by specifying the number of visible units (the
        dimension d of the input ), the number of hidden units ( the dimension
        d' of the latent or hidden space ) and the corruption level. The
        constructor also receives symbolic variables for the input, weights and
        bias. Such a symbolic variables are useful when, for example the input
        is the result of some computations, or when weights are shared between
        the dA and an MLP layer. When dealing with SdAs this always happens,
        the dA on layer 2 gets as input the output of the dA on layer 1,
        and the weights of the dA are used in the second stage of training
        to construct an MLP.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: number random generator used to generate weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                     generated based on a seed drawn from `rng`

        :type input: theano.tensor.TensorType
        :param input: a symbolic description of the input or None for
                      standalone dA

        :type n_visible: int
        :param n_visible: number of visible units

        :type n_hidden: int
        :param n_hidden:  number of hidden units

        :type W: theano.tensor.TensorType
        :param W: Theano variable pointing to a set of weights that should be
                  shared belong the dA and another architecture; if dA should
                  be standalone set this to None

        :type bhid: theano.tensor.TensorType
        :param bhid: Theano variable pointing to a set of biases values (for
                     hidden units) that should be shared belong dA and another
                     architecture; if dA should be standalone set this to None

        :type bvis: theano.tensor.TensorType
        :param bvis: Theano variable pointing to a set of biases values (for
                     visible units) that should be shared belong dA and another
                     architecture; if dA should be standalone set this to None


        """
        self.n_visible = n_visible
        self.n_hidden = n_hidden

        # create a Theano random generator that gives symbolic random values
        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))

        # note : W' was written as `W_prime` and b' as `b_prime`
        if not W:
            # W is initialized with `initial_W` which is uniformely sampled
            # from -4*sqrt(6./(n_visible+n_hidden)) and
            # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if
            # converted using asarray to dtype
            # theano.config.floatX so that the code is runable on GPU
            initial_W = numpy.asarray(numpy_rng.uniform(
                low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)),
                high=4 * numpy.sqrt(6. / (n_hidden + n_visible)),
                size=(n_visible, n_hidden)),
                                      dtype=theano.config.floatX)
            W = theano.shared(value=initial_W, name='W', borrow=True)

        if not bvis:
            bvis = theano.shared(value=numpy.zeros(n_visible,
                                                   dtype=theano.config.floatX),
                                 borrow=True)

        if not bhid:
            bhid = theano.shared(value=numpy.zeros(n_hidden,
                                                   dtype=theano.config.floatX),
                                 name='b',
                                 borrow=True)

        self.W = W
        # b corresponds to the bias of the hidden
        self.b = bhid
        # b_prime corresponds to the bias of the visible
        self.b_prime = bvis
        # tied weights, therefore W_prime is W transpose
        self.W_prime = T.DimShuffle(initial_W)
        self.theano_rng = theano_rng
        # if no input is given, generate a variable representing the input
        if input is None:
            # we use a matrix because we expect a minibatch of several
            # examples, each example being a row
            self.x = T.dmatrix(name='input')
        else:
            self.x = input

        self.params = [self.W, self.b, self.b_prime]
Exemplo n.º 11
0
    class Opt(object):
        merge = theano.gof.MergeOptimizer()
        gemm_opt_1 = theano.gof.TopoOptimizer(theano.tensor_opt.gemm_pattern_1)

        gemm_opt_2 = theano.gof.TopoOptimizer(  # d -= a * (dot()+transpose(dot))
            theano.gof.PatternSub(
                (T.sub_inplace, 'd', (T.mul,
                                      dict(pattern=(T.DimShuffle(
                                          (), ['x', 'x'], inplace=True), 'a'),
                                           allow_multiple_clients=True),
                                      (T.add, (T.dot, 'b', 'c'),
                                       (T.transpose_inplace,
                                        (T.dot, 'f', 'g'))))),
                (T.gemm,
                 (T.gemm, 'd', (T.neg, 'a'), (T.transpose_inplace, 'g'),
                  (T.transpose_inplace, 'f'), T.constant(1.0)),
                 (T.neg, 'a'), 'b', 'c', T.constant(1.0)),
                allow_multiple_clients=False))

        sqr = []
        sqr.append(
            theano.gof.TopoOptimizer(
                theano.gof.PatternSub((T.mul, 'x', 'x'), (T.sqr, 'x'),
                                      allow_multiple_clients=True)))
        sqr.append(
            theano.gof.TopoOptimizer(
                theano.gof.PatternSub((T.pow, 'x', (T.DimShuffle(
                    (), ['x', 'x'], inplace=True), T.constant(2))),
                                      (T.sqr, 'x'),
                                      allow_multiple_clients=True)))

        ident_opt_list = []
        ident_opt_list.append(  # remove explicit copies
            theano.gof.TopoOptimizer(
                theano.gof.PatternSub((T.tensor_copy, 'x'),
                                      'x',
                                      allow_multiple_clients=True)))
        ident_opt_list.append(  # remove double-transpose
            theano.gof.TopoOptimizer(
                theano.gof.PatternSub(
                    (T.transpose_inplace, (T.transpose_inplace, 'x')),
                    'x',
                    allow_multiple_clients=True)))

        ident_opt_list.append(
            theano.gof.TopoOptimizer(
                theano.gof.PatternSub((T.sqr, (T.sqrt, 'x')),
                                      'x',
                                      allow_multiple_clients=True)))
        ident_opt_list.append(
            theano.gof.TopoOptimizer(
                theano.gof.PatternSub((T.sqrt, (T.sqr, 'x')),
                                      'x',
                                      allow_multiple_clients=True)))
        ident_opt_list.append(
            theano.gof.TopoOptimizer(
                theano.gof.PatternSub((T.mul, 'x', (T.div, 'y', 'x')),
                                      'y',
                                      allow_multiple_clients=True)))

        ident_opt_list.append(
            theano.gof.TopoOptimizer(
                theano.gof.PatternSub((T.mul, (T.div, 'y', 'x'), 'x'),
                                      'y',
                                      allow_multiple_clients=True)))

        ident_opt_list.append(
            theano.gof.TopoOptimizer(
                theano.gof.PatternSub((T.div, (T.mul, 'y', 'x'), 'x'),
                                      'y',
                                      allow_multiple_clients=True)))

        ident_opt_list.append(
            theano.gof.TopoOptimizer(
                theano.gof.PatternSub((T.div, (T.mul, 'y', 'x'), 'y'),
                                      'x',
                                      allow_multiple_clients=True)))

        def __call__(self, env):
            self.merge(env)
            #eliminate identities
            if 0:
                print('SKIPPING optimizations')
            else:

                for opt in self.ident_opt_list:
                    opt(env)

                for opt in self.sqr:
                    opt(env)

                self.gemm_opt_1(env)
                self.gemm_opt_2(env)

                self.merge(env)