Exemplo n.º 1
0
    def __init__(self,
                 input=tensor.dvector('input'),
                 target=tensor.dvector('target'),
                 n_input=1,
                 n_hidden=1,
                 n_output=1,
                 lr=1e-3,
                 **kw):
        super(NNet, self).__init__(**kw)

        self.input = input
        self.target = target
        self.lr = shared(lr, 'learning_rate')
        self.w1 = shared(numpy.zeros((n_hidden, n_input)), 'w1')
        self.w2 = shared(numpy.zeros((n_output, n_hidden)), 'w2')
        # print self.lr.type

        self.hidden = sigmoid(tensor.dot(self.w1, self.input))
        self.output = tensor.dot(self.w2, self.hidden)
        self.cost = tensor.sum((self.output - self.target)**2)

        self.sgd_updates = {
            self.w1: self.w1 - self.lr * tensor.grad(self.cost, self.w1),
            self.w2: self.w2 - self.lr * tensor.grad(self.cost, self.w2)
        }

        self.sgd_step = pfunc(params=[self.input, self.target],
                              outputs=[self.output, self.cost],
                              updates=self.sgd_updates)

        self.compute_output = pfunc([self.input], self.output)

        self.output_from_hidden = pfunc([self.hidden], self.output)
Exemplo n.º 2
0
def test_elemwise1():
    """ Several kinds of elemwise expressions with no broadcasting, non power-of-two shape """

    shape = (3,4)
    a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape), dtype='float32')+0.5, 'a')
    b = tensor.fmatrix()

    #let debugmode catch any mistakes
    print >> sys.stdout, "STARTING FUNCTION 1"
    f = pfunc([b], [], updates=[(a, b**a)], mode=mode_with_gpu)
    for i, node in enumerate(f.maker.env.toposort()):
        print i, node
    f(theano._asarray(numpy.random.rand(*shape), dtype='float32')+0.3)

    print >> sys.stdout, "STARTING FUNCTION 2"
    #let debugmode catch any mistakes
    f = pfunc([b], [], updates=[(a, tensor.exp(b**a))], mode=mode_with_gpu)
    for i, node in enumerate(f.maker.env.toposort()):
        print i, node
    f(theano._asarray(numpy.random.rand(*shape), dtype='float32')+0.3)

    print >> sys.stdout, "STARTING FUNCTION 3"
    #let debugmode catch any mistakes
    f = pfunc([b], [], updates=[(a, a+b * tensor.exp(b**a))], mode=mode_with_gpu)
    f(theano._asarray(numpy.random.rand(*shape), dtype='float32')+0.3)
Exemplo n.º 3
0
    def __init__(self,
                 input=tensor.dvector('input'),
                 target=tensor.dvector('target'),
                 n_input=1, n_hidden=1, n_output=1, lr=1e-3, **kw):
        super(NNet, self).__init__(**kw)

        self.input = input
        self.target = target
        self.lr = shared(lr, 'learning_rate')
        self.w1 = shared(numpy.zeros((n_hidden, n_input)), 'w1')
        self.w2 = shared(numpy.zeros((n_output, n_hidden)), 'w2')
        # print self.lr.type

        self.hidden = sigmoid(tensor.dot(self.w1, self.input))
        self.output = tensor.dot(self.w2, self.hidden)
        self.cost = tensor.sum((self.output - self.target)**2)

        self.sgd_updates = {
            self.w1: self.w1 - self.lr * tensor.grad(self.cost, self.w1),
            self.w2: self.w2 - self.lr * tensor.grad(self.cost, self.w2)}

        self.sgd_step = pfunc(
            params=[self.input, self.target],
            outputs=[self.output, self.cost],
            updates=self.sgd_updates)

        self.compute_output = pfunc([self.input], self.output)

        self.output_from_hidden = pfunc([self.hidden], self.output)
Exemplo n.º 4
0
def test_elemwise2():
    """ Several kinds of elemwise expressions with dimension permutations """
    rng = numpy.random.RandomState(int(time.time()))
    shape = (3, 5)
    for pattern in [(0, 1), (1, 0)]:
        a = tcn.shared_constructor(theano._asarray(rng.rand(*shape),
                                                   dtype='float32'), name=None)
        b = tensor.Tensor(dtype='float32', broadcastable=[0] * len(shape))()
        f = pfunc([b], [], updates=[(a, (a + b).dimshuffle(pattern))],
                  mode=mode_with_gpu)
        has_elemwise = False
        for i, node in enumerate(f.maker.env.toposort()):
            has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise)
        assert not has_elemwise
        #let debugmode catch errors
        f(theano._asarray(rng.rand(*shape), dtype='float32') * .3)

    shape = (3, 4, 5, 6)
    a = tcn.shared_constructor(theano._asarray(rng.rand(*shape),
                                               dtype='float32'), 'a')
    b = tensor.Tensor(dtype='float32', broadcastable=[0] * len(shape))()
    f = pfunc([b], [], updates=[(a, (a + b).dimshuffle([2, 0, 3, 1]) *
        tensor.exp(b ** a).dimshuffle([2, 0, 3, 1]))], mode=mode_with_gpu)
    has_elemwise = False
    for i, node in enumerate(f.maker.env.toposort()):
        has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise)
    assert not has_elemwise
    #let debugmode catch errors
    f(theano._asarray(rng.rand(*shape), dtype='float32'))
Exemplo n.º 5
0
def test_elemwise2():
    """ Several kinds of elemwise expressions with dimension permutations """
    rng = numpy.random.RandomState(int(time.time()))
    print 'random?', rng.rand(3)
    shape = (3,5)
    for pattern in [(0,1), (1,0)]:
        a = tcn.shared_constructor(theano._asarray(rng.rand(*shape),dtype='float32'), name=None)
        b = tensor.Tensor(dtype='float32', broadcastable=[0]*len(shape))()
        f = pfunc([b], [], updates=[(a, (a+b).dimshuffle(pattern))], mode=mode_with_gpu)
        has_elemwise = False
        for i, node in enumerate(f.maker.env.toposort()):
            print >> sys.stdout, i, node
            has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise)
        assert not has_elemwise
        #let debugmode catch errors
        print >> sys.stdout, 'pattern', pattern
        f(theano._asarray(rng.rand(*shape),dtype='float32')*.3)

    shape = (3,4,5,6)
    a = tcn.shared_constructor(theano._asarray(rng.rand(*shape),dtype='float32'), 'a')
    b = tensor.Tensor(dtype='float32', broadcastable=[0]*len(shape))()
    f = pfunc([b], [], updates=[(a, (a+b).dimshuffle([2,0,3,1]) *
        tensor.exp(b**a).dimshuffle([2,0,3,1]))], mode=mode_with_gpu)
    has_elemwise = False
    for i, node in enumerate(f.maker.env.toposort()):
        print i, node
        has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise)
    assert not has_elemwise
    #let debugmode catch errors
    f(theano._asarray(rng.rand(*shape),dtype='float32'))
Exemplo n.º 6
0
def test_downsample():
    shps = [
        (1, 1, 1, 12),
        (1, 1, 2, 2),
        (1, 1, 1, 1),
        (1, 1, 4, 4),
        (1, 1, 10, 11),
        (1, 2, 2, 2),
        (3, 5, 4, 4),
        (25, 1, 7, 7),
        (1, 1, 12, 12),
        (1, 1, 2, 14),
        (1, 1, 12, 14),
        (1, 1, 14, 14),
        (1, 1, 16, 16),
        (1, 1, 18, 18),
        (1, 1, 24, 24),
        (1, 6, 24, 24),
        (10, 1, 24, 24),
        (10, 6, 24, 24),
        (30, 6, 12, 12),
        (30, 2, 24, 24),
        (30, 6, 24, 24),
        (10, 10, 10, 11),
        (1, 1, 10, 1025),
        (1, 1, 10, 1023),
        (1, 1, 1025, 10),
        (1, 1, 1023, 10),
    ]

    numpy.random.RandomState(unittest_tools.fetch_seed()).shuffle(shps)

    for shp in shps:
        for ds in (2, 2), (3, 2), (1, 1):
            if ds[0] > shp[2]:
                continue
            if ds[1] > shp[3]:
                continue
            # GpuDownsampleFactorMax doesn't like having more than 512 columns
            # in the output tensor.
            if float(shp[3]) / ds[1] > 512:
                continue
            for ignore_border in (True, False):
                print "test_downsample", shp, ds, ignore_border
                ds_op = DownsampleFactorMax(ds, ignore_border=ignore_border)

                a = tcn.shared_constructor(my_rand(*shp), "a")
                f = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_with_gpu)
                f2 = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_without_gpu)
                assert any([isinstance(node.op, tcn.blas.GpuDownsampleFactorMax) for node in f.maker.env.toposort()])
                assert any([isinstance(node.op, DownsampleFactorMax) for node in f2.maker.env.toposort()])
                assert numpy.allclose(f(), f2())

                g = pfunc([], tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(), a), mode=mode_with_gpu)
                g2 = pfunc([], tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(), a), mode=mode_without_gpu)
                assert any(
                    [isinstance(node.op, tcn.blas.GpuDownsampleFactorMaxGrad) for node in g.maker.env.toposort()]
                )
                assert any([isinstance(node.op, DownsampleFactorMaxGrad) for node in g2.maker.env.toposort()])
                assert numpy.allclose(g(), g2())
Exemplo n.º 7
0
def test_elemwise1():
    """ Several kinds of elemwise expressions with no broadcasting,
    non power-of-two shape """

    shape = (3, 4)
    a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape),
                                               dtype='float32') + 0.5, 'a')
    b = tensor.fmatrix()

    #let debugmode catch any mistakes
    print >> sys.stdout, "STARTING FUNCTION 1"
    f = pfunc([b], [], updates=[(a, b ** a)], mode=mode_with_gpu)
    for i, node in enumerate(f.maker.env.toposort()):
        print i, node
    f(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.3)

    print >> sys.stdout, "STARTING FUNCTION 2"
    #let debugmode catch any mistakes
    f = pfunc([b], [], updates=[(a, tensor.exp(b ** a))], mode=mode_with_gpu)
    for i, node in enumerate(f.maker.env.toposort()):
        print i, node
    f(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.3)

    print >> sys.stdout, "STARTING FUNCTION 3"
    #let debugmode catch any mistakes
    f = pfunc([b], [], updates=[(a, a + b * tensor.exp(b ** a))],
              mode=mode_with_gpu)
    f(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.3)
Exemplo n.º 8
0
def test_downsample():
    import random
    shps = [ (1, 1, 1, 12),
            (1, 1, 2, 2),
            (1, 1, 1, 1),
            (1,1,4,4),
            (1, 1, 10, 11),
            (1, 2, 2, 2),
            (3,5,4,4),
            (25, 1, 7, 7),
            (1, 1, 12, 12),
            (1, 1, 2, 14),
            (1, 1, 12, 14),
            (1, 1, 14, 14),
            (1, 1, 16, 16),
            (1, 1, 18, 18),
            (1, 1, 24, 24),
            (1, 6, 24, 24),
            (10, 1, 24, 24),
            (10, 6, 24, 24),
            (30, 6, 12, 12),
            (30, 2, 24, 24),
            (30, 6, 24, 24),
            (10, 10, 10, 11),
            (1,1,10,1025),
            (1,1,10,1023),
            (1,1,1025,10),
            (1,1,1023,10),
             ]

    numpy.random.RandomState(unittest_tools.fetch_seed()).shuffle(shps)

    for shp in shps:
        for ds in (2, 2), (3,2), (1,1):
            if ds[0] > shp[2]: continue
            if ds[1] > shp[3]: continue
            #GpuDownsampleFactorMax don't having more then 512 columns in the output tensor
            if float(shp[3])/ds[1]>512: continue
            for ignore_border in (True, False):
                print 'test_downsample', shp, ds, ignore_border
                ds_op = DownsampleFactorMax(ds, ignore_border=ignore_border)

                a = tcn.shared_constructor(my_rand(*shp), 'a')
                f = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_with_gpu)
                f2 = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_without_gpu)
                assert any([isinstance(node.op, tcn.blas.GpuDownsampleFactorMax) for node in
                            f.maker.env.toposort()])
                assert any([isinstance(node.op, DownsampleFactorMax) for node in
                            f2.maker.env.toposort()])
                assert numpy.allclose(f(),f2())

                g = pfunc([], tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(),a), mode=mode_with_gpu)
                g2 = pfunc([], tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(),a), mode=mode_without_gpu)
                assert any([isinstance(node.op, tcn.blas.GpuDownsampleFactorMaxGrad)
                            for node in g.maker.env.toposort()])
                assert any([isinstance(node.op, DownsampleFactorMaxGrad)
                            for node in g2.maker.env.toposort()])
                assert numpy.allclose(g(),g2())
Exemplo n.º 9
0
def cmp_sigmoids(shape):
    def numpy_sigmoid(input):
        rval = 1.0 / (1.0 + numpy.exp(-input))
    sinput = tensor.Tensor(dtype='float32', broadcastable=(0,)*len(shape))()
    shared_input = tcn.shared_constructor(numpy.random.rand(*shape), 'shared_input')
    times = compare_fns(
            dict( numpy=numpy_sigmoid
                , theano_cpu=pfunc([sinput], 1.0 / (1.0 + tensor.exp(-sinput)))
                , theano_gpu_onboard=pfunc([sinput], [], updates=[(shared_input, 1.0 / (1.0 + tensor.exp(-shared_input)))])
                ),
            input=shared_input.value)
    showtimes(times)
Exemplo n.º 10
0
def cmp_sigmoids(shape):
    def numpy_sigmoid(input):
        rval = 1.0 / (1.0 + numpy.exp(-input))
    sinput = tensor.Tensor(dtype='float32', broadcastable=(0,)*len(shape))()
    shared_input = tcn.shared_constructor(numpy.random.rand(*shape), 'shared_input')
    times = compare_fns(
            dict( numpy=numpy_sigmoid
                , theano_cpu=pfunc([sinput], 1.0 / (1.0 + tensor.exp(-sinput)))
                , theano_gpu_onboard=pfunc([sinput], [], updates=[(shared_input, 1.0 / (1.0 + tensor.exp(-shared_input)))])
                ),
            input=shared_input.value)
    showtimes(times)
Exemplo n.º 11
0
def test_elemwise_empty():
    #test with 0 element
    a = tcn.shared_constructor(theano._asarray(numpy.random.rand(0,0), dtype='float32'), 'a')

    b = tensor.fmatrix()

    f = pfunc([b], [], updates=[(a, a+b)], mode=mode_with_gpu)
    f2 = pfunc([b], [], updates=[(a, a+b)], mode=mode_without_gpu)

    a0 = a.get_value() * 1.0
    f(numpy.ones((0,0), dtype='float32'))

    assert numpy.all(a0 + 1.0 == a.get_value())
Exemplo n.º 12
0
def test_elemwise_composite_float64():
    # test that we don't fuse composite elemwise with float64 somewhere inside
    # nvcc by default downcast them to float32. We would need to tell him not
    # to do so, but that possible only on some device.
    a = tensor.fmatrix()
    b = tensor.fmatrix()
    av = theano._asarray(numpy.random.rand(4, 4), dtype='float32')
    bv = numpy.ones((4, 4), dtype='float32')

    def get_all_basic_scalar(composite_op):
        l = []
        for i in composite_op.env.toposort():
            if isinstance(i, theano.scalar.Composite):
                l += get_all_basic_scalar(i)
            else:
                l.append(i)
        return l
    for mode in [mode_with_gpu, mode_with_gpu.excluding('gpu_after_fusion'),
                 mode_with_gpu.excluding('elemwise_fusion')]:
        f = pfunc([a, b],
                  tensor.cast(tensor.lt(tensor.cast(a, 'float64') ** 2,
                                               b),
                                     'float32'), mode=mode)

        out = f(av, bv)
        assert numpy.all(out == ((av ** 2) < bv))
        for node in f.maker.env.toposort():
            if isinstance(node.op, cuda.GpuElemwise):
                if isinstance(node.op.scalar_op, theano.scalar.Composite):
                    scals = get_all_basic_scalar(node.op.scalar_op)
                    for s in scals:
                        assert not any([i.type.dtype == 'float64'
                                        for i in s.inputs + s.outputs])
Exemplo n.º 13
0
    def cmp(a_shp, b_shp):
        a0 = my_rand(* a_shp)
        a = tcn.shared_constructor(a0, 'a')
        cval = my_rand(a_shp[0], b_shp[1])
        c = tcn.shared_constructor(cval.copy(), 'c')

        b = tcn.fmatrix('b')
        b2 = tcn.fmatrix('b2')

        f = pfunc([b, b2],
                  [tensor.dot(a, b2) + c],
                  updates=[(a, tensor.dot(a, b) + c)],
                  mode=mode_with_gpu)

        assert any([node.op == tcn.blas.gpu_gemm_no_inplace
                    for node in f.maker.fgraph.toposort()])
        bval = my_rand(*b_shp)
        bval2 = my_rand(*b_shp)
        rval = f(bval, bval2)

        assert numpy.allclose(numpy.dot(a0, bval) + cval, a.get_value())
        assert numpy.allclose(numpy.dot(a0, bval2) + cval, rval)

        # Try with a matrix equal to a0, but with strides in both dims
        a.set_value(a0)
        a.set_value(
            a.get_value(
                borrow=True,
                return_internal_type=True)[::-1, ::-1],
            borrow=True)
        f(bval, bval2)
Exemplo n.º 14
0
    def cmp(a_shp, b_shp):
        a0 = my_rand(*a_shp)
        a = tcn.shared_constructor(a0, 'a')
        cval = my_rand(a_shp[0], b_shp[1])
        c = tcn.shared_constructor(cval.copy(), 'c')

        b = tcn.fmatrix('b')
        b2 = tcn.fmatrix('b2')

        f = pfunc(
                [b, b2],
                [tensor.dot(a, b2) + c],
                updates=[(a, tensor.dot(a, b) + c)],
                mode=mode_with_gpu)

        assert any([node.op == tcn.blas.gpu_gemm_no_inplace
            for node in f.maker.fgraph.toposort()])
        bval = my_rand(*b_shp)
        bval2 = my_rand(*b_shp)
        rval = f(bval, bval2)

        assert numpy.allclose(numpy.dot(a0, bval) + cval, a.get_value())
        assert numpy.allclose(numpy.dot(a0, bval2) + cval, rval)

        # Try with a matrix equal to a0, but with strides in both dims
        a.set_value(a0)
        a.set_value(
                a.get_value(borrow=True,
                    return_internal_type=True)[::-1, ::-1],
                borrow=True)
        f(bval, bval2)
Exemplo n.º 15
0
    def cmp(a_shp, b_shp):
        a0 = my_rand(*a_shp)
        a = tcn.shared_constructor(a0, 'a')

        b = tensor.fmatrix('b')
        c = tensor.fmatrix('c')

        f = pfunc([b, c], [], updates=[(a, tensor.dot(a, b) + tensor.exp(c))],
                mode=mode_with_gpu)
        assert any([node.op == tcn.blas.gpu_gemm_inplace
            for node in f.maker.env.toposort()])

        bval = my_rand(*b_shp)
        cval = my_rand(a_shp[0], b_shp[1])
        f(bval, cval)

        assert numpy.allclose(numpy.dot(a0, bval) + numpy.exp(cval),
                a.get_value())

        # Try with a matrix equal to a0, but with strides in both dims
        a.set_value(a0)
        a.set_value(
                a.get_value(borrow=True,
                    return_internal_type=True)[::-1, ::-1],
                borrow=True)
        f(bval, cval)
Exemplo n.º 16
0
    def cmp(a_shp, b_shp):
        a0 = my_rand(*a_shp)
        a = tcn.shared_constructor(a0, 'a')

        b = tensor.fmatrix('b')
        c = tensor.fmatrix('c')

        f = pfunc([b, c], [],
                  updates=[(a, tensor.dot(a, b) + tensor.exp(c))],
                  mode=mode_with_gpu)
        assert any([
            node.op == tcn.blas.gpu_gemm_inplace
            for node in f.maker.env.toposort()
        ])

        bval = my_rand(*b_shp)
        cval = my_rand(a_shp[0], b_shp[1])
        f(bval, cval)

        assert numpy.allclose(
            numpy.dot(a0, bval) + numpy.exp(cval), a.get_value())

        # Try with a matrix equal to a0, but with strides in both dims
        a.set_value(a0)
        a.set_value(a.get_value(borrow=True,
                                return_internal_type=True)[::-1, ::-1],
                    borrow=True)
        f(bval, cval)
Exemplo n.º 17
0
def test_elemwise_composite_float64():
    # test that we don't fuse composite elemwise with float64 somewhere inside
    # nvcc by default downcast them to float32. We would need to tell him not to
    # do so, but that possible only on some device.
    a = tensor.fmatrix()
    b = tensor.fmatrix()
    av = theano._asarray(numpy.random.rand(4,4), dtype='float32')
    bv = numpy.ones((4,4), dtype='float32')

    def get_all_basic_scalar(composite_op):
        l=[]
        for i in composite_op.env.toposort():
            if isinstance(i, theano.scalar.Composite):
                l += get_all_basic_scalar(i)
            else:
                l.append(i)
        return l
    for mode in [mode_with_gpu, mode_with_gpu.excluding('gpu_after_fusion'), mode_with_gpu.excluding('elemwise_fusion')]:
        f = pfunc([a,b], tensor.cast(tensor.lt(tensor.cast(a,'float64')**2,#*numpy.asarray(2, 'float32'),
                                               b),
                                     'float32'), mode=mode)

        #theano.printing.debugprint(f, print_type=True)
        out = f(av,bv)
        assert numpy.all(out == ((av**2)<bv))
        for node in f.maker.env.toposort():
            if isinstance(node.op, cuda.GpuElemwise):
                if isinstance(node.op.scalar_op, theano.scalar.Composite):
                    scals = get_all_basic_scalar(node.op.scalar_op)
                    for s in scals:
                        assert not any([i.type.dtype=='float64' for i in s.inputs+s.outputs])
Exemplo n.º 18
0
def run_conv_nnet1(use_gpu):
    if use_gpu:
        shared_fn = tcn.shared_constructor
    else:
        shared_fn = shared
    n_batch = 16
    n_kern = 20
    shape_img = (n_batch, 1, 32, 32)
    shape_kern = (n_kern, 1, 5, 5)
    n_train = 10
    if config.mode == 'DEBUG_MODE':
        n_train = 1

    logical_hid_shape = tcn.blas.GpuConv.logical_output_shape_2d(
        shape_img[2:], shape_kern[2:], 'valid')
    n_hid = n_kern * logical_hid_shape[0] * logical_hid_shape[1]
    n_out = 10

    w = shared_fn(0.01 * (my_rand(*shape_kern) - 0.5), 'w')
    b = shared_fn(my_zeros((n_kern,)), 'b')
    v = shared_fn(my_zeros((n_hid, n_out)), 'c')
    c = shared_fn(my_zeros(n_out), 'c')

    x = tensor.Tensor(dtype='float32', broadcastable=(0, 1, 0, 0))('x')
    y = tensor.fmatrix('y')
    lr = tensor.fscalar('lr')

    conv_op = conv.ConvOp(shape_img[2:], shape_kern[2:], n_kern, n_batch, 1, 1)

    hid = tensor.tanh(conv_op(x, w) + b.dimshuffle((0, 'x', 'x')))
    hid_flat = hid.reshape((n_batch, n_hid))
    out = tensor.tanh(tensor.dot(hid_flat, v) + c)
    loss = tensor.sum(0.5 * (out - y) ** 2 * lr)
    # print 'loss type', loss.type

    params = [w, b, v, c]
    gparams = tensor.grad(loss, params)

    mode = get_mode(use_gpu)

    # print 'building pfunc ...'
    train = pfunc(
        [x, y, lr],
        [loss],
        mode=mode,
        updates=[(p, p - g) for p, g in zip(params, gparams)])

#    for i, n in enumerate(train.maker.fgraph.toposort()):
#        print i, n

    xval = my_rand(*shape_img)
    yval = my_rand(n_batch, n_out)
    lr = theano._asarray(0.01, dtype='float32')

    for i in xrange(n_train):
        rval = train(xval, yval, lr)
    # print 'training done'
    print_mode(mode)
    return rval
Exemplo n.º 19
0
def run_nnet(use_gpu,
             n_batch=60,
             n_in=1024,
             n_hid=2048,
             n_out=10,
             n_train=100):

    if config.mode == 'DEBUG_MODE':
        n_train = 1

    if use_gpu:
        w = tcn.shared_constructor(0.01 * (my_rand(n_in, n_hid) - 0.5), 'w')
        b = tcn.shared_constructor(my_zeros(n_hid), 'b')
        v = tcn.shared_constructor(my_zeros((n_hid, n_out)), 'c')
        c = tcn.shared_constructor(my_zeros(n_out), 'c')
    else:
        w = shared(0.01 * (my_rand(n_in, n_hid) - 0.5), 'w')
        b = shared(my_zeros(n_hid), 'b')
        v = shared(my_zeros((n_hid, n_out)), 'c')
        c = shared(my_zeros(n_out), 'c')

    x = tensor.fmatrix('x')
    y = tensor.fmatrix('y')
    lr = tensor.fscalar('lr')

    hid = tensor.tanh(tensor.dot(x, w) + b)
    out = tensor.tanh(tensor.dot(hid, v) + c)
    loss = tensor.sum(0.5 * (out - y)**2 * lr)
    if 0:
        print 'loss type', loss.type

    params = [w, b, v, c]
    gparams = tensor.grad(loss, params)

    mode = get_mode(use_gpu)

    #print 'building pfunc ...'
    train = pfunc([x, y, lr], [loss],
                  mode=mode,
                  updates=[(p, p - g) for p, g in izip(params, gparams)])

    if 0:
        for i, n in enumerate(train.maker.fgraph.toposort()):
            print i, n

    xval = my_rand(n_batch, n_in)
    yval = my_rand(n_batch, n_out)
    lr = theano._asarray(0.01, dtype='float32')

    t0 = time.time()
    rval = []
    for i in xrange(n_train):
        rval.append(train(xval, yval, lr))
    dt = time.time() - t0

    print_mode(mode)
    return numpy.asarray(rval), dt
Exemplo n.º 20
0
    def __init__(
        self,
        input=None,
        target=None,
        n_input=1,
        n_hidden=1,
        n_output=1,
        lr=1e-3,
        **kw,
    ):
        super().__init__(**kw)

        if input is None:
            input = tensor.dvector("input")
        if target is None:
            target = tensor.dvector("target")

        self.input = input
        self.target = target
        self.lr = shared(lr, "learning_rate")
        self.w1 = shared(np.zeros((n_hidden, n_input)), "w1")
        self.w2 = shared(np.zeros((n_output, n_hidden)), "w2")
        # print self.lr.type

        self.hidden = sigmoid(tensor.dot(self.w1, self.input))
        self.output = tensor.dot(self.w2, self.hidden)
        self.cost = tensor.sum((self.output - self.target) ** 2)

        self.sgd_updates = {
            self.w1: self.w1 - self.lr * tensor.grad(self.cost, self.w1),
            self.w2: self.w2 - self.lr * tensor.grad(self.cost, self.w2),
        }

        self.sgd_step = pfunc(
            params=[self.input, self.target],
            outputs=[self.output, self.cost],
            updates=self.sgd_updates,
        )

        self.compute_output = pfunc([self.input], self.output)

        self.output_from_hidden = pfunc([self.hidden], self.output)
Exemplo n.º 21
0
def test_elemwise1():
    """ Several kinds of elemwise expressions with no broadcasting,
    non power-of-two shape """

    shape = (3, 4)
    a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape),
                                               dtype='float32') + 0.5, 'a')
    b = tensor.fmatrix()

    #let debugmode catch any mistakes
    f = pfunc([b], [], updates=[(a, b ** a)], mode=mode_with_gpu)
    f(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.3)

    #let debugmode catch any mistakes
    f = pfunc([b], [], updates=[(a, tensor.exp(b ** a))], mode=mode_with_gpu)
    f(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.3)

    #let debugmode catch any mistakes
    f = pfunc([b], [], updates=[(a, a + b * tensor.exp(b ** a))],
              mode=mode_with_gpu)
    f(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.3)
Exemplo n.º 22
0
def test_elemwise1():
    """ Several kinds of elemwise expressions with no broadcasting,
    non power-of-two shape """

    shape = (3, 4)
    a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape),
                                               dtype='float32') + 0.5, 'a')
    b = tensor.fmatrix()

    #let debugmode catch any mistakes
    f = pfunc([b], [], updates=[(a, b ** a)], mode=mode_with_gpu)
    f(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.3)

    #let debugmode catch any mistakes
    f = pfunc([b], [], updates=[(a, tensor.exp(b ** a))], mode=mode_with_gpu)
    f(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.3)

    #let debugmode catch any mistakes
    f = pfunc([b], [], updates=[(a, a + b * tensor.exp(b ** a))],
              mode=mode_with_gpu)
    f(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.3)
Exemplo n.º 23
0
    def cmp(a_shp, b_shp):
        a = tcn.shared_constructor(my_rand(*a_shp), 'a')

        b = tensor.fmatrix()

        f = pfunc([b], [], updates=[(a, tensor.dot(a,b))], mode=mode_with_gpu)

        a0 = a.get_value() * 1.0
        bval = my_rand(*b_shp)
        f(bval)

        assert numpy.allclose(numpy.dot(a0, bval), a.get_value())
Exemplo n.º 24
0
    def cmp(a_shp, b_shp):
        a = tcn.shared_constructor(my_rand(*a_shp), 'a')

        b = tensor.fmatrix()

        f = pfunc([b], [], updates=[(a, tensor.dot(a,b))], mode=mode_with_gpu)

        a0 = a.get_value() * 1.0
        bval = my_rand(*b_shp)
        f(bval)

        assert numpy.allclose(numpy.dot(a0, bval), a.get_value())
Exemplo n.º 25
0
def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10,
             n_train=100):

    if config.mode == 'DEBUG_MODE':
        n_train = 1

    if use_gpu:
        w = tcn.shared_constructor(0.01 * (my_rand(n_in, n_hid) - 0.5), 'w')
        b = tcn.shared_constructor(my_zeros(n_hid), 'b')
        v = tcn.shared_constructor(my_zeros((n_hid, n_out)), 'c')
        c = tcn.shared_constructor(my_zeros(n_out), 'c')
    else:
        w = shared(0.01 * (my_rand(n_in, n_hid) - 0.5), 'w')
        b = shared(my_zeros(n_hid), 'b')
        v = shared(my_zeros((n_hid, n_out)), 'c')
        c = shared(my_zeros(n_out), 'c')

    x = tensor.fmatrix('x')
    y = tensor.fmatrix('y')
    lr = tensor.fscalar('lr')

    hid = tensor.tanh(tensor.dot(x, w) + b)
    out = tensor.tanh(tensor.dot(hid, v) + c)
    loss = tensor.sum(0.5 * (out - y) ** 2 * lr)
    if 0:
        print('loss type', loss.type)

    params = [w, b, v, c]
    gparams = tensor.grad(loss, params)

    mode = get_mode(use_gpu)

    # print 'building pfunc ...'
    train = pfunc([x, y, lr], [loss], mode=mode,
                  updates=[(p, p - g) for p, g in izip(params, gparams)])

    if 0:
        for i, n in enumerate(train.maker.fgraph.toposort()):
            print(i, n)

    xval = my_rand(n_batch, n_in)
    yval = my_rand(n_batch, n_out)
    lr = theano._asarray(0.01, dtype='float32')

    t0 = time.time()
    rval = []
    for i in xrange(n_train):
        rval.append(train(xval, yval, lr))
    dt = time.time() - t0

    print_mode(mode)
    return numpy.asarray(rval), dt
Exemplo n.º 26
0
def test_elemwise_fusion():
    """ Test the the GpuElemwise fusion work correctly"""
    shape = (3,4)
    a = cuda.shared_constructor(theano._asarray(numpy.random.rand(*shape), dtype='float32'), 'a')
    b = tensor.fmatrix()
    c = tensor.fmatrix()
    f = pfunc([b,c], [a+b+c], mode=mode_with_gpu)
    topo = f.maker.env.toposort()
    for i, node in enumerate(topo):
        print >> sys.stdout, i, node
    assert len(topo)==4
    assert isinstance(topo[2].op.scalar_op,theano.scalar.basic.Composite)
    #let debugmode catch errors
    f(theano._asarray(numpy.random.rand(*shape), dtype='float32'), theano._asarray(numpy.random.rand(*shape), dtype='float32'))
Exemplo n.º 27
0
def test_huge_elemwise_fusion():
    """ Test the the GpuElemwise fusion work correctly
        We check that we fuse one node with part of its input
        in case their is too many inputs and that would make it bust the 256
        bytes limits.
    """
    shape = (2,3,4,5,6)
    ttype = tensor.tensor(dtype='float32',broadcastable=(False,)*len(shape))
    vars = [tensor.tanh(ttype) for x in range(10)]
    f = pfunc(vars, [vars[0]-vars[1]-vars[2]-vars[3]-vars[4]-vars[5]-vars[6]], mode=mode_with_gpu)
    topo = f.maker.env.toposort()
    #theano.printing.debugprint(f)
    #for i, node in enumerate(topo):
    #    print >> sys.stdout, i, node
    assert len(topo)==10
    assert sum([isinstance(node.op, cuda.GpuElemwise) for node in topo])==2
    assert isinstance(topo[7].op.scalar_op,theano.scalar.basic.Sub)
    assert isinstance(topo[8].op.scalar_op,theano.scalar.basic.Composite)
    #let debugmode catch errors
    gen = lambda : theano._asarray(numpy.random.rand(*shape), dtype='float32')
    f(gen(),gen(),gen(),gen(),gen(),gen(),gen(),gen(),gen(),gen())

    # Test the case where we can't put the computation on the gpu! their is too many
    # dimensions to the input to have 2 inputs to the op!

    shape = (1,2,3,4,5,6,7,2,2,3,2,1,2,2,2,)
    ttype = tensor.tensor(dtype='float32',broadcastable=(False,)*len(shape))
    vars = [tensor.tanh(ttype) for x in range(10)]
    f = pfunc(vars, [vars[0]-vars[1]-vars[2]-vars[3]-vars[4]-vars[5]-vars[6]], mode=mode_with_gpu)
    topo = f.maker.env.toposort()
    #theano.printing.debugprint(f)
    assert len(topo)==1
    assert sum([isinstance(node.op, cuda.GpuElemwise) for node in topo])==0
    assert sum([isinstance(node.op, tensor.Elemwise) for node in topo])==1
    #let debugmode catch errors
    gen = lambda : theano._asarray(numpy.random.rand(*shape), dtype='float32')
    f(gen(),gen(),gen(),gen(),gen(),gen(),gen(),gen(),gen(),gen())
Exemplo n.º 28
0
    def cmp(a_shp, b_shp):
        a = tcn.shared_constructor(my_rand(*a_shp), 'a')

        b = tensor.fmatrix('b')
        c = tensor.fmatrix('c')

        f = pfunc([b,c], [], updates=[(a, tensor.dot(a,b) + tensor.exp(c))], mode=mode_with_gpu)
        assert any([node.op == tcn.blas.gpu_gemm_inplace for node in f.maker.env.toposort()])

        a0 = a.get_value() * 1.0
        bval = my_rand(*b_shp)
        cval = my_rand(a_shp[0],b_shp[1])
        f(bval,cval)

        assert numpy.allclose(numpy.dot(a0, bval)+numpy.exp(cval), a.get_value())
Exemplo n.º 29
0
    def cmp(a_shp, b_shp):
        a = tcn.shared_constructor(my_rand(*a_shp), 'a')

        b = tensor.fmatrix('b')
        c = tensor.fmatrix('c')

        f = pfunc([b,c], [], updates=[(a, tensor.dot(a,b) + tensor.exp(c))], mode=mode_with_gpu)
        assert any([node.op == tcn.blas.gpu_gemm_inplace for node in f.maker.env.toposort()])

        a0 = a.get_value() * 1.0
        bval = my_rand(*b_shp)
        cval = my_rand(a_shp[0],b_shp[1])
        f(bval,cval)

        assert numpy.allclose(numpy.dot(a0, bval)+numpy.exp(cval), a.get_value())
Exemplo n.º 30
0
def test_elemwise4():
    """ Test that two vectors can be broadcast to form an outer product (by performing rank-1 matrix update"""

    shape = (3,4)
    a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape), dtype='float32'), 'a')
    b = tensor.fvector()
    c = tensor.fvector()
    f = pfunc([b,c], [], updates=[(a, (a+b.dimshuffle('x', 0)*c.dimshuffle(0, 'x')))], mode=mode_with_gpu)
    has_elemwise = False
    for i, node in enumerate(f.maker.env.toposort()):
        print >> sys.stdout, i, node
        has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise)
    assert not has_elemwise
    #let debugmode catch errors
    f(theano._asarray(numpy.random.rand(4), dtype='float32'), theano._asarray(numpy.random.rand(3), dtype='float32'))
Exemplo n.º 31
0
    def cmp(a_shp, b_shp):
        a0 = numpy.random.uniform(-0.4, 0.4, a_shp).astype('float32')
        a = cuda.shared_constructor(a0, 'a')

        b0 = numpy.random.uniform(-0.4, 0.4, b_shp).astype('float32')
        b = cuda.shared_constructor(b0, 'b')

        f = pfunc([], tensor.slinalg.solve(a, b), mode=mode_with_gpu)

        assert isinstance(f.maker.fgraph.toposort()[1].inputs[0].owner.op,
                          cuda.cula.GpuSolve)

        assert cuda.opt.local_gpu_solve.transform(
            tensor.slinalg.solve(a, b).owner)
        out = f()
        assert numpy.allclose(numpy.dot(a0, out), b0)
Exemplo n.º 32
0
def test_elemwise_fusion():
    """ Test the the GpuElemwise fusion work correctly"""
    shape = (3, 4)
    a = cuda.shared_constructor(
        theano._asarray(numpy.random.rand(*shape), dtype='float32'), 'a')
    b = tensor.fmatrix()
    c = tensor.fmatrix()
    f = pfunc([b, c], [a + b + c], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    for i, node in enumerate(topo):
        print >> sys.stdout, i, node
    assert len(topo) == 4
    assert isinstance(topo[2].op.scalar_op, theano.scalar.basic.Composite)
    # let debugmode catch errors
    f(theano._asarray(numpy.random.rand(*shape), dtype='float32'),
      theano._asarray(numpy.random.rand(*shape), dtype='float32'))
Exemplo n.º 33
0
def test_elemwise0():

    a = tcn.shared_constructor(theano._asarray(numpy.random.rand(4, 4),
                                               dtype='float32'), 'a')

    b = tensor.fmatrix()

    f = pfunc([b], [], updates=[(a, a + b)], mode=mode_with_gpu)

    #check that we work inplace.
    assert f.maker.env.toposort()[1].op.destroy_map.items() == [(0, [0])]

    a0 = a.get_value() * 1.0
    f(numpy.ones((4, 4), dtype='float32'))

    assert numpy.all(a0 + 1.0 == a.get_value())
Exemplo n.º 34
0
def test_elemwise_collapse7(atol=1e-6):
    """ Test when one input have one broadcastable dimension and the
    other is a scalar"""

    shape = (5, 4, 1)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a.copy(), 'a')
    a3 = a2.dimshuffle(0, 'x', 1, 2)
    f = pfunc([], [a3 + 2], mode=mode_with_gpu)

    #let debugmode catch errors
    out = f()[0]
    ans = (a + 2).reshape(shape[0], 1, shape[1], shape[2])
    assert numpy.allclose(out, ans, atol=atol)
Exemplo n.º 35
0
def test_elemwise_collapse7(atol=1e-6):
    """ Test when one input have one broadcastable dimension and the
    other is a scalar"""

    shape = (5, 4, 1)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a.copy(), 'a')
    a3 = a2.dimshuffle(0, 'x', 1, 2)
    f = pfunc([], [a3 + 2], mode=mode_with_gpu)

    #let debugmode catch errors
    out = f()[0]
    ans = (a + 2).reshape(shape[0], 1, shape[1], shape[2])
    assert numpy.allclose(out, ans, atol=atol)
Exemplo n.º 36
0
def test_elemwise0():

    a = tcn.shared_constructor(theano._asarray(numpy.random.rand(4, 4),
                                               dtype='float32'), 'a')

    b = tensor.fmatrix()

    f = pfunc([b], [], updates=[(a, a + b)], mode=mode_with_gpu)

    #check that we work inplace.
    assert f.maker.fgraph.toposort()[1].op.destroy_map.items() == [(0, [0])]

    a0 = a.get_value() * 1.0
    f(numpy.ones((4, 4), dtype='float32'))

    assert numpy.all(a0 + 1.0 == a.get_value())
Exemplo n.º 37
0
    def test_maxpool():
        """TODO: test the gpu version!!! """
        for d0, d1, r_true, r_false in [(4, 4, [[[[5, 7], [13, 15]]]], [[[[5, 7], [13, 15]]]]),
                                        (5, 5, [[[[6, 8], [16, 18], [21, 23]]]],
                                         [[[[6, 8, 9], [16, 18, 19], [21, 23, 24]]]])]:
            for border, ret in [(True, r_true), (False, r_false)]:
                ret = numpy.array(ret)
                a = tcn.blas.Pool((2, 2), border)
                dmatrix4 = tensor.TensorType("float32", (False, False, False, False))
                b = dmatrix4()
                f = pfunc([b], [a(b)], mode=mode_with_gpu)

                bval = numpy.arange(0, d0 * d1).reshape(1, 1, d0, d1)
                r = f(bval)[0]
    #            print bval, bval.shape, border
                # print r, r.shape
                assert (ret == r).all()
Exemplo n.º 38
0
def test_elemwise3():
    """ Several kinds of elemwise expressions with dimension
    permutations and broadcasting"""

    shape = (3, 4, 5, 6)
    a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape),
                                               dtype='float32'), 'a')
    b = tensor.fvector()
    new_val = (a + b).dimshuffle([2, 0, 3, 1])
    new_val *= tensor.exp(1 + b ** a).dimshuffle([2, 0, 3, 1])
    f = pfunc([b], [], updates=[(a, new_val)], mode=mode_with_gpu)
    has_elemwise = False
    for i, node in enumerate(f.maker.fgraph.toposort()):
        has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise)
    assert not has_elemwise
    #let debugmode catch errors
    f(theano._asarray(numpy.random.rand(6), dtype='float32'))
Exemplo n.º 39
0
def test_elemwise3():
    """ Several kinds of elemwise expressions with dimension
    permutations and broadcasting"""

    shape = (3, 4, 5, 6)
    a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape),
                                               dtype='float32'), 'a')
    b = tensor.fvector()
    new_val = (a + b).dimshuffle([2, 0, 3, 1])
    new_val *= tensor.exp(1 + b ** a).dimshuffle([2, 0, 3, 1])
    f = pfunc([b], [], updates=[(a, new_val)], mode=mode_with_gpu)
    has_elemwise = False
    for i, node in enumerate(f.maker.env.toposort()):
        has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise)
    assert not has_elemwise
    #let debugmode catch errors
    f(theano._asarray(numpy.random.rand(6), dtype='float32'))
Exemplo n.º 40
0
    def cmp(a_shp, b_shp):
        a0 = my_rand(*a_shp)
        a = tcn.shared_constructor(a0, "a")

        b = tensor.fmatrix()

        f = pfunc([b], [], updates=[(a, tensor.dot(a, b))], mode=mode_with_gpu)

        bval = my_rand(*b_shp)
        f(bval)

        assert numpy.allclose(numpy.dot(a0, bval), a.get_value())

        # Try with a matrix equal to a0, but with strides in both dims
        a.set_value(a0)
        a.set_value(a.get_value(borrow=True, return_internal_type=True)[::-1, ::-1], borrow=True)
        f(bval)
Exemplo n.º 41
0
    def test_maxpool():
        """TODO: test the gpu version!!! """
        for d0, d1, r_true, r_false in [(4,4,[[[[5,7],[13,15]]]],[[[[5,7],[13,15]]]]),
                                        (5,5,[[[[6, 8],[ 16, 18], [ 21, 23]]]],
                                         [[[[6, 8, 9],[ 16, 18, 19], [ 21, 23, 24]]]])]:
            for border,ret in [(True,r_true),(False, r_false)]:
                ret=numpy.array(ret)
                a = tcn.blas.DownsampleFactorMax((2,2),border)
                dmatrix4 = tensor.TensorType("float32", (False, False, False, False))
                b = dmatrix4()
                f = pfunc([b], [a(b)], mode=mode_with_gpu)

                bval = numpy.arange(0,d0*d1).reshape(1,1,d0,d1)
                r = f(bval)[0]
    #            print bval, bval.shape, border
                #print r, r.shape
                assert (ret==r).all()
Exemplo n.º 42
0
    def cmp(a_shp, b_shp):
        a = tcn.shared_constructor(my_rand(*a_shp), 'a')
        cval = my_rand(a_shp[0], b_shp[1])
        c = tcn.shared_constructor(cval.copy(), 'c')

        b = tcn.fmatrix('b')
        b2 = tcn.fmatrix('b2')

        f = pfunc([b,b2], [tensor.dot(a,b2) + c], updates=[(a, tensor.dot(a,b) + c)], mode=mode_with_gpu)

        a0 = a.get_value() * 1.0
        assert any([node.op == tcn.blas.gpu_gemm_no_inplace for node in f.maker.env.toposort()])
        bval = my_rand(*b_shp)
        bval2 = my_rand(*b_shp)
        rval = f(bval,bval2)

        assert numpy.allclose(numpy.dot(a0, bval)+cval, a.get_value())
        assert numpy.allclose(numpy.dot(a0, bval2)+cval, rval)
Exemplo n.º 43
0
    def cmp(a_shp, b_shp):
        a = tcn.shared_constructor(my_rand(*a_shp), 'a')
        cval = my_rand(a_shp[0], b_shp[1])
        c = tcn.shared_constructor(cval.copy(), 'c')

        b = tcn.fmatrix('b')
        b2 = tcn.fmatrix('b2')

        f = pfunc([b,b2], [tensor.dot(a,b2) + c], updates=[(a, tensor.dot(a,b) + c)], mode=mode_with_gpu)

        a0 = a.get_value() * 1.0
        assert any([node.op == tcn.blas.gpu_gemm_no_inplace for node in f.maker.env.toposort()])
        bval = my_rand(*b_shp)
        bval2 = my_rand(*b_shp)
        rval = f(bval,bval2)

        assert numpy.allclose(numpy.dot(a0, bval)+cval, a.get_value())
        assert numpy.allclose(numpy.dot(a0, bval2)+cval, rval)
Exemplo n.º 44
0
def test_elemwise_collapse7(atol=1e-6):
    """ Test when one input have one broadcastable dimension and the other is a scalar"""

    shape = (5, 4, 1)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape), dtype="float32"))
    a = theano._asarray(numpy.random.rand(*shape), dtype="float32")
    a2 = tcn.shared_constructor(a.copy(), "a")
    a3 = a2.dimshuffle(0, "x", 1, 2)
    f = pfunc([], [a3 + 2])

    if False:
        for id, n in enumerate(f.maker.env.toposort()):
            print id, n
    # let debugmode catch errors
    out = f()[0]
    ans = (a + 2).reshape(shape[0], 1, shape[1], shape[2])
    assert numpy.allclose(out, ans, atol=atol)
    print "Expected collapse to c contiguous"
Exemplo n.º 45
0
def test_elemwise_comparaison_cast():
    """
    test if an elemwise comparaison followed by a cast to float32 are pushed to gpu.
    """

    a = tensor.fmatrix()
    b = tensor.fmatrix()
    av = theano._asarray(numpy.random.rand(4, 4), dtype="float32")
    bv = numpy.ones((4, 4), dtype="float32")

    for g, ans in [(tensor.lt, av < bv), (tensor.gt, av > bv), (tensor.le, av <= bv), (tensor.ge, av >= bv)]:

        f = pfunc([a, b], tensor.cast(g(a, b), "float32"), mode=mode_with_gpu)

        # theano.printing.debugprint(f)
        out = f(av, bv)
        assert numpy.all(out == ans)
        assert any([isinstance(node.op, cuda.GpuElemwise) for node in f.maker.env.toposort()])
Exemplo n.º 46
0
    def cmp(a_shp, b_shp):
        a0 = numpy.random.uniform(-0.4, 0.4,
                                  a_shp).astype('float32')
        a = cuda.shared_constructor(a0, 'a')

        b0 = numpy.random.uniform(-0.4, 0.4,
                                  b_shp).astype('float32')
        b = cuda.shared_constructor(b0, 'b')

        f = pfunc([], tensor.slinalg.solve(a, b), mode=mode_with_gpu)

        assert isinstance(f.maker.fgraph.toposort()[1].inputs[0].owner.op,
                          cuda.cula.GpuSolve)

        assert cuda.opt.local_gpu_solve.transform(
            tensor.slinalg.solve(a, b).owner)
        out = f()
        assert numpy.allclose(numpy.dot(a0, out), b0)
Exemplo n.º 47
0
    def cmp(a_shp, b_shp):
        a0 = numpy.random.rand(*a_shp).astype('float32')
        a = cuda.shared_constructor(a0, 'a')
        b0 = numpy.random.rand(*b_shp).astype('float32')
        b = cuda.shared_constructor(b0, 'a')

        f = pfunc([], tensor.dot(a, b), mode=mode_with_gpu)
        assert cuda.opt.local_gpu_dot_to_dot22.transform(
            tensor.dot(a, b).owner)
        out = f()

        assert numpy.allclose(numpy.dot(a0, b0), out)

        # Try with a matrix equal to a0, but with strides in both dims
        a.set_value(a0)
        a.set_value(a.get_value(borrow=True, return_internal_type=True)[::-1],
                    borrow=True)
        f()
Exemplo n.º 48
0
def test_elemwise_collapse6():
    """ Test when all inputs have two broadcastable dimension at the
    beginning"""

    shape = (4, 5)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2.dimshuffle('x', 'x', 0, 1)
    b = tcn.CudaNdarrayType((True, True, False, False))()
    f = pfunc([b], [a3 + b], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(1, 1, shape[0], shape[1]),
                        dtype='float32')
    v = cuda_ndarray.CudaNdarray(v)
    #let debugmode catch errors
    out = f(v)[0]
    assert numpy.allclose(out, a.reshape(1, 1, shape[0], shape[1]) + v)
Exemplo n.º 49
0
def test_elemwise0():

    a = tcn.shared_constructor(theano._asarray(numpy.random.rand(4, 4), dtype="float32"), "a")

    b = tensor.fmatrix()

    f = pfunc([b], [], updates=[(a, a + b)], mode=mode_with_gpu)

    # check that we work inplace.
    assert f.maker.env.toposort()[1].op.destroy_map.items() == [(0, [0])]

    a0 = a.get_value() * 1.0
    print "BEFORE ADD", a.get_value()
    for i, node in enumerate(f.maker.env.toposort()):
        print i, node
    f(numpy.ones((4, 4), dtype="float32"))
    print "AFTER ADD", a.get_value()

    assert numpy.all(a0 + 1.0 == a.get_value())
Exemplo n.º 50
0
def test_elemwise_comparaison_cast():
    """
    test if an elemwise comparaison followed by a cast to float32 are pushed to gpu.
    """

    a = tensor.fmatrix()
    b = tensor.fmatrix()
    av = theano._asarray(numpy.random.rand(4,4), dtype='float32')
    bv = numpy.ones((4,4), dtype='float32')

    for g,ans in [(tensor.lt, av<bv), (tensor.gt, av>bv),
                  (tensor.le, av<=bv), (tensor.ge, av>=bv)]:

        f = pfunc([a,b], tensor.cast(g(a,b),'float32'), mode=mode_with_gpu)

        #theano.printing.debugprint(f)
        out = f(av,bv)
        assert numpy.all(out == ans)
        assert any([isinstance(node.op, cuda.GpuElemwise) for node in f.maker.env.toposort()])
Exemplo n.º 51
0
def test_elemwise_collapse6():
    """ Test when all inputs have two broadcastable dimension at the
    beginning"""

    shape = (4, 5)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2.dimshuffle('x', 'x', 0, 1)
    b = tcn.CudaNdarrayType((True, True, False, False))()
    f = pfunc([b], [a3 + b], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(1, 1, shape[0], shape[1]),
                        dtype='float32')
    v = cuda_ndarray.CudaNdarray(v)
    #let debugmode catch errors
    out = f(v)[0]
    assert numpy.allclose(out, a.reshape(1, 1, shape[0], shape[1]) + v)
Exemplo n.º 52
0
def test_elemwise_collapse2():
    """ Test when only one inputs have one broadcastable dimension """

    shape = (4, 5, 9)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2.dimshuffle(0, 'x', 1, 2)
    b = tcn.CudaNdarrayType((False, False, False, False))()
    c = a3 + b
    f = pfunc([b], [c], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(shape[0], 5, *shape[1:]),
                        dtype='float32')
    v = cuda_ndarray.CudaNdarray(v)
    #let debugmode catch errors
    out = f(v)[0]
    assert numpy.allclose(out, a.reshape(shape[0], 1, *shape[1:]) + v)
Exemplo n.º 53
0
def test_elemwise_collapse2():
    """ Test when only one inputs have one broadcastable dimension """

    shape = (4, 5, 9)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2.dimshuffle(0, 'x', 1, 2)
    b = tcn.CudaNdarrayType((False, False, False, False))()
    c = a3 + b
    f = pfunc([b], [c], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(shape[0], 5, *shape[1:]),
                        dtype='float32')
    v = cuda_ndarray.CudaNdarray(v)
    #let debugmode catch errors
    out = f(v)[0]
    assert numpy.allclose(out, a.reshape(shape[0], 1, *shape[1:]) + v)
Exemplo n.º 54
0
    def cmp(a_shp, b_shp):
        a0 = my_rand(* a_shp)
        a = tcn.shared_constructor(a0, 'a')

        b = tensor.fmatrix()

        f = pfunc([b], [], updates=[(a, tensor.dot(a, b))], mode=mode_with_gpu)

        bval = my_rand(* b_shp)
        f(bval)

        assert numpy.allclose(numpy.dot(a0, bval), a.get_value())

        # Try with a matrix equal to a0, but with strides in both dims
        a.set_value(a0)
        a.set_value(a.get_value(borrow=True,
                    return_internal_type=True)[::-1, ::-1],
                    borrow=True)
        f(bval)
Exemplo n.º 55
0
    def cmp(a_shp, b_shp):
        a0 = numpy.random.rand(*a_shp).astype('float32')
        a = cuda.shared_constructor(a0, 'a')
        b0 = numpy.random.rand(*b_shp).astype('float32')
        b = cuda.shared_constructor(b0, 'b')

        f = pfunc([], tensor.dot(a, b), mode=mode_with_gpu)
        assert cuda.opt.local_gpu_dot_to_dot22.transform(
            tensor.dot(a, b).owner)
        out = f()

        assert numpy.allclose(numpy.dot(a0, b0), out)

        # Try with a matrix equal to a0, but with strides in both dims
        a.set_value(a0)
        a.set_value(
            a.get_value(borrow=True,
                        return_internal_type=True)[::-1],
            borrow=True)
        f()
Exemplo n.º 56
0
def test_elemwise_collapse4():
    """ Test when only one inputs have two broadcastable dimension at
    each ends and we add a scalar"""

    shape = (4, 5)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2.dimshuffle('x', 0, 1, 'x')
    b = tcn.CudaNdarrayType((False, False, False, False))()
    c = (a3 + b + 2)
    f = pfunc([b], [c], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(5, shape[0], shape[1], 4),
                        dtype='float32')
    v = cuda_ndarray.CudaNdarray(v)
    #let debugmode catch errors
    out = f(v)[0]
    assert numpy.allclose(out, a.reshape(1, shape[0], shape[1], 1) + v + 2)
Exemplo n.º 57
0
def speed_elemwise_collapse():
    """ used to time if the collapse of ccontiguous dims are useful """

    shape = (30, 40, 50, 600)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2[:, ::2, :, :]
    b = tcn.CudaNdarrayType((False, False, False, False))()
    c = a3 + b * tensor.exp(1 + b ** a3)
    f = pfunc([b], [c], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    v = v[:, ::2, :, :]
    v = cuda_ndarray.CudaNdarray(v)
    t1 = time.time()
    for i in range(100):
        #let debugmode catch errors
        f(v)
    t2 = time.time()