Пример #1
0
 def setUp(self):
     utt.seed_rng()
     self.mode = mode_with_gpu.excluding('constant_folding')
     self.gemv_op = gpu_sparse_block_gemv
     self.outer_op = gpu_sparse_block_outer
     self.gemv_class = GpuSparseBlockGemv
     self.outer_class = GpuSparseBlockOuter
Пример #2
0
def test_subsample():
    seed_rng()
    # implement when
    shapes = [((1, 1, 1, 1), (1, 1, 1, 1), (1, 1), (1, 1), (1, 1)),
              ((1, 1, 1, 1), (1, 1, 1, 1), (2, 2), (1, 1), (1, 1)),
              ((4, 2, 10, 10), (3, 2, 2, 2), (1, 3), (1, 1), (1, 1)),
              ((4, 2, 10, 10), (3, 2, 2, 2), (3, 3), (1, 1), (1, 1)),
              ((4, 2, 10, 10), (3, 2, 2, 2), (3, 1), (1, 1), (1, 1))
          ]
    shapes += get_shapes2(scales_img=(2, 2), subsample=(1, 1))
    shapes += get_shapes2(scales_img=(2, 2), subsample=(1, 2))
    shapes += get_shapes2(scales_img=(2, 2), subsample=(2, 1))
    shapes += get_shapes2(scales_img=(2, 2), subsample=(2, 2))

    version_valid = [-1]
    version_full = [-1]
    verbose = 0
    random = True
    print_ = False
    ones = False
    if ones:
        random = False

    exec_conv(version_valid, shapes, verbose, random, 'valid',
              print_=print_, ones=ones)
    exec_conv(version_full, shapes, verbose, random, 'full',
              print_=print_, ones=ones)
Пример #3
0
def run_conv_nnet2_classif(use_gpu, seed, isize, ksize, bsize,
                           n_train=10,
                           check_isfinite=True,
                           verbose=0,
                           version=-1):
    """Run the train function returned by build_conv_nnet2_classif on one device.
    """

    utt.seed_rng(seed)  # Seeds numpy.random with seed
    train, params, x_shape, y_shape, mode = build_conv_nnet2_classif(
        use_gpu=use_gpu,
        isize=isize,
        ksize=ksize,
        n_batch=bsize,
        verbose=verbose,
        version=version,
        check_isfinite=check_isfinite)

    xval = my_rand(*x_shape)
    yval = my_rand(*y_shape)
    lr = theano._asarray(0.01, dtype='float32')

    rvals = my_zeros(n_train)
    for i in xrange(n_train):
        rvals[i] = train(xval, yval, lr)[0]
def test_doubleop():
    utt.seed_rng()
    x = matrix()
    f = function([x], DoubleOp()(x))
    inp = numpy.asarray(numpy.random.rand(5, 4), dtype=config.floatX)
    out = f(inp)
    utt.assert_allclose(inp * 2, out)
    def test_invalid_input_shape(self):
        """
        Tests that when the shape gived at build time is not the same as
        run time we raise an error
        """
        seed_rng()
        verbose = 0
        random = True
        print_ = False
        ones = False
        if ones:
            random = False

        global theano_mode
        theano_mode_orig = theano_mode
        try:
            if theano.config.mode in ['DebugMode', 'DEBUG_MODE']:
                theano_mode = theano.compile.mode.get_mode(
                    'FAST_RUN').including('gpu')
                for mode in ['valid', 'full']:
                    for shapes in [((3, 2, 8, 8), (4, 2, 5, 5), (8, 8)),
                                   ((3, 2, 8, 8), (4, 2, 5, 5), (5, 8)),
                                   #((3, 2, 8, 8), (4, 2, 5, 5), (8, 5)),
                                   # We use only the number of columns.
                                   ]:

                        self.assertRaises(ValueError, _params_allgood,
                                          shapes[0], shapes[1],
                                          verbose=verbose, random=random,
                                          mode=mode,
                                          print_=print_, ones=ones,
                                          compile_kshp=shapes[2])
        finally:
            theano_mode = theano_mode_orig
Пример #6
0
def test_valid_1_3_11_12():
    seed_rng()
    shapes = get_valid_shapes()
    version = [1, 3, 11, 12]
    verbose = 0

    random = True
    print_ = False
    ones = False
    if ones:
        random = False
    shapes2 = []

    for id, (ishape, kshape, subshape, istride, kstride) in enumerate(shapes):
        oshape = [ishape[0]] + [kshape[0]] + list(numpy.asarray(ishape[2:]) -
                                                  numpy.asarray(kshape[2:]) +
                                                  numpy.asarray([1, 1]))
        if oshape[3] > device_prop['maxThreadsDim0']:
            continue
        if ((numpy.prod(ishape[2:]) + numpy.prod(kshape[2:])) * 4 >
            (16 * 1024 - 150)):
            continue
        if subshape == (1, 1):
            shapes2.append((ishape, kshape, subshape, istride, kstride))
    shapes = shapes2

    for t in exec_conv(version, shapes, verbose, random, 'valid',
                       print_=print_, ones=ones, rtol=1.1e-5):
        yield t
Пример #7
0
 def setUp(self):
     if theano.config.mode == 'FAST_COMPILE':
         m = theano.compile.mode.get_mode('FAST_RUN').excluding('local_elemwise_fusion')
     else:
         m = theano.compile.mode.get_default_mode().excluding('local_elemwise_fusion')
     self.m = m
     utt.seed_rng()
Пример #8
0
def test_full():
    seed_rng()
    shapes = get_basic_shapes()
    shapes += get_shapes2()
    #test image stride
    shapes += get_shapes2(scales_img=(2, 2), img_stride=(1, 2))
    shapes += get_shapes2(scales_img=(2, 2), img_stride=(2, 1))
    shapes += get_shapes2(scales_img=(2, 2), img_stride=(2, 2))
    shapes += get_shapes2(scales_img=(2, 2), img_stride=(-1, -1))
    shapes += get_shapes2(scales_img=(2, 2), kern_stride=(-1, -1))

    #test subsample done in a separate fct

    shapes += [
        #other test
              ((2, 1, 2, 2), (1, 1, 2, 2), (1, 1), (1, 1), (1, 1))
            , ((3, 2, 4, 4), (4, 2, 4, 4), (1, 1), (1, 1), (1, 1))
            , ((4, 1, 10, 10), (1, 1, 2, 2), (1, 1), (1, 1), (1, 1))
            , ((1, 1, 4, 4), (1, 1, 2, 3), (1, 1), (1, 1), (1, 1))
            , ((4, 1, 10, 10), (1, 1, 2, 3), (1, 1), (1, 1), (1, 1))
            , ((4, 1, 10, 10), (1, 1, 2, 10), (1, 1), (1, 1), (1, 1))
            , ((4, 1, 20, 10), (1, 1, 2, 10), (1, 1), (1, 1), (1, 1))
            , ((3, 2, 8, 8), (4, 2, 4, 4), (1, 1), (1, 1), (1, 1)) #stack, nkern, bsize
            , ((3, 2, 8, 6), (4, 2, 4, 4), (1, 1), (1, 1), (1, 1)) #stack, nkern, bsize, non-square image
            , ((3, 2, 8, 6), (4, 2, 4, 3), (1, 1), (1, 1), (1, 1)) #stack, nkern, bsize, non-square image, non-square kern
            , ((3, 2, 8, 6), (4, 2, 4, 6), (1, 1), (1, 1), (1, 1)) #stack, nkern, bsize ,non-square image, non-square kern, kernsize==imgsize on one dim
            , ((16, 5, 64, 64), (8, 5, 8, 8), (1, 1), (1, 1), (1, 1)) # a big one
            , ((16, 1, 28, 28), (20, 1, 5, 5), (1, 1), (1, 1), (1, 1)) # MNIST LeNET layer 1
            , ((20, 16, 32, 32), (1, 16, 28, 28), (1, 1), (1, 1), (1, 1)) # layer 1 backprop to weights

        #other test
            , ((3, 1, 1, 1), (2, 1, 5, 3), (1, 1), (1, 1), (1, 1))#kernel bigger then image
            , ((3, 2, 1, 1), (4, 2, 1, 1), (1, 1), (1, 1), (1, 1))
            , ((3, 2, 4, 4), (4, 2, 2, 6), (1, 1), (1, 1), (1, 1))
            , ((3, 2, 4, 4), (4, 2, 8, 6), (1, 1), (1, 1), (1, 1))#kernel bigger then image
            , ((4, 2, 10, 10), (3, 2, 2, 12), (1, 1), (1, 1), (1, 1))
            ]
    shapes += [
#        ((60,1,28,28),(20,1,5,5), (1, 1), (1, 1), (1, 1))#test_lenet_28 1 layers
#            , ((60,20,12,12),(30,20,5,5), (1, 1), (1, 1), (1, 1))#test_lenet_28 2 layers
             ((60,30,8,8),(20,30,5,5), (1, 1), (1, 1), (1, 1))#test_lenet_28 bprop 1 full
#            , ((20,60,12,12),(30,60,8,8), (1, 1), (1, 1), (1, 1))#test_lenet_28 bprop 2 valid
#            , ((1,60,28,28),(20,60,24,24), (1, 1), (1, 1), (1, 1))#test_lenet_28 bprop 2 valid
#            , ((10,1,64,64),(20,1,7,7), (1, 1), (1, 1), (1, 1))#test_lenet_64 1 layers
#            , ((10,20,29,29),(30,20,7,7), (1, 1), (1, 1), (1, 1))#test_lenet_64 2 layers
            , ((10,30,23,23),(20,30,7,7), (1, 1), (1, 1), (1, 1))#test_lenet_64 full
#            , ((20,10,29,29),(30,10,23,23), (1, 1), (1, 1), (1, 1))#test_lenet_64 bprop 1
#            , ((1,10,64,64),(20,10,58,58), (1, 1), (1, 1), (1, 1))#test_lenet_64 bprop 2
            #Test more than maxThreadsDim0
            , ((2,4,13,1050), (3,4,10, 11), (1, 1), (1, 1), (1, 1))
            , ((2,4,1050,13), (3,4,10, 11), (1, 1), (1, 1), (1, 1))
            ]

#    shapes=shapes[:277]
    version = [-2, -1, 0, 1, 2, 3, 4, 5]
    verbose = 0
#    version=[4]
    random = True

    exec_conv(version, shapes, verbose, random, 'full')
Пример #9
0
 def setUp(self):
     if theano.config.mode == "FAST_COMPILE":
         m = theano.compile.mode.get_mode("FAST_RUN").excluding("local_elemwise_fusion")
     else:
         m = theano.compile.mode.get_default_mode().excluding("local_elemwise_fusion")
     self.m = m
     utt.seed_rng()
Пример #10
0
def test_subsample():
    seed_rng()
    # implement when
    shapes = [((1, 1, 1, 1), (1, 1, 1, 1), (1, 1), (1, 1), (1, 1)),
              ((1, 1, 1, 1), (1, 1, 1, 1), (2, 2), (1, 1), (1, 1)),
              ((4, 2, 10, 10), (3, 2, 2, 2), (1, 3), (1, 1), (1, 1)),
              ((4, 2, 10, 10), (3, 2, 2, 2), (3, 3), (1, 1), (1, 1)),
              ((4, 2, 10, 10), (3, 2, 2, 2), (3, 1), (1, 1), (1, 1))
          ]
    shapes += get_shapes2(scales_img=(2, 2), subsample=(1, 1))
    shapes += get_shapes2(scales_img=(2, 2), subsample=(1, 2))
    shapes += get_shapes2(scales_img=(2, 2), subsample=(2, 1))
    shapes += get_shapes2(scales_img=(2, 2), subsample=(2, 2))

#We put only the version that implement the subsample to make the test faster.
    version_valid = [-2, -1, 1, 3, 11, 12]
    version_full = [-2, -1]
    verbose = 0
    random = True
    print_ = False
    ones = False
    if ones:
        random = False

    exec_conv(version_valid, shapes, verbose, random, 'valid',
              print_=print_, ones=ones)
    exec_conv(version_full, shapes, verbose, random, 'full',
              print_=print_, ones=ones)
Пример #11
0
def test_valid():
    seed_rng()
    shapes = get_valid_shapes()

    #shapes=shapes[400:426]
    # I put -1 in case we forget to add version in the test to.
    # I put -2 to test the reference version.
    version = [-2, -1, 6]
    verbose = 0
#    version=[1]

    random = True
    print_ = False
    ones = False
    if ones:
        random = False

#    exec_conv(version, shapes, verbose, random, 'valid',
#              print_=print_, ones=ones, rtol=1.1e-5)

    mode = theano_mode.including("conv_gemm")

    version = [-1]
    # Remove case not supported
    # Add tests with strided inputs by still square images and filters.
    shapes += get_shapes2(scales_img=(2, 2), img_stride=(2, 2))
    shapes += get_shapes2(scales_kern=(2, 2), kern_stride=(2, 2))
    # Keep only tests with square images and filters even with inputs strides
    shapes = [shp for shp in shapes if (
        shp[0][2]/shp[3][0] == shp[0][3]/shp[3][1] and
        shp[1][2]/shp[4][0] == shp[1][3]/shp[4][1])]
    exec_conv(version, shapes, verbose, random, 'valid',
              print_=print_, ones=ones, rtol=1.1e-5,
              theano_mode=mode, cls=cuda.blas.GpuCorrMM)
Пример #12
0
def test_batch_normalization_train_without_running_averages():
    # compile and run batch_normalization_train without running averages
    utt.seed_rng()

    x, scale, bias, dy = T.tensor4('x'), T.tensor4('scale'), T.tensor4('bias'), T.tensor4('dy')
    data_shape = (5, 10, 30, 25)
    param_shape = (1, 10, 30, 25)

    # forward pass
    out, x_mean, x_invstd = bn.batch_normalization_train(x, scale, bias, 'per-activation')
    # backward pass
    grads = T.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
    # compile
    f = theano.function([x, scale, bias, dy], [out, x_mean, x_invstd] + grads)
    # check if the abstract Ops have been replaced
    assert not any([isinstance(n.op, (bn.AbstractBatchNormTrain,
                                      bn.AbstractBatchNormInference,
                                      bn.AbstractBatchNormTrainGrad))
                    for n in f.maker.fgraph.toposort()])
    # run
    X = 4 + 3 * numpy.random.randn(*data_shape).astype(theano.config.floatX)
    Dy = -1 + 2 * numpy.random.randn(*data_shape).astype(theano.config.floatX)
    Scale = numpy.random.randn(*param_shape).astype(theano.config.floatX)
    Bias = numpy.random.randn(*param_shape).astype(theano.config.floatX)
    f(X, Scale, Bias, Dy)
Пример #13
0
def _test_subsample(cls, mode, version_valid=[-1], version_full=[-1]):
    seed_rng()
    shapes = [((1, 1, 1, 1), (1, 1, 1, 1), (1, 1), (1, 1), (1, 1)),
              ((1, 1, 1, 1), (1, 1, 1, 1), (2, 2), (1, 1), (1, 1)),
              ((4, 2, 10, 10), (3, 2, 2, 2), (1, 3), (1, 1), (1, 1)),
              ((4, 2, 10, 10), (3, 2, 2, 2), (3, 3), (1, 1), (1, 1)),
              ((4, 2, 10, 10), (3, 2, 2, 2), (3, 1), (1, 1), (1, 1))
          ]
    shapes += get_shapes2(scales_img=(2, 2), subsample=(1, 1))
    shapes += get_shapes2(scales_img=(2, 2), subsample=(1, 2))
    shapes += get_shapes2(scales_img=(2, 2), subsample=(2, 1))
    shapes += get_shapes2(scales_img=(2, 2), subsample=(2, 2))

    # We put only the version that implement the subsample to make the
    # test faster.
    verbose = 0
    random = True
    print_ = False
    ones = False
    if ones:
        random = False

    for t in exec_conv(version_valid, shapes, verbose, random, 'valid',
                       print_=print_, ones=ones,
                       theano_mode=mode, cls=cls):
        yield t
    for t in exec_conv(version_full, shapes, verbose, random, 'full',
                       print_=print_, ones=ones,
                       theano_mode=mode, cls=cls):
        yield t
Пример #14
0
def test_valid_4():
    seed_rng()
    shapes = get_valid_shapes()
    version = [4]
    verbose = 0

    random = True
    print_ = False
    ones = False
    if ones:
        random = False
    shapes2 = []

    for id, (ishape, kshape, subshape, istride, kstride) in enumerate(shapes):
        oshape = (
            [ishape[0]]
            + [kshape[0]]
            + list(numpy.asarray(ishape[2:]) - numpy.asarray(kshape[2:]) + numpy.asarray([1, 1]))
        )
        if oshape[3] > device_prop["maxThreadsDim0"]:
            continue
        if ishape[1] > 1:
            continue
        if (kshape[2] * ishape[3] * 4 + numpy.prod(kshape[2:]) * 4) > (16 * 1024 - 150):
            continue
        if subshape == (1, 1):
            shapes2.append((ishape, kshape, subshape, istride, kstride))
    shapes = shapes2

    exec_conv(version, shapes, verbose, random, "valid", print_=print_, ones=ones, rtol=1.1e-5)
Пример #15
0
def test_valid(conv_gemm=False):
    seed_rng()
    shapes = get_valid_shapes()

    #shapes=shapes[400:426]
    # I put -1 in case we forget to add version in the test to.
    # I put -2 to test the reference version.
    version = [-2, -1, 6]
    verbose = 0

    random = True
    print_ = False
    ones = False
    if ones:
        random = False

    if conv_gemm:
        # Test the GpuCorrMM version
        mode = theano_mode.including("conv_gemm")
        cls = cuda.blas.BaseGpuCorrMM
        # dummy version; not used by GpuCorrMM so one version is enough
        version = [-1]
        # Add tests with strided inputs by still square images and filters.
        shapes += get_shapes2(scales_img=(2, 2), img_stride=(2, 2))
        shapes += get_shapes2(scales_kern=(2, 2), kern_stride=(2, 2))
    else:
        mode = theano_mode
        cls = None
    exec_conv(version, shapes, verbose, random, 'valid',
              print_=print_, ones=ones, rtol=1.1e-5,
              theano_mode=mode, cls=cls)
Пример #16
0
def test_valid_7_8_13():
    seed_rng()
    shapes = get_valid_shapes()
    # This is to test the "new" lower shared memory usage.
    shapes.append(((10, 30, 60, 60), (20, 30, 40, 40),
                   (1, 1), (1, 1), (1, 1)))
    version = [7, 8, 13]
    verbose = 0

    random = True
    print_ = False
    ones = False
    if ones:
        random = False
    shapes2 = []

#    print len(shapes)
    for id, (ishape, kshape, subshape, istride, kstride) in enumerate(shapes):
        oshape = [ishape[0]] + [kshape[0]] + list(numpy.asarray(ishape[2:]) -
                                                  numpy.asarray(kshape[2:]) +
                                                  numpy.asarray([1, 1]))
        if oshape[2] * oshape[3] > device_prop['maxThreadsDim0']:
            continue
        if max(numpy.prod(ishape[2:]) * 4 + 2 * kshape[3] * 4,
               oshape[2] * oshape[3] * 4 * 2) > (16 * 1024 - 150):
            continue
        if subshape == (1, 1):
            shapes2.append((ishape, kshape, subshape, istride, kstride))
    shapes = shapes2
#    print len(shapes2)

    exec_conv(version, shapes, verbose, random, 'valid',
              print_=print_, ones=ones, rtol=1.1e-5)
Пример #17
0
def test_valid_9_10():
    seed_rng()
    shapes = get_valid_shapes()
    version = [9, 10]
    verbose = 0

    random = True
    print_ = False
    ones = False
    if ones:
        random = False
    shapes2 = []

#    print len(shapes)
    for id, (ishape, kshape, subshape, istride, kstride) in enumerate(shapes):
        oshape = [ishape[0]] + [kshape[0]] + list(numpy.asarray(ishape[2:]) -
                                                  numpy.asarray(kshape[2:]) +
                                                  numpy.asarray([1, 1]))
        if oshape[3] > device_prop['maxThreadsDim0']:
            continue
        if (kshape[3] * 4 + ishape[3]) > (16 * 1024 - 150):
            continue
        if subshape == (1, 1):
            shapes2.append((ishape, kshape, subshape, istride, kstride))
    shapes = shapes2
#    print len(shapes2)

    exec_conv(version, shapes, verbose, random, 'valid',
              print_=print_, ones=ones, rtol=1.1e-5)
Пример #18
0
    def test_logical_shapes(self):
        seed_rng()
        for stride in range(1, 4):
            kshp = (10, 2, 10, 10)
            featshp = (3, 10, 11, 11)

            a = tensor.ftensor4()
            A = tensor.ftensor4()

            # Need to transpose first two dimensions of kernel, and reverse
            # index kernel image dims (for correlation)
            kernel_rotated = tensor.transpose(A, axes=[1, 0, 2, 3])

            featshp_logical = (featshp[0], featshp[1], featshp[2] * stride,
                               featshp[3] * stride)
            kshp_rotated = (kshp[1], kshp[0], kshp[2], kshp[3])
            #print featshp, kshp_rotated, featshp_logical[1:], kshp[2:]
            image_estimate = tensor.nnet.conv2d(a, kernel_rotated,
                                                border_mode='full',
                                                image_shape=featshp,
                                                filter_shape=kshp_rotated,
                                                imshp_logical=featshp_logical[1:],
                                                kshp_logical=kshp[2:])

            func = theano.function([a, A], image_estimate, mode=theano_mode)
            #theano.printing.debugprint(func,)
            assert any([isinstance(node.op, theano.sandbox.cuda.blas.GpuConv)
                        for node in func.maker.fgraph.toposort()])

            a_in = numpy.random.randn(*featshp).astype("float32")
            A_in = numpy.random.randn(*kshp).astype("float32")

            func(a_in, A_in)
Пример #19
0
def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
    # This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias
    # We check that we loop when their is too much threads

    n_in = 1000
    batch_size = 4097
    n_out = 1250

    if not isinstance(mode_with_gpu, theano.compile.DebugMode):
        n_in = 4098
        n_out = 4099

    y = T.lvector('y')

    b = T.fvector('b')

    # we precompute the dot with big shape before to allow the test of
    # GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error
    # (the launch timed out and was terminated) on GPU card not
    # powerful enough. We need the big shape to check for corner
    # case.
    dot_result = T.fmatrix('dot_result')

    # Seed numpy.random with config.unittests.rseed
    utt.seed_rng()

    xx = np.asarray(np.random.rand(batch_size, n_in),
                    dtype=np.float32)
    yy = np.ones((batch_size,), dtype='int32')
    b_values = np.zeros((n_out,), dtype='float32')
    W_values = np.asarray(np.random.rand(n_in, n_out), dtype='float32')

    dot_value = np.asarray(np.dot(xx, W_values), dtype='float32')
    del W_values
    p_y_given_x = T.nnet.softmax(dot_result + b)
    y_pred = T.argmax(p_y_given_x, axis=-1)
    loss = -T.mean(T.log(p_y_given_x)[T.arange(y.shape[0]), y])
    dW = T.grad(loss, dot_result)
    classify = theano.function(inputs=[y, b, dot_result],
                               outputs=[loss, y_pred, dW],
                               mode=mode_without_gpu)
    classify_gpu = theano.function(inputs=[y, b, dot_result],
                                   outputs=[loss, y_pred, dW],
                                   mode=mode_with_gpu)

    assert any([isinstance(node.op,
                           T.nnet.CrossentropySoftmaxArgmax1HotWithBias)
                for node in classify.maker.fgraph.toposort()])
    assert any([isinstance(node.op,
                           GpuCrossentropySoftmaxArgmax1HotWithBias)
                for node in classify_gpu.maker.fgraph.toposort()])

    out = classify(yy, b_values, dot_value)
    gout = classify_gpu(yy, b_values, dot_value)

    assert len(out) == len(gout) == 3
    utt.assert_allclose(out[0], gout[0])
    utt.assert_allclose(out[2], gout[2], atol=3e-6)
    utt.assert_allclose(out[1], gout[1])
Пример #20
0
def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
    """
    This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias

    We check that we loop when their is too much threads
    TODO: check that we loop when their is too much block(>32*1024)

    """

    n_in = 1000
    batch_size = 4097
    n_out = 1250

    if theano.config.mode!="DEBUG_MODE":
        n_in = 4098
        n_out = 4099

    x = T.fmatrix('x')
    y = T.lvector('y')


    b = T.fvector('b')
    #W = T.fmatrix('W')

    #we precompute the dot with big shape before to allow the test of GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error (the launch timed out and was terminated) on GPU card not powerfull enought. We need the big shape to check for corner case.
    dot_result = T.fmatrix('dot_result')

    # Seed numpy.random with config.unittests.rseed
    utt.seed_rng()

    xx = numpy.asarray(numpy.random.rand(batch_size,n_in),dtype=numpy.float32)
    #?????yy = numpy.ones((batch_size,),dtype='float32')
    yy = numpy.ones((batch_size,),dtype='int32')
    b_values = numpy.zeros((n_out,),dtype='float32')
    W_values = numpy.asarray(numpy.random.rand(n_in,n_out),dtype='float32')

    dot_value = numpy.asarray(numpy.dot(xx, W_values),dtype='float32')
    del W_values
    p_y_given_x = T.nnet.softmax(dot_result+b)
    y_pred = T.argmax(p_y_given_x, axis=-1)
    loss = -T.mean(T.log(p_y_given_x)[T.arange(y.shape[0]), y])
    dW = T.grad(loss, dot_result)
    classify = theano.function( inputs = [x,y,b,dot_result], outputs = [loss,y_pred,dW],
                                mode = mode_without_gpu)
    classify_gpu = theano.function( inputs = [x,y,b,dot_result], outputs = [loss,y_pred,dW],
                                    mode = mode_with_gpu)
    #theano.printing.debugprint(classify)
    #theano.printing.debugprint(classify_gpu)

    assert any([isinstance(node.op,T.nnet.CrossentropySoftmaxArgmax1HotWithBias) for node in classify.maker.env.toposort()])
    assert any([isinstance(node.op,cuda.nnet.GpuCrossentropySoftmaxArgmax1HotWithBias) for node in classify_gpu.maker.env.toposort()])

    out=classify(xx,yy,b_values,dot_value)
    gout=classify_gpu(xx,yy,b_values,dot_value)

    assert len(out)==len(gout)==3
    assert numpy.allclose(out[0],gout[0])
    assert numpy.allclose(out[2],gout[2],atol=3e-6),numpy.absolute(gout-out).max()
    assert numpy.allclose(out[1],gout[1]),[(id,out[1][id],gout[1][id],val) for id,val in enumerate(out[1]-gout[1]) if val!=0]
Пример #21
0
def test_conv_nnet2():
    utt.seed_rng()
    rval_gpu = run_conv_nnet2(True)
    if True:
        utt.seed_rng()
        rval_cpu = run_conv_nnet2(False)
        # print rval_cpu[0], rval_gpu[0],rval_cpu[0]-rval_gpu[0]
        utt.assert_allclose(rval_cpu, rval_gpu, rtol=1e-4, atol=1e-4)
def test_doubleop_grad():
    utt.seed_rng()
    utt.verify_grad(
        # Op instance
        DoubleOp(),
        # Numeric inputs
        [numpy.random.rand(5, 7, 2)],
    )
Пример #23
0
def test_GpuCrossentropySoftmax1HotWithBiasDx():
    """
    This is basic test for GpuCrossentropySoftmax1HotWithBiasDx

    We check that we loop when their is too much threads
    TODO: check that we loop when their is too much block(>32*1024)

    """
    n_in = 1000
    batch_size = 4097
    n_out = 1250

    # Seed numpy.random with config.unittests.rseed
    utt.seed_rng()

    softmax_output_value = numpy.random.rand(batch_size, n_out).astype("float32")
    dnll_value = numpy.asarray(numpy.random.rand(batch_size), dtype="float32")
    y_idx_value = numpy.random.randint(low=0, high=5, size=batch_size)

    softmax_output = T.fmatrix()
    softmax_output /= softmax_output.sum(axis=1).reshape(softmax_output.shape[1], 1)
    op = theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(dnll_value, softmax_output, y_idx_value)

    cpu_f = theano.function([softmax_output], op, mode=mode_without_gpu)
    gpu_f = theano.function([softmax_output], op, mode=mode_with_gpu)
    # theano.printing.debugprint(cpu_f)
    # theano.printing.debugprint(gpu_f)

    assert any(
        [isinstance(node.op, T.nnet.CrossentropySoftmax1HotWithBiasDx) for node in cpu_f.maker.fgraph.toposort()]
    )
    assert any(
        [isinstance(node.op, cuda.nnet.GpuCrossentropySoftmax1HotWithBiasDx) for node in gpu_f.maker.fgraph.toposort()]
    )

    cpu_out = cpu_f(softmax_output_value)
    gpu_out = gpu_f(softmax_output_value)

    rtol = 1e-5
    atol = 1e-6
    if not numpy.allclose(cpu_out, gpu_out, rtol=rtol, atol=atol):
        abs_err, rel_err = T.numeric_grad.abs_rel_err(cpu_out, gpu_out)
        scaled_err = numpy.minimum(abs_err / atol, rel_err / rtol)
        max_i = scaled_err.argmax()

        print "max err index:", max_i, max_i / batch_size,
        print max_i % batch_size, max_i / n_out, max_i & n_out
        print "At that index:"
        print "err:", scaled_err.flatten()[max_i]
        print "absolute error:", abs_err.flatten()[max_i]
        print "relative error:", rel_err.flatten()[max_i]
        print "cpu_out:", cpu_out.flatten()[max_i]
        print "gpu_out:", gpu_out.flatten()[max_i]
        print "softmax_output_value:", softmax_output_value.flatten()[max_i]
        print "dnll_value:", dnll_value[max_i / n_out]
        print "y_idx_value:", y_idx_value[max_i / n_out]

        assert False, "numpy.allclose(cpu_out, gpu_out, rtol=%s, atol=%s)" % (rtol, atol)
Пример #24
0
def test_gpu_tril_triu():
    def check_l(m, k=0):
        m_symb = T.matrix(dtype=m.dtype)
        k_symb = T.iscalar()

        f = theano.function([m_symb, k_symb],
                            T.tril(m_symb, k_symb),
                            mode=mode_with_gpu)
        result = f(m, k)
        assert np.allclose(result, np.tril(m, k))
        assert result.dtype == np.dtype(dtype)
        assert any([isinstance(node.op, GpuTri)
                    for node in f.maker.fgraph.toposort()])

    def check_u(m, k=0):
        m_symb = T.matrix(dtype=m.dtype)
        k_symb = T.iscalar()
        f = theano.function([m_symb, k_symb],
                            T.triu(m_symb, k_symb),
                            mode=mode_with_gpu)
        result = f(m, k)
        assert np.allclose(result, np.triu(m, k))
        assert result.dtype == np.dtype(dtype)
        assert any([isinstance(node.op, GpuTri)
                    for node in f.maker.fgraph.toposort()])

    utt.seed_rng()
    test_rng = np.random.RandomState(seed=utt.fetch_seed())

    for dtype in ['float64', 'float32', 'float16']:
        # try a big one
        m = np.asarray(test_rng.rand(5000, 5000) * 2 - 1, dtype=dtype)
        yield check_l, m, 0
        yield check_l, m, 1
        yield check_l, m, -1

        yield check_u, m, 0
        yield check_u, m, 1
        yield check_u, m, -1

        m = np.asarray(test_rng.rand(10, 10) * 2 - 1, dtype=dtype)
        yield check_l, m, 0
        yield check_l, m, 1
        yield check_l, m, -1

        yield check_u, m, 0
        yield check_u, m, 1
        yield check_u, m, -1

        m = np.asarray(test_rng.rand(10, 5) * 2 - 1, dtype=dtype)
        yield check_l, m, 0
        yield check_l, m, 1
        yield check_l, m, -1

        yield check_u, m, 0
        yield check_u, m, 1
        yield check_u, m, -1
Пример #25
0
    def setUp(self):
        unittest_tools.seed_rng()

        # we want to allow nans in the matrices, so we disable this DEBUG_MODE check
        mode = theano.compile.mode.get_default_mode()
        mode = copy(mode)
        mode.check_isfinite = False

        self.mode = mode
Пример #26
0
    def test_dimshuffle(self):
        utt.seed_rng()
        rng = numpy.random.RandomState(utt.fetch_seed())

        # 2d -> 0d
        a = theano._asarray(rng.randn(1,1), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(numpy.transpose(a), cuda_ndarray.dimshuffle(b,()))

        # Test when we drop a axis that don't have shape 1
        a = theano._asarray(rng.randn(2,1), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        self.assertRaises(ValueError, cuda_ndarray.dimshuffle, b,())

        # Test that we can't take a dimensions multiple time
        a = theano._asarray(rng.randn(2,1), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        self.assertRaises(ValueError, cuda_ndarray.dimshuffle, b,(1,1))

        # 1d
        a = theano._asarray(rng.randn(3,), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(numpy.transpose(a), cuda_ndarray.dimshuffle(b,(0,)))
        assert numpy.allclose(a[None,:,None], cuda_ndarray.dimshuffle(b,(-1,0,-1)))

        # 2d
        a = theano._asarray(rng.randn(3,11), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(numpy.transpose(a), cuda_ndarray.dimshuffle(b,(1,0)))
        assert numpy.allclose(numpy.transpose(a)[None,:,None,:,None], cuda_ndarray.dimshuffle(b,(-1,1,-1,0,-1)))

        # 2d -> 1d
        a = theano._asarray(rng.randn(1,11), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(a[:,], cuda_ndarray.dimshuffle(b,(1,)))
        a = theano._asarray(rng.randn(11,1), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(a.reshape((11,)), cuda_ndarray.dimshuffle(b,(0,)))

        # 3d
        a = theano._asarray(rng.randn(3,4,5), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(a, cuda_ndarray.dimshuffle(b,(0,1,2)))
        assert numpy.allclose(numpy.swapaxes(a,0,1), cuda_ndarray.dimshuffle(b,(1,0,2)))
        assert numpy.allclose(numpy.swapaxes(a,0,2), cuda_ndarray.dimshuffle(b,(2,1,0)))
        assert numpy.allclose(numpy.swapaxes(a,1,2), cuda_ndarray.dimshuffle(b,(0,2,1)))
        assert numpy.allclose(numpy.swapaxes(a,1,2)[None,:,None,:,:,None], cuda_ndarray.dimshuffle(b,(-1,0,-1,2,1,-1)))

        # 4d
        a = theano._asarray(rng.randn(3,11,4,5), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(numpy.swapaxes(a,0,1), cuda_ndarray.dimshuffle(b,(1,0,2,3)))
        assert numpy.allclose(numpy.swapaxes(a,0,2), cuda_ndarray.dimshuffle(b,(2,1,0,3)))
        assert numpy.allclose(numpy.swapaxes(a,0,3), cuda_ndarray.dimshuffle(b,(3,1,2,0)))
        assert numpy.allclose(numpy.swapaxes(a,0,3), cuda_ndarray.dimshuffle(b,(3,1,2,0)))
        assert numpy.allclose(numpy.swapaxes(a,0,3)[None,:,None,:,:,:], cuda_ndarray.dimshuffle(b,(-1,3,-1,1,2,0)))
Пример #27
0
 def setUp(self):
     utt.seed_rng()
     mode = None
     if theano.config.mode == "FAST_COMPILE":
         mode = "FAST_RUN"
     self.mode = theano.compile.get_mode(mode).excluding(
         'constant_folding'
     )
     self.gemv_op = sparse_block_gemv
     self.outer_op = sparse_block_outer
Пример #28
0
def test_batchnorm_inference():
    if not dnn.dnn_available(test_ctx_name):
        raise SkipTest(dnn.dnn_available.msg)
    if dnn.version(raises=False) < 5000:
        raise SkipTest("batch normalization requires cudnn v5+")
    utt.seed_rng()

    for mode in ('per-activation', 'spatial'):
        for vartype in (T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
            x, scale, bias, mean, var = (vartype(n) for n in ('x', 'scale',
                                                              'bias', 'mean',
                                                              'var'))
            ndim = x.ndim
            eps = 5e-3  # some non-standard value to test if it's used

            # forward pass
            out = dnn.dnn_batch_normalization_test(x, scale, bias, mean,
                                                   var, mode, eps)
            # reference forward pass
            if mode == 'per-activation':
                axes = (0,)
            elif mode == 'spatial':
                axes = (0,) + tuple(range(2, ndim))
            scale2, bias2, mean2, var2 = (T.addbroadcast(t, *axes)
                                          for t in (scale, bias, mean, var))
            out2 = (x - mean2) * (scale2 / T.sqrt(var2 + eps)) + bias2
            # backward pass
            dy = vartype('dy')
            grads = T.grad(None, wrt=[x, scale, bias, mean, var], known_grads={out: dy})
            # reference backward pass
            grads2 = T.grad(None, wrt=[x, scale, bias, mean, var], known_grads={out2: dy})
            # compile
            f = theano.function([x, scale, bias, mean, var, dy],
                                [out, out2] + grads + grads2, mode=mode_with_gpu)
            # run
            for data_shape in ((10, 20, 30, 40), (4, 3, 1, 1), (1, 1, 5, 5)):
                data_shape = data_shape[:ndim]
                param_shape = tuple(1 if d in axes else s
                                    for d, s in enumerate(data_shape))
                X = 4 + 3 * numpy.random.randn(*data_shape).astype('float32')
                Dy = -1 + 2 * numpy.random.randn(*data_shape).astype('float32')
                Scale = numpy.random.randn(*param_shape).astype('float32')
                Bias = numpy.random.randn(*param_shape).astype('float32')
                Mean = numpy.random.randn(*param_shape).astype('float32')
                Var = numpy.random.rand(*param_shape).astype('float32')
                outputs = f(X, Scale, Bias, Mean, Var, Dy)
                # compare outputs
                utt.assert_allclose(outputs[0], outputs[1])  # out
                # compare gradients
                utt.assert_allclose(outputs[2], outputs[2 + 5])  # dx
                utt.assert_allclose(outputs[3], outputs[3 + 5])  # dscale
                utt.assert_allclose(outputs[4], outputs[4 + 5])  # dbias
                utt.assert_allclose(outputs[5], outputs[5 + 5])  # dmean
                utt.assert_allclose(outputs[6], outputs[6 + 5], atol=2e-5)  # dvar
Пример #29
0
 def setUp(self):
     utt.seed_rng()
     # Using vectors make things a lot simpler for generating the same
     # computations using scan
     self.x = tensor.vector("x")
     self.v = tensor.vector("v")
     self.rng = numpy.random.RandomState(utt.fetch_seed())
     self.in_shape = (5 + self.rng.randint(3),)
     self.mx = tensor.matrix("mx")
     self.mv = tensor.matrix("mv")
     self.mat_in_shape = (5 + self.rng.randint(3), 5 + self.rng.randint(3))
Пример #30
0
def test_dnn_batchnorm_train():
    if not dnn.dnn_available(test_ctx_name):
        raise SkipTest(dnn.dnn_available.msg)
    if dnn.version(raises=False) < 5000:
        raise SkipTest("batch normalization requires cudnn v5+")
    utt.seed_rng()

    for mode in ('per-activation', 'spatial'):
        for vartype in (T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
            x, scale, bias = (vartype(n) for n in ('x', 'scale', 'bias'))
            ndim = x.ndim
            eps = 5e-3  # some non-standard value to test if it's used

            # forward pass
            out, x_mean, x_invstd = dnn.dnn_batch_normalization_train(
                x, scale, bias, mode, eps)
            # reference forward pass
            if mode == 'per-activation':
                axes = (0,)
            elif mode == 'spatial':
                axes = (0,) + tuple(range(2, ndim))
            x_mean2 = x.mean(axis=axes, keepdims=True)
            x_invstd2 = T.inv(T.sqrt(x.var(axis=axes, keepdims=True) + eps))
            scale2 = T.addbroadcast(scale, *axes)
            bias2 = T.addbroadcast(bias, *axes)
            out2 = (x - x_mean2) * (scale2 * x_invstd2) + bias2
            # backward pass
            dy = vartype('dy')
            grads = T.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
            # reference backward pass
            grads2 = T.grad(None, wrt=[x, scale, bias], known_grads={out2: dy})
            # compile
            f = theano.function([x, scale, bias, dy],
                                [out, x_mean, x_invstd, out2, x_mean2, x_invstd2] +
                                grads + grads2, mode=mode_with_gpu)
            # run
            for data_shape in ((10, 20, 30, 40), (4, 3, 1, 1), (1, 1, 5, 5)):
                data_shape = data_shape[:ndim]
                param_shape = tuple(1 if d in axes else s
                                    for d, s in enumerate(data_shape))
                X = 4 + 3 * numpy.random.randn(*data_shape).astype('float32')
                Dy = -1 + 2 * numpy.random.randn(*data_shape).astype('float32')
                Scale = numpy.random.randn(*param_shape).astype('float32')
                Bias = numpy.random.randn(*param_shape).astype('float32')
                outputs = f(X, Scale, Bias, Dy)
                # compare outputs
                utt.assert_allclose(outputs[0], outputs[0 + 3])  # out
                utt.assert_allclose(outputs[1], outputs[1 + 3])  # mean
                utt.assert_allclose(outputs[2], outputs[2 + 3])  # invstd
                # compare gradients
                utt.assert_allclose(outputs[6], outputs[6 + 3])  # dx
                utt.assert_allclose(outputs[7], outputs[7 + 3], rtol=3e-3)  # dscale
                utt.assert_allclose(outputs[8], outputs[8 + 3])  # dbias
Пример #31
0
def cmp_run_conv_nnet2_classif(seed,
                               isize,
                               ksize,
                               bsize,
                               ignore_error=False,
                               n_train=10,
                               gpu_only=False,
                               cpu_only=False,
                               float_atol=1e-06,
                               check_isfinite=True,
                               pickle=False,
                               verbose=0,
                               version=-1):
    """Run the nnet2 function on 1 or 2 devices, and compares the results.

       float_atol: None mean use the default value.
       check_isfinite: the debug mode option. We forward this value to debug mode.
                       For some parameter CrossentropyCategorical1Hot op generate inf when not optimized.
    """
    if config.mode == 'DEBUG_MODE':
        n_train = 1

    # Change global tolerance, used in DebugMode for instance
    orig_float32_atol = theano.tensor.basic.float32_atol
    try:
        if float_atol:
            #print "float_atol", float_atol
            theano.tensor.basic.float32_atol = float_atol

        if gpu_only and cpu_only:
            raise ValueError("Please use only one of cpu_only and gpu_only")
        elif cpu_only:
            use_gpu = False
            compare = False
        elif gpu_only:
            use_gpu = True
            compare = False
        else:
            compare = True

        if not compare:
            return run_conv_nnet2_classif(use_gpu=use_gpu,
                                          seed=seed,
                                          isize=isize,
                                          ksize=ksize,
                                          bsize=bsize,
                                          n_train=n_train,
                                          check_isfinite=check_isfinite,
                                          pickle=pickle,
                                          verbose=verbose,
                                          version=version)

        utt.seed_rng(seed)  # Seeds numpy.random with seed
        train_cpu, params_cpu, x_shape, y_shape, mode_cpu = \
                build_conv_nnet2_classif(
                        use_gpu=False,
                        isize=isize,
                        ksize=ksize,
                        n_batch=bsize,
                        verbose=verbose,
                        version=version,
                        check_isfinite=check_isfinite)

        utt.seed_rng(seed)  # Seeds numpy.random with seed
        train_gpu, params_gpu, x_shape_gpu, y_shape_gpu, mode_gpu = \
                build_conv_nnet2_classif(
                        use_gpu=True,
                        isize=isize,
                        ksize=ksize,
                        n_batch=bsize,
                        verbose=verbose,
                        version=version,
                        check_isfinite=check_isfinite)

        assert x_shape == x_shape_gpu
        assert y_shape == y_shape_gpu

        xval = my_rand(*x_shape)
        yval = my_rand(*y_shape)
        lr = theano._asarray(0.01, dtype='float32')

        time_cpu = 0
        time_gpu = 0

        for i in range(n_train):
            # Train one batch on CPU
            t0 = time.time()
            rval_cpu = train_cpu(xval, yval, lr)[0]
            t1 = time.time()
            time_cpu += (t1 - t0)

            # Train one batch on GPU
            t0 = time.time()
            rval_gpu = train_gpu(xval, yval, lr)[0]
            t1 = time.time()
            time_gpu += (t1 - t0)

            # Compare results
            if (verbose or not numpy.allclose(
                    rval_cpu, rval_gpu, rtol=1e-5, atol=float_atol)):
                print "At batch:", i + 1
                print "CPU:", rval_cpu
                print "GPU:", rval_gpu
                print "abs diff:", numpy.absolute(rval_gpu - rval_cpu)
                print "rel diff:", numpy.absolute(
                    (rval_gpu - rval_cpu) / rval_gpu)

            if not ignore_error:
                assert numpy.allclose(rval_cpu,
                                      rval_gpu,
                                      rtol=1e-5,
                                      atol=float_atol)

            # Synchronize parameters to start from the same point next time
            if i < n_train - 1:
                for cpu_p, gpu_p in zip(params_cpu, params_gpu):
                    cpu_p.set_value(gpu_p.get_value(borrow=False), borrow=True)

    finally:
        theano.tensor.basic.float32_atol = orig_float32_atol

    if pickle:
        if isinstance(cpu_mode, theano.compile.ProfileMode):
            import pickle
            print "BEGIN CPU profile mode dump"
            print pickle.dumps(cpu_mode)
            print "END CPU profile mode dump"
        if isinstance(gpu_mode, theano.compile.ProfileMode):
            import pickle
            print "BEGIN GPU profile mode dump"
            print pickle.dumps(gpu_mode)
            print "END GPU profile mode dump"
Пример #32
0
def test_conv_nnet1():
    utt.seed_rng()
    rval_cpu = run_conv_nnet1(False)
    utt.seed_rng()
    rval_gpu = run_conv_nnet1(True)
    assert numpy.allclose(rval_cpu, rval_gpu, rtol=1e-4, atol=1e-6)
Пример #33
0
 def setUp(self):
     if not cusolver_available:
         self.skipTest(
             'Optional package scikits.cuda.cusolver not available')
     utt.seed_rng()
Пример #34
0
 def setUp(self):
     utt.seed_rng()
     self.mode = mode_with_gpu
     self.shared = gpuarray_shared_constructor
     self.dtypes = ['float64', 'float32']
Пример #35
0
def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
    """
    This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias

    We check that we loop when their is too much threads

    """

    n_in = 1000
    batch_size = 4097
    n_out = 1250

    if not isinstance(mode_with_gpu, theano.compile.DebugMode):
        n_in = 4098
        n_out = 4099

    x = T.fmatrix('x')
    y = T.lvector('y')

    b = T.fvector('b')
    #W = T.fmatrix('W')

    # we precompute the dot with big shape before to allow the test of
    # GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error
    #(the launch timed out and was terminated) on GPU card not
    # powerful enough. We need the big shape to check for corner
    # case.
    dot_result = T.fmatrix('dot_result')

    # Seed numpy.random with config.unittests.rseed
    utt.seed_rng()

    xx = numpy.asarray(numpy.random.rand(batch_size, n_in),
                       dtype=numpy.float32)
    #?????yy = numpy.ones((batch_size,),dtype='float32')
    yy = numpy.ones((batch_size,), dtype='int32')
    b_values = numpy.zeros((n_out,), dtype='float32')
    W_values = numpy.asarray(numpy.random.rand(n_in, n_out), dtype='float32')

    dot_value = numpy.asarray(numpy.dot(xx, W_values), dtype='float32')
    del W_values
    p_y_given_x = T.nnet.softmax(dot_result + b)
    y_pred = T.argmax(p_y_given_x, axis=-1)
    loss = -T.mean(T.log(p_y_given_x)[T.arange(y.shape[0]), y])
    dW = T.grad(loss, dot_result)
    classify = theano.function(inputs=[y, b, dot_result],
                               outputs=[loss, y_pred, dW],
                               mode=mode_without_gpu)
    classify_gpu = theano.function(inputs=[y, b, dot_result],
                                   outputs=[loss, y_pred, dW],
                                   mode=mode_with_gpu)
    # theano.printing.debugprint(classify)
    # theano.printing.debugprint(classify_gpu)

    assert any([isinstance(node.op,
                           T.nnet.CrossentropySoftmaxArgmax1HotWithBias)
                for node in classify.maker.fgraph.toposort()])
    assert any([isinstance(node.op,
                           GpuCrossentropySoftmaxArgmax1HotWithBias)
                for node in classify_gpu.maker.fgraph.toposort()])

    out = classify(yy, b_values, dot_value)
    gout = classify_gpu(yy, b_values, dot_value)

    assert len(out) == len(gout) == 3
    assert numpy.allclose(out[0], gout[0])
    assert numpy.allclose(out[2], gout[2], atol=3e-6), numpy.absolute(
        gout[2] - out[2]).max()
    assert numpy.allclose(out[1], gout[1]), [(id, out[1][id], gout[1][id], val)
                                             for id, val in enumerate(out[1] -
                                                                      gout[1])
                                             if val != 0]
Пример #36
0
def _test_full(cls,
               mode=None,
               version=[-1],
               extra_shapes=[],
               test_bigger_kernels=True):
    seed_rng()
    shapes = get_basic_shapes()
    shapes += get_shapes2()
    # test image stride
    shapes += get_shapes2(scales_img=(2, 2), img_stride=(1, 2))
    shapes += get_shapes2(scales_img=(2, 2), img_stride=(2, 1))
    shapes += get_shapes2(scales_img=(2, 2), img_stride=(2, 2))
    shapes += get_shapes2(scales_img=(2, 2), img_stride=(-1, -1))
    shapes += get_shapes2(scales_img=(2, 2), kern_stride=(-1, -1))

    # test subsample done in a separate fct

    shapes += [
        # other test
        ((2, 1, 2, 2), (1, 1, 2, 2), (1, 1), (1, 1), (1, 1)),
        ((3, 2, 4, 4), (4, 2, 4, 4), (1, 1), (1, 1), (1, 1)),
        ((4, 1, 10, 10), (1, 1, 2, 2), (1, 1), (1, 1), (1, 1)),
        ((1, 1, 4, 4), (1, 1, 2, 3), (1, 1), (1, 1), (1, 1)),
        ((4, 1, 10, 10), (1, 1, 2, 3), (1, 1), (1, 1), (1, 1)),
        ((4, 1, 10, 10), (1, 1, 2, 10), (1, 1), (1, 1), (1, 1)),
        ((4, 1, 20, 10), (1, 1, 2, 10), (1, 1), (1, 1), (1, 1)),
        ((3, 2, 8, 8), (4, 2, 4, 4), (1, 1), (1, 1), (1, 1)
         )  # stack, nkern, bsize
        ,
        ((3, 2, 8, 6), (4, 2, 4, 4), (1, 1), (1, 1), (1, 1)
         )  # stack, nkern, bsize, non-square image
        ,
        ((3, 2, 8, 6), (4, 2, 4, 3), (1, 1), (1, 1), (1, 1)
         )  # stack, nkern, bsize, non-square image, non-square kern
        ,
        (
            (3, 2, 8, 6), (4, 2, 4, 6), (1, 1), (1, 1), (1, 1)
        )  # stack, nkern, bsize ,non-square image, non-square kern, kernsize==imgsize on one dim
        ,
        ((16, 5, 64, 64), (8, 5, 8, 8), (1, 1), (1, 1), (1, 1))  # a big one
        ,
        ((16, 1, 28, 28), (20, 1, 5, 5), (1, 1), (1, 1), (1, 1)
         )  # MNIST LeNET layer 1
        ,
        ((20, 16, 32, 32), (1, 16, 28, 28), (1, 1), (1, 1), (1, 1)
         )  # layer 1 backprop to weights
    ]

    if test_bigger_kernels:
        # Shapes where the kernel is larger than the image in some dimension
        shapes += [((3, 1, 1, 1), (2, 1, 5, 3), (1, 1), (1, 1), (1, 1)),
                   ((3, 2, 1, 1), (4, 2, 1, 1), (1, 1), (1, 1), (1, 1)),
                   ((3, 2, 4, 4), (4, 2, 2, 6), (1, 1), (1, 1), (1, 1)),
                   ((3, 2, 4, 4), (4, 2, 8, 6), (1, 1), (1, 1), (1, 1)),
                   ((4, 2, 10, 10), (3, 2, 2, 12), (1, 1), (1, 1), (1, 1))]

    shapes += [
        #        ((60,1,28,28),(20,1,5,5), (1, 1), (1, 1), (1, 1))#test_lenet_28 1 layers
        #            , ((60,20,12,12),(30,20,5,5), (1, 1), (1, 1), (1, 1))#test_lenet_28 2 layers
        ((60, 30, 8, 8), (20, 30, 5, 5), (1, 1), (1, 1), (1, 1)
         )  # test_lenet_28 bprop 1 full
        #            , ((20,60,12,12),(30,60,8,8), (1, 1), (1, 1), (1, 1))#test_lenet_28 bprop 2 valid
        #            , ((1,60,28,28),(20,60,24,24), (1, 1), (1, 1), (1, 1))#test_lenet_28 bprop 2 valid
        #            , ((10,1,64,64),(20,1,7,7), (1, 1), (1, 1), (1, 1))#test_lenet_64 1 layers
        #            , ((10,20,29,29),(30,20,7,7), (1, 1), (1, 1), (1, 1))#test_lenet_64 2 layers
        ,
        ((10, 30, 23, 23), (20, 30, 7, 7), (1, 1), (1, 1), (1, 1)
         )  # test_lenet_64 full
        #            , ((20,10,29,29),(30,10,23,23), (1, 1), (1, 1), (1, 1))#test_lenet_64 bprop 1
        #            , ((1,10,64,64),(20,10,58,58), (1, 1), (1, 1), (1, 1))#test_lenet_64 bprop 2
        # Test more than maxThreadsDim0
        ,
        ((2, 4, 13, 1050), (3, 4, 10, 11), (1, 1), (1, 1), (1, 1)),
        ((2, 4, 1050, 13), (3, 4, 10, 11), (1, 1), (1, 1), (1, 1)),
        ((1, 1, 44800, 1), (6, 1, 1, 1), (1, 1), (1, 1), (1, 1)
         )  # This caused crash
    ]

    verbose = 0
    random = True

    shapes += extra_shapes

    return exec_conv(version,
                     shapes,
                     verbose,
                     random,
                     'full',
                     theano_mode=mode,
                     cls=cls)
Пример #37
0
 def setup_method(self):
     super(TestLogDet, self).setup_method()
     utt.seed_rng()
     self.op_class = LogDet
     self.op = logdet
Пример #38
0
 def setup_method(self):
     if not cusolver_available:
         self.skipTest(
             "Optional package scikits.cuda.cusolver not available")
     utt.seed_rng()
Пример #39
0
 def setUp(self):
     utt.seed_rng()
     self.rng = np.random.RandomState(seed=utt.fetch_seed())
Пример #40
0
    def setUp(self):
        super(TestConv3D, self).setUp()
        utt.seed_rng()
        self.rng = N.random.RandomState(utt.fetch_seed())

        mode = copy.copy(theano.compile.mode.get_default_mode())
        mode.check_py_code = False

        self.W = shared(N.ndarray(shape=(1, 1, 1, 1, 1), dtype=floatX))
        self.W.name = 'W'
        self.b = shared(N.zeros(1, dtype=floatX))
        self.b.name = 'b'
        self.rb = shared(N.zeros(1, dtype=floatX))
        self.rb.name = 'rb'
        self.V = shared(N.ndarray(shape=(1, 1, 1, 1, 1), dtype=floatX))
        self.V.name = 'V'
        self.d = shared(N.ndarray(shape=(3, ), dtype=int))
        self.d.name = 'd'

        self.H = conv3D(self.V, self.W, self.b, self.d)
        self.H.name = 'H'
        self.H_func = function([], self.H, mode=mode)
        self.H_shape_func = function([], self.H.shape, mode=mode)

        self.RShape = T.vector(dtype='int64')
        self.RShape.name = 'RShape'

        self.otherH = T.TensorType(floatX,
                        (False, False, False, False, False))(name='otherH')
        self.transp = convTransp3D(self.W, self.rb, self.d,
                                   self.otherH, self.RShape)
        self.transp.name = 'transp'
        self.transp_func = function([self.otherH, self.RShape],
                                    self.transp, mode=mode)

        self.R = convTransp3D(self.W, self.rb, self.d, self.H, self.RShape)
        self.R.name = 'R'
        self.R_func = function([self.RShape], self.R, mode=mode)
        self.R_shape_func = function([self.RShape], self.R.shape)

        diff = self.V - self.R
        diff.name = 'diff'
        sqr = T.sqr(diff)
        sqr.name = 'sqr'
        self.reconsObj = T.sum(sqr)
        self.reconsObj.name = 'reconsObj'
        self.reconsObjFunc = function([self.RShape], self.reconsObj, mode=mode)

        W_grad = T.grad(self.reconsObj, self.W)

        self.gradientsFunc = function([self.RShape],
                        [W_grad, T.grad(self.reconsObj,
                        self.H), T.grad(self.reconsObj, self.V),
                         T.grad(self.reconsObj, self.b)], mode=mode)

        self.check_c_against_python = function([self.RShape],
                        [T.grad(self.reconsObj, self.W), T.grad(self.reconsObj,
                        self.H), T.grad(self.reconsObj, self.V),
                         T.grad(self.reconsObj, self.b)], mode='DEBUG_MODE')

        self.dCdW_shape_func = function([self.RShape],
                        T.grad(self.reconsObj, self.W).shape, mode=mode)
Пример #41
0
                         move_shared_float32_to_gpu=False,
                         enable_cuda=False)
    theano.sandbox.gpuarray.init_dev('cuda')

if not theano.sandbox.gpuarray.pygpu_activated:
    raise SkipTest("pygpu disabled")

from ..type import (GpuArrayType, gpuarray_shared_constructor)
from ..basic_ops import (host_from_gpu, gpu_from_host, gpu_alloc, GpuAlloc,
                         gpu_from_cuda, cuda_from_gpu, HostFromGpu,
                         GpuFromHost, GpuReshape, gpu_join, GpuJoin, GpuSplit,
                         GpuEye, gpu_contiguous)
from ..subtensor import GpuSubtensor

from theano.tests import unittest_tools as utt
utt.seed_rng()
rng = numpy.random.RandomState(seed=utt.fetch_seed())

from pygpu import gpuarray

if theano.config.mode == 'FAST_COMPILE':
    mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including(
        'gpuarray').excluding('gpu')
    mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding(
        'gpuarray')
else:
    mode_with_gpu = theano.compile.mode.get_default_mode().including(
        'gpuarray').excluding('gpu')
    mode_without_gpu = theano.compile.mode.get_default_mode().excluding(
        'gpuarray')
 def setUp(self):
     utt.seed_rng()
Пример #43
0
 def setUp(self):
     unittest_tools.seed_rng()
Пример #44
0
def test_run_nnet_small():
    utt.seed_rng()
    rval_cpu = run_nnet(False, 10, 10, 4, 4, n_train=100000)
Пример #45
0
def test_GpuCrossentropySoftmax1HotWithBiasDx():
    """
    This is basic test for GpuCrossentropySoftmax1HotWithBiasDx

    We check that we loop when their is too much threads

    """
    n_in = 1000
    batch_size = 4097
    n_out = 1250

    if not isinstance(mode_with_gpu, theano.compile.DebugMode):
        n_in = 4098
        n_out = 4099

    # Seed numpy.random with config.unittests.rseed
    utt.seed_rng()

    softmax_output_value = numpy.random.rand(batch_size,
                                             n_out).astype('float32')
    dnll_value = numpy.asarray(numpy.random.rand(batch_size), dtype='float32')
    y_idx_value = numpy.random.randint(low=0, high=5, size=batch_size)

    softmax_output = T.fmatrix()
    softmax_output /= softmax_output.sum(axis=1).reshape(
        softmax_output.shape[1], 1)
    op = theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(
        dnll_value,
        softmax_output,
        y_idx_value)

    cpu_f = theano.function([softmax_output], op, mode=mode_without_gpu)
    gpu_f = theano.function([softmax_output], op, mode=mode_with_gpu)
    # theano.printing.debugprint(cpu_f)
    # theano.printing.debugprint(gpu_f)

    assert any([isinstance(node.op, T.nnet.CrossentropySoftmax1HotWithBiasDx)
                for node in cpu_f.maker.fgraph.toposort()])
    assert any([isinstance(node.op,
                           GpuCrossentropySoftmax1HotWithBiasDx)
                for node in gpu_f.maker.fgraph.toposort()])

    cpu_out = cpu_f(softmax_output_value)
    gpu_out = gpu_f(softmax_output_value)

    rtol = 1e-5
    atol = 1e-6
    if not numpy.allclose(cpu_out, gpu_out, rtol=rtol, atol=atol):
        abs_err, rel_err = T.numeric_grad.abs_rel_err(cpu_out, gpu_out)
        scaled_err = numpy.minimum(abs_err / atol, rel_err / rtol)
        max_i = scaled_err.argmax()

        print('max err index:', max_i, max_i / batch_size, end=' ')
        print(max_i % batch_size, max_i / n_out, max_i & n_out)
        print('At that index:')
        print('err:', scaled_err.flatten()[max_i])
        print('absolute error:', abs_err.flatten()[max_i])
        print('relative error:', rel_err.flatten()[max_i])
        print('cpu_out:', cpu_out.flatten()[max_i])
        print('gpu_out:', gpu_out.flatten()[max_i])
        print('softmax_output_value:', softmax_output_value.flatten()[max_i])
        print('dnll_value:', dnll_value[max_i / n_out])
        print('y_idx_value:', y_idx_value[max_i / n_out])

        assert False, "numpy.allclose(cpu_out, gpu_out, rtol=%s, atol=%s)" % (
            rtol, atol)
Пример #46
0
    def test_dimshuffle(self):
        utt.seed_rng()
        rng = numpy.random.RandomState(utt.fetch_seed())

        # 2d -> 0d
        a = theano._asarray(rng.randn(1, 1), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(numpy.transpose(a),
                              cuda_ndarray.dimshuffle(b, ()))

        # Test when we drop a axis that don't have shape 1
        a = theano._asarray(rng.randn(2, 1), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        self.assertRaises(ValueError, cuda_ndarray.dimshuffle, b, ())

        # Test that we can't take a dimensions multiple time
        a = theano._asarray(rng.randn(2, 1), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        self.assertRaises(ValueError, cuda_ndarray.dimshuffle, b, (1, 1))

        # 1d
        a = theano._asarray(rng.randn(3, ), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(numpy.transpose(a),
                              cuda_ndarray.dimshuffle(b, (0, )))
        assert numpy.allclose(a[None, :, None],
                              cuda_ndarray.dimshuffle(b, (-1, 0, -1)))

        # 2d
        a = theano._asarray(rng.randn(3, 11), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(numpy.transpose(a),
                              cuda_ndarray.dimshuffle(b, (1, 0)))
        assert numpy.allclose(
            numpy.transpose(a)[None, :, None, :, None],
            cuda_ndarray.dimshuffle(b, (-1, 1, -1, 0, -1)))

        # 2d -> 1d
        a = theano._asarray(rng.randn(1, 11), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(a[:, ], cuda_ndarray.dimshuffle(b, (1, )))
        a = theano._asarray(rng.randn(11, 1), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(a.reshape((11, )),
                              cuda_ndarray.dimshuffle(b, (0, )))

        # 3d
        a = theano._asarray(rng.randn(3, 4, 5), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(a, cuda_ndarray.dimshuffle(b, (0, 1, 2)))
        assert numpy.allclose(numpy.swapaxes(a, 0, 1),
                              cuda_ndarray.dimshuffle(b, (1, 0, 2)))
        assert numpy.allclose(numpy.swapaxes(a, 0, 2),
                              cuda_ndarray.dimshuffle(b, (2, 1, 0)))
        assert numpy.allclose(numpy.swapaxes(a, 1, 2),
                              cuda_ndarray.dimshuffle(b, (0, 2, 1)))
        assert numpy.allclose(
            numpy.swapaxes(a, 1, 2)[None, :, None, :, :, None],
            cuda_ndarray.dimshuffle(b, (-1, 0, -1, 2, 1, -1)))

        # 4d
        a = theano._asarray(rng.randn(3, 11, 4, 5), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(numpy.swapaxes(a, 0, 1),
                              cuda_ndarray.dimshuffle(b, (1, 0, 2, 3)))
        assert numpy.allclose(numpy.swapaxes(a, 0, 2),
                              cuda_ndarray.dimshuffle(b, (2, 1, 0, 3)))
        assert numpy.allclose(numpy.swapaxes(a, 0, 3),
                              cuda_ndarray.dimshuffle(b, (3, 1, 2, 0)))
        assert numpy.allclose(numpy.swapaxes(a, 0, 3),
                              cuda_ndarray.dimshuffle(b, (3, 1, 2, 0)))
        assert numpy.allclose(
            numpy.swapaxes(a, 0, 3)[None, :, None, :, :, :],
            cuda_ndarray.dimshuffle(b, (-1, 3, -1, 1, 2, 0)))
Пример #47
0
def test_run_nnet_med():
    utt.seed_rng()
    rval_cpu = run_nnet(False, 10, 128, 50, 4, n_train=10000)