示例#1
0
def dnn_available():
    if dnn_available.avail is None:
        dev = active_device_number()
        if device_properties(dev)['major'] < 3:
            dnn_available.msg = "Device not supported by cuDNN"
            dnn_available.avail = False
        else:
            dnn_available.msg = "Can not find the cuDNN library"
            dnn_available.avail = theano.gof.cmodule.GCC_compiler.try_flags(
                ["-l", "cudnn"])
    return dnn_available.avail
示例#2
0
    def setUp(self):
        super(TestGpuCumsum, self).setUp()

        # Fetch some useful properties on the device
        cuda = theano.sandbox.cuda
        device_id = cuda.use.device_number
        if device_id is None:
            cuda.use("gpu",
                     force=False,
                     default_to_move_computation_to_gpu=False,
                     move_shared_float32_to_gpu=False,
                     enable_cuda=False,
                     test_driver=True)
            device_id = cuda.use.device_number
        cuda_ndarray = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray
        prop = cuda_ndarray.device_properties(device_id)
        self.max_threads_dim0 = prop['maxThreadsDim0']
        self.max_grid_size1 = prop['maxGridSize1']
def test_conv_grads():
    if cuda.device_properties(cuda.active_device_number())['major'] < 3:
        ops = [gemm_op]
    else:
        ops = [gemm_op, dnn_op]
    for mode in 'valid', 'full':
        for bs in [1, 5]:
            for ch in [4]:
                for nf in [3]:
                    for rImg1 in [2, 5]:
                        for rImg2 in [2, 8]:
                            for rFlt1 in [1, 2]:
                                for rFlt2 in [1, 2]:
                                    for subsample in (1, 1), (1, 2), (2, 2):
                                        for op in ops:
                                            yield (conv_grad, mode, bs, ch, nf,
                                                   rImg1, rImg2, rFlt1, rFlt2,
                                                   subsample, op)
def test_conv_grads():
    if cuda.device_properties(cuda.active_device_number())['major'] < 3:
        ops = [gemm_op]
    else:
        ops = [gemm_op, dnn_op]
    for mode in 'valid', 'full':
        for bs in [1, 5]:
            for ch in [4]:
                for nf in [3]:
                    for rImg1 in [2, 5]:
                        for rImg2 in [2, 8]:
                            for rFlt1 in [1, 2]:
                                for rFlt2 in [1, 2]:
                                    for subsample in (1, 1), (1, 2), (2, 2):
                                        for op in ops:
                                            yield (conv_grad, mode, bs, ch, nf,
                                                   rImg1, rImg2, rFlt1, rFlt2,
                                                   subsample, op)
示例#5
0
    def setUp(self):
        super(TestGpuCumsum, self).setUp()

        # Fetch some useful properties on the device
        cuda = theano.sandbox.cuda
        device_id = cuda.use.device_number
        if device_id is None:
            cuda.use("gpu",
                     force=False,
                     default_to_move_computation_to_gpu=False,
                     move_shared_float32_to_gpu=False,
                     enable_cuda=False,
                     test_driver=True)
            device_id = cuda.use.device_number
        cuda_ndarray = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray
        prop = cuda_ndarray.device_properties(device_id)
        self.max_threads_dim0 = prop['maxThreadsDim0']
        self.max_grid_size1 = prop['maxGridSize1']
示例#6
0
def dnn_available():
    if dnn_available.avail is None:
        dev = active_device_number()
        if device_properties(dev)['major'] < 3:
            dnn_available.msg = "Device not supported by cuDNN"
            dnn_available.avail = False
        else:
            preambule = """
#include <stdio.h>
#include <cuda.h>
#include <cudnn.h>
#include <cudnn_helper.h>
            """

            body = """
cudnnHandle_t _handle = NULL;
cudnnStatus_t err;
if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
  fprintf(stderr, "could not create cuDNN handle: %s",
          cudnnGetErrorString(err));
  return 1;
}
"""

            comp, run, out, err = gof.cmodule.GCC_compiler.try_flags(
                [
                    "-l", "cudnn", "-I" + os.path.dirname(__file__),
                    "-I" + os.path.join(theano.config.cuda.root, 'include'),
                    "-L" + os.path.join(theano.config.cuda.root, 'lib64')
                ],
                preambule=preambule,
                body=body,
                try_run=True,
                output=True)

            dnn_available.avail = comp and run
            if dnn_available.avail:
                dnn_available.msg = "cuDNN should work"
            else:
                dnn_available.msg = (
                    "Theano is not able to use cuDNN. We got this error: \n" +
                    err)
    return dnn_available.avail
示例#7
0
文件: dnn.py 项目: xiaobeileo/Theano
def dnn_available():
    if dnn_available.avail is None:
        dev = active_device_number()
        if device_properties(dev)['major'] < 3:
            dnn_available.msg = "Device not supported by cuDNN"
            dnn_available.avail = False
        else:
            preambule = """
#include <stdio.h>
#include <cuda.h>
#include <cudnn.h>
#include <cudnn_helper.h>
            """

            body = """
cudnnHandle_t _handle = NULL;
cudnnStatus_t err;
if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
  fprintf(stderr, "could not create cuDNN handle: %s",
          cudnnGetErrorString(err));
  return 1;
}
"""

            comp, run, out, err = gof.cmodule.GCC_compiler.try_flags(
                ["-l", "cudnn", "-I" + os.path.dirname(__file__),
                 "-I" + os.path.join(theano.config.cuda.root, 'include'),
                 "-L" + os.path.join(theano.config.cuda.root, 'lib64')],
                preambule=preambule, body=body,
                try_run=True, output=True)

            dnn_available.avail = comp and run
            if dnn_available.avail:
                dnn_available.msg = "cuDNN should work"
            else:
                dnn_available.msg = (
                    "Theano is not able to use cuDNN. We got this error: \n" +
                    str(err))
    return dnn_available.avail
示例#8
0
def test_dnn_subsample():
    if cuda.device_properties(cuda.active_device_number())['major'] < 3:
        raise SkipTest('Current GPU too old')
    for t in _test_subsample(GpuDnnConv, theano_mode.including('cudnn')):
        yield t
示例#9
0
def test_dnn_full():
    if cuda.device_properties(cuda.active_device_number())['major'] < 3:
        raise SkipTest('Current GPU too old')
    for t in _test_full(GpuDnnConv, mode=theano_mode.including("cudnn")):
        yield t
示例#10
0
cuda_tensor4 = cuda_ndarray.CudaNdarrayType([False] * 4)

device_id = theano.sandbox.cuda.use.device_number
if device_id is None:
    cuda_ndarray.shared_constructor(numpy.zeros(2, dtype='float32'))
device_id = theano.sandbox.cuda.use.device_number
if device_id is None:
    cuda.use("gpu",
             force=False,
             default_to_move_computation_to_gpu=False,
             move_shared_float32_to_gpu=False,
             enable_cuda=False,
             test_driver=True)
    device_id = theano.sandbox.cuda.use.device_number
cuda_ndarray = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray
device_prop = cuda_ndarray.device_properties(device_id)


def py_conv_valid_numpy(img, kern):
    assert img.shape[1] == kern.shape[1]
    outshp = (img.shape[0], kern.shape[0],
            img.shape[2] - kern.shape[2] + 1,
            img.shape[3] - kern.shape[3] + 1)
    out = numpy.zeros(outshp, dtype='float32')
    for b in xrange(out.shape[0]):
        for k in xrange(out.shape[1]):
            for rr in xrange(out.shape[2]):
                for cc in xrange(out.shape[3]):
                    #rr, cc is the upper-left corner of img patches
                    imgpatch = img[b, :, rr:rr + kern.shape[2],
                                   cc:cc + kern.shape[3]]