def dnn_available(): if dnn_available.avail is None: dev = active_device_number() if device_properties(dev)['major'] < 3: dnn_available.msg = "Device not supported by cuDNN" dnn_available.avail = False else: dnn_available.msg = "Can not find the cuDNN library" dnn_available.avail = theano.gof.cmodule.GCC_compiler.try_flags( ["-l", "cudnn"]) return dnn_available.avail
def setUp(self): super(TestGpuCumsum, self).setUp() # Fetch some useful properties on the device cuda = theano.sandbox.cuda device_id = cuda.use.device_number if device_id is None: cuda.use("gpu", force=False, default_to_move_computation_to_gpu=False, move_shared_float32_to_gpu=False, enable_cuda=False, test_driver=True) device_id = cuda.use.device_number cuda_ndarray = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray prop = cuda_ndarray.device_properties(device_id) self.max_threads_dim0 = prop['maxThreadsDim0'] self.max_grid_size1 = prop['maxGridSize1']
def test_conv_grads(): if cuda.device_properties(cuda.active_device_number())['major'] < 3: ops = [gemm_op] else: ops = [gemm_op, dnn_op] for mode in 'valid', 'full': for bs in [1, 5]: for ch in [4]: for nf in [3]: for rImg1 in [2, 5]: for rImg2 in [2, 8]: for rFlt1 in [1, 2]: for rFlt2 in [1, 2]: for subsample in (1, 1), (1, 2), (2, 2): for op in ops: yield (conv_grad, mode, bs, ch, nf, rImg1, rImg2, rFlt1, rFlt2, subsample, op)
def dnn_available(): if dnn_available.avail is None: dev = active_device_number() if device_properties(dev)['major'] < 3: dnn_available.msg = "Device not supported by cuDNN" dnn_available.avail = False else: preambule = """ #include <stdio.h> #include <cuda.h> #include <cudnn.h> #include <cudnn_helper.h> """ body = """ cudnnHandle_t _handle = NULL; cudnnStatus_t err; if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) { fprintf(stderr, "could not create cuDNN handle: %s", cudnnGetErrorString(err)); return 1; } """ comp, run, out, err = gof.cmodule.GCC_compiler.try_flags( [ "-l", "cudnn", "-I" + os.path.dirname(__file__), "-I" + os.path.join(theano.config.cuda.root, 'include'), "-L" + os.path.join(theano.config.cuda.root, 'lib64') ], preambule=preambule, body=body, try_run=True, output=True) dnn_available.avail = comp and run if dnn_available.avail: dnn_available.msg = "cuDNN should work" else: dnn_available.msg = ( "Theano is not able to use cuDNN. We got this error: \n" + err) return dnn_available.avail
def dnn_available(): if dnn_available.avail is None: dev = active_device_number() if device_properties(dev)['major'] < 3: dnn_available.msg = "Device not supported by cuDNN" dnn_available.avail = False else: preambule = """ #include <stdio.h> #include <cuda.h> #include <cudnn.h> #include <cudnn_helper.h> """ body = """ cudnnHandle_t _handle = NULL; cudnnStatus_t err; if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) { fprintf(stderr, "could not create cuDNN handle: %s", cudnnGetErrorString(err)); return 1; } """ comp, run, out, err = gof.cmodule.GCC_compiler.try_flags( ["-l", "cudnn", "-I" + os.path.dirname(__file__), "-I" + os.path.join(theano.config.cuda.root, 'include'), "-L" + os.path.join(theano.config.cuda.root, 'lib64')], preambule=preambule, body=body, try_run=True, output=True) dnn_available.avail = comp and run if dnn_available.avail: dnn_available.msg = "cuDNN should work" else: dnn_available.msg = ( "Theano is not able to use cuDNN. We got this error: \n" + str(err)) return dnn_available.avail
def test_dnn_subsample(): if cuda.device_properties(cuda.active_device_number())['major'] < 3: raise SkipTest('Current GPU too old') for t in _test_subsample(GpuDnnConv, theano_mode.including('cudnn')): yield t
def test_dnn_full(): if cuda.device_properties(cuda.active_device_number())['major'] < 3: raise SkipTest('Current GPU too old') for t in _test_full(GpuDnnConv, mode=theano_mode.including("cudnn")): yield t
cuda_tensor4 = cuda_ndarray.CudaNdarrayType([False] * 4) device_id = theano.sandbox.cuda.use.device_number if device_id is None: cuda_ndarray.shared_constructor(numpy.zeros(2, dtype='float32')) device_id = theano.sandbox.cuda.use.device_number if device_id is None: cuda.use("gpu", force=False, default_to_move_computation_to_gpu=False, move_shared_float32_to_gpu=False, enable_cuda=False, test_driver=True) device_id = theano.sandbox.cuda.use.device_number cuda_ndarray = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray device_prop = cuda_ndarray.device_properties(device_id) def py_conv_valid_numpy(img, kern): assert img.shape[1] == kern.shape[1] outshp = (img.shape[0], kern.shape[0], img.shape[2] - kern.shape[2] + 1, img.shape[3] - kern.shape[3] + 1) out = numpy.zeros(outshp, dtype='float32') for b in xrange(out.shape[0]): for k in xrange(out.shape[1]): for rr in xrange(out.shape[2]): for cc in xrange(out.shape[3]): #rr, cc is the upper-left corner of img patches imgpatch = img[b, :, rr:rr + kern.shape[2], cc:cc + kern.shape[3]]