def cudnn_available(): """ return True if running on GPU with cuDNN available """ if config['device'] == 'gpu': # theano backend if config['backend'] == 'theano': try: if package_installed(name='pygpu'): from theano.gpuarray import dnn from theano.gpuarray.type import list_contexts return dnn.dnn_available(list_contexts()[0]) else: from theano.sandbox.cuda import dnn return dnn.dnn_available() except ImportError: return False # tensorflow backend else: import commands if platform.system() == "Darwin": x = commands.getstatusoutput('ls /usr/local/cuda/lib') x = x[-1].split('\n') elif platform.version() == "Windows": raise Exception('No support for Windows') else: x = commands.getstatusoutput('ldconfig -p') x = x[-1].split('=>') if builtins.any('libcudnn' in i for i in x): return True else: return False return False
def local_cudnn_maxandargmax(node): if not isinstance(node.op, GpuMaxAndArgmax): return if not dnn_available(node.inputs[0].type.context_name): return if version(raises=False) < 6000: return if node.inputs[0].ndim > 8: return if node.inputs[0].dtype != node.outputs[0].dtype: return if node.inputs[0].dtype not in ["float16", "float32", "float64"]: return # order of the axes influences the output indices if node.op.axis is not None and tuple(sorted( node.op.axis)) != node.op.axis: return max, arg = GpuDnnReduction("maximum", node.op.axis, node.outputs[0].dtype, node.outputs[0].dtype, True)(node.inputs[0]) # cudnn can only return int32 indices return ( max, as_gpuarray_variable(arg.astype("int64"), node.outputs[1].type.context_name), )
def local_gpua_avg_pool_dnn_grad_stride(op, ctx_name, inputs, outputs): if not dnn_available(ctx_name): return if not op.ignore_border: return inp, out_grad, ws, stride, pad = inputs nd = op.ndim if nd not in (2, 3): return inp = gpu_contiguous(as_gpuarray_variable(inp, ctx_name)) out_grad = gpu_contiguous(as_gpuarray_variable(out_grad, ctx_name)) mode = op.mode # the GPU ops expect exactly 2 non-pooling dimensions if inp.ndim == nd + 2: # We reuse out_grad because cuDNN does not use the value of the `out` # argument but still checks its shape for average pooling. This # has been observed in v2 and v3 as far as I know. return GpuDnnPoolGrad(mode=mode)(inp, out_grad, out_grad, ws, stride, pad) else: # reshape to 4D or 5D with 2 non-pooling dimensions inp_padded = pad_dims(inp, 2, nd) out_grad_padded = pad_dims(out_grad, 2, nd) ret_padded = GpuDnnPoolGrad(mode=mode)(inp_padded, out_grad_padded, out_grad_padded, ws, stride, pad) return unpad_dims(ret_padded, inp, 2, nd)
def local_gpua_pool_dnn_grad_stride(op, ctx_name, inputs, outputs): if not dnn_available(ctx_name): return if not op.ignore_border: return inp, out, out_grad, ws, stride, pad = inputs nd = op.ndim if nd not in (2, 3): return inp = gpu_contiguous(as_gpuarray_variable(inp, ctx_name)) out = gpu_contiguous(as_gpuarray_variable(out, ctx_name)) out_grad = gpu_contiguous(as_gpuarray_variable(out_grad, ctx_name)) mode = op.mode # the GPU ops expect exactly 2 non-pooling dimensions if inp.ndim == nd + 2: return GpuDnnPoolGrad(mode=mode)(inp, out, out_grad, ws, stride, pad) else: # reshape to 4D or 5D with 2 non-pooling dimensions inp_padded = pad_dims(inp, 2, nd) out_padded = pad_dims(out, 2, nd) out_grad_padded = pad_dims(out_grad, 2, nd) ret_padded = GpuDnnPoolGrad(mode=mode)(inp_padded, out_padded, out_grad_padded, ws, stride, pad) return unpad_dims(ret_padded, inp, 2, nd)
def local_softmax_dnn(node): if isinstance(node.op, GpuSoftmax): if not dnn_available(node.outputs[0].type.context_name): return ins = node.inputs[0].dimshuffle(0, 1, "x", "x") ins = gpu_contiguous(ins) out = GpuDnnSoftmax("accurate", "channel")(ins) out = as_gpuarray_variable(out.dimshuffle(0, 1), out.type.context_name) return [out]
def local_gpua_softmax_dnn_grad(op, ctx_name, inputs, outputs): if not dnn_available(ctx_name): return ins = [] for n in inputs: n = as_gpuarray_variable(n, ctx_name) if n.ndim != 2: return ins.append(n.dimshuffle(0, "x", 1, "x")) out = GpuDnnSoftmaxGrad("accurate", "instance")(gpu_contiguous(ins[0]), gpu_contiguous(ins[1])) return [out.dimshuffle(0, 2)]
def apply(self, fgraph): """ Raise a error if cudnn can't be used. """ for c in list_contexts(): if not dnn_available(c): # Make an assert error as we want Theano to fail, not # just skip this optimization. raise AssertionError( "cuDNN optimization was enabled, but Theano was not able " "to use it for context " + str(c) + ". We got this error: \n" + dnn_available.msg)
def local_gpua_logsoftmax_to_dnn(op, ctx_name, inputs, outputs): # Transform the input in the format expected by GpuDnnSoftmax inp = inputs[0] if inp.ndim != 2: return if not dnn_available(ctx_name): return inp = inp.dimshuffle(0, 1, "x", "x") inp.tag.context_name = ctx_name # Apply GpuDnnSoftmax and return the result out = GpuDnnSoftmax("log", "channel")(gpu_contiguous(inp)) return [out.dimshuffle(0, 1)]
def local_mypool_dnn_alternative(node): if not dnn_available(): return if isinstance(node.op, MyPool): if not node.op.ignore_border: return img, = node.inputs ds = node.op.ds stride = node.op.st pad = node.op.padding mode = node.op.mode if (img.owner and isinstance(img.owner.op, HostFromGpu)): ret = dnn_pool(gpu_contiguous(img.owner.inputs[0]), ds, stride=stride, pad=pad, mode=mode) return [host_from_gpu(ret)]
def run_test_case_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1), expect_error=False): if not dnn_available(test_ctx_name): pytest.skip(dnn_available.msg) if fd != (1, 1): pytest.skip("Doesn't have CUDNN implementation") mode = mode_with_gpu if not expect_error: self.run_gradinput( inputs_shape=i, filters_shape=f, output_shape=o, subsample=s, verify_grad=True, mode=mode, provide_shape=provide_shape, border_mode=b, filter_flip=flip, target_op=GpuDnnConvGradI, filter_dilation=fd, ) else: with pytest.raises((RuntimeError, ValueError)): self.run_gradinput( inputs_shape=i, filters_shape=f, output_shape=o, subsample=s, verify_grad=False, mode=mode, provide_shape=provide_shape, border_mode=b, filter_flip=flip, target_op=GpuDnnConvGradI, ref=None, filter_dilation=fd, )
def test_import_without_gpu_or_cudnn_raises(self): if theano_backend == 'pygpu': from theano.gpuarray import dnn if dnn.dnn_present(): pytest.skip() elif theano_backend == 'pygpu_sandbox': from theano.sandbox.gpuarray import dnn if dnn.dnn_present(): pytest.skip() elif theano_backend == 'cuda_sandbox': from theano.sandbox.cuda import dnn if dnn.dnn_available(): pytest.skip() else: with pytest.raises(ImportError): import lasagne.layers.dnn
def run_test_case(self, i, f, s, b, flip, provide_shape, fd=(1, 1)): if not dnn_available(test_ctx_name): pytest.skip(dnn_available.msg) mode = mode_with_gpu if fd != (1, 1): pytest.skip("Doesn't have CUDNN implementation") o = self.get_output_shape(i, f, s, b, fd) self.run_fwd( inputs_shape=i, filters_shape=f, subsample=s, verify_grad=True, mode=mode, provide_shape=provide_shape, border_mode=b, filter_flip=flip, target_op=GpuDnnConv, ) self.run_gradweight( inputs_shape=i, filters_shape=f, output_shape=o, subsample=s, verify_grad=True, mode=mode, provide_shape=provide_shape, border_mode=b, filter_flip=flip, target_op=GpuDnnConvGradW, ) self.run_gradinput( inputs_shape=i, filters_shape=f, output_shape=o, subsample=s, verify_grad=True, mode=mode, provide_shape=provide_shape, border_mode=b, filter_flip=flip, target_op=GpuDnnConvGradI, )
def setUp(self): """ Set up a test image and filter to re-use. """ skip_if_no_gpu() if not dnn_available(): raise SkipTest('Skipping tests cause cudnn is not available') self.orig_floatX = theano.config.floatX theano.config.floatX = 'float32' self.image = np.random.rand(1, 1, 3, 3).astype(theano.config.floatX) self.image_tensor = tensor.tensor4() self.input_space = Conv2DSpace((3, 3), 1, axes=('b', 'c', 0, 1)) self.filters_values = np.ones((1, 1, 2, 2), dtype=theano.config.floatX) self.filters = sharedX(self.filters_values, name='filters') self.batch_size = 1 self.cudnn2d = Cudnn2D(self.filters, self.batch_size, self.input_space)
def local_gpua_pool_dnn_alternative(fgraph, op, ctx_name, inputs, outputs): if not dnn_available(ctx_name): return if not op.ignore_border: return img, ws, stride, pad = inputs nd = op.ndim if nd not in (2, 3): return img = gpu_contiguous(as_gpuarray_variable(img, ctx_name)) mode = op.mode # dnn_pool expects exactly 2 non-pooling dimensions if img.ndim == nd + 2: return dnn_pool(img, ws, stride=stride, pad=pad, mode=mode) else: # reshape to 4D or 5D with 2 non-pooling dimensions img_padded = pad_dims(img, 2, nd) ret_padded = dnn_pool(img_padded, ws, stride=stride, pad=pad, mode=mode) return unpad_dims(ret_padded, img, 2, nd)
def _params_to_cudnn(self): from theano.gpuarray import dnn from theano.gpuarray.type import gpuarray_shared_constructor assert dnn.dnn_available(None) self._rnn_block = dnn.RNNBlock(theano.config.floatX, self.hidden_dim, num_layers=1, input_mode="linear", rnn_mode=self.rnn_type, direction_mode="unidirectional") param_size = self._rnn_block.get_param_size( [self.n_batch, self.input_dim]) # TODO: study about n_batch self.params = [gpuarray_shared_constructor(Constant(0.0)(param_size))] cs = self._rnn_block.split_params(self.params[0], layer=0, input_size=[ self.n_batch, self.input_dim ]) # TODO: multi layer support for c, p in zip(cs, self.non_cudnn_params): c[:] = p.get_value(borrow=True, return_internal_type=True)
def local_dnn_argmax(op, ctx_name, inputs, outputs): if not dnn_available(ctx_name): return if version(raises=False) < 6000: return if inputs[0].ndim > 8: return if inputs[0].dtype not in ["float16", "float32", "float64"]: return # order of the axes influences the output indices if op.axis is not None and tuple(sorted(op.axis)) != op.axis: return max, arg = GpuDnnReduction("maximum", op.axis, inputs[0].dtype, inputs[0].dtype, True)(*inputs) return [as_gpuarray_variable(arg.astype("int64"), ctx_name)]
def local_mypool_dnn_grad_stride(node): if not dnn_available(): return if isinstance(node.op, MyMaxPoolGrad): if not node.op.ignore_border: return inp, out, inp_grad = node.inputs ds = node.op.ds st = node.op.st pad = node.op.padding mode = node.op.mode if ((inp.owner and isinstance(inp.owner.op, HostFromGpu)) or (out.owner and isinstance(out.owner.op, HostFromGpu)) or (inp_grad.owner and isinstance(inp_grad.owner.op, HostFromGpu))): ret = GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp), gpu_contiguous(out), gpu_contiguous(inp_grad), ds, st, pad) return [host_from_gpu(ret)]
def init_dev(dev, name=None, preallocate=None): global pygpu_activated global theano_gpu_is_already_active if ( not theano_gpu_is_already_active and os.environ.get("THEANO_GPU_IS_ALREADY_ACTIVE", "") == "Yes" ): raise RuntimeError( "You can't initialize the GPU in a subprocess if the parent process already did it" ) if not config.cxx: raise RuntimeError("The new gpu-backend need a c++ compiler.") pygpu_version = pygpu_parse_version(pygpu.__version__) if pygpu_version.major != 0 or pygpu_version.minor != 7 or pygpu_version.patch < 0: raise ValueError( "Your installed version of pygpu(%s) is too old, please upgrade to 0.7.0 or later (but below 0.8.0)" % pygpu_version.fullversion ) # This is for the C headers API, we need to match the exact version. gpuarray_version_major_supported = 2 gpuarray_version_major_detected = pygpu.gpuarray.api_version()[0] if gpuarray_version_major_detected != gpuarray_version_major_supported: raise ValueError( "Your installed version of libgpuarray is not in sync with the current Theano" f" version. The installed libgpuarray version supports API version {int(gpuarray_version_major_detected)}," f" while current Theano supports API version {int(gpuarray_version_major_supported)}. Change the version of" " libgpuarray or Theano to fix this problem.", ) if dev not in init_dev.devmap: args = dict() if config.gpuarray__cache_path != "": args["kernel_cache_path"] = config.gpuarray__cache_path if preallocate is None: preallocate = config.gpuarray__preallocate if preallocate < 0: args["max_cache_size"] = 0 else: args["initial_cache_size"] = preallocate context = pygpu.init( dev, sched=config.gpuarray__sched, single_stream=config.gpuarray__single_stream, **args, ) os.environ["THEANO_GPU_IS_ALREADY_ACTIVE"] = "Yes" theano_gpu_is_already_active = True context.dev = dev init_dev.devmap[dev] = context reg_context(name, context) MB = 1024 * 1024 if dev.startswith("cuda"): avail = dnn.dnn_available(name) # If we try to enable cudnn and there isn't enough GPU # memory, there will be an unclear error message. So do # not even try a clear error. if avail and context.free_gmem < 75 * MB: raise RuntimeError( f"Can not enable cuDNN as there is only {int(context.free_gmem / MB)} MB of free GPU memory." ) elif avail: context.cudnn_handle = dnn._make_handle(context) elif config.dnn__enabled == "True": raise RuntimeError( "You enabled cuDNN, but we aren't able to use it: %s" % dnn.dnn_available.msg ) if config.print_active_device: if avail: print( f"Using cuDNN version {int(dnn.version())} on context {name}", file=sys.stderr, ) else: print( f"Can not use cuDNN on context {name}: {dnn.dnn_available.msg}", file=sys.stderr, ) if preallocate < 0: print(f"Disabling allocation cache on {dev}") elif preallocate > 0: if preallocate <= 1: gmem = min(preallocate, 0.95) * context.total_gmem else: gmem = preallocate * MB if gmem > context.free_gmem: raise RuntimeError( f"Trying to preallocate {int(gmem / MB)} MB of GPU memory while only" f" {int(context.free_gmem / MB)} MB are available." ) elif gmem > context.free_gmem - 50 * MB: warnings.warn( "Preallocating too much memory can prevent cudnn and cublas from working properly" ) # This will allocate and immediately free an object of size gmem # which will reserve that amount of memory on the GPU. pygpu.empty((gmem,), dtype="int8", context=context) if config.print_active_device: print( f"Preallocating {int(gmem // MB)}/{int(context.total_gmem // MB)} Mb ({gmem / context.total_gmem}) on {dev}", file=sys.stderr, ) # Initialise the blas kernels. We do this after the # preallocation to not fragment the heap accidentally. tmp = pygpu.empty((2, 2), dtype="float32", context=context) if dev.startswith("cuda"): # In OpenCL, BLAS isn't always available pygpu.blas.gemm(0, tmp, tmp, 0, tmp, overwrite_c=True) del tmp else: context = init_dev.devmap[dev] # This will map the context name to the real context object. if config.print_active_device: try: unique_id = "(" + context.unique_id + ")" except pygpu.gpuarray.UnsupportedException: unique_id = "" print( f"Mapped name {name} to device {dev}: {context.devname} {unique_id}", file=sys.stderr, ) pygpu_activated = True
def local_abstractconv_cudnn_alt(node): if not isinstance(node.op, (AbstractConv2d, AbstractConv2d_gradWeights, AbstractConv2d_gradInputs)): return if version(raises=False) < 6000 and node.op.filter_dilation != (1, 1): return None if node.op.unshared: return None if isinstance(node.op.border_mode, tuple) and any( isinstance(p, tuple) for p in node.op.border_mode): # Asymmetric padding not yet supported return None inp1 = node.inputs[0] inp2 = node.inputs[1] if not dnn_available(inp1.type.context_name): return op = node.op border_mode = node.op.border_mode subsample = node.op.subsample filter_dilation = node.op.filter_dilation num_groups = node.op.num_groups precision, _ = get_precision(None, [inp1, inp2]) if node.op.filter_flip: conv_mode = "conv" else: conv_mode = "cross" if isinstance(op, AbstractConv2d): if border_mode == "half" or subsample != (1, 1) or num_groups != 1: return None if border_mode == "full": direction_hint = "bprop inputs" elif border_mode == "valid" and filter_dilation == (1, 1): direction_hint = "bprop weights" else: return None rval = dnn_conv( inp1, inp2, border_mode=border_mode, subsample=subsample, dilation=filter_dilation, direction_hint=direction_hint, conv_mode=conv_mode, num_groups=num_groups, ) elif isinstance(op, AbstractConv2d_gradWeights): if (border_mode == "valid" and subsample == (1, 1) and filter_dilation == (1, 1) and num_groups == 1): img = gpu_contiguous(inp1) topgrad = gpu_contiguous(inp2) ctx_name = infer_context_name(img, topgrad) img = gpu_contiguous(img.dimshuffle(1, 0, 2, 3)) topgrad = gpu_contiguous(topgrad.dimshuffle(1, 0, 2, 3)) ishape = [shape_i_op(i)(img) for i in range(img.ndim)] tshape = [shape_i_op(i)(topgrad) for i in range(topgrad.ndim)] out_shp = get_conv_output_shape( ishape, tshape, border_mode=border_mode, subsample=subsample, filter_dilation=filter_dilation, ) out_shp = assert_conv_shape(out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) desc = GpuDnnConvDesc( border_mode=border_mode, subsample=subsample, dilation=filter_dilation, conv_mode="cross", precision=precision, )(out.shape) conv = GpuDnnConv(algo=None, num_groups=num_groups)(img, topgrad, out, desc) if conv_mode == "conv": conv = conv[:, :, ::-1, ::-1] rval = as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3), ctx_name) else: return None elif isinstance(op, AbstractConv2d_gradInputs): if border_mode == "valid" and subsample == (1, 1) and num_groups == 1: kerns = gpu_contiguous(inp1.dimshuffle(1, 0, 2, 3)) topgrad = gpu_contiguous(inp2) ctx_name = infer_context_name(kerns, topgrad) conv_mode = "cross" if conv_mode == "conv" else "conv" desc = GpuDnnConvDesc( border_mode="full", subsample=subsample, dilation=filter_dilation, conv_mode=conv_mode, precision=precision, )(kerns.shape) tshape = [shape_i_op(i)(topgrad) for i in range(topgrad.ndim)] kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)] shape = get_conv_output_shape( tshape, kshape, border_mode="full", subsample=subsample, filter_dilation=filter_dilation, ) shape = assert_conv_shape(shape) out = GpuAllocEmpty(dtype=topgrad.dtype, context_name=ctx_name)(*shape) rval = GpuDnnConv(algo=None, num_groups=num_groups)(topgrad, kerns, out, desc) else: return None return [rval]
def local_dnn_reduction(node): if not isinstance(node.op, GpuCAReduceCuda): return if not dnn_available(node.inputs[0].type.context_name): return if version(raises=False) < 6000: return if node.inputs[0].ndim > 8: return acc_dtype = node.op._acc_dtype(node.inputs[0].dtype) if node.inputs[0].dtype != node.outputs[0].dtype: # We can mix float16 and float32, but not float64. if node.inputs[0].dtype == "float64" or node.outputs[ 0].dtype == "float64": return if acc_dtype != "float32": return if node.inputs[0].dtype not in ["float16", "float32", "float64"]: return if node.inputs[0].dtype == "float64" and acc_dtype != "float64": return if node.inputs[0].dtype == "float32" and acc_dtype != "float32": return if node.inputs[0].dtype == "float16" and acc_dtype == "float64": return def _identity(a): return a def _square(a): return GpuElemwise(theano.scalar.basic.sqr)(a) scal = node.op.scalar_op.name post = _identity if node.op.pre_scalar_op is not None: if isinstance(node.op.scalar_op, theano.scalar.basic.Add): if isinstance(node.op.pre_scalar_op, theano.scalar.basic.Sqr): scal = "norm2" post = _square elif isinstance(node.op.pre_scalar_op, theano.scalar.basic.Abs): scal = "norm1" else: return elif isinstance(node.op.scalar_op, theano.scalar.basic.Maximum) and isinstance( node.op.pre_scalar_op, theano.scalar.basic.Abs): scal = "absmax" else: return if not cudnn.cudnnReduceTensorOp_t.has_alias(scal): return with inherit_stack_trace(node.outputs): ret = GpuDnnReduction(scal, node.op.axis, acc_dtype, node.op.dtype, False)(node.inputs[0]) return [post(ret)]
def test_dnn_rnn_lstm(): if not dnn.dnn_available(test_ctx_name): raise SkipTest(dnn.dnn_available.msg) utt.seed_rng() # test params input_dim = 32 hidden_dim = 16 batch_size = 2 depth = 3 timesteps = 5 # test code X = T.tensor3('X') Y = T.tensor3('Y') h0 = T.tensor3('h0') c0 = T.tensor3('c0') rnnb = dnn.RNNBlock(theano.config.floatX, hidden_dim, depth, 'lstm') psize = rnnb.get_param_size([batch_size, input_dim]) params_cudnn = gpuarray_shared_constructor( np.zeros((psize, ), dtype=theano.config.floatX)) model = Model() last_layer = WrapperLayer(X) last_dim = input_dim for i in range(depth): lstm = LSTM(last_dim, hidden_dim, last_layer, s0=h0[i, :, :], c0=c0[i, :, :]) model.add_layer(lstm) last_layer = lstm last_dim = hidden_dim layer_params = lstm.get_params() dnn_params = rnnb.split_params(params_cudnn, i, [batch_size, input_dim]) for j, p in enumerate(dnn_params): p[:] = layer_params[j].get_value(borrow=True, return_internal_type=True) def funcs(out, params): fn = theano.function([X, h0, c0], out, mode=mode_with_gpu) cost = T.mean((Y - out)**2) grad = T.grad(cost, [X, h0, c0] + params) grad_fn = theano.function([X, Y, h0, c0], grad, mode=mode_with_gpu) return fn, grad_fn ref_fn, ref_grad_fn = funcs(last_layer.output(), model.get_params()) cudnn_fn, cudnn_grad_fn = funcs( rnnb.apply(params_cudnn, X, h0, c0)[0], [params_cudnn]) x_val = np.random.random( (timesteps, batch_size, input_dim)).astype(theano.config.floatX) y_val = np.random.random( (timesteps, batch_size, hidden_dim)).astype(theano.config.floatX) h0_val = np.random.random( (depth, batch_size, hidden_dim)).astype(theano.config.floatX) c0_val = np.random.random( (depth, batch_size, hidden_dim)).astype(theano.config.floatX) ref_out = ref_fn(x_val, h0_val, c0_val) cudnn_out = cudnn_fn(x_val, h0_val, c0_val) utt.assert_allclose(ref_out, cudnn_out) ref_grads = ref_grad_fn(x_val, y_val, h0_val, c0_val) cudnn_grads = cudnn_grad_fn(x_val, y_val, h0_val, c0_val) utt.assert_allclose(ref_grads[0], cudnn_grads[0]) utt.assert_allclose(ref_grads[1], cudnn_grads[1]) utt.assert_allclose(ref_grads[2], cudnn_grads[2]) ref_grads_params = ref_grads[3:] cudnn_grads_params = gpuarray_shared_constructor(cudnn_grads[3]) for i in range(depth): cudnn_grads_layer = rnnb.split_params(cudnn_grads_params, i, [batch_size, input_dim]) ref_grads_layer = ref_grads_params[i * len(cudnn_grads_layer):(i + 1) * len(cudnn_grads_layer)] for j, g in enumerate(cudnn_grads_layer): utt.assert_allclose(ref_grads_layer[j], g)
from . import leaf, optimizer, initializer, logger try: import theano from theano.gpuarray import ContextNotDefined from theano.gpuarray.dnn import dnn_available assert dnn_available(None) assert theano.config.dnn.enabled != "False" except (ImportError, AssertionError, ContextNotDefined): logger.logger.warning("cuDNN is unavailable")