def local_cudnn_maxandargmax(node): if not isinstance(node.op, GpuMaxAndArgmax): return if not dnn_available(node.inputs[0].type.context_name): return if version(raises=False) < 6000: return if node.inputs[0].ndim > 8: return if node.inputs[0].dtype != node.outputs[0].dtype: return if node.inputs[0].dtype not in ["float16", "float32", "float64"]: return # order of the axes influences the output indices if node.op.axis is not None and tuple(sorted( node.op.axis)) != node.op.axis: return max, arg = GpuDnnReduction("maximum", node.op.axis, node.outputs[0].dtype, node.outputs[0].dtype, True)(node.inputs[0]) # cudnn can only return int32 indices return ( max, as_gpuarray_variable(arg.astype("int64"), node.outputs[1].type.context_name), )
def local_dnn_argmax(op, ctx_name, inputs, outputs): if not dnn_available(ctx_name): return if version(raises=False) < 6000: return if inputs[0].ndim > 8: return if inputs[0].dtype not in ["float16", "float32", "float64"]: return # order of the axes influences the output indices if op.axis is not None and tuple(sorted(op.axis)) != op.axis: return max, arg = GpuDnnReduction("maximum", op.axis, inputs[0].dtype, inputs[0].dtype, True)(*inputs) return [as_gpuarray_variable(arg.astype("int64"), ctx_name)]
def local_dnn_reduction(node): if not isinstance(node.op, GpuCAReduceCuda): return if not dnn_available(node.inputs[0].type.context_name): return if version(raises=False) < 6000: return if node.inputs[0].ndim > 8: return acc_dtype = node.op._acc_dtype(node.inputs[0].dtype) if node.inputs[0].dtype != node.outputs[0].dtype: # We can mix float16 and float32, but not float64. if node.inputs[0].dtype == "float64" or node.outputs[ 0].dtype == "float64": return if acc_dtype != "float32": return if node.inputs[0].dtype not in ["float16", "float32", "float64"]: return if node.inputs[0].dtype == "float64" and acc_dtype != "float64": return if node.inputs[0].dtype == "float32" and acc_dtype != "float32": return if node.inputs[0].dtype == "float16" and acc_dtype == "float64": return def _identity(a): return a def _square(a): return GpuElemwise(theano.scalar.basic.sqr)(a) scal = node.op.scalar_op.name post = _identity if node.op.pre_scalar_op is not None: if isinstance(node.op.scalar_op, theano.scalar.basic.Add): if isinstance(node.op.pre_scalar_op, theano.scalar.basic.Sqr): scal = "norm2" post = _square elif isinstance(node.op.pre_scalar_op, theano.scalar.basic.Abs): scal = "norm1" else: return elif isinstance(node.op.scalar_op, theano.scalar.basic.Maximum) and isinstance( node.op.pre_scalar_op, theano.scalar.basic.Abs): scal = "absmax" else: return if not cudnn.cudnnReduceTensorOp_t.has_alias(scal): return with inherit_stack_trace(node.outputs): ret = GpuDnnReduction(scal, node.op.axis, acc_dtype, node.op.dtype, False)(node.inputs[0]) return [post(ret)]