示例#1
0
def local_cudnn_maxandargmax(node):
    if not isinstance(node.op, GpuMaxAndArgmax):
        return

    if not dnn_available(node.inputs[0].type.context_name):
        return

    if version(raises=False) < 6000:
        return

    if node.inputs[0].ndim > 8:
        return

    if node.inputs[0].dtype != node.outputs[0].dtype:
        return

    if node.inputs[0].dtype not in ["float16", "float32", "float64"]:
        return

    # order of the axes influences the output indices
    if node.op.axis is not None and tuple(sorted(
            node.op.axis)) != node.op.axis:
        return

    max, arg = GpuDnnReduction("maximum", node.op.axis, node.outputs[0].dtype,
                               node.outputs[0].dtype, True)(node.inputs[0])

    # cudnn can only return int32 indices
    return (
        max,
        as_gpuarray_variable(arg.astype("int64"),
                             node.outputs[1].type.context_name),
    )
示例#2
0
def local_dnn_argmax(op, ctx_name, inputs, outputs):
    if not dnn_available(ctx_name):
        return

    if version(raises=False) < 6000:
        return

    if inputs[0].ndim > 8:
        return

    if inputs[0].dtype not in ["float16", "float32", "float64"]:
        return

    # order of the axes influences the output indices
    if op.axis is not None and tuple(sorted(op.axis)) != op.axis:
        return

    max, arg = GpuDnnReduction("maximum", op.axis, inputs[0].dtype,
                               inputs[0].dtype, True)(*inputs)

    return [as_gpuarray_variable(arg.astype("int64"), ctx_name)]
示例#3
0
def local_dnn_reduction(node):
    if not isinstance(node.op, GpuCAReduceCuda):
        return

    if not dnn_available(node.inputs[0].type.context_name):
        return

    if version(raises=False) < 6000:
        return

    if node.inputs[0].ndim > 8:
        return

    acc_dtype = node.op._acc_dtype(node.inputs[0].dtype)

    if node.inputs[0].dtype != node.outputs[0].dtype:
        # We can mix float16 and float32, but not float64.
        if node.inputs[0].dtype == "float64" or node.outputs[
                0].dtype == "float64":
            return
        if acc_dtype != "float32":
            return

    if node.inputs[0].dtype not in ["float16", "float32", "float64"]:
        return

    if node.inputs[0].dtype == "float64" and acc_dtype != "float64":
        return

    if node.inputs[0].dtype == "float32" and acc_dtype != "float32":
        return

    if node.inputs[0].dtype == "float16" and acc_dtype == "float64":
        return

    def _identity(a):
        return a

    def _square(a):
        return GpuElemwise(theano.scalar.basic.sqr)(a)

    scal = node.op.scalar_op.name
    post = _identity

    if node.op.pre_scalar_op is not None:
        if isinstance(node.op.scalar_op, theano.scalar.basic.Add):
            if isinstance(node.op.pre_scalar_op, theano.scalar.basic.Sqr):
                scal = "norm2"
                post = _square
            elif isinstance(node.op.pre_scalar_op, theano.scalar.basic.Abs):
                scal = "norm1"
            else:
                return
        elif isinstance(node.op.scalar_op,
                        theano.scalar.basic.Maximum) and isinstance(
                            node.op.pre_scalar_op, theano.scalar.basic.Abs):
            scal = "absmax"
        else:
            return

    if not cudnn.cudnnReduceTensorOp_t.has_alias(scal):
        return

    with inherit_stack_trace(node.outputs):
        ret = GpuDnnReduction(scal, node.op.axis, acc_dtype, node.op.dtype,
                              False)(node.inputs[0])
        return [post(ret)]