Python exists示例，tvm.contrib.cudnn.exists Python示例

示例#1

0

显示文件

文件： utils.py 项目： were/tvm

def _get_targets(target_str=None):
    if target_str is None:
        target_str = os.environ.get("TVM_TEST_TARGETS", "")
        # Use dict instead of set for de-duplication so that the
        # targets stay in the order specified.
        target_names = list(
            {t.strip(): None
             for t in target_str.split(";") if t.strip()})

    if not target_names:
        target_names = DEFAULT_TEST_TARGETS

    targets = []
    for target in target_names:
        target_kind = target.split()[0]

        if target_kind == "cuda" and "cudnn" in tvm.target.Target(
                target).attrs.get("libs", []):
            is_enabled = tvm.support.libinfo()["USE_CUDNN"].lower() in [
                "on", "true", "1"
            ]
            is_runnable = is_enabled and cudnn.exists()
        elif target_kind == "hexagon":
            is_enabled = tvm.support.libinfo()["USE_HEXAGON"].lower() in [
                "on", "true", "1"
            ]
            # If Hexagon has compile-time support, we can always fall back
            is_runnable = is_enabled and "ANDROID_SERIAL_NUMBER" in os.environ
        else:
            is_enabled = tvm.runtime.enabled(target_kind)
            is_runnable = is_enabled and tvm.device(target_kind).exist

        targets.append({
            "target": target,
            "target_kind": target_kind,
            "is_enabled": is_enabled,
            "is_runnable": is_runnable,
        })

    if all(not t["is_runnable"] for t in targets):
        if tvm.runtime.enabled("llvm"):
            logging.warning(
                "None of the following targets are supported by this build of TVM: %s."
                " Try setting TVM_TEST_TARGETS to a supported target. Defaulting to llvm.",
                target_str,
            )
            return _get_targets("llvm")

        raise TVMError(
            "None of the following targets are supported by this build of TVM: %s."
            " Try setting TVM_TEST_TARGETS to a supported target."
            " Cannot default to llvm, as it is not enabled." % target_str)

    return targets

示例#2

0

显示文件

def requires_cudnn(*args):
    """Mark a test as requiring the cuDNN library.

    This also marks the test as requiring a cuda gpu.

    Parameters
    ----------
    f : function
        Function to mark
    """

    requirements = [
        pytest.mark.skipif(
            not cudnn.exists(),
            reason="cuDNN library not enabled, or not installed"),
        *requires_cuda(),
    ]
    return _compose(args, requirements)

示例#3

0

显示文件

文件： conv3d.py 项目： wenxcs/tvm

def conv3d_cudnn(cfg,
                 data,
                 kernel,
                 strides,
                 padding,
                 dilation,
                 groups,
                 layout="NCDHW",
                 out_dtype="float32"):
    """Conv3D operator for cuda backend.

    Parameters
    ----------
    cfg: ConfigEntity
        The config for this template

    data : tvm.te.Tensor
        5-D with shape [batch, in_channel, in_depth, in_height, in_width]

    kernel : tvm.te.Tensor
        5-D with shape [num_filter, in_channel, filter_depth, filter_height, filter_width]

    strides : int or a list/tuple of three ints
        stride size, or [stride_depth, stride_height, stride_width]

    padding : int or a list/tuple of three ints
        padding size, or [pad_depth, pad_height, pad_width]

    dilation: int or a list/tuple of three ints
        dilation size, or [dilation_depth, dilation_height, dilation_width]

    layout : str
        layout of data

    out_dtype: str
        The output type. This is used for mixed precision.

    Returns
    -------
    output : tvm.te.Tensor
        5-D with shape [batch, out_channel, out_depth, out_height, out_width]
    """
    if layout == "NCDHW":
        tensor_format = 0  # CUDNN_TENSOR_NCHW
        N, _, D, H, W = get_const_tuple(data.shape)
    elif layout == "NDHWC":
        tensor_format = 1  # CUDNN_TENSOR_NHWC
        N, D, H, W, _ = get_const_tuple(data.shape)
    else:
        raise ValueError("Unsupported layout %s in cudnn" % layout)
    CO, CI, KD, KH, KW = get_const_tuple(kernel.shape)

    assert groups == 1, "conv3d_cudnn does not support groups"

    # handle dilation
    stride_d, stride_h, stride_w = ((strides, strides, strides) if isinstance(
        strides, int) else strides)
    pad_d, pad_h, pad_w = (padding, padding,
                           padding) if isinstance(padding, int) else padding
    dilation_d, dilation_h, dilation_w = ((dilation, dilation,
                                           dilation) if isinstance(
                                               dilation, int) else dilation)

    OD = (D + 2 * pad_d - KD) // stride_d + 1
    OH = (H + 2 * pad_h - KH) // stride_h + 1
    OW = (W + 2 * pad_w - KW) // stride_w + 1

    if isinstance(N, int):
        cfg.add_flop(2 * N * OD * OH * OW * CO * CI *
                     ((KD - 1) * dilation_d + 1) *
                     ((KH - 1) * dilation_h + 1) * ((KW - 1) * dilation_w + 1))

    cfg.define_knob(
        "algo",
        range(cudnn.algo_to_index("fwd", "CUDNN_CONVOLUTION_FWD_ALGO_COUNT")))
    if cfg.is_fallback:
        if cudnn.exists():
            # Let CUDNN choose the best algo, based on benchmarks run
            # on the local machine.  In the future, this should be
            # based on parameters stored in the Target.
            cfg["algo"] = OtherOptionEntity(-1)
        else:
            cfg["algo"] = OtherOptionEntity(0)

    return cudnn.conv_forward(
        data,
        kernel,
        [pad_d, pad_h, pad_w],
        [stride_d, stride_h, stride_w],
        [dilation_d, dilation_h, dilation_w],
        conv_mode=1,
        tensor_format=tensor_format,
        algo=cfg["algo"].val,
        conv_dtype=dtype,
    )

示例#4

0

显示文件

def conv2d_cudnn(
    cfg, data, kernel, strides, padding, dilation, groups=1, layout="NCHW", out_dtype="float32"
):
    """Compute conv2d using CuDNN library"""
    if layout == "NCHW":
        tensor_format = 0  # CUDNN_TENSOR_NCHW
        N, _, H, W = get_const_tuple(data.shape)
    elif layout == "NHWC":
        tensor_format = 1  # CUDNN_TENSOR_NHWC
        N, H, W, _ = get_const_tuple(data.shape)
    else:
        raise ValueError("Unsupported layout %s in cudnn" % layout)
    CO, CI, KH, KW = get_const_tuple(kernel.shape)

    # handle dilation
    stride_h, stride_w = (strides, strides) if isinstance(strides, int) else strides
    dilation_h, dilation_w = (dilation, dilation) if isinstance(dilation, int) else dilation
    KH_dilated = (KH - 1) * dilation_h + 1
    KW_dilated = (KW - 1) * dilation_h + 1

    pt, pl, pb, pr = get_pad_tuple(padding, (KH_dilated, KW_dilated))
    if (pt != pb) or (pl != pr):
        raise ValueError("Cudnn doesn't support asymmetric padding.")

    OH = (H + pt + pb - KH) // stride_h + 1
    OW = (W + pl + pr - KW) // stride_w + 1

    if isinstance(N, int):
        cfg.add_flop(
            groups
            * 2
            * N
            * OH
            * OW
            * CO
            * CI
            * ((KH - 1) * dilation_h + 1)
            * ((KW - 1) * dilation_w + 1)
        )

    if data.dtype == "int8" or kernel.dtype == "int8":
        if layout == "NCHW":
            raise ValueError("NCHW layout do not support int8 in cudnn")
        dtype = "int32"
    else:
        dtype = data.dtype

    cfg.define_knob("algo", range(cudnn.algo_to_index("fwd", "CUDNN_CONVOLUTION_FWD_ALGO_COUNT")))
    if cfg.is_fallback:
        if cudnn.exists():
            # Let CUDNN choose the best algo, based on benchmarks run
            # on the local machine.  In the future, this should be
            # based on parameters stored in the Target.
            cfg["algo"] = OtherOptionEntity(-1)
        else:
            cfg["algo"] = OtherOptionEntity(0)

    return cudnn.conv_forward(
        data,
        kernel,
        [pt, pl],  # cudnn padding pt, pl on both sides of input
        [stride_h, stride_w],
        [dilation_h, dilation_w],
        conv_mode=1,
        tensor_format=tensor_format,
        algo=cfg["algo"].val,
        conv_dtype=dtype,
        groups=groups,
    )