def _get_targets(target_str=None): if target_str is None: target_str = os.environ.get("TVM_TEST_TARGETS", "") # Use dict instead of set for de-duplication so that the # targets stay in the order specified. target_names = list( {t.strip(): None for t in target_str.split(";") if t.strip()}) if not target_names: target_names = DEFAULT_TEST_TARGETS targets = [] for target in target_names: target_kind = target.split()[0] if target_kind == "cuda" and "cudnn" in tvm.target.Target( target).attrs.get("libs", []): is_enabled = tvm.support.libinfo()["USE_CUDNN"].lower() in [ "on", "true", "1" ] is_runnable = is_enabled and cudnn.exists() elif target_kind == "hexagon": is_enabled = tvm.support.libinfo()["USE_HEXAGON"].lower() in [ "on", "true", "1" ] # If Hexagon has compile-time support, we can always fall back is_runnable = is_enabled and "ANDROID_SERIAL_NUMBER" in os.environ else: is_enabled = tvm.runtime.enabled(target_kind) is_runnable = is_enabled and tvm.device(target_kind).exist targets.append({ "target": target, "target_kind": target_kind, "is_enabled": is_enabled, "is_runnable": is_runnable, }) if all(not t["is_runnable"] for t in targets): if tvm.runtime.enabled("llvm"): logging.warning( "None of the following targets are supported by this build of TVM: %s." " Try setting TVM_TEST_TARGETS to a supported target. Defaulting to llvm.", target_str, ) return _get_targets("llvm") raise TVMError( "None of the following targets are supported by this build of TVM: %s." " Try setting TVM_TEST_TARGETS to a supported target." " Cannot default to llvm, as it is not enabled." % target_str) return targets
def requires_cudnn(*args): """Mark a test as requiring the cuDNN library. This also marks the test as requiring a cuda gpu. Parameters ---------- f : function Function to mark """ requirements = [ pytest.mark.skipif( not cudnn.exists(), reason="cuDNN library not enabled, or not installed"), *requires_cuda(), ] return _compose(args, requirements)
def conv3d_cudnn(cfg, data, kernel, strides, padding, dilation, groups, layout="NCDHW", out_dtype="float32"): """Conv3D operator for cuda backend. Parameters ---------- cfg: ConfigEntity The config for this template data : tvm.te.Tensor 5-D with shape [batch, in_channel, in_depth, in_height, in_width] kernel : tvm.te.Tensor 5-D with shape [num_filter, in_channel, filter_depth, filter_height, filter_width] strides : int or a list/tuple of three ints stride size, or [stride_depth, stride_height, stride_width] padding : int or a list/tuple of three ints padding size, or [pad_depth, pad_height, pad_width] dilation: int or a list/tuple of three ints dilation size, or [dilation_depth, dilation_height, dilation_width] layout : str layout of data out_dtype: str The output type. This is used for mixed precision. Returns ------- output : tvm.te.Tensor 5-D with shape [batch, out_channel, out_depth, out_height, out_width] """ if layout == "NCDHW": tensor_format = 0 # CUDNN_TENSOR_NCHW N, _, D, H, W = get_const_tuple(data.shape) elif layout == "NDHWC": tensor_format = 1 # CUDNN_TENSOR_NHWC N, D, H, W, _ = get_const_tuple(data.shape) else: raise ValueError("Unsupported layout %s in cudnn" % layout) CO, CI, KD, KH, KW = get_const_tuple(kernel.shape) assert groups == 1, "conv3d_cudnn does not support groups" # handle dilation stride_d, stride_h, stride_w = ((strides, strides, strides) if isinstance( strides, int) else strides) pad_d, pad_h, pad_w = (padding, padding, padding) if isinstance(padding, int) else padding dilation_d, dilation_h, dilation_w = ((dilation, dilation, dilation) if isinstance( dilation, int) else dilation) OD = (D + 2 * pad_d - KD) // stride_d + 1 OH = (H + 2 * pad_h - KH) // stride_h + 1 OW = (W + 2 * pad_w - KW) // stride_w + 1 if isinstance(N, int): cfg.add_flop(2 * N * OD * OH * OW * CO * CI * ((KD - 1) * dilation_d + 1) * ((KH - 1) * dilation_h + 1) * ((KW - 1) * dilation_w + 1)) cfg.define_knob( "algo", range(cudnn.algo_to_index("fwd", "CUDNN_CONVOLUTION_FWD_ALGO_COUNT"))) if cfg.is_fallback: if cudnn.exists(): # Let CUDNN choose the best algo, based on benchmarks run # on the local machine. In the future, this should be # based on parameters stored in the Target. cfg["algo"] = OtherOptionEntity(-1) else: cfg["algo"] = OtherOptionEntity(0) return cudnn.conv_forward( data, kernel, [pad_d, pad_h, pad_w], [stride_d, stride_h, stride_w], [dilation_d, dilation_h, dilation_w], conv_mode=1, tensor_format=tensor_format, algo=cfg["algo"].val, conv_dtype=dtype, )
def conv2d_cudnn( cfg, data, kernel, strides, padding, dilation, groups=1, layout="NCHW", out_dtype="float32" ): """Compute conv2d using CuDNN library""" if layout == "NCHW": tensor_format = 0 # CUDNN_TENSOR_NCHW N, _, H, W = get_const_tuple(data.shape) elif layout == "NHWC": tensor_format = 1 # CUDNN_TENSOR_NHWC N, H, W, _ = get_const_tuple(data.shape) else: raise ValueError("Unsupported layout %s in cudnn" % layout) CO, CI, KH, KW = get_const_tuple(kernel.shape) # handle dilation stride_h, stride_w = (strides, strides) if isinstance(strides, int) else strides dilation_h, dilation_w = (dilation, dilation) if isinstance(dilation, int) else dilation KH_dilated = (KH - 1) * dilation_h + 1 KW_dilated = (KW - 1) * dilation_h + 1 pt, pl, pb, pr = get_pad_tuple(padding, (KH_dilated, KW_dilated)) if (pt != pb) or (pl != pr): raise ValueError("Cudnn doesn't support asymmetric padding.") OH = (H + pt + pb - KH) // stride_h + 1 OW = (W + pl + pr - KW) // stride_w + 1 if isinstance(N, int): cfg.add_flop( groups * 2 * N * OH * OW * CO * CI * ((KH - 1) * dilation_h + 1) * ((KW - 1) * dilation_w + 1) ) if data.dtype == "int8" or kernel.dtype == "int8": if layout == "NCHW": raise ValueError("NCHW layout do not support int8 in cudnn") dtype = "int32" else: dtype = data.dtype cfg.define_knob("algo", range(cudnn.algo_to_index("fwd", "CUDNN_CONVOLUTION_FWD_ALGO_COUNT"))) if cfg.is_fallback: if cudnn.exists(): # Let CUDNN choose the best algo, based on benchmarks run # on the local machine. In the future, this should be # based on parameters stored in the Target. cfg["algo"] = OtherOptionEntity(-1) else: cfg["algo"] = OtherOptionEntity(0) return cudnn.conv_forward( data, kernel, [pt, pl], # cudnn padding pt, pl on both sides of input [stride_h, stride_w], [dilation_h, dilation_w], conv_mode=1, tensor_format=tensor_format, algo=cfg["algo"].val, conv_dtype=dtype, groups=groups, )