예제 #1
0
파일: x86.py 프로젝트: chenghanpeng/tvm
def batch_matmul_strategy_cpu(attrs, inputs, out_type, target):
    """batch_matmul x86 strategy"""
    strategy = _op.OpStrategy()
    mcpu = Target.current().mcpu

    need_auto_scheduler_layout = is_auto_scheduler_enabled()
    need_meta_schedule_layout = is_meta_schedule_enabled()

    if (not attrs.transpose_a and attrs.transpose_b and target_has_vnni(mcpu)
            and inputs[0].dtype == "uint8" and inputs[1].dtype == "int8"
            and inputs[1].shape[-2] % 16 == 0
            and inputs[1].shape[-1] % 4 == 0):
        strategy.add_implementation(
            wrap_compute_batch_matmul(topi.x86.batch_matmul_vnni_compute,
                                      need_out_dtype=True),
            wrap_topi_schedule(topi.x86.schedule_batch_matmul_vnni),
            name="batch_matmul_vnni.x86",
            plevel=10,
        )
    elif is_dynamic(
            out_type
    ) or need_auto_scheduler_layout or need_meta_schedule_layout:
        strategy.add_implementation(
            wrap_compute_batch_matmul(
                topi.nn.batch_matmul,
                need_out_dtype=True,
                need_auto_scheduler_layout=need_auto_scheduler_layout,
                need_meta_schedule_layout=need_meta_schedule_layout,
            ),
            wrap_topi_schedule(topi.generic.nn.schedule_batch_matmul),
            name="batch_matmul.generic",
            plevel=10,
        )
    else:
        strategy.add_implementation(
            wrap_compute_batch_matmul(topi.x86.batch_matmul,
                                      need_out_dtype=True),
            wrap_topi_schedule(topi.x86.schedule_batch_matmul),
            name="batch_matmul.x86",
            plevel=10,
        )
    if "cblas" in target.libs:
        strategy.add_implementation(
            wrap_compute_batch_matmul(topi.x86.batch_matmul_cblas),
            wrap_topi_schedule(topi.x86.schedule_batch_matmul_cblas),
            name="batch_matmul_cblas.x86",
            plevel=15,
        )
    if "mkl" in target.libs:
        strategy.add_implementation(
            wrap_compute_batch_matmul(topi.x86.batch_matmul_mkl),
            wrap_topi_schedule(topi.x86.schedule_batch_matmul_mkl),
            name="batch_matmul_mkl.x86",
            plevel=15,
        )
    return strategy
예제 #2
0
파일: x86.py 프로젝트: chenghanpeng/tvm
def conv3d_strategy_cpu(attrs, inputs, out_type, target):
    """conv3d generic strategy"""
    strategy = _op.OpStrategy()
    layout = attrs.data_layout
    need_auto_scheduler_layout = is_auto_scheduler_enabled()
    need_meta_schedule_layout = is_meta_schedule_enabled()
    if need_auto_scheduler_layout or need_meta_schedule_layout:
        # Use auto-scheduler. We should provide clear compute definition without autotvm templates
        # or packed layouts.
        if layout == "NCDHW":
            strategy.add_implementation(
                wrap_compute_conv3d(topi.nn.conv3d_ncdhw),
                naive_schedule,
                name="conv3d_ncdhw.x86",
            )
        elif layout == "NDHWC":
            strategy.add_implementation(
                wrap_compute_conv3d(
                    topi.nn.conv3d_ndhwc,
                    need_auto_scheduler_layout=need_auto_scheduler_layout,
                    need_meta_schedule_layout=need_meta_schedule_layout,
                ),
                naive_schedule,
                name="conv3d_ndhwc.x86",
            )
        else:
            raise ValueError("Not support this layout {} yet".format(layout))
    else:
        # Use autotvm templates
        if layout == "NCDHW":
            strategy.add_implementation(
                wrap_compute_conv3d(topi.x86.conv3d_ncdhw),
                wrap_topi_schedule(topi.x86.schedule_conv3d_ncdhw),
                name="conv3d_ncdhw.x86",
            )
        elif layout == "NDHWC":
            strategy.add_implementation(
                wrap_compute_conv3d(topi.x86.conv3d_ndhwc),
                wrap_topi_schedule(topi.x86.schedule_conv3d_ndhwc),
                name="conv3d_ndhwc.x86",
            )
        else:
            raise ValueError("Not support this layout {} yet".format(layout))
    return strategy
예제 #3
0
파일: x86.py 프로젝트: chenghanpeng/tvm
def conv2d_winograd_without_weight_transfrom_strategy_cpu(
        attrs, inputs, out_type, target):
    """conv2d_winograd_without_weight_transfrom cpu strategy"""
    dilation = attrs.get_int_tuple("dilation")
    groups = attrs.get_int("groups")
    layout = attrs.data_layout
    strides = attrs.get_int_tuple("strides")
    assert dilation == (1, 1), "Do not support dilate now"
    assert strides == (1, 1), "Do not support strides now"
    assert groups == 1, "Do not supoort arbitrary group number"
    strategy = _op.OpStrategy()
    need_auto_scheduler_layout = is_auto_scheduler_enabled()
    need_meta_schedule_layout = is_meta_schedule_enabled()
    if layout == "NHWC":
        if need_meta_schedule_layout:
            strategy.add_implementation(
                wrap_compute_conv2d(
                    topi.nn.conv2d_winograd_nhwc_without_weight_transform,
                    need_auto_scheduler_layout=False,
                    need_meta_schedule_layout=True,
                ),
                naive_schedule,
                name="ansor.winograd",
            )
        elif need_auto_scheduler_layout:
            strategy.add_implementation(
                wrap_compute_conv2d(
                    topi.nn.conv2d_winograd_nhwc_without_weight_transform,
                    need_auto_scheduler_layout=True,
                    need_meta_schedule_layout=False,
                ),
                naive_schedule,
                name="ansor.winograd",
            )
        else:
            raise RuntimeError(
                "Both AutoScheduler and MetaSchedule are not enabled")
    else:
        raise RuntimeError(
            "Unsupported conv2d_winograd_without_weight_transfrom layout {}".
            format(layout))
    return strategy
예제 #4
0
파일: mali.py 프로젝트: chenghanpeng/tvm
def conv2d_winograd_without_weight_transfrom_strategy_mali(attrs, inputs, out_type, target):
    """conv2d_winograd_without_weight_transfrom mali strategy"""
    dilation = attrs.get_int_tuple("dilation")
    groups = attrs.get_int("groups")
    layout = attrs.data_layout
    strides = attrs.get_int_tuple("strides")
    kernel = inputs[1]
    assert dilation == (1, 1), "Do not support dilate now"
    assert strides == (1, 1), "Do not support strides now"
    assert groups == 1, "Do not supoort arbitrary group number"
    strategy = _op.OpStrategy()
    if layout == "NCHW":
        assert len(kernel.shape) == 5, "Kernel must be packed into 5-dim"
        strategy.add_implementation(
            wrap_compute_conv2d(topi.mali.conv2d_nchw_winograd),
            wrap_topi_schedule(topi.mali.schedule_conv2d_nchw_winograd),
            name="conv2d_nchw_winograd.mali",
        )
    elif layout == "NHWC":
        need_auto_scheduler_layout = is_auto_scheduler_enabled()
        need_meta_schedule_layout = is_meta_schedule_enabled()
        if need_auto_scheduler_layout or need_meta_schedule_layout:
            strategy.add_implementation(
                wrap_compute_conv2d(
                    topi.nn.conv2d_winograd_nhwc_without_weight_transform,
                    need_auto_scheduler_layout=need_auto_scheduler_layout,
                    need_meta_schedule_layout=need_meta_schedule_layout,
                ),
                naive_schedule,  # this implementation should never be picked by autotvm
                name="conv2d_nhwc_winograd_without_weight_transform",
                plevel=15,
            )
        else:
            raise RuntimeError(
                "Winograd conv2d NHWC is not enabled for mali without auto_scheduler."
            )
    else:
        raise RuntimeError(
            "Unsupported conv2d_winograd_without_weight_transfrom layout {}".format(layout)
        )
    return strategy
예제 #5
0
파일: mali.py 프로젝트: chenghanpeng/tvm
def dense_strategy_mali(attrs, inputs, out_type, target):
    """dense mali strategy"""
    strategy = _op.OpStrategy()
    if is_auto_scheduler_enabled():
        strategy.add_implementation(
            wrap_compute_dense(topi.nn.dense, need_auto_scheduler_layout=True),
            naive_schedule,
            name="dense.mali",
        )
    elif is_meta_schedule_enabled():
        strategy.add_implementation(
            wrap_compute_dense(topi.nn.dense, need_meta_schedule_layout=True),
            naive_schedule,
            name="dense.mali",
        )
    else:
        strategy.add_implementation(
            wrap_compute_dense(topi.mali.dense),
            wrap_topi_schedule(topi.mali.schedule_dense),
            name="dense.mali",
        )
    return strategy
예제 #6
0
파일: mali.py 프로젝트: chenghanpeng/tvm
def conv2d_strategy_mali(attrs, inputs, out_type, target):
    """conv2d mali strategy"""
    strategy = _op.OpStrategy()
    data, kernel = inputs
    dilation_h, dilation_w = attrs.get_int_tuple("dilation")
    stride_h, stride_w = attrs.get_int_tuple("strides")
    groups = attrs.groups
    layout = attrs.data_layout
    kernel_layout = attrs.kernel_layout
    if dilation_h < 1 or dilation_w < 1:
        raise ValueError("dilation should be positive value")

    if groups == 1:
        if layout == "NCHW":
            if kernel_layout == "OIHW":
                strategy.add_implementation(
                    wrap_compute_conv2d(topi.mali.conv2d_nchw_spatial_pack),
                    wrap_topi_schedule(topi.mali.schedule_conv2d_nchw_spatial_pack),
                    name="conv2d_nchw_spatial_pack.mali",
                )
                # check if winograd algorithm is applicable
                _, _, kh, kw = get_const_tuple(kernel.shape)
                if (
                    kh == 3
                    and kw == 3
                    and stride_h == 1
                    and stride_w == 1
                    and dilation_h == 1
                    and dilation_w == 1
                ):
                    strategy.add_implementation(
                        wrap_compute_conv2d(topi.mali.conv2d_nchw_winograd),
                        wrap_topi_schedule(topi.mali.schedule_conv2d_nchw_winograd),
                        name="conv2d_nchw_winograd.mali",
                        plevel=5,
                    )
            elif re.match(r"OIHW\d*o", kernel_layout):
                strategy.add_implementation(
                    wrap_compute_conv2d(topi.mali.conv2d_nchw_spatial_pack),
                    wrap_topi_schedule(topi.mali.schedule_conv2d_nchw_spatial_pack),
                    name="conv2d_nchw_spatial_pack.mali",
                )
            else:
                raise RuntimeError(
                    "Unsupported weight layout {} for conv2d NCHW".format(kernel_layout)
                )
        elif layout == "NHWC":
            assert kernel_layout == "HWIO"
            need_auto_scheduler_layout = is_auto_scheduler_enabled()
            need_meta_schedule_layout = is_meta_schedule_enabled()
            if need_auto_scheduler_layout or need_meta_schedule_layout:
                strategy.add_implementation(
                    wrap_compute_conv2d(
                        topi.nn.conv2d_nhwc,
                        need_auto_scheduler_layout=need_auto_scheduler_layout,
                        need_meta_schedule_layout=need_meta_schedule_layout,
                    ),
                    naive_schedule,
                    name="conv2d_nhwc.mali",
                )
                is_winograd_applicable = False
                if len(kernel.shape) == 4:
                    kernel_h, kernel_w, _, _ = get_const_tuple(kernel.shape)
                    is_winograd_applicable = (
                        "float" in data.dtype
                        and "float" in kernel.dtype
                        and kernel_h == 3
                        and kernel_w == 3
                        and stride_h == 1
                        and stride_w == 1
                        and dilation_h == 1
                        and dilation_w == 1
                    )
                if is_winograd_applicable:
                    if need_meta_schedule_layout:
                        strategy.add_implementation(
                            wrap_compute_conv2d(
                                topi.nn.conv2d_winograd_nhwc,
                                need_auto_scheduler_layout=False,
                                need_meta_schedule_layout=True,
                            ),
                            naive_schedule,  # this implementation should never be picked by autotvm
                            name="conv2d_nhwc.winograd",
                            plevel=15,
                        )
                    elif need_auto_scheduler_layout:
                        strategy.add_implementation(
                            wrap_compute_conv2d(
                                topi.nn.conv2d_winograd_nhwc,
                                need_auto_scheduler_layout=True,
                                need_meta_schedule_layout=False,
                            ),
                            naive_schedule,  # this implementation should never be picked by autotvm
                            name="conv2d_nhwc.winograd",
                            plevel=15,
                        )
                    else:
                        raise RuntimeError("Both AutoScheduler and MetaSchedule are not enabled")
            else:
                strategy.add_implementation(
                    wrap_compute_conv2d(topi.mali.conv2d_nhwc_spatial_pack),
                    wrap_topi_schedule(topi.mali.schedule_conv2d_nhwc_spatial_pack),
                    name="conv2d_nhwc_spatial_pack.mali",
                )

        else:
            raise RuntimeError("Unsupported conv2d layout {} for mali".format(layout))
    elif is_depthwise_conv2d(data.shape, layout, kernel.shape, kernel_layout, groups):
        if layout == "NCHW":
            assert kernel_layout == "OIHW"
            strategy.add_implementation(
                wrap_compute_conv2d(topi.mali.depthwise_conv2d_nchw),
                wrap_topi_schedule(topi.mali.schedule_depthwise_conv2d_nchw),
                name="depthwise_conv2d_nchw.mali",
            )
        elif layout == "NHWC":
            assert kernel_layout == "HWOI"
            if is_auto_scheduler_enabled():
                strategy.add_implementation(
                    wrap_compute_conv2d(topi.nn.depthwise_conv2d_nhwc),
                    naive_schedule,
                    name="depthwise_conv2d_nhwc.mali",
                )
            elif is_meta_schedule_enabled():
                strategy.add_implementation(
                    wrap_compute_conv2d(topi.nn.depthwise_conv2d_nhwc),
                    naive_schedule,
                    name="depthwise_conv2d_nhwc.mali",
                )
            else:
                strategy.add_implementation(
                    wrap_compute_conv2d(topi.mali.depthwise_conv2d_nhwc),
                    wrap_topi_schedule(topi.mali.schedule_depthwise_conv2d_nhwc),
                    name="depthwise_conv2d_nhwc.mali",
                )
        else:
            raise RuntimeError("Unsupported depthwise_conv2d layout {} for mali".format(layout))
    else:  # group_conv2d
        raise RuntimeError("group_conv2d is not supported for mali")
    return strategy
예제 #7
0
파일: x86.py 프로젝트: chenghanpeng/tvm
def dense_strategy_cpu(attrs, inputs, out_type, target):
    """dense x86 strategy"""
    strategy = _op.OpStrategy()
    same_type = inputs[0].dtype == inputs[1].dtype == out_type.dtype
    dtype = inputs[0].dtype
    u8s8s32 = dtype == "uint8" and inputs[
        1].dtype == "int8" and out_type.dtype == "int32"
    strategy.add_implementation(
        wrap_compute_dense(topi.x86.dense_nopack),
        wrap_topi_schedule(topi.x86.schedule_dense_nopack),
        name="dense_nopack.x86",
        plevel=5,
    )

    strategy.add_implementation(
        wrap_compute_dense(topi.x86.dense_pack),
        wrap_topi_schedule(topi.x86.schedule_dense_pack),
        name="dense_pack.x86",
        plevel=10,
    )

    need_auto_scheduler_layout = is_auto_scheduler_enabled()
    need_meta_schedule_layout = is_meta_schedule_enabled()

    if need_auto_scheduler_layout or need_meta_schedule_layout:
        strategy.add_implementation(
            wrap_compute_dense(
                topi.nn.dense,
                need_auto_scheduler_layout=need_auto_scheduler_layout,
                need_meta_schedule_layout=need_meta_schedule_layout,
            ),
            naive_schedule,
            name="dense.generic",
            plevel=11,
        )

    if "cblas" in target.libs:
        with SpecializedCondition(same_type
                                  and dtype in ["float32", "float64"]):
            strategy.add_implementation(
                wrap_compute_dense(topi.x86.dense_cblas),
                wrap_topi_schedule(topi.x86.schedule_dense_cblas),
                name="dense_cblas.x86",
                plevel=13,
            )
    if "mkl" in target.libs:
        with SpecializedCondition(
                same_type and dtype in ["float32", "float64"] or u8s8s32):
            strategy.add_implementation(
                wrap_compute_dense(topi.x86.dense_mkl),
                wrap_topi_schedule(topi.x86.schedule_dense_mkl),
                name="dense_mkl.x86",
                plevel=14,
            )
    if "dnnl" in target.libs:
        with SpecializedCondition(same_type and dtype == "float32"):
            strategy.add_implementation(
                wrap_compute_dense(topi.x86.dense_dnnl),
                wrap_topi_schedule(topi.x86.schedule_dense_dnnl),
                name="dense_dnnl.x86",
                plevel=15,
            )
    return strategy
예제 #8
0
파일: x86.py 프로젝트: chenghanpeng/tvm
def matmul_strategy_cpu(attrs, inputs, out_type, target):
    """matmul x86 strategy"""
    strategy = _op.OpStrategy()

    same_type = inputs[0].dtype == inputs[1].dtype == out_type.dtype
    dtype = inputs[0].dtype
    u8s8s32 = dtype == "uint8" and inputs[
        1].dtype == "int8" and out_type.dtype == "int32"
    if "cblas" in target.libs:
        length_before = len(
            strategy.specializations) if strategy.specializations else 0
        with SpecializedCondition(same_type
                                  and dtype in ["float32", "float64"]):
            strategy.add_implementation(
                wrap_compute_matmul(topi.x86.matmul_cblas),
                wrap_topi_schedule(topi.x86.schedule_matmul_cblas),
                name="matmul_cblas.x86",
                plevel=13,
            )
        length_after = len(
            strategy.specializations) if strategy.specializations else 0
        if length_before == length_after:
            logger.warning(
                "Currently cblas only support the data type to be float32 or float64. Skip."
            )
    if "mkl" in target.libs:
        length_before = len(
            strategy.specializations) if strategy.specializations else 0
        with SpecializedCondition(
                same_type and dtype in ["float32", "float64"] or u8s8s32):
            strategy.add_implementation(
                wrap_compute_matmul(topi.x86.matmul_mkl),
                wrap_topi_schedule(topi.x86.schedule_matmul_mkl),
                name="matmul_mkl.x86",
                plevel=14,
            )
        length_after = len(
            strategy.specializations) if strategy.specializations else 0
        if length_before == length_after:
            logger.warning(
                "Currently mkl only support the data type to be float32, float64 or input with "
                "uint8 and int8 while output wiht int32. Skip.")
    if "dnnl" in target.libs:
        length_before = len(
            strategy.specializations) if strategy.specializations else 0
        with SpecializedCondition(same_type and dtype == "float32"):
            strategy.add_implementation(
                wrap_compute_matmul(topi.x86.matmul_dnnl),
                wrap_topi_schedule(topi.x86.schedule_matmul_dnnl),
                name="matmul_dnnl.x86",
                plevel=15,
            )
        length_after = len(
            strategy.specializations) if strategy.specializations else 0
        if length_before == length_after:
            logger.warning(
                "Currently dnnl only support the data type to be float32. Skip."
            )

    need_auto_scheduler_layout = is_auto_scheduler_enabled()
    need_meta_schedule_layout = is_meta_schedule_enabled()
    if need_auto_scheduler_layout or need_meta_schedule_layout:
        strategy.add_implementation(
            wrap_compute_matmul(
                topi.nn.matmul,
                need_auto_scheduler_layout=need_auto_scheduler_layout,
                need_meta_schedule_layout=need_meta_schedule_layout,
            ),
            naive_schedule,
            name="matmul.generic",
            plevel=11,
        )
    else:
        # If no cblas/mkl/dnnl strategy choosed
        if not strategy.specializations:
            logger.warning(
                "Matmul is not optimized for x86. "
                "Recommend to use cblas/mkl/dnnl for better performance.")
        strategy.add_implementation(
            wrap_compute_matmul(topi.nn.matmul),
            naive_schedule,
            name="matmul.generic",
        )
    return strategy
예제 #9
0
파일: x86.py 프로젝트: chenghanpeng/tvm
def conv2d_strategy_cpu(attrs, inputs, out_type, target):
    """conv2d x86 strategy"""
    strategy = _op.OpStrategy()
    data, kernel = inputs
    stride_h, stride_w = get_const_tuple(attrs.strides)
    dilation_h, dilation_w = get_const_tuple(attrs.dilation)
    groups = attrs.groups
    layout = attrs.data_layout
    kernel_layout = attrs.kernel_layout
    if dilation_h < 1 or dilation_w < 1:
        raise ValueError("dilation should be positive value")

    need_auto_scheduler_layout = is_auto_scheduler_enabled()
    need_meta_schedule_layout = is_meta_schedule_enabled()

    if groups == 1:
        if layout == "NCHW":
            assert kernel_layout == "OIHW"
            if topi.x86.is_int8_hw_support(data.dtype, kernel.dtype):
                strategy.add_implementation(
                    wrap_compute_conv2d(topi.x86.conv2d_nchw_int8),
                    wrap_topi_schedule(topi.x86.schedule_conv2d_nchw_int8),
                    name="conv2d_nchw_int8.x86",
                )
            elif "dnnl" in target.libs:
                strategy.add_implementation(
                    wrap_compute_conv2d(topi.x86.conv2d_nchw_dnnl),
                    wrap_topi_schedule(topi.x86.schedule_conv2d_nchw_dnnl),
                    name="conv2d_nchw_dnnl.x86",
                )
            else:
                strategy.add_implementation(
                    wrap_compute_conv2d(topi.x86.conv2d_nchw),
                    wrap_topi_schedule(topi.x86.schedule_conv2d_nchw),
                    name="conv2d_nchw.x86",
                )
        elif _NCHWc_matcher.match(layout):  # check if layout is NCHWxc
            assert _OIHWio_matcher.match(
                kernel_layout)  # check if kernel is OIHWio
            return conv2d_NCHWc_strategy_cpu(attrs, inputs, out_type, target)
        elif layout == "NHWC":
            assert kernel_layout == "HWIO"
            if (not need_auto_scheduler_layout) and (
                    not need_meta_schedule_layout):
                logger.warning(
                    "conv2d NHWC layout is not optimized for x86 with autotvm."
                )
            if "dnnl" in target.libs:
                strategy.add_implementation(
                    wrap_compute_conv2d(topi.x86.conv2d_nhwc_dnnl),
                    wrap_topi_schedule(topi.x86.schedule_conv2d_nhwc_dnnl),
                    name="conv2d_nhwc_dnnl.x86",
                )
            else:
                strategy.add_implementation(
                    wrap_compute_conv2d(
                        topi.nn.conv2d_nhwc,
                        need_auto_scheduler_layout=need_auto_scheduler_layout,
                        need_meta_schedule_layout=need_meta_schedule_layout,
                    ),
                    wrap_topi_schedule(topi.x86.schedule_conv2d_nhwc),
                    name="conv2d_nhwc.x86",
                )

            judge_winograd_auto_scheduler = False
            if len(kernel.shape) == 4:
                kernel_h, kernel_w, _, co = get_const_tuple(kernel.shape)
                judge_winograd_auto_scheduler = (
                    "float" in data.dtype and "float" in kernel.dtype
                    and kernel_h == 3 and kernel_w == 3 and stride_h == 1
                    and stride_w == 1 and dilation_h == 1 and dilation_w == 1
                    and 64 < co < 512
                    # The last condition of co is based on our profiling of resnet workloads
                    # on skylake avx512 machines. We found winograd is faster than direct
                    # only when co is within this range
                )

            # register auto-scheduler implementations
            if (need_auto_scheduler_layout or need_meta_schedule_layout
                ) and judge_winograd_auto_scheduler:
                strategy.add_implementation(
                    wrap_compute_conv2d(
                        topi.nn.conv2d_winograd_nhwc,
                        need_auto_scheduler_layout=need_auto_scheduler_layout,
                        need_meta_schedule_layout=need_meta_schedule_layout,
                    ),
                    naive_schedule,  # this implementation should never be picked by autotvm
                    name="conv2d_nhwc.winograd",
                    plevel=15,
                )
        elif layout == "HWCN":
            assert kernel_layout == "HWIO"
            if (not need_auto_scheduler_layout) or (
                    not need_meta_schedule_layout):
                logger.warning(
                    "conv2d HWCN layout is not optimized for x86 with autotvm."
                )
            strategy.add_implementation(
                wrap_compute_conv2d(topi.nn.conv2d_hwcn),
                wrap_topi_schedule(topi.generic.schedule_conv2d_hwcn),
                name="conv2d_hwcn.generic",
            )
        else:
            raise RuntimeError(
                "Unsupported conv2d layout {} for x86".format(layout))
    elif is_depthwise_conv2d(data.shape, layout, kernel.shape, kernel_layout,
                             groups):
        if layout == "NCHW":
            assert kernel_layout == "OIHW"
            channel_multiplier = get_const_tuple(inputs[1].shape)[1]
            if channel_multiplier == 1 and dilation_h == 1 and dilation_w == 1:
                strategy.add_implementation(
                    wrap_compute_conv2d(topi.x86.depthwise_conv2d_nchw),
                    wrap_topi_schedule(
                        topi.x86.schedule_depthwise_conv2d_nchw),
                    name="depthwise_conv2d_nchw.x86",
                )
            else:
                logger.warning("For x86 target, depthwise_conv2d with channel "
                               "multiplier greater than 1 is not optimized")
                strategy.add_implementation(
                    wrap_compute_conv2d(topi.nn.depthwise_conv2d_nchw),
                    wrap_topi_schedule(
                        topi.generic.schedule_depthwise_conv2d_nchw),
                    name="depthwise_conv2d_nchw.generic",
                )
        elif _NCHWc_matcher.match(layout):  # check if layout is NCHWxc
            assert _OIHWio_matcher.match(
                kernel_layout)  # check if kernel is OIHWio
            return depthwise_conv2d_NCHWc_strategy_cpu(attrs, inputs, out_type,
                                                       target)
        elif layout == "NHWC":
            assert kernel_layout == "HWOI"
            if (not need_auto_scheduler_layout) and (
                    not need_meta_schedule_layout):
                logger.warning(
                    "depthwise_conv2d NHWC layout is not optimized for x86 with autotvm."
                )
            strategy.add_implementation(
                wrap_compute_conv2d(topi.nn.depthwise_conv2d_nhwc),
                wrap_topi_schedule(
                    topi.generic.schedule_depthwise_conv2d_nhwc),
                name="depthwise_conv2d_nhwc.generic",
            )
        else:
            raise RuntimeError(
                "Unsupported depthwise_conv2d layout {}".format(layout))
    else:  # group_conv2d
        if layout == "NCHW":
            assert kernel_layout == "OIHW"
            strategy.add_implementation(
                wrap_compute_conv2d(topi.x86.group_conv2d_nchw,
                                    has_groups=True),
                wrap_topi_schedule(topi.x86.schedule_group_conv2d_nchw),
                name="group_conv2d_nchw.x86",
            )
        elif layout == "NHWC":
            assert kernel_layout == "HWIO"
            if (not need_auto_scheduler_layout) and (
                    not need_meta_schedule_layout):
                logger.warning(
                    "group_conv2d is not optimized for x86 with autotvm.")
            strategy.add_implementation(
                wrap_compute_conv2d(topi.nn.group_conv2d_nhwc,
                                    has_groups=True),
                wrap_topi_schedule(topi.generic.schedule_group_conv2d_nhwc),
                name="group_conv2d_nhwc.generic",
            )
        else:
            raise RuntimeError(
                "Unsupported group_conv2d layout {}".format(layout))
    return strategy