예제 #1
0
def test_fold_quantize():
    t = relay.TensorType([1, 2, 3], "int8")

    def before():
        data = tvm.nd.array(np.array([1.0, 2.0, 3.0], dtype="float32"))
        const_fp = relay.const(data, dtype="float32")
        const_i8 = relay.qnn.op.quantize(const_fp,
                                         output_scale=relay.const(0.5),
                                         output_zero_point=relay.const(0))
        x = relay.var("x", t)
        sub = relay.op.subtract(x, const_i8)
        func = relay.Function([x], sub)
        return func

    def expected():
        data = tvm.nd.array(np.array([2, 4, 6], dtype="int8"))
        const_i8 = relay.const(data, dtype="int8")
        x = relay.var("x", t)
        sub = relay.op.subtract(x, const_i8)
        func = relay.Function([x], sub)
        return func

    # Nothing changed after applying FoldConstant
    a = run_opt_pass(before(), transform.FoldConstant())
    b = run_opt_pass(before(), transform.InferType())
    tvm.ir.assert_structural_equal(a, b)

    # Fold QNN constants
    a = run_opt_pass(before(), transform.FoldConstant(fold_qnn=True))
    b = run_opt_pass(expected(), transform.InferType())
    tvm.ir.assert_structural_equal(a, b)
예제 #2
0
def test_fold_requantize():
    def before():
        data = tvm.nd.array(np.array([1, 2, 3], dtype="int8"))
        const_i8 = relay.const(data, dtype="int8")
        op = relay.qnn.op.requantize(
            const_i8,
            input_scale=relay.const(2.0, dtype="float32"),
            input_zero_point=relay.const(1, dtype="int32"),
            output_scale=relay.const(1.0, dtype="float32"),
            output_zero_point=relay.const(1, dtype="int32"),
        )
        x = relay.var("x", relay.TensorType([3], "int8"))
        add = relay.op.add(op, x)
        func = relay.Function([x], add)
        return func

    def expected():
        data = tvm.nd.array(np.array([1, 3, 5], dtype="int8"))
        const_i8 = relay.const(data, dtype="int8")
        x = relay.var("x", relay.TensorType([3], "int8"))
        add = relay.op.add(const_i8, x)
        func = relay.Function([x], add)
        return func

    # Nothing changed after applying FoldConstant
    a = run_opt_pass(before(), transform.FoldConstant())
    b = run_opt_pass(before(), transform.InferType())
    tvm.ir.assert_structural_equal(a, b)

    # Fold QNN constants
    a = run_opt_pass(before(), transform.FoldConstant(fold_qnn=True))
    b = run_opt_pass(expected(), transform.InferType())
    tvm.ir.assert_structural_equal(a, b)
예제 #3
0
def partition_for_dnnl(mod, params=None):
    """Partition the graph greedily offloading supported operators to DNNL.

    Parameters
    ----------
    mod : Module
        The module to run passes on.
    params : Optional[Dict[str, NDArray]]
        Constant input parameters.
    Returns
    -------
    mod : Module
        Annotated and partitioned module.
    """

    if params:
        mod["main"] = bind_params_by_name(mod["main"], params)
    seq = tvm.transform.Sequential([
        transform.CanonicalizeOps(),
        transform.InferType(),
        transform.SimplifyInference(),
        transform.FoldConstant(),
        transform.FoldScaleAxis(),
        # fold consecutive add ops to simplify pattern `conv2d-bias_add-bn-relu`
        transform.SimplifyExpr(),
        transform.FoldConstant(),
        transform.MergeComposite(pattern_table()),
        transform.AnnotateTarget("dnnl"),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
    ])
    with tvm.transform.PassContext(opt_level=3):
        mod = seq(mod)
    return mod
예제 #4
0
def quantize_model(model, params, input_dtype, input_shape, qeval='power2'):

    skip_conv_layers = [0]
    with relay.quantize.qconfig(store_lowbit_output=False,
                                skip_conv_layers=skip_conv_layers):
        from tvm.relay.quantize.quantize import _bind_params
        graph = _bind_params(model['main'], params)
        mod = relay.Module.from_expr(graph)
        optimize = _transform.Sequential([
            _transform.SimplifyInference(),
            _transform.FoldConstant(),
            _transform.FoldScaleAxis(),
            _transform.CanonicalizeOps(),
            _transform.FoldConstant()
        ])

        with relay.build_config(opt_level=4):
            mod = optimize(mod)
            mod = relay.quantize.annotate()(mod)

            # find scale
            cache_file = '%s_%s_scales.pkl' % (VIDEO_FILE, MODEL_NAME)
            if os.path.exists(cache_file):
                print("Using cached layer statistics...")
                with open(cache_file, 'rb') as f:
                    scales = pickle.load(f)
            else:
                print("Compute layer statistics...")
                scales = calibrate_on_dataset(mod['main'], params, input_dtype,
                                              input_shape)
                with open(cache_file, 'wb') as f:
                    pickle.dump(scales, f)

            if qeval == 'power2':
                scales = list(
                    map(
                        lambda scale: 2**np.math.ceil(np.math.log(scale, 2))
                        if scale > 0 else 1.0, scales))
                weight_scales = 'power2'
            elif qeval == 'max':
                weight_scales = 'max'
            else:
                raise ValueError("Invalid quantiziation eval: " + qeval)

            mod['main'] = relay.quantize.calibrate(mod['main'],
                                                   weight_scales=weight_scales,
                                                   scales=scales)
            mod = relay.quantize.realize()(mod)
            mod = relay.transform.FoldConstant()(mod)

    return mod
예제 #5
0
def partition_for_vitis_ai(mod, params=None, dpu=None, **opts):
    """Partition the Relay expression for offloading operators to Vitis AI DPU

    Parameters
    ----------
    mod : Module
        The module to run passes on.
    params : Optional[Dict[str, NDArray]]
        Constant input parameters.
    dpu : str
        The DPU identifier (e.g. DPUCZDX8G-zcu104, DPUCADF8H)

    Returns
    -------
    ret : Module
    """

    if dpu is None:
        raise ValueError(
            "Please pass Vitis AI DPU identifier to the partitioning function")

    if params:
        mod["main"] = bind_params_by_name(mod["main"], params)

    desired_layouts_in_partition = {
        "nn.conv2d": ["NHWC", "default"],
        "nn.upsampling": ["NHWC"],
        "image.resize2d": ["NHWC"],
    }
    desired_layouts_in_main = {
        "nn.conv2d": ["NCHW", "default"],
        "nn.upsampling": ["NCHW"],
        "image.resize2d": ["NCHW"],
    }
    seq = tvm.transform.Sequential([
        transform.RemoveUnusedFunctions(),
        transform.ConvertLayout(desired_layouts_in_partition),
        transform.FoldConstant(),
        transform.InferType(),
        VitisAIAnnotationPass("vitis_ai", dpu, params),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
        transform.RemoveUnusedFunctions(),
        transform.ConvertLayout(desired_layouts_in_main),
        transform.FoldConstant(),
    ])

    with tvm.transform.PassContext(opt_level=3):
        return seq(mod)
예제 #6
0
def const_folding_test():
    data_shape = (2, 4, 2, 4)
    data_dtype = 'uint8'
    kernel_shape = (3, 4, 2, 2)
    kernel_dtype = 'uint8'

    golden_weight = np.random.random_integers(low=0, high=255,
            size=kernel_shape).astype(kernel_dtype)
    data = relay.var("data", shape=data_shape,
            dtype=data_dtype)
    kernel = relay.const(golden_weight)
    qnn_func = get_qnn_func(data,
                            kernel,
                            input_zero_point=8,
                            kernel_zero_point=3,
                            kernel_size=(2, 2),
                            padding=(0, 0),
                            strides=(1, 1),
                            dilation=(1, 1),
                            data_layout="NCHW",
                            kernel_layout="OIHW",
                            out_dtype="int32")
    folded_mod = transform.FoldConstant()(qnn_func)
    folded_func = folded_mod["main"]
    assert "reshape" not in folded_func.astext()
예제 #7
0
    def annotate(func, compiler):
        """
        An annotator for Core ML.
        """
        # Bind free variables to the constant values.
        bind_dict = {}
        for arg in func.params:
            name = arg.name_hint
            if name in params:
                bind_dict[arg] = relay.const(params[name])

        func = relay.bind(func, bind_dict)

        # Annotate the entire graph for Core ML
        mod = tvm.IRModule()
        mod["main"] = func

        seq = tvm.transform.Sequential([
            transform.SimplifyInference(),
            transform.FoldConstant(),
            transform.FoldScaleAxis(),
            transform.AnnotateTarget(compiler),
            transform.MergeCompilerRegions(),
            transform.PartitionGraph(),
        ])

        with relay.build_config(opt_level=3):
            mod = seq(mod)

        return mod
예제 #8
0
def partition_for_clml(mod, params=None):
    """Partition the graph greedily offloading supported
    operators to CLML Library.

    Parameters
    ----------
    mod : Module
        The module to run passes on.
    params : Optional[Dict[str, NDArray]]
        Constant input parameters.

    Returns
    -------
    ret : annotated and partitioned module.
    """

    if params:
        mod["main"] = bind_params_by_name(mod["main"], params)

    seq = tvm.transform.Sequential(
        [
            transform.InferType(),
            transform.FoldConstant(),
            transform.MergeComposite(clml_pattern_table()),
            transform.AnnotateTarget("clml", False),
            transform.MergeCompilerRegions(),
            transform.PartitionGraph(),
        ]
    )

    result_mod = seq(mod)
    return result_mod
def test_const_folding():
    data_shape = (2, 4, 2, 4)
    data_dtype = "uint8"
    kernel_shape = (4, 3, 2, 2)
    kernel_dtype = "uint8"

    golden_weight = np.random.randint(low=0, high=255, size=kernel_shape).astype(kernel_dtype)
    data = relay.var("data", shape=data_shape, dtype=data_dtype)
    kernel = relay.const(golden_weight)
    qnn_func = get_qnn_func(
        data,
        kernel,
        input_zero_point=8,
        kernel_zero_point=3,
        kernel_size=(2, 2),
        input_scale=1.0,
        kernel_scale=1.0,
        padding=(0, 0),
        strides=(1, 1),
        dilation=(1, 1),
        data_layout="NCHW",
        kernel_layout="IOHW",
        out_dtype="int32",
        channels=kernel_shape[1],
        groups=1,
    )
    folded_mod = transform.FoldConstant()(qnn_func)
    folded_func = folded_mod["main"]
    assert "reshape" not in folded_func.astext()
def test_fold_const():
    c_data = np.array([1, 2, 3]).astype("float32")
    t = relay.TensorType([1, 2, 3], "float32")

    def before():
        c = relay.const(c_data)
        x = relay.var("x", t)
        y = relay.add(c, c)
        y = relay.multiply(y, relay.const(2, "float32"))
        y = relay.add(x, y)
        z = relay.add(y, c)
        return relay.Function([x], z)

    def expected():
        x = relay.var("x", t)
        c_folded = (c_data + c_data) * 2
        y = relay.add(x, relay.const(c_folded))
        z = relay.add(y, relay.const(c_data))
        return relay.Function([x], z)

    def fail(x):
        raise RuntimeError()

    # the fold constant should work on any context.
    with tvm.target.build_config(add_lower_pass=[(0, fail)]):
        with tvm.target.create("cuda"):
            zz = run_opt_pass(before(), transform.FoldConstant())
    zexpected = run_opt_pass(expected(), transform.InferType())
    assert relay.analysis.alpha_equal(zz, zexpected)
예제 #11
0
def test_fold_const_with_on_device():
    """Make sure on_device annotations don't get in the way of constant folding"""
    c_data = np.array([1, 2, 3]).astype("float32")
    t = relay.TensorType([1, 2, 3], "float32")

    def before():
        c = relay.const(c_data)
        x = relay.var("x", t)
        y = relay.add(c, c)
        y = relay.multiply(y, relay.const(2, "float32"))
        y = relay.add(x, y)
        z = relay.add(y, c)
        f = relay.Function([x], z)
        return annot_func(f)

    def expected():
        x = relay.var("x", t)
        c_folded = (c_data + c_data) * 2
        y = relay.add(x, relay.const(c_folded))
        z = relay.add(y, relay.const(c_data))
        f = relay.Function([x], z)
        return annot_func(f)

    zz = run_opt_pass(before(), transform.FoldConstant())
    zexpected = run_opt_pass(expected(), transform.InferType())
    tvm.ir.assert_structural_equal(zz, zexpected)
예제 #12
0
def test_fold_let_with_on_device():
    """Make sure on_device annotations don't get in the way of constant folding,
    and inlined constants bring their annotations with them."""
    c_data = np.array(1).astype("float32")
    t = relay.TensorType([1], "float32")

    def before():
        sb = relay.ScopeBuilder()
        x = relay.var("x", t)
        t1 = sb.let("t1", annot_expr(relay.const(c_data)))
        t2 = sb.let("t2", annot_expr(relay.add(t1, t1)))
        t3 = sb.let("t3", annot_expr(relay.add(t2, x)))
        sb.ret(t3)
        f = relay.Function([x], sb.get())
        return annot_func(f)

    def expected():
        sb = relay.ScopeBuilder()
        x = relay.var("x", t)
        c_folded = c_data + c_data
        t3 = sb.let(
            "t3", annot_expr(relay.add(annot_expr(relay.const(c_folded)), x)))
        sb.ret(t3)
        f = relay.Function([x], sb.get())
        return annot_func(f)

    zz = run_opt_pass(before(), transform.FoldConstant())
    zexpected = run_opt_pass(expected(), transform.InferType())
    tvm.ir.assert_structural_equal(zz, zexpected)
예제 #13
0
def test_fold_const():
    c_data = np.array([1, 2, 3]).astype("float32")
    t = relay.TensorType([1, 2, 3], "float32")

    def before():
        c = relay.const(c_data)
        x = relay.var("x", t)
        y = relay.add(c, c)
        y = relay.multiply(y, relay.const(2, "float32"))
        y = relay.add(x, y)
        z = relay.add(y, c)
        return relay.Function([x], z)

    def expected():
        x = relay.var("x", t)
        c_folded = (c_data + c_data) * 2
        y = relay.add(x, relay.const(c_folded))
        z = relay.add(y, relay.const(c_data))
        return relay.Function([x], z)

    def FailPass():
        def _transform(m, *args):
            raise RuntimeError()

        return tvm.transform.module_pass(_transform, opt_level=0)

    # the fold constant should work on any context.
    with tvm.target.build_config(add_lower_pass=[(0, FailPass())]):
        with tvm.target.create("cuda"):
            zz = run_opt_pass(before(), transform.FoldConstant())
    zexpected = run_opt_pass(expected(), transform.InferType())
    assert tvm.ir.structural_equal(zz, zexpected)
예제 #14
0
def test_fold_const():
    c_data = np.array([1, 2, 3]).astype("float32")
    t = relay.TensorType([1, 2, 3], "float32")

    def before():
        c = relay.const(c_data)
        x = relay.var("x", t)
        y = relay.add(c, c)
        y = relay.multiply(y, relay.const(2, "float32"))
        y = relay.add(x, y)
        z = relay.add(y, c)
        return relay.Function([x], z)

    def expected():
        x = relay.var("x", t)
        c_folded = (c_data + c_data) * 2
        y = relay.add(x, relay.const(c_folded))
        z = relay.add(y, relay.const(c_data))
        return relay.Function([x], z)

    # the fold constant should work on any context.
    with tvm.target.create("cuda"):
        zz = run_opt_pass(before(), transform.FoldConstant())
    zexpected = run_opt_pass(expected(), transform.InferType())
    assert tvm.ir.structural_equal(zz, zexpected)
    def partition():
        data = relay.var("data", relay.TensorType((1, 16, 224, 224),
                                                  "float32"))
        bn_gamma = relay.var("bn_gamma", relay.TensorType((16, ), "float32"))
        bn_beta = relay.var("bn_beta", relay.TensorType((16, ), "float32"))
        bn_mmean = relay.var("bn_mean", relay.TensorType((16, ), "float32"))
        bn_mvar = relay.var("bn_var", relay.TensorType((16, ), "float32"))

        bn_output = relay.nn.batch_norm(data, bn_gamma, bn_beta, bn_mmean,
                                        bn_mvar)

        func = relay.Function([data, bn_gamma, bn_beta, bn_mmean, bn_mvar],
                              bn_output.astuple())
        mod = tvm.IRModule()
        mod["main"] = func
        op_list = ["nn.batch_norm", "nn.conv2d"]
        mod = WhiteListAnnotator(op_list, "test_compiler")(mod)

        opt_pass = tvm.transform.Sequential([
            transform.InferType(),
            transform.PartitionGraph(),
            transform.SimplifyInference(),
            transform.FoldConstant(),
            transform.AlterOpLayout(),
            transform.Inline(),
        ])

        with relay.build_config(opt_level=3):
            mod = opt_pass(mod)

        return mod
예제 #16
0
def test_const_folding():
    with TempOpAttr("qnn.conv2d", "FTVMQnnLegalize", legalize_qnn_conv2d):

        data_shape = (2, 4, 2, 4)
        data_dtype = 'uint8'
        kernel_shape = (3, 4, 2, 2)
        kernel_dtype = 'uint8'

        golden_weight = np.random.randint(
            low=0, high=255, size=kernel_shape).astype(kernel_dtype)
        data = relay.var("data", shape=data_shape, dtype=data_dtype)
        kernel = relay.const(golden_weight)
        qnn_func = get_qnn_func(data,
                                kernel,
                                input_zero_point=8,
                                kernel_zero_point=3,
                                kernel_size=(2, 2),
                                input_scale=1.0,
                                kernel_scale=1.0,
                                padding=(0, 0),
                                strides=(1, 1),
                                dilation=(1, 1),
                                data_layout="NCHW",
                                kernel_layout="OIHW",
                                out_dtype="int32",
                                groups=1)
        folded_mod = transform.FoldConstant()(qnn_func)
        folded_func = folded_mod["main"]
        assert "reshape" not in folded_func.astext()
예제 #17
0
def get_pass_order(use_patterns):
    """
    Get the pass ordering based on using predicates or patterns.

    Parameters
    ----------
    use_patterns: Bool
        True if pass needs to work with op patterns
    Returns
    ----------
    ret : Sequential
        Pass object
    """
    return (tvm.transform.Sequential([
        transform.InferType(),
        RemoveDropoutPass(),
        transform.RemoveUnusedFunctions(),
        transform.ConvertLayout({
            "nn.conv1d": ["NCW", "default"],
            "nn.conv2d": ["NCHW", "default"],
            "nn.conv3d": ["NCDHW", "default"],
            "nn.conv2d_transpose": ["NCHW", "default"],
        }),
        transform.FoldConstant(),
        transform.MergeComposite(pattern_table()),
        transform.AnnotateTarget("tensorrt"),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
        transform.InlineComposites("tensorrt"),
        transform.InferType(),
    ]) if use_patterns else tvm.transform.Sequential([
        transform.InferType(),
        RemoveDropoutPass(),
        transform.RemoveUnusedFunctions(),
        transform.ConvertLayout({
            "nn.conv1d": ["NCW", "default"],
            "nn.conv2d": ["NCHW", "default"],
            "nn.conv3d": ["NCDHW", "default"],
            "nn.conv2d_transpose": ["NCHW", "default"],
        }),
        transform.FoldConstant(),
        transform.AnnotateTarget("tensorrt"),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
        transform.InferType(),
    ]))
예제 #18
0
def test_partial_constant():
    """Test the subgraph with (const, var, const, var) arguments."""
    if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True):
        print("skip because DNNL codegen is not available")
        return

    dtype = "float32"
    ishape = (10, 10)

    in_1 = relay.var("in_1", shape=ishape, dtype=dtype)
    in_2 = relay.var("in_2", shape=ishape, dtype=dtype)
    in_3 = relay.var("in_3", shape=ishape, dtype=dtype)
    in_4 = relay.var("in_4", shape=ishape, dtype=dtype)

    add1 = relay.add(in_1, in_2)
    add2 = relay.add(add1, in_3)
    add3 = relay.add(add2, in_3)
    add4 = relay.add(add3, in_3)

    func = relay.Function([in_1, in_2, in_3, in_4], add4)
    ref_mod = tvm.IRModule.from_expr(func)
    ref_mod = relay.transform.InferType()(ref_mod)

    data1 = np.random.uniform(0, 1, ishape).astype(dtype)
    data3 = np.random.uniform(0, 1, ishape).astype(dtype)

    params = {
        "in_1": tvm.nd.array(data1, device=tvm.cpu(0)),
        "in_3": tvm.nd.array(data3, device=tvm.cpu(0)),
    }
    ref_mod["main"] = bind_params_by_name(ref_mod["main"], params)

    opt_pass = tvm.transform.Sequential([
        transform.InferType(),
        transform.SimplifyInference(),
        transform.FoldConstant(),
        transform.FoldScaleAxis(),
        transform.AnnotateTarget("dnnl"),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
    ])

    with tvm.transform.PassContext(opt_level=3,
                                   disabled_pass=["AlterOpLayout"]):
        mod = opt_pass(ref_mod)

    data2 = np.random.uniform(0, 1, ishape).astype(dtype)
    data4 = np.random.uniform(0, 1, ishape).astype(dtype)
    check_result(mod,
                 ref_mod, {
                     "in_2": data2,
                     "in_4": data4
                 }, (10, 10),
                 tol=1e-5)
예제 #19
0
def partition_for_tensorrt(
    mod: tvm.IRModule,
    params: Optional[Dict[str, tvm.nd.NDArray]] = None,
    # CAUTION: Can't use default Target("tensorrt") here since the target kind is only available
    #          if is_tensorrt_compiler_enabled() == True.
    target: Optional[tvm.target.Target] = None,
) -> tvm.IRModule:
    """Partition all functions in mod to greedily offload supported operators to TensorRT.

    Parameters
    ----------
    mod : tvm.IRModule
        The module to partition.
    target : tvm.target.Target
        A target of kind "tensorrt" describing additional partitioning and compilation options.
    params : Optional[Dict[str, tvm.nd.NDArray]]
        Constant input parameters.

    Returns
    -------
    partitioned_mod : tvm.IRModule
        The partitioned module.

    """
    assert is_tensorrt_compiler_enabled(
    ), "Can only partition for TensorRT if it is enabled"
    if params:
        mod["main"] = bind_params_by_name(mod["main"], params)
    if target is None:
        # Use a default target. The get_tensorrt_target() function will similarly create an
        # equivalent default target when compilation continues after partitioning.
        target = tvm.target.Target("tensorrt")

    seq = tvm.transform.Sequential([
        transform.InferType(),
        RemoveDropoutPass(),
        transform.RemoveUnusedFunctions(),
        transform.ConvertLayout({
            "nn.conv1d": ["NCW", "default"],
            "nn.conv2d": ["NCHW", "default"],
            "nn.conv3d": ["NCDHW", "default"],
            "nn.conv2d_transpose": ["NCHW", "default"],
        }),
        transform.FoldConstant(),
        transform.MergeComposite(pattern_table()),
        transform.AnnotateTarget("tensorrt"),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
        transform.InferType(),
    ])
    with target:
        mod = seq(mod)
        mod = prune_tensorrt_subgraphs(mod)
    return mod
예제 #20
0
def test_fold_qnn_conv2d_qnn_mul():
    def before():
        dtype = "uint8"
        op0 = relay.qnn.op.conv2d(
            relay.const(np.ones((1, 1, 2, 2), dtype=dtype), dtype=dtype),
            relay.const(np.ones((1, 1, 2, 2), dtype=dtype), dtype=dtype),
            input_zero_point=relay.const(0, "int32"),
            kernel_zero_point=relay.const(0, "int32"),
            input_scale=relay.const(1.0, "float32"),
            kernel_scale=relay.const(1.0, "float32"),
            kernel_size=(2, 2),
            channels=1,
        )
        op = relay.qnn.op.mul(
            op0,
            relay.const(np.array([10], dtype="int32"), dtype="int32"),
            relay.const(1.0, dtype="float32"),
            relay.const(0, dtype="int32"),
            relay.const(1.0, dtype="float32"),
            relay.const(0, dtype="int32"),
            relay.const(1.0, dtype="float32"),
            relay.const(0, dtype="int32"),
        )
        func = relay.Function([], op)
        return func

    def expected():
        data = relay.const(np.array([[[[40]]]], dtype="int32"), dtype="int32")
        func = relay.Function([], data)
        return func

    # Nothing changed after applying FoldConstant
    a = run_opt_pass(before(), transform.FoldConstant())
    b = run_opt_pass(before(), transform.InferType())
    tvm.ir.assert_structural_equal(a, b)

    # Fold QNN constants
    a = run_opt_pass(before(), transform.FoldConstant(fold_qnn=True))
    b = run_opt_pass(expected(), transform.InferType())
    tvm.ir.assert_structural_equal(a, b)
예제 #21
0
def preprocess_module(mod):
    """
    Pre-process a module containing functions ready for ACL codegen. For now we enforce OHWI
    kernel layout and fold the transforms away.

    Parameters
    ----------
    mod : Module
        The module to run passes on.

    Returns
    -------
    preprocessed_mod : The processed module.
    """

    def convert_layout_conv2d(conv2d_function):
        def convert_conv(attrs, inputs, tinfos, desired_layouts):
            new_attrs = dict(attrs)
            data_info = tinfos[0]
            weight_info = tinfos[1]
            desired_data_layout, desired_kernel_layout = map(str, desired_layouts)
            new_attrs["data_layout"] = desired_data_layout
            new_attrs["kernel_layout"] = desired_kernel_layout

            if is_depthwise_conv2d(
                data_info.shape,
                attrs["data_layout"],
                weight_info.shape,
                attrs["kernel_layout"],
                attrs["groups"],
            ):
                dkl = desired_kernel_layout
                new_attrs["kernel_layout"] = dkl[3] + dkl[1:3] + dkl[0]
            return conv2d_function(*inputs, **new_attrs)

        return convert_conv

    with OpAttrContext(
        "nn.conv2d", "FTVMConvertOpLayout", convert_layout_conv2d(tvm.relay.nn.conv2d)
    ), OpAttrContext(
        "qnn.conv2d", "FTVMConvertOpLayout", convert_layout_conv2d(tvm.relay.qnn.op.conv2d)
    ):
        seq = tvm.transform.Sequential(
            [
                transform.ConvertLayout(
                    {"nn.conv2d": ["NHWC", "OHWI"], "qnn.conv2d": ["NHWC", "OHWI"]}
                ),
                transform.FoldConstant(),
            ]
        )
        preprocessed_mod = seq(mod)
    return preprocessed_mod
예제 #22
0
def test_fold_full():
    c_shape = (8, 9, 10)
    def before():
        dtype = 'float32'
        return relay.full(relay.const(1.0, dtype), c_shape, dtype=dtype)

    def expected():
        # expect no changes
        return before()

    zz = run_opt_pass(before(), transform.FoldConstant())
    zexpected = run_opt_pass(expected(), transform.InferType())
    assert relay.analysis.graph_equal(zz, zexpected)
예제 #23
0
def partition_for_bnns(mod, params=None):
    """Partition the graph greedily offloading supported
    operators to BNNS.

    Parameters
    ----------
    mod : Module
        The module to run passes on.
    params : Optional[Dict[str, NDArray]]
        Constant input parameters.

    Returns
    -------
    ret : annotated and partitioned module.
    """
    if params:
        mod["main"] = bind_params_by_name(mod["main"], params)

    seq = tvm.transform.Sequential(
        [
            transform.InferType(),
            transform.FoldConstant(),
            transform.FoldScaleAxis(),
            transform.DynamicToStatic(),
            transform.AlterOpLayout(),
            # TODO(apeskov): WA. AlterOpLayout call lead to constants shape transformation
            #   Some expand_dims op may appears after constants. It breaks BNNS fusing.
            #   So we have to call FoldConstant right before bnns composite passes.
            transform.FoldConstant(),
            transform.MergeComposite(get_pattern_table("bnns")),
            transform.AnnotateTarget("bnns"),
            #   If you no need in per layer performance statistic you can
            #   uncomment next line
            # transform.MergeCompilerRegions(),
            transform.PartitionGraph(),
        ]
    )

    return seq(mod)
예제 #24
0
def test_constant():
    """Test the subgraph with (var, const, ...) arguments."""
    if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True):
        print("skip because DNNL codegen is not available")
        return

    dtype = "float32"
    ishape = (1, 32, 14, 14)
    wshape = (32, 32, 3, 3)

    data = relay.var("data", shape=ishape, dtype=dtype)
    weight = relay.var("weight", shape=wshape, dtype=dtype)
    bn_gamma = relay.var("bn_gamma")
    bn_beta = relay.var("bn_beta")
    bn_mmean = relay.var("bn_mean")
    bn_mvar = relay.var("bn_var")

    layer = relay.nn.conv2d(data=data,
                            weight=weight,
                            kernel_size=(3, 3),
                            padding=(1, 1))
    bn_output = relay.nn.batch_norm(layer, bn_gamma, bn_beta, bn_mmean,
                                    bn_mvar)
    out = bn_output[0]
    out = relay.nn.relu(out)

    func = relay.Function(relay.analysis.free_vars(out), out)
    ref_mod, params = tvm.relay.testing.create_workload(func)
    ref_mod["main"] = bind_params_by_name(ref_mod["main"], params)

    remove_bn_pass = tvm.transform.Sequential([
        transform.InferType(),
        transform.SimplifyInference(),
        transform.FoldConstant(),
        transform.FoldScaleAxis(),
    ])

    dnnl_patterns = get_pattern_table("dnnl")
    composite_partition = tvm.transform.Sequential([
        transform.MergeComposite(dnnl_patterns),
        transform.AnnotateTarget("dnnl"),
        transform.PartitionGraph(),
    ])

    with tvm.transform.PassContext(opt_level=3,
                                   disabled_pass=["AlterOpLayout"]):
        ref_mod = remove_bn_pass(ref_mod)
        mod = composite_partition(ref_mod)

    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
    check_result(mod, ref_mod, {"data": i_data}, (1, 32, 14, 14), tol=1e-5)
예제 #25
0
def legalize_qnn_for_dnnl(mod):
    """Transform qnn primitives to DNNL compatible form. Eliminate source zero point and apply
    strict sequence of post ops."""
    mod["main"] = rewrite(LegalizeQnnOpForDnnl(), mod["main"])

    seq = tvm.transform.Sequential([
        transform.InferType(),
        # transform.SimplifyInference(),  # TODO: this pass decompose nn.layer_norm
        # transform.FoldScaleAxis(),  # TODO: fail inside TVM in case of grouped convolutions.
        transform.FoldConstant(),
    ])
    with tvm.transform.PassContext(opt_level=3):
        mod = seq(mod)
    return mod
예제 #26
0
def test_fold_full():
    c_shape = (8, 9, 10)

    def before():
        dtype = "float32"
        return relay.full(relay.const(1.0, dtype), c_shape, dtype=dtype)

    def expected():
        # expect no changes
        return before()

    zz = run_opt_pass(before(), transform.FoldConstant())
    zexpected = run_opt_pass(expected(), transform.InferType())
    assert tvm.ir.structural_equal(zz, zexpected)
예제 #27
0
def test_fold_if():
    cond_data = np.array(1).astype("bool")
    x_data = np.array([[1, 2, 3]]).astype("float32")

    def before():
        a = relay.const(cond_data)
        x = relay.const(x_data)
        y = relay.const(x_data)
        iff = relay.If(a, x + y, x - y)
        return relay.Function([], iff)

    def expected():
        y_data = x_data + x_data
        y = relay.const(y_data)
        return relay.Function([], y)

    zz = run_opt_pass(before(), transform.FoldConstant())
    zexpected = run_opt_pass(expected(), transform.InferType())
    tvm.ir.assert_structural_equal(zz, zexpected)

    cond_data = np.array(0).astype("bool")

    def before():
        a = relay.const(cond_data)
        x = relay.const(x_data)
        y = relay.const(x_data)
        iff = relay.If(a, x + y, x - y)
        return relay.Function([], iff)

    def expected():
        y_data = x_data - x_data
        y = relay.const(y_data)
        return relay.Function([], y)

    zz = run_opt_pass(before(), transform.FoldConstant())
    zexpected = run_opt_pass(expected(), transform.InferType())
    tvm.ir.assert_structural_equal(zz, zexpected)
예제 #28
0
def simplify_model(mod):
    """
    Simplify execution graph

    At least merge BatchNorm into convolution. For this purpose decompose BN primitive
    into simple operation which can be calculated as const expr and after that merged
    into nearest conv/dense primitive.
    """
    seq = tvm.transform.Sequential([
        transform.InferType(),
        transform.FoldConstant(),
        transform.SimplifyInference(),
        transform.FoldScaleAxis(),
    ])
    return seq(mod)
예제 #29
0
def test_concatenate_const():
    def before():
        data = tvm.nd.array(np.array([1.0, 2.0, 3.0]))
        const = relay.const(data)
        concat = relay.op.concatenate([const, const], axis=0)
        func = relay.Function([], concat)
        return func

    def expected():
        data = tvm.nd.array(np.array([1.0, 2.0, 3.0, 1.0, 2.0, 3.0]))
        const = relay.const(data)
        func = relay.Function([], const)
        return func

    zz = run_opt_pass(before(), transform.FoldConstant())
    zexpected = run_opt_pass(expected(), transform.InferType())
    assert tvm.ir.structural_equal(zz, zexpected)
예제 #30
0
def test_fold_concat():
    c_data = np.array([[1, 2, 3]]).astype("float32")

    def before():
        a = relay.const(c_data)
        b = relay.const(c_data)
        y = relay.concatenate((a, b), axis=0)
        return relay.Function([], y)

    def expected():
        y_data = np.concatenate((c_data, c_data), axis=0)
        y = relay.const(y_data)
        return relay.Function([], y)

    zz = run_opt_pass(before(), transform.FoldConstant())
    zexpected = run_opt_pass(expected(), transform.InferType())
    assert tvm.ir.structural_equal(zz, zexpected)