示例#1
0
def test_aot_codegen_checks_returns():
    """This test checks whether AoT lowering creates calls that check the return value correctly"""
    x = relay.var("x", shape=(1, 10))
    y = relay.var("y", shape=(1, 10))
    z = relay.add(x, y)
    func = relay.Function([x, y], z)

    compiled_test_mods = compile_models(
        models=AOTTestModel(module=IRModule.from_expr(func), inputs=None, outputs=None),
        interface_api="c",
        use_unpacked_api=True,
    )
    source = compiled_test_mods[0].executor_factory.lib.imported_modules[0].get_source()

    main_ir_module = compiled_test_mods[0].executor_factory.lowered_ir_mods.items()[0][1]
    main_func = main_ir_module["__tvm_main__"]

    # Check operator call is wrapped properly
    assert (
        str(main_func.body[1])
        == "tir.tvm_check_return(0, -1, tir.call_extern("
        + '"tvmgen_default_fused_add",'
        + " x_buffer_var, y_buffer_var, output_buffer_var))\n"
    )
    # TODO(Mousius) - Create a better place for C codegen tests
    assert (
        "if (tvmgen_default_fused_add(x_buffer_var, y_buffer_var, output_buffer_var) != 0 ) return -1;"
        in source
    )
def test_tflite_model_u2_usecase_two_models_with_a_single_external_pool(model_urls, usmp_algo):
    """This checks for inference using a single large enough common pool"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing

    use_unpacked_api = True
    interface_api = "c"

    target = tvm.target.Target("c")
    workspace_memory_pools = WorkspaceMemoryPools(
        [PoolInfo("my_memory_pool", {target: PoolInfo.READ_WRITE_ACCESS})]
    )
    test_runner = AOTTestRunner(
        pass_config={"tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo},
        prologue=f"""
        #define MAX(A, B) ((A > B) ? A : B)
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool[MAX({_get_workspace_size_define_macro("my_memory_pool", "mod1")},{_get_workspace_size_define_macro("my_memory_pool", "mod2")})];
        """,
    )

    tflite_model_file1 = tf_testing.get_workload_official(
        model_urls[0][0],
        model_urls[0][1],
    )
    mod1, inputs1, params1 = create_relay_module_and_inputs_from_tflite_file(tflite_model_file1)
    output_list1 = generate_ref_data(mod1, inputs1, params1)

    tflite_model_file2 = tf_testing.get_workload_official(
        model_urls[1][0],
        model_urls[1][1],
    )
    mod2, inputs2, params2 = create_relay_module_and_inputs_from_tflite_file(tflite_model_file2)
    output_list2 = generate_ref_data(mod2, inputs2, params2)

    compiled_test_mods = compile_models(
        [
            AOTTestModel(
                name="mod1", module=mod1, inputs=inputs1, outputs=output_list1, params=params1
            ),
            AOTTestModel(
                name="mod2", module=mod2, inputs=inputs2, outputs=output_list2, params=params2
            ),
        ],
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
        workspace_memory_pools=workspace_memory_pools,
        target=target,
    )

    for compiled_model in compiled_test_mods:
        check_for_no_tvm_backendallocworkspace_calls(compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
示例#3
0
def test_tflite_model_u3_usecase_two_external_pools(model_url, usmp_algo):
    """This checks for inference using two external pools placed in the application"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing

    use_unpacked_api = True
    interface_api = "c"

    target = tvm.target.Target("c")
    workspace_memory_pools = WorkspaceMemoryPools([
        PoolInfo("my_memory_pool_1", {target: PoolInfo.READ_WRITE_ACCESS},
                 size_hint_bytes=2500000),
        PoolInfo("my_memory_pool_2", {target: PoolInfo.READ_WRITE_ACCESS}),
    ])
    test_runner = AOTTestRunner(
        pass_config={
            "tir.usmp.enable": True,
            "tir.usmp.algorithm": usmp_algo
        },
        prologue=f"""
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}];
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool_2[{_get_workspace_size_define_macro("my_memory_pool_2")}];
        """,
    )

    tflite_model_file = tf_testing.get_workload_official(
        model_url[0],
        model_url[1],
    )
    mod, inputs, params = _get_relay_module_and_inputs_from_tflite_file(
        tflite_model_file)
    output_list = generate_ref_data(mod, inputs, params)

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
        workspace_memory_pools=workspace_memory_pools,
        target=target,
    )

    for compiled_model in compiled_test_mods:
        check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
示例#4
0
def test_conv2d(interface_api, use_unpacked_api, test_runner, groups,
                weight_shape):
    """Test a subgraph with a single conv2d operator."""
    dtype = "float32"
    ishape = (1, 32, 14, 14)
    wshape = (32, weight_shape, 3, 3)
    pass_config = {"tir.usmp.enable": True}
    test_runner = AOTTestRunner(
        makefile=test_runner.makefile,
        prologue=test_runner.prologue,
        epilogue=test_runner.epilogue,
        includes=test_runner.includes,
        parameters=test_runner.parameters,
        pass_config=pass_config,
    )

    data0 = relay.var("data", shape=ishape, dtype=dtype)
    weight0 = relay.var("weight", shape=wshape, dtype=dtype)
    out = relay.nn.conv2d(data0,
                          weight0,
                          kernel_size=(3, 3),
                          padding=(1, 1),
                          groups=groups)
    main_f = relay.Function([data0, weight0], out)
    mod = tvm.IRModule()
    mod["main"] = main_f
    mod = transform.InferType()(mod)

    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
    w1_data = np.random.uniform(0, 1, wshape).astype(dtype)

    inputs = OrderedDict([("data", i_data), ("weight", w1_data)])

    output_list = generate_ref_data(mod, inputs)
    compile_and_run(
        AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
    compiled_test_mods = compile_models(
        models=AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
    )

    for compiled_model in compiled_test_mods:
        check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
def test_byoc_microtvm(merge_compiler_regions):
    """This is a simple test to check BYOC capabilities of AOT - with and without merging compiler regions to test for https://github.com/apache/tvm/issues/9036"""
    use_unpacked_api = False
    interface_api = "packed"
    test_runner = AOTTestRunner(pass_config={"tir.usmp.enable": True})

    x = relay.var("x", shape=(10, 10))
    w0 = relay.var("w0", shape=(10, 10))
    w1 = relay.var("w1", shape=(10, 10))

    # z0 = x + w0
    x_ = compiler_begin(x, "ccompiler")
    w0_ = compiler_begin(w0, "ccompiler")
    z0_ = relay.add(x_, w0_)
    z0 = compiler_end(z0_, "ccompiler")

    # z1 = z0 + w1
    z0__ = compiler_begin(z0, "ccompiler")
    w1_ = compiler_begin(w1, "ccompiler")
    z1_ = relay.add(z0__, w1_)
    z1 = compiler_end(z1_, "ccompiler")

    # z2 = z0 + z1
    z2 = relay.add(z0, z1)

    f = relay.Function([x, w0, w1], z2)
    mod = tvm.IRModule()
    mod["main"] = f

    if merge_compiler_regions:
        mod = transform.MergeCompilerRegions()(mod)

    mod = transform.PartitionGraph("mod_name")(mod)
    mod = transform.InferType()(mod)

    x_data = [("x", np.random.rand(10, 10).astype("float32"))]
    w_data = [("w{}".format(i), np.random.rand(10, 10).astype("float32")) for i in range(2)]

    map_inputs = OrderedDict(x_data + w_data)
    output_list = generate_ref_data(mod, map_inputs)

    compiled_test_mods = compile_models(
        AOTTestModel(name="my_mod", module=mod, inputs=map_inputs, outputs=output_list),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
    )

    for compiled_model in compiled_test_mods:
        check_for_no_tvm_backendallocworkspace_calls(compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
def test_tflite_model(model_url, usmp_algo, workspace_size):
    """This checks for ML models and the memory used by them when using USMP with different algorithms"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing

    use_unpacked_api = True
    interface_api = "c"
    test_runner = AOTTestRunner(pass_config={
        "tir.usmp.enable": True,
        "tir.usmp.algorithm": usmp_algo
    })

    tflite_model_file = tf_testing.get_workload_official(
        model_url[0],
        model_url[1],
    )
    with open(tflite_model_file, "rb") as f:
        tflite_model_buf = f.read()
    data_shape = (1, 224, 224, 3)
    in_min, in_max = (0, 255)
    data = np.random.randint(in_min,
                             high=in_max,
                             size=data_shape,
                             dtype="uint8")
    mod, params = convert_to_relay(tflite_model_buf, data, "input")
    inputs = {"input": data}
    output_list = generate_ref_data(mod, inputs, params)

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
    )

    for compiled_model in compiled_test_mods:
        check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    # Checking the workspace size
    assert (sum(
        compiled_model.executor_factory.function_metadata["__tvm_main__"].
        workspace_sizes.values()) == workspace_size)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
示例#7
0
def test_tflite_model_u1_usecase(model_url, usmp_algo, workspace_size):
    """This checks for ML models and the memory used by them when using USMP with different algorithms"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing

    use_unpacked_api = True
    interface_api = "c"
    test_runner = AOTTestRunner(pass_config={
        "tir.usmp.enable": True,
        "tir.usmp.algorithm": usmp_algo
    })

    tflite_model_file = tf_testing.get_workload_official(
        model_url[0],
        model_url[1],
    )
    mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(
        tflite_model_file)
    output_list = generate_ref_data(mod, inputs, params)

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
    )

    for compiled_model in compiled_test_mods:
        check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    # Checking the workspace size reported in model library format
    mlf_memory_map = mlf._build_function_memory_map(
        compiled_test_mods[0].executor_factory.function_metadata)
    assert mlf_memory_map["main"][0]["workspace_size_bytes"] == workspace_size
    # That should match to workspace size that will be codegen'd to the entry point.
    allocated_pool_info = list(
        dict(compiled_test_mods[0].executor_factory.executor_codegen_metadata.
             pool_inputs).values())[0]
    assert allocated_pool_info.allocated_size == workspace_size

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
示例#8
0
 def compile_to_main_func(interface_api="c", use_unpacked_api=True):
     test_runner = create_test_runner()
     compiled_models = compile_models(
         models=AOTTestModel(
             module=mod,
             inputs=input_data,
             outputs=output_data,
         ),
         interface_api=interface_api,
         use_unpacked_api=use_unpacked_api,
         workspace_byte_alignment=16,
         pass_config=test_runner.pass_config,
     )
     main_ir_module = compiled_models[0].executor_factory.lowered_ir_mods.items()[0][1]
     main_func = main_ir_module["run_model"]
     return main_func
示例#9
0
 def compile_to_main_func(interface_api="c", use_unpacked_api=True):
     test_runner = AOT_DEFAULT_RUNNER
     compiled_models = compile_models(
         models=AOTTestModel(
             module=IRModule.from_expr(func),
             inputs=inputs,
             outputs=output_list,
         ),
         interface_api=interface_api,
         use_unpacked_api=use_unpacked_api,
         workspace_byte_alignment=16,
         pass_config=test_runner.pass_config,
     )
     main_ir_module = list(compiled_models[0].executor_factory.lowered_ir_mods.values())[0]
     main_func = main_ir_module["run_model"]
     return main_func
示例#10
0
def test_tflite_model_u1_usecase(model_url, usmp_algo, workspace_size):
    """This checks for ML models and the memory used by them when using USMP with different algorithms"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing

    use_unpacked_api = True
    interface_api = "c"
    test_runner = AOTTestRunner(pass_config={
        "tir.usmp.enable": True,
        "tir.usmp.algorithm": usmp_algo
    })

    tflite_model_file = tf_testing.get_workload_official(
        model_url[0],
        model_url[1],
    )
    mod, inputs, params = _get_relay_module_and_inputs_from_tflite_file(
        tflite_model_file)
    output_list = generate_ref_data(mod, inputs, params)

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
    )

    for compiled_model in compiled_test_mods:
        check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    # Checking the workspace size
    assert (sum(
        compiled_model.executor_factory.function_metadata["__tvm_main__"].
        workspace_sizes.values()) == workspace_size)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
示例#11
0
def test_constants_alignment(constants_byte_alignment):
    """Test that constants_byte_alignment correctly sets constants byte alignment"""

    use_unpacked_api = True
    interface_api = "c"

    mod, params = testing.mobilenet.get_workload(batch_size=1)
    data_shape = [int(x) for x in mod["main"].checked_type.arg_types[0].shape]
    data = np.random.uniform(size=data_shape).astype("float32")
    inputs = {"data": data}
    output_list = generate_ref_data(mod, inputs, params)
    target = f"c -constants-byte-alignment={constants_byte_alignment}"
    compiled_test_mods = compile_models(
        AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
        interface_api,
        use_unpacked_api,
        target=tvm.target.Target(target, host=target),
    )
    source = compiled_test_mods[0].executor_factory.lib.imported_modules[0].get_source()
    assert f'__attribute__((section(".rodata.tvm"), aligned({constants_byte_alignment})))' in source
示例#12
0
def test_aot_codegen_backend_alloc_workspace_calls():
    """This test checks whether AoT lowering creates TVMBackendAllocWorkspace calls"""

    # The %data and %weight shapes in the following primitive Relay should create
    # small tensors that would get lowered to stack allocations in the CPU PrimFuncs.
    # However, the AoT executor codegen should retain them as TVMBAW calls
    relay_mod = tvm.parser.fromtext(
        """
        #[version = "0.0.5"]
        def @main(%data: Tensor[(1, 4, 4, 4), float32], %weight: Tensor[(4, 4, 3, 3), float32], src_layout="OIHW", dst_layout="OIHW4i4o") -> Tensor[(1, 4, 4, 4), float32] {
        %0 = fn (%p02: Tensor[(1, 4, 4, 4), float32], Primitive=1, hash="9332b3872fb5292c", src_layout="NCHW", dst_layout="NCHW4c") -> Tensor[(1, 1, 4, 4, 4), float32] {
            layout_transform(%p02, src_layout="NCHW", dst_layout="NCHW4c") /* ty=Tensor[(1, 1, 4, 4, 4), float32] */
        };
        %1 = fn (%p03: Tensor[(4, 4, 3, 3), float32], Primitive=1, hash="9f0b2b8a24a4dab3", src_layout="OIHW", dst_layout="OIHW4i4o") -> Tensor[(1, 1, 3, 3, 4, 4), float32] {
            layout_transform(%p03, src_layout="OIHW", dst_layout="OIHW4i4o") /* ty=Tensor[(1, 1, 3, 3, 4, 4), float32] */
        };
        %2 = %0(%data) /* ty=Tensor[(1, 1, 4, 4, 4), float32] */;
        %3 = %1(%weight) /* ty=Tensor[(1, 1, 3, 3, 4, 4), float32] */;
        %4 = fn (%p01: Tensor[(1, 1, 4, 4, 4), float32], %p1: Tensor[(1, 1, 3, 3, 4, 4), float32], out_layout="NCHW4c", kernel_layout="OIHW4i4o", Primitive=1, data_layout="NCHW4c") -> Tensor[(1, 1, 4, 4, 4), float32] {
                                                                                                                                                                                                                                                      nn.contrib_conv2d_NCHWc(%p01, %p1, padding=[1, 1, 1, 1], channels=4, kernel_size=[3, 3], data_layout="NCHW4c", kernel_layout="OIHW4i4o", out_layout="NCHW4c") /* ty=Tensor[(1, 1, 4, 4, 4), float32] */
        };
        %5 = %4(%2, %3) /* ty=Tensor[(1, 1, 4, 4, 4), float32] */;
        %6 = fn (%p0: Tensor[(1, 1, 4, 4, 4), float32], Primitive=1, src_layout="NCHW4c", dst_layout="NCHW") -> Tensor[(1, 4, 4, 4), float32] {
            layout_transform(%p0, src_layout="NCHW4c", dst_layout="NCHW") /* ty=Tensor[(1, 4, 4, 4), float32] */
        };
        %6(%5) /* ty=Tensor[(1, 4, 4, 4), float32] */
        }
        """
    )
    compiled_test_mods = compile_models(
        models=AOTTestModel(module=relay_mod, inputs=None, outputs=None),
        interface_api="c",
        use_unpacked_api=True,
    )
    source = compiled_test_mods[0].executor_factory.lib.imported_modules[0].get_source()
    # There should be three allocates created for three primitive relay function
    # calls in the main for the above relay snippet.
    assert source.count("TVMBackendAllocWorkspace") == 3
示例#13
0
def test_output_tensor_names():
    """Test that the output names generated match those in the model"""
    pytest.importorskip("tflite")

    import os
    import tensorflow as tf
    import tflite.Model

    ifm_shape = (1, 299, 299, 3)
    padding = "VALID"
    strides = (1, 1)
    dilation = (1, 1)
    kernel_shape = (3, 2)

    def create_tflite_graph_two_outs():
        """Create a model with 2 output tensors"""

        class Model(tf.Module):
            @tf.function
            def tf_function(self, x):
                # Use tf.nn API to create the model
                tf_strides = [1, strides[0], strides[1], 1]
                op = tf.nn.conv2d(
                    x,
                    filters=tf.constant(
                        np.random.uniform(size=[kernel_shape[0], kernel_shape[1], 3, 3]),
                        dtype=tf.float32,
                    ),
                    strides=tf_strides,
                    padding=padding,
                    dilations=dilation,
                )
                op = tf.nn.relu(op)
                # Second convolution
                op2 = tf.nn.conv2d(
                    x,
                    filters=tf.constant(
                        np.random.uniform(size=(kernel_shape[0], kernel_shape[1], 3, 3)),
                        dtype=tf.float32,
                    ),
                    strides=strides,
                    padding=padding,
                    data_format="NHWC",
                    dilations=dilation,
                )
                op2 = tf.nn.relu(op2)
                return op, op2

        model = Model()
        concrete_func = model.tf_function.get_concrete_function(
            tf.TensorSpec(ifm_shape, dtype=tf.float32)
        )

        # Convert the model
        def representative_dataset():
            for _ in range(100):
                data = np.random.rand(*tuple(ifm_shape))
                yield [data.astype(np.float32)]

        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        converter.representative_dataset = representative_dataset
        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
        converter.inference_input_type = tf.int8
        converter.inference_output_type = tf.int8
        tflite_model = converter.convert()
        return tflite_model

    tflite_graph = create_tflite_graph_two_outs()
    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
    mod, params = relay.frontend.from_tflite(
        tflite_model,
        shape_dict={"input": ifm_shape},
        dtype_dict={"input": "int8"},
    )

    use_unpacked_api = True
    interface_api = "c"
    test_runner = AOT_DEFAULT_RUNNER

    in_min, in_max = (-128, 127)
    data = np.random.randint(in_min, high=in_max, size=ifm_shape, dtype="int8")
    inputs = {"x_int8": data}
    output_list = generate_ref_data(mod, inputs, params)
    compile_and_run(
        AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
        test_runner,
        interface_api,
        use_unpacked_api,
    )

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
        interface_api,
        use_unpacked_api,
    )

    # Check that the names of the output tensors occur in the source code
    source = compiled_test_mods[0].executor_factory.lib.get_source()
    for output_name in output_list.keys():
        assert output_name in source
示例#14
0
def test_packed_global_variables():
    """Check packed global variables in codegen output."""
    dtype = "float32"
    ishape = (1, 32, 14, 14)
    wshape = (32, 32, 3, 3)
    interface_api = "packed"
    use_unpacked_api = False

    data0 = relay.var("data", shape=ishape, dtype=dtype)
    weight0 = relay.var("weight", shape=wshape, dtype=dtype)
    out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=1)
    main_f = relay.Function([data0, weight0], out)
    mod = tvm.IRModule()
    mod["main"] = main_f
    mod = transform.InferType()(mod)

    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
    w1_data = np.random.uniform(0, 1, wshape).astype(dtype)

    inputs = OrderedDict([("data", i_data), ("weight", w1_data)])

    output_list = generate_ref_data(mod, inputs)
    compiled_models_list = compile_models(
        models=AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        workspace_byte_alignment=8,
        enable_op_fusion=True,
        pass_config=AOT_DEFAULT_RUNNER.pass_config,
        use_runtime_executor=True,
        target=tvm.target.Target("c"),
    )
    compiled_model = compiled_models_list[0]

    tmp_path = utils.tempdir()
    base_path = tmp_path.temp_dir

    model = compiled_model.model
    tar_file = os.path.join(base_path, f"{model.name}.tar")
    export_model_library_format(compiled_model.executor_factory, tar_file)
    t = tarfile.open(tar_file)
    t.extractall(base_path)

    file_list = []
    for path in (pathlib.Path(base_path) / "codegen" / "host" / "src").iterdir():
        if path.is_file():
            file_list.append(path)
    assert len(file_list) > 0

    for path in file_list:
        with open(path, "r") as lib_f:
            lib1 = lib_f.readlines()

        tvmgen_names = []
        tvmgen_funcs = []
        for line in lib1:
            for item in line.split(" "):
                # Find all names starting with tvmgen_default
                if item.startswith("tvmgen_default"):
                    # Collect any name starting with tvmgen_default
                    tvmgen_names.append(item)
                    # Collect all functions starting with tvmgen_default
                    tvmgen_funcs += re.findall(r"(?<=).*(?=\()", item)

        # Check if any function name has a packed variable name in all items that start with tvmgen_default
        for func in tvmgen_funcs:
            assert f"{func}_packed" not in tvmgen_names
示例#15
0
def test_tflite_model_u4_usecase_two_external_pools(model_url, usmp_algo):
    """This checks for inference with USMP using external pool placed in the application"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing

    use_unpacked_api = True
    interface_api = "c"

    target = tvm.target.Target("c")
    workspace_memory_pools = WorkspaceMemoryPools(
        [
            PoolInfo(
                "my_memory_pool_1", {target: PoolInfo.READ_WRITE_ACCESS}, size_hint_bytes=2500000
            ),
            PoolInfo("my_memory_pool_2", {target: PoolInfo.READ_WRITE_ACCESS}),
        ]
    )

    tflite_model_file = tf_testing.get_workload_official(
        model_url[0],
        model_url[1],
    )
    mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(tflite_model_file)
    output_list = generate_ref_data(mod, inputs, params)

    input_name, input_data = list(inputs.items())[0]
    input_size_bytes = input_data.size * input_data.itemsize
    test_runner = AOTTestRunner(
        pass_config={
            "tir.usmp.enable": True,
            "tir.usmp.algorithm": usmp_algo,
            "tir.usmp.use_workspace_io": True,
        },
        prologue=f"""
        #include <string.h>
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}];
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool_2[{_get_workspace_size_define_macro("my_memory_pool_2")}];
        struct {_add_module_prefix("workspace_pools")} {_add_module_prefix("workspace_pools")} = {{
            .my_memory_pool_1 = my_memory_pool_1,
            .my_memory_pool_2 = my_memory_pool_2,
        }};
        struct {_add_module_prefix("inputs")} {_add_module_prefix("inputs")} = {_add_module_prefix("map_inputs")}(&{_add_module_prefix("workspace_pools")});
        memcpy({_add_module_prefix("inputs")}.{input_name}, tvmgen_default_input_data_input, {input_size_bytes});
        struct {_add_module_prefix("outputs")} {_add_module_prefix("outputs")} = {_add_module_prefix("map_outputs")}(&{_add_module_prefix("workspace_pools")});
        """,
    )

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
        workspace_memory_pools=workspace_memory_pools,
        target=target,
    )

    for compiled_model in compiled_test_mods:
        check_for_no_tvm_backendallocworkspace_calls(compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
        use_workspace_io=True,
    )