示例#1
0
def test_tflite_model_u3_usecase_two_external_pools(model_url, usmp_algo):
    """This checks for inference using two external pools placed in the application"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing  # pylint: disable=import-outside-toplevel

    use_unpacked_api = True
    interface_api = "c"

    target = tvm.target.Target("c")
    workspace_memory_pools = WorkspaceMemoryPools([
        WorkspacePoolInfo("my_memory_pool_1", [target],
                          PoolInfoProperties(size_hint_bytes=2500000)),
        WorkspacePoolInfo("my_memory_pool_2", [target]),
    ])
    test_runner = AOTTestRunner(
        pass_config={
            "tir.usmp.enable": True,
            "tir.usmp.algorithm": usmp_algo
        },
        prologue=f"""
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}];
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool_2[{_get_workspace_size_define_macro("my_memory_pool_2")}];
        """,
    )

    tflite_model_file = tf_testing.get_workload_official(
        model_url[0],
        model_url[1],
    )
    mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(
        tflite_model_file)
    output_list = generate_ref_data(mod, inputs, params)

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
        workspace_memory_pools=workspace_memory_pools,
        target=target,
    )

    for compiled_model in compiled_test_mods:
        _check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
示例#2
0
def test_multiple_memory_pools():
    """
    The cascader does not support multiple workspace memory
    pools. Check the correct error is thrown.
    """
    np.random.seed(2)
    ifm_shape = (1, 80, 75, 3)

    target, ethosu_target, runtime, executor, pass_config = _get_compilation_config(
        "ethos-u55-256", True, True)
    workspace_memory_pools = WorkspaceMemoryPools([
        WorkspacePoolInfo(
            "SRAM",
            [target, ethosu_target],
            PoolInfoProperties(
                size_hint_bytes=1,
                read_bandwidth_bytes_per_cycle=16,
                write_bandwidth_bytes_per_cycle=16,
                target_burst_bytes={ethosu_target: 1},
            ),
        ),
        WorkspacePoolInfo(
            "SRAM",
            [target, ethosu_target],
            PoolInfoProperties(
                size_hint_bytes=1,
                read_bandwidth_bytes_per_cycle=16,
                write_bandwidth_bytes_per_cycle=16,
                target_burst_bytes={ethosu_target: 1},
            ),
        ),
    ])

    @tf.function
    def tf_graph(x):
        return tf.nn.max_pool(x, (3, 3), (1, 1), "SAME")

    _, tflite_graph = infra.get_tflite_graph(tf_graph, [ifm_shape])
    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
    relay_module, params = relay.frontend.from_tflite(tflite_model)
    mod = partition_for_ethosu(relay_module, params)

    with pytest.raises(ValueError) as e:
        with tvm.transform.PassContext(opt_level=3, config=pass_config):
            tvm.relay.build(
                mod,
                target,
                executor=executor,
                runtime=runtime,
                workspace_memory_pools=workspace_memory_pools,
                params=params,
            )

    expected_reason = "Exactly one workspace pool needs to be provided for the U55 cascader"
    on_error = "A ValueError was caught but its reason is not the expected one."
    assert expected_reason in str(e.value), on_error
示例#3
0
def test_no_pool_error():
    target = Target("c")
    tiny_workspace_pool = WorkspacePoolInfo(
        "tiny_workspace",
        [target],
        PoolInfoProperties(size_hint_bytes=10),
    )
    bi_a = usmp_utils.BufferInfo(
        name_hint="bi_a", size_bytes=10, pool_candidates=[tiny_workspace_pool]
    )
    bi_b = usmp_utils.BufferInfo(
        name_hint="bi_b", size_bytes=10, pool_candidates=[tiny_workspace_pool]
    )
    bi_c = usmp_utils.BufferInfo(
        name_hint="bi_c", size_bytes=10, pool_candidates=[tiny_workspace_pool]
    )
    bi_a.set_conflicts([bi_b])
    bi_b.set_conflicts([bi_c])
    bi_c.set_conflicts([bi_a])
    buffer_info_arr = [bi_a, bi_b, bi_c]
    fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.greedy_by_size")
    with pytest.raises(
        tvm.TVMError, match="TVM USMP Error: the space available in the provided pools exceeded"
    ):
        buffer_pool_allocations = fusmp_algo(buffer_info_arr, 0)
示例#4
0
def _get_ethosu_workspace_size(mod, params, accel_type, pool_size,
                               enable_cascader, enable_striping):

    target, ethosu_target, runtime, executor, pass_config = _get_compilation_config(
        accel_type, enable_cascader, enable_striping)

    workspace_memory_pools = WorkspaceMemoryPools([
        WorkspacePoolInfo(
            "SRAM",
            [target, ethosu_target],
            PoolInfoProperties(
                size_hint_bytes=pool_size,
                read_bandwidth_bytes_per_cycle=16,
                write_bandwidth_bytes_per_cycle=16,
                target_burst_bytes={ethosu_target: 1},
            ),
        ),
    ])

    with tvm.transform.PassContext(opt_level=3, config=pass_config):
        lib = tvm.relay.build(
            mod,
            target,
            executor=executor,
            runtime=runtime,
            workspace_memory_pools=workspace_memory_pools,
            params=params,
        )

    mlf_memory_map = mlf._build_function_memory_map(lib.function_metadata)
    return mlf_memory_map["main"][0]["workspace_size_bytes"]
示例#5
0
    def _test():
        target = Target("c")
        global_workspace_pool = WorkspacePoolInfo(
            "global_workspace",
            [target],
        )
        bi_a = usmp_utils.BufferInfo(
            name_hint="bi_a", size_bytes=10, pool_candidates=[global_workspace_pool]
        )
        bi_b = usmp_utils.BufferInfo(
            name_hint="bi_b", size_bytes=10, pool_candidates=[global_workspace_pool]
        )
        bi_c = usmp_utils.BufferInfo(
            name_hint="bi_c", size_bytes=10, pool_candidates=[global_workspace_pool]
        )
        bi_a.set_conflicts([bi_b, bi_c])
        bi_b.set_conflicts([bi_c, bi_a])
        bi_c.set_conflicts([bi_a, bi_b])

        buffer_info_arr = [bi_a, bi_b, bi_c]
        fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
        buffer_pool_allocations = fusmp_algo(buffer_info_arr, 0)
        assert buffer_pool_allocations[bi_a].byte_offset == 20
        assert buffer_pool_allocations[bi_b].byte_offset == 10
        assert buffer_pool_allocations[bi_c].byte_offset == 0
示例#6
0
def test_create_pool_info():
    target = Target("c")
    pool_info = WorkspacePoolInfo(
        "foo_workspace",
        [target],
    )
    assert pool_info.pool_name == "foo_workspace"
    # default pool size constraint
    assert pool_info.size_hint_bytes == -1

    pool_info = WorkspacePoolInfo(
        "bar_workspace",
        [target],
        PoolInfoProperties(size_hint_bytes=1425),
    )
    assert pool_info.pool_name == "bar_workspace"
    assert pool_info.size_hint_bytes == 1425
示例#7
0
def test_create_pool_allocation():
    pool_info = WorkspacePoolInfo(
        "foo_workspace",
        [Target("c")],
    )
    pool_allocation = usmp_utils.PoolAllocation(pool_info=pool_info,
                                                byte_offset=64)
    assert pool_allocation.pool_info == pool_info
    assert pool_allocation.byte_offset == 64
示例#8
0
def test_linear(algorithm, workspace_size):
    """
    The test case here represent BufferInfo objects
    that could get generated for a linear sequence
    such as :
    (Op A)
    |
    bi_a
    |
    (Op B)
    |
    bi_b
    |
    .
    .
    .
    (Op F)
    |
    bi_f
    """
    target = Target("c")
    global_workspace_pool = WorkspacePoolInfo(
        "global_workspace",
        [target],
    )
    bi_a = usmp_utils.BufferInfo(
        name_hint="bi_a", size_bytes=10, pool_candidates=[global_workspace_pool]
    )
    bi_b = usmp_utils.BufferInfo(
        name_hint="bi_b", size_bytes=20, pool_candidates=[global_workspace_pool]
    )
    bi_c = usmp_utils.BufferInfo(
        name_hint="bi_c", size_bytes=100, pool_candidates=[global_workspace_pool]
    )
    bi_d = usmp_utils.BufferInfo(
        name_hint="bi_d", size_bytes=40, pool_candidates=[global_workspace_pool]
    )
    bi_e = usmp_utils.BufferInfo(
        name_hint="bi_e", size_bytes=50, pool_candidates=[global_workspace_pool]
    )
    bi_f = usmp_utils.BufferInfo(
        name_hint="bi_f", size_bytes=50, pool_candidates=[global_workspace_pool]
    )

    # Creating conflicts for a linear graph
    bi_a.set_conflicts([bi_b])
    bi_b.set_conflicts([bi_a, bi_c])
    bi_c.set_conflicts([bi_b, bi_d])
    bi_d.set_conflicts([bi_c, bi_e])
    bi_e.set_conflicts([bi_d, bi_f])
    bi_f.set_conflicts([bi_e])

    buffer_info_arr = [bi_a, bi_b, bi_c, bi_d, bi_e, bi_f]
    fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
    buffer_pool_allocations = fusmp_algo(buffer_info_arr, 0)
    _check_max_workspace_size(buffer_pool_allocations, global_workspace_pool, workspace_size)
示例#9
0
def compare_ethosu_with_reference(
    mod,
    input_data,
    output_data,
    accel_type: str,
    output_tolerance=0,
    print_cmm=False,
    enable_cascader=None,
):
    if enable_cascader is None:
        enable_cascader = "u65" not in accel_type
    pool_name = "my_memory_pool"
    host_target = tvm.target.Target("c")
    ethosu_target = tvm.target.Target("ethos-u")
    workspace_pools = WorkspaceMemoryPools([
        WorkspacePoolInfo(
            pool_name,
            [host_target, ethosu_target],
            PoolInfoProperties(
                size_hint_bytes=2400000,
                read_bandwidth_bytes_per_cycle=16,
                write_bandwidth_bytes_per_cycle=16,
                target_burst_bytes={ethosu_target: 1},
            ),
        )
    ])
    test_runner = create_test_runner(
        accel_type,
        enable_usmp=True,
        enable_cascader=enable_cascader,
        enable_striping=False,
        workspace_pools=workspace_pools,
    )
    compiled_models = build_source(
        mod,
        input_data,
        output_data,
        test_runner,
        workspace_pools=workspace_pools,
        output_tolerance=output_tolerance,
    )

    # Assumes only two runtime.Modules are created -- i.e. single offload module
    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[
        0].imported_modules[0]

    # Verify generated C source
    if print_cmm:
        get_artifacts = tvm._ffi.get_global_func(
            "runtime.module.ethos-u.get_artifacts")
        compilation_artifacts = get_artifacts(ethosu_module)
        cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
        print_payload(cmms)

    verify_source(compiled_models, test_runner)
def test_bounded(
    random_len=150,
    pools=[
        WorkspacePoolInfo("default", [], PoolInfoProperties(65535)),
        WorkspacePoolInfo("slow", []),
    ],
):
    """Tests two pools, one is bounded and one is not limited"""
    random.seed(0)
    mem_range = [BufferInfo(str(i), random.randrange(1, 65535), pools) for i in range(random_len)]
    for mr in mem_range:
        pr = random.choice(mem_range)
        while pr in (*mr.conflicts, mr):
            pr = random.choice(mem_range)

        mr.set_conflicts([*mr.conflicts, pr])
        pr.set_conflicts([*pr.conflicts, mr])

    fusmp_algo = tvm.get_global_func("tir.usmp.algo.hill_climb")
    result_map = fusmp_algo(mem_range, 0)
    _verify_all_conflicts(result_map)
示例#11
0
def test_mobilenet_subgraph(algorithm, fast_memory_size, slow_memory_size):
    target = Target("c")
    fast_memory_pool = WorkspacePoolInfo(
        "fast_memory",
        [target],
        PoolInfoProperties(size_hint_bytes=200704),
    )
    slow_memory_pool = WorkspacePoolInfo(
        "slow_memory",
        [target],
    )
    tir_mod = MobilenetStructure
    tir_mod = _assign_targets_to_primfuncs_irmodule(tir_mod, target)
    tir_mod = _assign_poolinfos_to_allocates_in_irmodule(
        tir_mod, [fast_memory_pool, slow_memory_pool]
    )
    main_func = tir_mod["run_model"]
    buffer_info_analysis = tvm.tir.usmp.analysis.extract_buffer_info(main_func, tir_mod)
    assert buffer_info_analysis.memory_pressure == 1117718

    fcreate_array_bi = tvm.get_global_func("tir.usmp.CreateArrayBufferInfo")
    buffer_info_arr = fcreate_array_bi(buffer_info_analysis.buffer_info_stmts)
    fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
    buffer_pool_allocations = fusmp_algo(buffer_info_arr, buffer_info_analysis.memory_pressure)

    buffer_info_map_names = dict()
    for buf_info in buffer_info_arr:
        buffer_info_map_names[buf_info.name_hint] = buf_info

    # check conflicts
    _verify_conflicts("PaddedInput_7", ["sid_9", "sid_8", "Conv2dOutput_7"], buffer_info_map_names)
    _verify_conflicts("tensor_2", ["sid_8"], buffer_info_map_names)
    _verify_conflicts("sid_9", ["PaddedInput_7"], buffer_info_map_names)
    _verify_conflicts(
        "sid_8", ["PaddedInput_7", "Conv2dOutput_7", "tensor_2"], buffer_info_map_names
    )
    _verify_conflicts("Conv2dOutput_7", ["sid_8", "PaddedInput_7"], buffer_info_map_names)

    _check_max_workspace_size(buffer_pool_allocations, slow_memory_pool, slow_memory_size)
    _check_max_workspace_size(buffer_pool_allocations, fast_memory_pool, fast_memory_size)
示例#12
0
def test_create_array_buffer_info():
    target = Target("c")
    global_ws_pool = WorkspacePoolInfo(
        "global_workspace",
        [target],
    )
    fcreate_array_bi = tvm.get_global_func("tir.usmp.CreateArrayBufferInfo")
    tir_mod = LinearStructure
    tir_mod = _assign_targets_to_primfuncs_irmodule(tir_mod, target)
    tir_mod = _assign_poolinfos_to_allocates_in_irmodule(
        tir_mod, [global_ws_pool])
    main_func = tir_mod["tvmgen_default_run_model"]
    buffer_info_analysis = tvm.tir.usmp.analysis.extract_buffer_info(
        main_func, tir_mod)
    buffer_info_array = fcreate_array_bi(
        buffer_info_analysis.buffer_info_stmts)
    for buffer_info in buffer_info_array:
        assert buffer_info in buffer_info_analysis.buffer_info_stmts.keys()
def run_intervals(intervals, tolerance=0):
    """Helper to run intervals"""
    expected_mem = find_maximum_from_intervals(intervals)
    pools = [WorkspacePoolInfo("default", [])]
    buffers = []
    # populate
    for i, (start, stop, size) in enumerate(intervals):
        buf = BufferInfo(str(i), size, pools)
        # buf.set_pool_candidates( ["default"] )
        buffers.append(buf)

    # intersect
    for i, (i_start, i_stop, _) in enumerate(intervals):
        conflicts = set()
        for j, (j_start, j_stop, _) in enumerate(intervals):
            start = min(i_start, j_start)
            stop = max(i_stop, j_stop)
            i_dur = i_stop - i_start + 1
            j_dur = j_stop - j_start + 1

            if i != j and (stop - start + 1 < i_dur + j_dur):
                conflicts.add(buffers[j])

        buffers[i].set_conflicts([c for c in sorted(conflicts, key=lambda c: c.name_hint)])

    result = {}
    for (alg, params) in [
        ("tir.usmp.algo.hill_climb", (expected_mem,)),
        ("tir.usmp.algo.greedy_by_size", (expected_mem,)),
    ]:
        fusmp_algo = tvm.get_global_func(alg)
        print("\n", "started", alg)
        buffer_info_arr = fusmp_algo(buffers, *params)
        print()

        _verify_all_conflicts(buffer_info_arr)
        result[alg], msg = _check_max_workspace_size(
            buffer_info_arr, pools[0], expected_mem, tolerance
        )
        if not result[alg]:
            print(alg, msg)

    return result
示例#14
0
def test_custom_algo():
    target = Target("c")
    global_workspace_pool = WorkspacePoolInfo(
        "global_workspace",
        [target],
    )
    tir_mod = ResnetStructure
    tir_mod = _assign_targets_to_primfuncs_irmodule(tir_mod, target)
    tir_mod = _assign_poolinfos_to_allocates_in_irmodule(tir_mod, [global_workspace_pool])
    tir_mod = tir_mod.with_attr("executor", tvm.relay.backend.Executor("aot"))
    tir_mod = tir_mod.with_attr("runtime", tvm.relay.backend.Runtime("crt"))
    tir_mod["__tvm_main__"] = tir_mod[
        "tvmgen_default_fused_cast_subtract_fixed_point_multiply_add_clip_cast_cast"
    ]

    algo_called = False

    @tvm.register_func("tir.usmp.algo.trivial")
    def _trivial_algo(buf_infos, mem_pressure):
        nonlocal algo_called
        algo_called = True
        out_layout = {}
        offset = 0
        for buf_info in buf_infos:
            pool_info = buf_info.pool_candidates[0]
            out_layout[buf_info] = usmp_utils.PoolAllocation(pool_info, offset)
            offset += buf_info.size_bytes
        return out_layout

    usmp_pass = tvm.get_global_func("tir.transform.UnifiedStaticMemoryPlanner")
    usmp_pass()(tir_mod)
    assert not algo_called

    with tvm.transform.PassContext(config={"tir.usmp.custom_algorithm": "trivial"}):
        usmp_pass()(tir_mod)

    assert algo_called

    with pytest.raises(
        tvm.TVMError, match="The selected custom USMP algorithm : invalid is not defined"
    ):
        with tvm.transform.PassContext(config={"tir.usmp.custom_algorithm": "invalid"}):
            usmp_pass()(tir_mod)
示例#15
0
def test_create_buffer_info():
    global_ws_pool = WorkspacePoolInfo(
        "global_workspace",
        [Target("c")],
    )
    buffer_info_obj = tvm.tir.usmp.BufferInfo(name_hint="buf1",
                                              size_bytes=256,
                                              pool_candidates=[global_ws_pool])
    assert buffer_info_obj.name_hint == "buf1"
    assert buffer_info_obj.size_bytes == 256
    assert list(buffer_info_obj.pool_candidates) == [global_ws_pool]
    # default workspace alignment
    assert buffer_info_obj.alignment == 1

    buffer_info_obj = tvm.tir.usmp.BufferInfo("buf2", 512, [global_ws_pool], 8)
    assert buffer_info_obj.name_hint == "buf2"
    assert buffer_info_obj.size_bytes == 512
    assert list(buffer_info_obj.pool_candidates) == [global_ws_pool]
    assert buffer_info_obj.alignment == 8
def test_extract_memory_info():
    """
    Test memory pressure value correctly reduces the workspace size.
    """
    initial_pool_size = 2000
    memory_pressure = 500
    memory_pool = WorkspacePoolInfo(
        "SRAM",
        [tvm.target.Target("c"),
         tvm.target.Target("ethos-u")],
        PoolInfoProperties(
            size_hint_bytes=initial_pool_size,
            read_bandwidth_bytes_per_cycle=16,
            write_bandwidth_bytes_per_cycle=16,
            target_burst_bytes={tvm.target.Target("ethos-u"): 1},
        ),
    )

    sram = extract_memory_info(memory_pool, memory_pressure)
    assert sram.size == initial_pool_size - memory_pressure
示例#17
0
def test_networks_with_usmp_and_cascader_wo_striping(accel_type, model_url,
                                                     workspace_size):
    np.random.seed(23)

    pool_name = "my_memory_pool"
    host_target = tvm.target.Target("c")
    ethosu_target = tvm.target.Target("ethos-u")
    workspace_pools = WorkspaceMemoryPools([
        WorkspacePoolInfo(
            pool_name,
            [host_target, ethosu_target],
            PoolInfoProperties(
                size_hint_bytes=2400000,
                read_bandwidth_bytes_per_cycle=16,
                write_bandwidth_bytes_per_cycle=16,
                target_burst_bytes={ethosu_target: 1},
            ),
        )
    ])
    tflite_model_buf = infra.get_tflite_model(model_url)
    input_data, output_data = infra.generate_ref_data_tflite(tflite_model_buf)
    mod, params = convert_to_relay(tflite_model_buf)
    mod = partition_for_ethosu(mod, params)
    test_runner = infra.create_test_runner(
        accel_type,
        enable_usmp=True,
        enable_cascader=True,
        enable_striping=False,
        workspace_pools=workspace_pools,
    )
    compiled_models = infra.build_source(mod,
                                         input_data,
                                         output_data,
                                         test_runner,
                                         workspace_pools=workspace_pools)
    infra.verify_source(compiled_models, test_runner)

    allocated_pool_info = list(
        dict(compiled_models[0].executor_factory.executor_codegen_metadata.
             pool_inputs).values())[0]
    assert allocated_pool_info.allocated_size == workspace_size
示例#18
0
def test_fanout(algorithm, workspace_size):
    """
    The test case here represent BufferInfo objects
    that could get generated for a fanout topology
    such as :
    (Op A)
    |
    bi_a ---------
    |            |
    (Op B)     (Op C)
    |            |
    bi_b        bi_c
    |            |
    (Op D)     (Op E)
    |            |
    bi_d        bi_e
    |            |
    (Op F) ------
    |
    bi_f
    |
    (Op G)
    |
    bi_g
    """
    target = Target("c")
    global_workspace_pool = WorkspacePoolInfo(
        "global_workspace",
        targets=[target],
    )
    bi_a = usmp_utils.BufferInfo(
        name_hint="bi_a", size_bytes=10, pool_candidates=[global_workspace_pool]
    )
    bi_b = usmp_utils.BufferInfo(
        name_hint="bi_b", size_bytes=20, pool_candidates=[global_workspace_pool]
    )
    bi_c = usmp_utils.BufferInfo(
        name_hint="bi_c", size_bytes=100, pool_candidates=[global_workspace_pool]
    )
    bi_d = usmp_utils.BufferInfo(
        name_hint="bi_d", size_bytes=40, pool_candidates=[global_workspace_pool]
    )
    bi_e = usmp_utils.BufferInfo(
        name_hint="bi_e", size_bytes=50, pool_candidates=[global_workspace_pool]
    )
    bi_f = usmp_utils.BufferInfo(
        name_hint="bi_f", size_bytes=60, pool_candidates=[global_workspace_pool]
    )
    bi_g = usmp_utils.BufferInfo(
        name_hint="bi_g", size_bytes=70, pool_candidates=[global_workspace_pool]
    )

    # Creating conflicts for a linear graph
    bi_a.set_conflicts([bi_b, bi_c])
    bi_b.set_conflicts([bi_a, bi_c, bi_e])
    bi_c.set_conflicts([bi_e, bi_a, bi_b, bi_d])
    bi_d.set_conflicts([bi_b, bi_f, bi_c, bi_e])
    bi_e.set_conflicts([bi_c, bi_f, bi_b, bi_d])
    bi_f.set_conflicts([bi_d, bi_e, bi_f])
    bi_g.set_conflicts([bi_f])

    buffer_info_arr = [bi_a, bi_b, bi_c, bi_d, bi_e, bi_f, bi_g]
    fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
    buffer_pool_allocations = fusmp_algo(buffer_info_arr, 0)
    _check_max_workspace_size(buffer_pool_allocations, global_workspace_pool, workspace_size)
示例#19
0
def test_tflite_model_u3_usecase_conv2d_var_cons(usmp_algo):
    """This checks for inference using workspace and constant pools placed in the application"""

    mod = tvm.parser.fromtext("""\
        #[version = "0.0.5"]
        def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(3, 3, 5, 5), int8]) {
            %1 = nn.conv2d(
                 %data,
                 %weight,
                 padding=[2, 2],
                 channels=3,
                 kernel_size=[5, 5],
                 data_layout="NCHW",
                 kernel_layout="OIHW",
                 out_dtype="int32");
            %2 = cast(nn.max_pool2d(%1, pool_size=[3, 3]), dtype="int8");
            %3 = nn.conv2d(
                 %2,
                 %weight,
                 padding=[2, 2],
                 channels=3,
                 kernel_size=[5, 5],
                 data_layout="NCHW",
                 kernel_layout="OIHW",
                 out_dtype="int32");
            %4 = nn.max_pool2d(%3, pool_size=[3, 3]);
            %4
        }
    """)

    main_func = mod["main"]
    shape_dict = {
        p.name_hint: p.checked_type.concrete_shape
        for p in main_func.params
    }
    type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params}

    weight_data = np.random.randint(1, 255, shape_dict["weight"]).astype(
        type_dict["weight"])
    input_data = np.ones(shape_dict["data"]).astype(type_dict["data"])
    params = {"weight": weight_data}
    inputs = {"data": input_data}

    use_unpacked_api = True
    interface_api = "c"

    target = tvm.target.Target("c")
    workspace_mem_pools = WorkspaceMemoryPools([
        WorkspacePoolInfo("my_memory_pool_1", [target],
                          PoolInfoProperties(size_hint_bytes=8500000)),
    ])

    constant_mem_pools = ConstantMemoryPools([
        ConstantPoolInfo("my_const_pool_1", [target], []),
    ])

    test_runner = AOTTestRunner(
        pass_config={
            "tir.usmp.enable": True,
            "tir.usmp.algorithm": usmp_algo
        },
        prologue=f"""
        __attribute__((section(".bss.noinit"), aligned(TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES)))
        static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}];
        __attribute__((section(".rodata.tvm"), aligned(TVM_RUNTIME_CONST_ALLOC_ALIGNMENT_BYTES)))
        static uint8_t my_const_pool_1[{_get_constant_size_define_macro("my_const_pool_1")}] = {{ {_get_constant_data_define_macro("my_const_pool_1")} }};
        """,
    )

    output_list = generate_ref_data(mod, inputs, params)

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
        workspace_memory_pools=workspace_mem_pools,
        constant_memory_pools=constant_mem_pools,
        target=target,
    )

    for compiled_model in compiled_test_mods:
        _check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
示例#20
0
def test_two_models_with_a_single_external_pool(model_urls, usmp_algo):
    """This checks for inference using a single large enough common pool"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing  # pylint: disable=import-outside-toplevel

    use_unpacked_api = True
    interface_api = "c"

    target = tvm.target.Target("c")
    workspace_memory_pools = WorkspaceMemoryPools(
        [WorkspacePoolInfo("my_memory_pool", [target])])
    test_runner = AOTTestRunner(
        pass_config={
            "tir.usmp.enable": True,
            "tir.usmp.algorithm": usmp_algo
        },
        prologue=f"""
        #define MAX(A, B) ((A > B) ? A : B)
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool[MAX({_get_workspace_size_define_macro("my_memory_pool", "mod1")},{_get_workspace_size_define_macro("my_memory_pool", "mod2")})];
        """,
    )

    tflite_model_file1 = tf_testing.get_workload_official(
        model_urls[0][0],
        model_urls[0][1],
    )
    mod1, inputs1, params1 = create_relay_module_and_inputs_from_tflite_file(
        tflite_model_file1)
    output_list1 = generate_ref_data(mod1, inputs1, params1)

    tflite_model_file2 = tf_testing.get_workload_official(
        model_urls[1][0],
        model_urls[1][1],
    )
    mod2, inputs2, params2 = create_relay_module_and_inputs_from_tflite_file(
        tflite_model_file2)
    output_list2 = generate_ref_data(mod2, inputs2, params2)

    compiled_test_mods = compile_models(
        [
            AOTTestModel(name="mod1",
                         module=mod1,
                         inputs=inputs1,
                         outputs=output_list1,
                         params=params1),
            AOTTestModel(name="mod2",
                         module=mod2,
                         inputs=inputs2,
                         outputs=output_list2,
                         params=params2),
        ],
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
        workspace_memory_pools=workspace_memory_pools,
        target=target,
    )

    for compiled_model in compiled_test_mods:
        _check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
示例#21
0
def test_resnet_subgraph(algorithm, workspace_size):
    target = Target("c")
    global_workspace_pool = WorkspacePoolInfo(
        "global_workspace",
        [target],
    )
    tir_mod = ResnetStructure
    tir_mod = _assign_targets_to_primfuncs_irmodule(tir_mod, target)
    tir_mod = _assign_poolinfos_to_allocates_in_irmodule(tir_mod, [global_workspace_pool])
    main_func = tir_mod["tvmgen_default_run_model"]
    buffer_info_analysis = tvm.tir.usmp.analysis.extract_buffer_info(main_func, tir_mod)
    assert buffer_info_analysis.memory_pressure == 7200256

    fcreate_array_bi = tvm.get_global_func("tir.usmp.CreateArrayBufferInfo")
    buffer_info_arr = fcreate_array_bi(buffer_info_analysis.buffer_info_stmts)
    fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
    buffer_pool_allocations = fusmp_algo(buffer_info_arr, buffer_info_analysis.memory_pressure)

    buffer_info_map_names = dict()
    for buf_info in buffer_info_arr:
        buffer_info_map_names[buf_info.name_hint] = buf_info

    # check conflicts
    _verify_conflicts(
        "sid_7",
        [
            "PaddedInput_1",
            "sid_2",
            "Conv2dOutput_1",
            "PaddedInput_2",
        ],
        buffer_info_map_names,
    )
    _verify_conflicts(
        "Conv2dOutput_3",
        [
            "PaddedInput_3",
            "sid_6",
        ],
        buffer_info_map_names,
    )
    _verify_conflicts(
        "sid_6",
        [
            "Conv2dOutput_2",
            "PaddedInput_2",
            "sid_2",
            "PaddedInput_3",
            "Conv2dOutput_3",
        ],
        buffer_info_map_names,
    )
    _verify_conflicts(
        "Conv2dOutput",
        [
            "sid_8",
            "sid_2",
            "PaddedInput",
        ],
        buffer_info_map_names,
    )
    _verify_conflicts(
        "PaddedInput_3",
        [
            "sid_6",
            "sid_2",
            "Conv2dOutput_3",
        ],
        buffer_info_map_names,
    )
    _verify_conflicts(
        "Conv2dOutput_2",
        [
            "PaddedInput_2",
            "sid_2",
            "sid_6",
        ],
        buffer_info_map_names,
    )
    _verify_conflicts(
        "PaddedInput_1",
        [
            "sid_8",
            "sid_2",
            "sid_7",
            "Conv2dOutput_1",
        ],
        buffer_info_map_names,
    )
    _verify_conflicts(
        "Conv2dOutput_1",
        [
            "sid_7",
            "PaddedInput_1",
            "sid_2",
        ],
        buffer_info_map_names,
    )
    _verify_conflicts(
        "PaddedInput",
        [
            "sid_2",
            "sid_8",
            "Conv2dOutput",
        ],
        buffer_info_map_names,
    )
    _verify_conflicts(
        "sid_8",
        [
            "PaddedInput",
            "sid_2",
            "Conv2dOutput",
            "PaddedInput_1",
        ],
        buffer_info_map_names,
    )
    _verify_conflicts(
        "sid_2",
        [
            "PaddedInput",
            "sid_8",
            "Conv2dOutput",
            "PaddedInput_1",
            "sid_7",
            "Conv2dOutput_1",
            "PaddedInput_2",
            "Conv2dOutput_2",
            "sid_6",
            "PaddedInput_3",
        ],
        buffer_info_map_names,
    )
    _verify_conflicts(
        "PaddedInput_2",
        [
            "sid_7",
            "sid_2",
            "Conv2dOutput_2",
            "sid_6",
        ],
        buffer_info_map_names,
    )

    _check_max_workspace_size(buffer_pool_allocations, global_workspace_pool, workspace_size)
示例#22
0
def test_tflite_model_u4_usecase_two_external_pools(model_url, usmp_algo):
    """This checks for inference with USMP using external pool placed in the application"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing  # pylint: disable=import-outside-toplevel

    use_unpacked_api = True
    interface_api = "c"

    target = tvm.target.Target("c")
    workspace_memory_pools = WorkspaceMemoryPools([
        WorkspacePoolInfo("my_memory_pool_1", [target],
                          PoolInfoProperties(size_hint_bytes=2500000)),
        WorkspacePoolInfo("my_memory_pool_2", [target]),
    ])

    tflite_model_file = tf_testing.get_workload_official(
        model_url[0],
        model_url[1],
    )
    mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(
        tflite_model_file)
    output_list = generate_ref_data(mod, inputs, params)

    input_name, input_data = list(inputs.items())[0]
    input_size_bytes = input_data.size * input_data.itemsize
    test_runner = AOTTestRunner(
        pass_config={
            "tir.usmp.enable": True,
            "tir.usmp.algorithm": usmp_algo,
            "tir.usmp.use_workspace_io": True,
        },
        prologue=f"""
        #include <string.h>
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}];
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool_2[{_get_workspace_size_define_macro("my_memory_pool_2")}];
        struct {_add_module_prefix("workspace_pools")} {_add_module_prefix("workspace_pools")} = {{
            .my_memory_pool_1 = my_memory_pool_1,
            .my_memory_pool_2 = my_memory_pool_2,
        }};
        struct {_add_module_prefix("inputs")} {_add_module_prefix("inputs")} = {_add_module_prefix("map_inputs")}(&{_add_module_prefix("workspace_pools")});
        memcpy({_add_module_prefix("inputs")}.{input_name}, tvmgen_default_input_data_input, {input_size_bytes});
        struct {_add_module_prefix("outputs")} {_add_module_prefix("outputs")} = {_add_module_prefix("map_outputs")}(&{_add_module_prefix("workspace_pools")});
        """,
    )

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
        workspace_memory_pools=workspace_memory_pools,
        target=target,
    )

    for compiled_model in compiled_test_mods:
        _check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
        use_workspace_io=True,
    )