def test_tflite_model_u3_usecase_two_external_pools(model_url, usmp_algo): """This checks for inference using two external pools placed in the application""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing # pylint: disable=import-outside-toplevel use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_memory_pools = WorkspaceMemoryPools([ WorkspacePoolInfo("my_memory_pool_1", [target], PoolInfoProperties(size_hint_bytes=2500000)), WorkspacePoolInfo("my_memory_pool_2", [target]), ]) test_runner = AOTTestRunner( pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }, prologue=f""" __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}]; __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool_2[{_get_workspace_size_define_macro("my_memory_pool_2")}]; """, ) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1], ) mod, inputs, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_memory_pools, target=target, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_multiple_memory_pools(): """ The cascader does not support multiple workspace memory pools. Check the correct error is thrown. """ np.random.seed(2) ifm_shape = (1, 80, 75, 3) target, ethosu_target, runtime, executor, pass_config = _get_compilation_config( "ethos-u55-256", True, True) workspace_memory_pools = WorkspaceMemoryPools([ WorkspacePoolInfo( "SRAM", [target, ethosu_target], PoolInfoProperties( size_hint_bytes=1, read_bandwidth_bytes_per_cycle=16, write_bandwidth_bytes_per_cycle=16, target_burst_bytes={ethosu_target: 1}, ), ), WorkspacePoolInfo( "SRAM", [target, ethosu_target], PoolInfoProperties( size_hint_bytes=1, read_bandwidth_bytes_per_cycle=16, write_bandwidth_bytes_per_cycle=16, target_burst_bytes={ethosu_target: 1}, ), ), ]) @tf.function def tf_graph(x): return tf.nn.max_pool(x, (3, 3), (1, 1), "SAME") _, tflite_graph = infra.get_tflite_graph(tf_graph, [ifm_shape]) tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0) relay_module, params = relay.frontend.from_tflite(tflite_model) mod = partition_for_ethosu(relay_module, params) with pytest.raises(ValueError) as e: with tvm.transform.PassContext(opt_level=3, config=pass_config): tvm.relay.build( mod, target, executor=executor, runtime=runtime, workspace_memory_pools=workspace_memory_pools, params=params, ) expected_reason = "Exactly one workspace pool needs to be provided for the U55 cascader" on_error = "A ValueError was caught but its reason is not the expected one." assert expected_reason in str(e.value), on_error
def test_no_pool_error(): target = Target("c") tiny_workspace_pool = WorkspacePoolInfo( "tiny_workspace", [target], PoolInfoProperties(size_hint_bytes=10), ) bi_a = usmp_utils.BufferInfo( name_hint="bi_a", size_bytes=10, pool_candidates=[tiny_workspace_pool] ) bi_b = usmp_utils.BufferInfo( name_hint="bi_b", size_bytes=10, pool_candidates=[tiny_workspace_pool] ) bi_c = usmp_utils.BufferInfo( name_hint="bi_c", size_bytes=10, pool_candidates=[tiny_workspace_pool] ) bi_a.set_conflicts([bi_b]) bi_b.set_conflicts([bi_c]) bi_c.set_conflicts([bi_a]) buffer_info_arr = [bi_a, bi_b, bi_c] fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.greedy_by_size") with pytest.raises( tvm.TVMError, match="TVM USMP Error: the space available in the provided pools exceeded" ): buffer_pool_allocations = fusmp_algo(buffer_info_arr, 0)
def _get_ethosu_workspace_size(mod, params, accel_type, pool_size, enable_cascader, enable_striping): target, ethosu_target, runtime, executor, pass_config = _get_compilation_config( accel_type, enable_cascader, enable_striping) workspace_memory_pools = WorkspaceMemoryPools([ WorkspacePoolInfo( "SRAM", [target, ethosu_target], PoolInfoProperties( size_hint_bytes=pool_size, read_bandwidth_bytes_per_cycle=16, write_bandwidth_bytes_per_cycle=16, target_burst_bytes={ethosu_target: 1}, ), ), ]) with tvm.transform.PassContext(opt_level=3, config=pass_config): lib = tvm.relay.build( mod, target, executor=executor, runtime=runtime, workspace_memory_pools=workspace_memory_pools, params=params, ) mlf_memory_map = mlf._build_function_memory_map(lib.function_metadata) return mlf_memory_map["main"][0]["workspace_size_bytes"]
def _test(): target = Target("c") global_workspace_pool = WorkspacePoolInfo( "global_workspace", [target], ) bi_a = usmp_utils.BufferInfo( name_hint="bi_a", size_bytes=10, pool_candidates=[global_workspace_pool] ) bi_b = usmp_utils.BufferInfo( name_hint="bi_b", size_bytes=10, pool_candidates=[global_workspace_pool] ) bi_c = usmp_utils.BufferInfo( name_hint="bi_c", size_bytes=10, pool_candidates=[global_workspace_pool] ) bi_a.set_conflicts([bi_b, bi_c]) bi_b.set_conflicts([bi_c, bi_a]) bi_c.set_conflicts([bi_a, bi_b]) buffer_info_arr = [bi_a, bi_b, bi_c] fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}") buffer_pool_allocations = fusmp_algo(buffer_info_arr, 0) assert buffer_pool_allocations[bi_a].byte_offset == 20 assert buffer_pool_allocations[bi_b].byte_offset == 10 assert buffer_pool_allocations[bi_c].byte_offset == 0
def test_create_pool_info(): target = Target("c") pool_info = WorkspacePoolInfo( "foo_workspace", [target], ) assert pool_info.pool_name == "foo_workspace" # default pool size constraint assert pool_info.size_hint_bytes == -1 pool_info = WorkspacePoolInfo( "bar_workspace", [target], PoolInfoProperties(size_hint_bytes=1425), ) assert pool_info.pool_name == "bar_workspace" assert pool_info.size_hint_bytes == 1425
def test_create_pool_allocation(): pool_info = WorkspacePoolInfo( "foo_workspace", [Target("c")], ) pool_allocation = usmp_utils.PoolAllocation(pool_info=pool_info, byte_offset=64) assert pool_allocation.pool_info == pool_info assert pool_allocation.byte_offset == 64
def test_linear(algorithm, workspace_size): """ The test case here represent BufferInfo objects that could get generated for a linear sequence such as : (Op A) | bi_a | (Op B) | bi_b | . . . (Op F) | bi_f """ target = Target("c") global_workspace_pool = WorkspacePoolInfo( "global_workspace", [target], ) bi_a = usmp_utils.BufferInfo( name_hint="bi_a", size_bytes=10, pool_candidates=[global_workspace_pool] ) bi_b = usmp_utils.BufferInfo( name_hint="bi_b", size_bytes=20, pool_candidates=[global_workspace_pool] ) bi_c = usmp_utils.BufferInfo( name_hint="bi_c", size_bytes=100, pool_candidates=[global_workspace_pool] ) bi_d = usmp_utils.BufferInfo( name_hint="bi_d", size_bytes=40, pool_candidates=[global_workspace_pool] ) bi_e = usmp_utils.BufferInfo( name_hint="bi_e", size_bytes=50, pool_candidates=[global_workspace_pool] ) bi_f = usmp_utils.BufferInfo( name_hint="bi_f", size_bytes=50, pool_candidates=[global_workspace_pool] ) # Creating conflicts for a linear graph bi_a.set_conflicts([bi_b]) bi_b.set_conflicts([bi_a, bi_c]) bi_c.set_conflicts([bi_b, bi_d]) bi_d.set_conflicts([bi_c, bi_e]) bi_e.set_conflicts([bi_d, bi_f]) bi_f.set_conflicts([bi_e]) buffer_info_arr = [bi_a, bi_b, bi_c, bi_d, bi_e, bi_f] fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}") buffer_pool_allocations = fusmp_algo(buffer_info_arr, 0) _check_max_workspace_size(buffer_pool_allocations, global_workspace_pool, workspace_size)
def compare_ethosu_with_reference( mod, input_data, output_data, accel_type: str, output_tolerance=0, print_cmm=False, enable_cascader=None, ): if enable_cascader is None: enable_cascader = "u65" not in accel_type pool_name = "my_memory_pool" host_target = tvm.target.Target("c") ethosu_target = tvm.target.Target("ethos-u") workspace_pools = WorkspaceMemoryPools([ WorkspacePoolInfo( pool_name, [host_target, ethosu_target], PoolInfoProperties( size_hint_bytes=2400000, read_bandwidth_bytes_per_cycle=16, write_bandwidth_bytes_per_cycle=16, target_burst_bytes={ethosu_target: 1}, ), ) ]) test_runner = create_test_runner( accel_type, enable_usmp=True, enable_cascader=enable_cascader, enable_striping=False, workspace_pools=workspace_pools, ) compiled_models = build_source( mod, input_data, output_data, test_runner, workspace_pools=workspace_pools, output_tolerance=output_tolerance, ) # Assumes only two runtime.Modules are created -- i.e. single offload module ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[ 0].imported_modules[0] # Verify generated C source if print_cmm: get_artifacts = tvm._ffi.get_global_func( "runtime.module.ethos-u.get_artifacts") compilation_artifacts = get_artifacts(ethosu_module) cmms = bytes.fromhex(compilation_artifacts[0].command_stream) print_payload(cmms) verify_source(compiled_models, test_runner)
def test_bounded( random_len=150, pools=[ WorkspacePoolInfo("default", [], PoolInfoProperties(65535)), WorkspacePoolInfo("slow", []), ], ): """Tests two pools, one is bounded and one is not limited""" random.seed(0) mem_range = [BufferInfo(str(i), random.randrange(1, 65535), pools) for i in range(random_len)] for mr in mem_range: pr = random.choice(mem_range) while pr in (*mr.conflicts, mr): pr = random.choice(mem_range) mr.set_conflicts([*mr.conflicts, pr]) pr.set_conflicts([*pr.conflicts, mr]) fusmp_algo = tvm.get_global_func("tir.usmp.algo.hill_climb") result_map = fusmp_algo(mem_range, 0) _verify_all_conflicts(result_map)
def test_mobilenet_subgraph(algorithm, fast_memory_size, slow_memory_size): target = Target("c") fast_memory_pool = WorkspacePoolInfo( "fast_memory", [target], PoolInfoProperties(size_hint_bytes=200704), ) slow_memory_pool = WorkspacePoolInfo( "slow_memory", [target], ) tir_mod = MobilenetStructure tir_mod = _assign_targets_to_primfuncs_irmodule(tir_mod, target) tir_mod = _assign_poolinfos_to_allocates_in_irmodule( tir_mod, [fast_memory_pool, slow_memory_pool] ) main_func = tir_mod["run_model"] buffer_info_analysis = tvm.tir.usmp.analysis.extract_buffer_info(main_func, tir_mod) assert buffer_info_analysis.memory_pressure == 1117718 fcreate_array_bi = tvm.get_global_func("tir.usmp.CreateArrayBufferInfo") buffer_info_arr = fcreate_array_bi(buffer_info_analysis.buffer_info_stmts) fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}") buffer_pool_allocations = fusmp_algo(buffer_info_arr, buffer_info_analysis.memory_pressure) buffer_info_map_names = dict() for buf_info in buffer_info_arr: buffer_info_map_names[buf_info.name_hint] = buf_info # check conflicts _verify_conflicts("PaddedInput_7", ["sid_9", "sid_8", "Conv2dOutput_7"], buffer_info_map_names) _verify_conflicts("tensor_2", ["sid_8"], buffer_info_map_names) _verify_conflicts("sid_9", ["PaddedInput_7"], buffer_info_map_names) _verify_conflicts( "sid_8", ["PaddedInput_7", "Conv2dOutput_7", "tensor_2"], buffer_info_map_names ) _verify_conflicts("Conv2dOutput_7", ["sid_8", "PaddedInput_7"], buffer_info_map_names) _check_max_workspace_size(buffer_pool_allocations, slow_memory_pool, slow_memory_size) _check_max_workspace_size(buffer_pool_allocations, fast_memory_pool, fast_memory_size)
def test_create_array_buffer_info(): target = Target("c") global_ws_pool = WorkspacePoolInfo( "global_workspace", [target], ) fcreate_array_bi = tvm.get_global_func("tir.usmp.CreateArrayBufferInfo") tir_mod = LinearStructure tir_mod = _assign_targets_to_primfuncs_irmodule(tir_mod, target) tir_mod = _assign_poolinfos_to_allocates_in_irmodule( tir_mod, [global_ws_pool]) main_func = tir_mod["tvmgen_default_run_model"] buffer_info_analysis = tvm.tir.usmp.analysis.extract_buffer_info( main_func, tir_mod) buffer_info_array = fcreate_array_bi( buffer_info_analysis.buffer_info_stmts) for buffer_info in buffer_info_array: assert buffer_info in buffer_info_analysis.buffer_info_stmts.keys()
def run_intervals(intervals, tolerance=0): """Helper to run intervals""" expected_mem = find_maximum_from_intervals(intervals) pools = [WorkspacePoolInfo("default", [])] buffers = [] # populate for i, (start, stop, size) in enumerate(intervals): buf = BufferInfo(str(i), size, pools) # buf.set_pool_candidates( ["default"] ) buffers.append(buf) # intersect for i, (i_start, i_stop, _) in enumerate(intervals): conflicts = set() for j, (j_start, j_stop, _) in enumerate(intervals): start = min(i_start, j_start) stop = max(i_stop, j_stop) i_dur = i_stop - i_start + 1 j_dur = j_stop - j_start + 1 if i != j and (stop - start + 1 < i_dur + j_dur): conflicts.add(buffers[j]) buffers[i].set_conflicts([c for c in sorted(conflicts, key=lambda c: c.name_hint)]) result = {} for (alg, params) in [ ("tir.usmp.algo.hill_climb", (expected_mem,)), ("tir.usmp.algo.greedy_by_size", (expected_mem,)), ]: fusmp_algo = tvm.get_global_func(alg) print("\n", "started", alg) buffer_info_arr = fusmp_algo(buffers, *params) print() _verify_all_conflicts(buffer_info_arr) result[alg], msg = _check_max_workspace_size( buffer_info_arr, pools[0], expected_mem, tolerance ) if not result[alg]: print(alg, msg) return result
def test_custom_algo(): target = Target("c") global_workspace_pool = WorkspacePoolInfo( "global_workspace", [target], ) tir_mod = ResnetStructure tir_mod = _assign_targets_to_primfuncs_irmodule(tir_mod, target) tir_mod = _assign_poolinfos_to_allocates_in_irmodule(tir_mod, [global_workspace_pool]) tir_mod = tir_mod.with_attr("executor", tvm.relay.backend.Executor("aot")) tir_mod = tir_mod.with_attr("runtime", tvm.relay.backend.Runtime("crt")) tir_mod["__tvm_main__"] = tir_mod[ "tvmgen_default_fused_cast_subtract_fixed_point_multiply_add_clip_cast_cast" ] algo_called = False @tvm.register_func("tir.usmp.algo.trivial") def _trivial_algo(buf_infos, mem_pressure): nonlocal algo_called algo_called = True out_layout = {} offset = 0 for buf_info in buf_infos: pool_info = buf_info.pool_candidates[0] out_layout[buf_info] = usmp_utils.PoolAllocation(pool_info, offset) offset += buf_info.size_bytes return out_layout usmp_pass = tvm.get_global_func("tir.transform.UnifiedStaticMemoryPlanner") usmp_pass()(tir_mod) assert not algo_called with tvm.transform.PassContext(config={"tir.usmp.custom_algorithm": "trivial"}): usmp_pass()(tir_mod) assert algo_called with pytest.raises( tvm.TVMError, match="The selected custom USMP algorithm : invalid is not defined" ): with tvm.transform.PassContext(config={"tir.usmp.custom_algorithm": "invalid"}): usmp_pass()(tir_mod)
def test_create_buffer_info(): global_ws_pool = WorkspacePoolInfo( "global_workspace", [Target("c")], ) buffer_info_obj = tvm.tir.usmp.BufferInfo(name_hint="buf1", size_bytes=256, pool_candidates=[global_ws_pool]) assert buffer_info_obj.name_hint == "buf1" assert buffer_info_obj.size_bytes == 256 assert list(buffer_info_obj.pool_candidates) == [global_ws_pool] # default workspace alignment assert buffer_info_obj.alignment == 1 buffer_info_obj = tvm.tir.usmp.BufferInfo("buf2", 512, [global_ws_pool], 8) assert buffer_info_obj.name_hint == "buf2" assert buffer_info_obj.size_bytes == 512 assert list(buffer_info_obj.pool_candidates) == [global_ws_pool] assert buffer_info_obj.alignment == 8
def test_extract_memory_info(): """ Test memory pressure value correctly reduces the workspace size. """ initial_pool_size = 2000 memory_pressure = 500 memory_pool = WorkspacePoolInfo( "SRAM", [tvm.target.Target("c"), tvm.target.Target("ethos-u")], PoolInfoProperties( size_hint_bytes=initial_pool_size, read_bandwidth_bytes_per_cycle=16, write_bandwidth_bytes_per_cycle=16, target_burst_bytes={tvm.target.Target("ethos-u"): 1}, ), ) sram = extract_memory_info(memory_pool, memory_pressure) assert sram.size == initial_pool_size - memory_pressure
def test_networks_with_usmp_and_cascader_wo_striping(accel_type, model_url, workspace_size): np.random.seed(23) pool_name = "my_memory_pool" host_target = tvm.target.Target("c") ethosu_target = tvm.target.Target("ethos-u") workspace_pools = WorkspaceMemoryPools([ WorkspacePoolInfo( pool_name, [host_target, ethosu_target], PoolInfoProperties( size_hint_bytes=2400000, read_bandwidth_bytes_per_cycle=16, write_bandwidth_bytes_per_cycle=16, target_burst_bytes={ethosu_target: 1}, ), ) ]) tflite_model_buf = infra.get_tflite_model(model_url) input_data, output_data = infra.generate_ref_data_tflite(tflite_model_buf) mod, params = convert_to_relay(tflite_model_buf) mod = partition_for_ethosu(mod, params) test_runner = infra.create_test_runner( accel_type, enable_usmp=True, enable_cascader=True, enable_striping=False, workspace_pools=workspace_pools, ) compiled_models = infra.build_source(mod, input_data, output_data, test_runner, workspace_pools=workspace_pools) infra.verify_source(compiled_models, test_runner) allocated_pool_info = list( dict(compiled_models[0].executor_factory.executor_codegen_metadata. pool_inputs).values())[0] assert allocated_pool_info.allocated_size == workspace_size
def test_fanout(algorithm, workspace_size): """ The test case here represent BufferInfo objects that could get generated for a fanout topology such as : (Op A) | bi_a --------- | | (Op B) (Op C) | | bi_b bi_c | | (Op D) (Op E) | | bi_d bi_e | | (Op F) ------ | bi_f | (Op G) | bi_g """ target = Target("c") global_workspace_pool = WorkspacePoolInfo( "global_workspace", targets=[target], ) bi_a = usmp_utils.BufferInfo( name_hint="bi_a", size_bytes=10, pool_candidates=[global_workspace_pool] ) bi_b = usmp_utils.BufferInfo( name_hint="bi_b", size_bytes=20, pool_candidates=[global_workspace_pool] ) bi_c = usmp_utils.BufferInfo( name_hint="bi_c", size_bytes=100, pool_candidates=[global_workspace_pool] ) bi_d = usmp_utils.BufferInfo( name_hint="bi_d", size_bytes=40, pool_candidates=[global_workspace_pool] ) bi_e = usmp_utils.BufferInfo( name_hint="bi_e", size_bytes=50, pool_candidates=[global_workspace_pool] ) bi_f = usmp_utils.BufferInfo( name_hint="bi_f", size_bytes=60, pool_candidates=[global_workspace_pool] ) bi_g = usmp_utils.BufferInfo( name_hint="bi_g", size_bytes=70, pool_candidates=[global_workspace_pool] ) # Creating conflicts for a linear graph bi_a.set_conflicts([bi_b, bi_c]) bi_b.set_conflicts([bi_a, bi_c, bi_e]) bi_c.set_conflicts([bi_e, bi_a, bi_b, bi_d]) bi_d.set_conflicts([bi_b, bi_f, bi_c, bi_e]) bi_e.set_conflicts([bi_c, bi_f, bi_b, bi_d]) bi_f.set_conflicts([bi_d, bi_e, bi_f]) bi_g.set_conflicts([bi_f]) buffer_info_arr = [bi_a, bi_b, bi_c, bi_d, bi_e, bi_f, bi_g] fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}") buffer_pool_allocations = fusmp_algo(buffer_info_arr, 0) _check_max_workspace_size(buffer_pool_allocations, global_workspace_pool, workspace_size)
def test_tflite_model_u3_usecase_conv2d_var_cons(usmp_algo): """This checks for inference using workspace and constant pools placed in the application""" mod = tvm.parser.fromtext("""\ #[version = "0.0.5"] def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(3, 3, 5, 5), int8]) { %1 = nn.conv2d( %data, %weight, padding=[2, 2], channels=3, kernel_size=[5, 5], data_layout="NCHW", kernel_layout="OIHW", out_dtype="int32"); %2 = cast(nn.max_pool2d(%1, pool_size=[3, 3]), dtype="int8"); %3 = nn.conv2d( %2, %weight, padding=[2, 2], channels=3, kernel_size=[5, 5], data_layout="NCHW", kernel_layout="OIHW", out_dtype="int32"); %4 = nn.max_pool2d(%3, pool_size=[3, 3]); %4 } """) main_func = mod["main"] shape_dict = { p.name_hint: p.checked_type.concrete_shape for p in main_func.params } type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params} weight_data = np.random.randint(1, 255, shape_dict["weight"]).astype( type_dict["weight"]) input_data = np.ones(shape_dict["data"]).astype(type_dict["data"]) params = {"weight": weight_data} inputs = {"data": input_data} use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_mem_pools = WorkspaceMemoryPools([ WorkspacePoolInfo("my_memory_pool_1", [target], PoolInfoProperties(size_hint_bytes=8500000)), ]) constant_mem_pools = ConstantMemoryPools([ ConstantPoolInfo("my_const_pool_1", [target], []), ]) test_runner = AOTTestRunner( pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }, prologue=f""" __attribute__((section(".bss.noinit"), aligned(TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES))) static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}]; __attribute__((section(".rodata.tvm"), aligned(TVM_RUNTIME_CONST_ALLOC_ALIGNMENT_BYTES))) static uint8_t my_const_pool_1[{_get_constant_size_define_macro("my_const_pool_1")}] = {{ {_get_constant_data_define_macro("my_const_pool_1")} }}; """, ) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_mem_pools, constant_memory_pools=constant_mem_pools, target=target, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_two_models_with_a_single_external_pool(model_urls, usmp_algo): """This checks for inference using a single large enough common pool""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing # pylint: disable=import-outside-toplevel use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_memory_pools = WorkspaceMemoryPools( [WorkspacePoolInfo("my_memory_pool", [target])]) test_runner = AOTTestRunner( pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }, prologue=f""" #define MAX(A, B) ((A > B) ? A : B) __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool[MAX({_get_workspace_size_define_macro("my_memory_pool", "mod1")},{_get_workspace_size_define_macro("my_memory_pool", "mod2")})]; """, ) tflite_model_file1 = tf_testing.get_workload_official( model_urls[0][0], model_urls[0][1], ) mod1, inputs1, params1 = create_relay_module_and_inputs_from_tflite_file( tflite_model_file1) output_list1 = generate_ref_data(mod1, inputs1, params1) tflite_model_file2 = tf_testing.get_workload_official( model_urls[1][0], model_urls[1][1], ) mod2, inputs2, params2 = create_relay_module_and_inputs_from_tflite_file( tflite_model_file2) output_list2 = generate_ref_data(mod2, inputs2, params2) compiled_test_mods = compile_models( [ AOTTestModel(name="mod1", module=mod1, inputs=inputs1, outputs=output_list1, params=params1), AOTTestModel(name="mod2", module=mod2, inputs=inputs2, outputs=output_list2, params=params2), ], interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_memory_pools, target=target, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_resnet_subgraph(algorithm, workspace_size): target = Target("c") global_workspace_pool = WorkspacePoolInfo( "global_workspace", [target], ) tir_mod = ResnetStructure tir_mod = _assign_targets_to_primfuncs_irmodule(tir_mod, target) tir_mod = _assign_poolinfos_to_allocates_in_irmodule(tir_mod, [global_workspace_pool]) main_func = tir_mod["tvmgen_default_run_model"] buffer_info_analysis = tvm.tir.usmp.analysis.extract_buffer_info(main_func, tir_mod) assert buffer_info_analysis.memory_pressure == 7200256 fcreate_array_bi = tvm.get_global_func("tir.usmp.CreateArrayBufferInfo") buffer_info_arr = fcreate_array_bi(buffer_info_analysis.buffer_info_stmts) fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}") buffer_pool_allocations = fusmp_algo(buffer_info_arr, buffer_info_analysis.memory_pressure) buffer_info_map_names = dict() for buf_info in buffer_info_arr: buffer_info_map_names[buf_info.name_hint] = buf_info # check conflicts _verify_conflicts( "sid_7", [ "PaddedInput_1", "sid_2", "Conv2dOutput_1", "PaddedInput_2", ], buffer_info_map_names, ) _verify_conflicts( "Conv2dOutput_3", [ "PaddedInput_3", "sid_6", ], buffer_info_map_names, ) _verify_conflicts( "sid_6", [ "Conv2dOutput_2", "PaddedInput_2", "sid_2", "PaddedInput_3", "Conv2dOutput_3", ], buffer_info_map_names, ) _verify_conflicts( "Conv2dOutput", [ "sid_8", "sid_2", "PaddedInput", ], buffer_info_map_names, ) _verify_conflicts( "PaddedInput_3", [ "sid_6", "sid_2", "Conv2dOutput_3", ], buffer_info_map_names, ) _verify_conflicts( "Conv2dOutput_2", [ "PaddedInput_2", "sid_2", "sid_6", ], buffer_info_map_names, ) _verify_conflicts( "PaddedInput_1", [ "sid_8", "sid_2", "sid_7", "Conv2dOutput_1", ], buffer_info_map_names, ) _verify_conflicts( "Conv2dOutput_1", [ "sid_7", "PaddedInput_1", "sid_2", ], buffer_info_map_names, ) _verify_conflicts( "PaddedInput", [ "sid_2", "sid_8", "Conv2dOutput", ], buffer_info_map_names, ) _verify_conflicts( "sid_8", [ "PaddedInput", "sid_2", "Conv2dOutput", "PaddedInput_1", ], buffer_info_map_names, ) _verify_conflicts( "sid_2", [ "PaddedInput", "sid_8", "Conv2dOutput", "PaddedInput_1", "sid_7", "Conv2dOutput_1", "PaddedInput_2", "Conv2dOutput_2", "sid_6", "PaddedInput_3", ], buffer_info_map_names, ) _verify_conflicts( "PaddedInput_2", [ "sid_7", "sid_2", "Conv2dOutput_2", "sid_6", ], buffer_info_map_names, ) _check_max_workspace_size(buffer_pool_allocations, global_workspace_pool, workspace_size)
def test_tflite_model_u4_usecase_two_external_pools(model_url, usmp_algo): """This checks for inference with USMP using external pool placed in the application""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing # pylint: disable=import-outside-toplevel use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_memory_pools = WorkspaceMemoryPools([ WorkspacePoolInfo("my_memory_pool_1", [target], PoolInfoProperties(size_hint_bytes=2500000)), WorkspacePoolInfo("my_memory_pool_2", [target]), ]) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1], ) mod, inputs, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_list = generate_ref_data(mod, inputs, params) input_name, input_data = list(inputs.items())[0] input_size_bytes = input_data.size * input_data.itemsize test_runner = AOTTestRunner( pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo, "tir.usmp.use_workspace_io": True, }, prologue=f""" #include <string.h> __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}]; __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool_2[{_get_workspace_size_define_macro("my_memory_pool_2")}]; struct {_add_module_prefix("workspace_pools")} {_add_module_prefix("workspace_pools")} = {{ .my_memory_pool_1 = my_memory_pool_1, .my_memory_pool_2 = my_memory_pool_2, }}; struct {_add_module_prefix("inputs")} {_add_module_prefix("inputs")} = {_add_module_prefix("map_inputs")}(&{_add_module_prefix("workspace_pools")}); memcpy({_add_module_prefix("inputs")}.{input_name}, tvmgen_default_input_data_input, {input_size_bytes}); struct {_add_module_prefix("outputs")} {_add_module_prefix("outputs")} = {_add_module_prefix("map_outputs")}(&{_add_module_prefix("workspace_pools")}); """, ) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_memory_pools, target=target, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, use_workspace_io=True, )