def test_tflite_model_u3_usecase_two_external_pools(model_url, usmp_algo): """This checks for inference using two external pools placed in the application""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing # pylint: disable=import-outside-toplevel use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_memory_pools = WorkspaceMemoryPools([ PoolInfo("my_memory_pool_1", {target: PoolInfo.READ_WRITE_ACCESS}, size_hint_bytes=2500000), PoolInfo("my_memory_pool_2", {target: PoolInfo.READ_WRITE_ACCESS}), ]) test_runner = AOTTestRunner( pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }, prologue=f""" __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}]; __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool_2[{_get_workspace_size_define_macro("my_memory_pool_2")}]; """, ) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1], ) mod, inputs, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_memory_pools, target=target, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_tflite_model_u1_usecase(model_url, usmp_algo, workspace_size, constant_size): """ This checks for ML models and the memory used by them when using USMP with different algorithms """ pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing # pylint: disable=import-outside-toplevel use_unpacked_api = True interface_api = "c" test_runner = AOTTestRunner(pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1], ) mod, inputs, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) # Checking the workspace size reported in model library format mlf_memory_map = mlf._build_function_memory_map( compiled_test_mods[0].executor_factory.function_metadata) assert mlf_memory_map["main"][0]["workspace_size_bytes"] == workspace_size assert mlf_memory_map["main"][0]["constants_size_bytes"] == constant_size # That should match to workspace size that will be codegen'd to the entry point. allocated_pool_info_size = sum([ _.allocated_size for _ in list( dict(compiled_test_mods[0].executor_factory. executor_codegen_metadata.pool_inputs).values()) ]) assert allocated_pool_info_size == workspace_size + constant_size run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_conv2d(interface_api, use_unpacked_api, test_runner, groups, weight_shape): """Test a subgraph with a single conv2d operator.""" dtype = "float32" ishape = (1, 32, 14, 14) wshape = (32, weight_shape, 3, 3) pass_config = {"tir.usmp.enable": True} test_runner = AOTTestRunner( makefile=test_runner.makefile, prologue=test_runner.prologue, epilogue=test_runner.epilogue, includes=test_runner.includes, parameters=test_runner.parameters, pass_config=pass_config, ) data0 = relay.var("data", shape=ishape, dtype=dtype) weight0 = relay.var("weight", shape=wshape, dtype=dtype) out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=groups) main_f = relay.Function([data0, weight0], out) mod = tvm.IRModule() mod["main"] = main_f mod = transform.InferType()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) w1_data = np.random.uniform(0, 1, wshape).astype(dtype) inputs = OrderedDict([("data", i_data), ("weight", w1_data)]) output_list = generate_ref_data(mod, inputs) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list), test_runner, interface_api, use_unpacked_api, ) compiled_test_mods = compile_models( models=AOTTestModel(module=mod, inputs=inputs, outputs=output_list), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def verify_source(models: List[AOTCompiledTestModel], test_runner): """ This method verifies the generated source from an NPU module by building it and running on an FVP. """ interface_api = "c" run_and_check( models, test_runner, interface_api, workspace_byte_alignment=16, data_linkage=AOTDataLinkage(section="ethosu_scratch", alignment=16), )
def verify_source( models: List[AOTCompiledTestModel], accel="ethos-u55-256", enable_usmp=True, enable_cascader=False, ): """ This method verifies the generated source from an NPU module by building it and running on an FVP. """ interface_api = "c" test_runner = create_test_runner(accel, enable_usmp, enable_cascader) run_and_check( models, test_runner, interface_api, workspace_byte_alignment=16, data_linkage=AOTDataLinkage(section="ethosu_scratch", alignment=16), )
def test_relay_conv2d_cmsisnn_depthwise_int8( padding, strides, dilation, relu_type, input_zero_point, input_scale, kernel_scale, depth_multiplier, ): """Tests QNN Depthwise int8 op via CMSIS-NN""" interface_api = "c" use_unpacked_api = True test_runner = AOT_USMP_CORSTONE300_RUNNER dtype = "int8" in_min, in_max = get_range_for_dtype_str(dtype) ifm_shape = (1, 24, 24, 1) groups = ifm_shape[3] weight_format = "HWIO" (kernel_h, kernel_w) = (3, 3) kernel_shape = (kernel_h, kernel_w, ifm_shape[3], depth_multiplier) out_channels = ifm_shape[3] * depth_multiplier enable_bias = True ks_len = len(kernel_scale) kernel_zero_point = 0 kernel_scale = [kernel_scale[i % ks_len] for i in range(out_channels)] output_scale, output_zero_point = get_conv2d_qnn_params( kernel_shape, input_scale, input_zero_point, kernel_scale, kernel_zero_point, dtype, dtype, dtype, True, ) model, params = make_model( ifm_shape, kernel_shape, input_zero_point, input_scale, kernel_zero_point, kernel_scale, output_zero_point, output_scale, padding, strides, dilation, groups, dtype, dtype, out_channels, weight_format, enable_bias, relu_type, ) orig_mod = make_module(model) cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params) # validate pattern matching assert_partitioned_function(orig_mod, cmsisnn_mod) # generate reference output rng = np.random.default_rng(12345) inputs = {"input": rng.integers(in_min, high=in_max, size=ifm_shape, dtype=dtype)} output_list = generate_ref_data(orig_mod["main"], inputs, params) # validate presence of depthwise convolution compiled_models = compile_models( AOTTestModel( module=cmsisnn_mod, inputs=inputs, outputs=output_list, params=params, output_tolerance=1, ), interface_api, use_unpacked_api, pass_config=test_runner.pass_config, ) cmsisnn_tir_mod = None for target, mod in compiled_models[0].executor_factory.lowered_ir_mods.items(): if target.kind.name == "cmsis-nn": cmsisnn_tir_mod = mod cmsisnn_func = cmsisnn_tir_mod["tvmgen_default_cmsis_nn_main_0"] call_extern = None # This happens when context buffer is init in case depthM != 1 if isinstance(cmsisnn_func.body, tvm.tir.stmt.Evaluate): call_extern = cmsisnn_func.body.value else: call_extern = cmsisnn_func.body.body.value assert ( call_extern.args[0].value == "arm_depthwise_conv_wrapper_s8" ), "Relay Conv2D should be mapped to CMSIS-NN Depthwise Convolution." # validate the output run_and_check( models=compiled_models, runner=test_runner, interface_api=interface_api, )
def test_reshape_removal(padding): """Tests reshape is removed from the network""" interface_api = "c" use_unpacked_api = True test_runner = AOT_USMP_CORSTONE300_RUNNER in_shape = (1, 28, 28, 12) pool_size = (3, 3) strides = (2, 2) relu_type = "NONE" zero_point, scale = (-34, 0.0256) max_pool = make_model( pool_op=relay.nn.max_pool2d, shape=in_shape, pool_size=pool_size, strides=strides, padding=padding, scale=scale, zero_point=zero_point, relu_type=relu_type, ) new_shape = (1, 28, 28, 3) if padding == "VALID" else (1, 30, 30, 3) reshape = relay.reshape(max_pool, newshape=new_shape) model = make_model( pool_op=relay.nn.avg_pool2d, shape=new_shape, pool_size=pool_size, strides=strides, padding=padding, scale=scale, zero_point=zero_point, relu_type=relu_type, input_op=reshape, ) orig_mod = make_module(model) cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod) # validate pattern matching assert_partitioned_function(orig_mod, cmsisnn_mod) # generate reference output rng = np.random.default_rng(12345) in_min, in_max = get_range_for_dtype_str("int8") inputs = { "input": rng.integers(in_min, high=in_max, size=in_shape, dtype="int8") } output_list = generate_ref_data(orig_mod["main"], inputs, params=None) # validate presence of depthwise convolution compiled_models = compile_models( AOTTestModel( module=cmsisnn_mod, inputs=inputs, outputs=output_list, params=None, output_tolerance=1, ), interface_api, use_unpacked_api, pass_config=test_runner.pass_config, ) main_mod = None for target, mod in compiled_models[ 0].executor_factory.lowered_ir_mods.items(): if target.kind.name == "c": main_mod = mod # when padding="SAME", extra padding is introduced which causes Reshape to be fused with the # Pad. RemoveReshapes pass cannot remove a fused Reshape. Whereas padding="VALID" doesn't need # an extra Pad layer. In this case, the pass removes the Reshape from the graph. reshapes_present = any( ["reshape" in gv.name_hint for gv in main_mod.get_global_vars()]) check_reshapes = reshapes_present if padding == "SAME" else not reshapes_present expected_reshapes = "a" if padding == "SAME" else "No" assert check_reshapes, "Expeting {} reshape layer(s).".format( expected_reshapes) # validate the output run_and_check( models=compiled_models, runner=test_runner, interface_api=interface_api, )
def test_tflite_model_u4_usecase_two_external_pools(model_url, usmp_algo): """This checks for inference with USMP using external pool placed in the application""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing # pylint: disable=import-outside-toplevel use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_memory_pools = WorkspaceMemoryPools([ PoolInfo("my_memory_pool_1", {target: PoolInfo.READ_WRITE_ACCESS}, size_hint_bytes=2500000), PoolInfo("my_memory_pool_2", {target: PoolInfo.READ_WRITE_ACCESS}), ]) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1], ) mod, inputs, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_list = generate_ref_data(mod, inputs, params) input_name, input_data = list(inputs.items())[0] input_size_bytes = input_data.size * input_data.itemsize test_runner = AOTTestRunner( pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo, "tir.usmp.use_workspace_io": True, }, prologue=f""" #include <string.h> __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}]; __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool_2[{_get_workspace_size_define_macro("my_memory_pool_2")}]; struct {_add_module_prefix("workspace_pools")} {_add_module_prefix("workspace_pools")} = {{ .my_memory_pool_1 = my_memory_pool_1, .my_memory_pool_2 = my_memory_pool_2, }}; struct {_add_module_prefix("inputs")} {_add_module_prefix("inputs")} = {_add_module_prefix("map_inputs")}(&{_add_module_prefix("workspace_pools")}); memcpy({_add_module_prefix("inputs")}.{input_name}, tvmgen_default_input_data_input, {input_size_bytes}); struct {_add_module_prefix("outputs")} {_add_module_prefix("outputs")} = {_add_module_prefix("map_outputs")}(&{_add_module_prefix("workspace_pools")}); """, ) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_memory_pools, target=target, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, use_workspace_io=True, )
def test_two_models_with_a_single_external_pool(model_urls, usmp_algo): """This checks for inference using a single large enough common pool""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing # pylint: disable=import-outside-toplevel use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_memory_pools = WorkspaceMemoryPools( [PoolInfo("my_memory_pool", {target: PoolInfo.READ_WRITE_ACCESS})]) test_runner = AOTTestRunner( pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }, prologue=f""" #define MAX(A, B) ((A > B) ? A : B) __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool[MAX({_get_workspace_size_define_macro("my_memory_pool", "mod1")},{_get_workspace_size_define_macro("my_memory_pool", "mod2")})]; """, ) tflite_model_file1 = tf_testing.get_workload_official( model_urls[0][0], model_urls[0][1], ) mod1, inputs1, params1 = create_relay_module_and_inputs_from_tflite_file( tflite_model_file1) output_list1 = generate_ref_data(mod1, inputs1, params1) tflite_model_file2 = tf_testing.get_workload_official( model_urls[1][0], model_urls[1][1], ) mod2, inputs2, params2 = create_relay_module_and_inputs_from_tflite_file( tflite_model_file2) output_list2 = generate_ref_data(mod2, inputs2, params2) compiled_test_mods = compile_models( [ AOTTestModel(name="mod1", module=mod1, inputs=inputs1, outputs=output_list1, params=params1), AOTTestModel(name="mod2", module=mod2, inputs=inputs2, outputs=output_list2, params=params2), ], interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_memory_pools, target=target, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_byoc_microtvm(merge_compiler_regions): """ This is a simple test to check BYOC capabilities of AOT with and without merging compiler regions to test for https://github.com/apache/tvm/issues/9036 """ use_unpacked_api = False interface_api = "packed" test_runner = AOTTestRunner(pass_config={"tir.usmp.enable": True}) input_x = relay.var("x", shape=(10, 10)) input_w0 = relay.var("w0", shape=(10, 10)) input_w1 = relay.var("w1", shape=(10, 10)) # z0 = x + w0 marked_input_x = compiler_begin(input_x, "ccompiler") marked_input_w0 = compiler_begin(input_w0, "ccompiler") add_x_and_w0 = relay.add(marked_input_x, marked_input_w0) end_inner_add = compiler_end(add_x_and_w0, "ccompiler") # z1 = z0 + w1 marked_inner_add = compiler_begin(end_inner_add, "ccompiler") marked_w1 = compiler_begin(input_w1, "ccompiler") add_nested_and_w1 = relay.add(marked_inner_add, marked_w1) end_outer_add = compiler_end(add_nested_and_w1, "ccompiler") # z2 = z0 + z1 final_add = relay.add(end_inner_add, end_outer_add) relay_func = relay.Function([input_x, input_w0, input_w1], final_add) mod = tvm.IRModule() mod["main"] = relay_func if merge_compiler_regions: mod = transform.MergeCompilerRegions()(mod) mod = transform.PartitionGraph("mod_name")(mod) mod = transform.InferType()(mod) x_data = [("x", np.random.rand(10, 10).astype("float32"))] w_data = [("w{}".format(i), np.random.rand(10, 10).astype("float32")) for i in range(2)] map_inputs = OrderedDict(x_data + w_data) output_list = generate_ref_data(mod, map_inputs) compiled_test_mods = compile_models( AOTTestModel(name="my_mod", module=mod, inputs=map_inputs, outputs=output_list), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_tflite_model_u3_usecase_var_cons_ext_pools(model_url, usmp_algo): """This checks for inference using one external workspace and one external constant pools placed in the application""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing # pylint: disable=import-outside-toplevel use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_mem_pools = WorkspaceMemoryPools([ WorkspacePoolInfo("my_memory_pool_1", [target], PoolInfoProperties(size_hint_bytes=8500000)), ]) constant_mem_pools = ConstantMemoryPools([ ConstantPoolInfo("my_const_pool_1", [target], []), ]) test_runner = AOTTestRunner( pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }, prologue=f""" __attribute__((section(".bss.noinit"), aligned(TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES))) static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}]; __attribute__((section(".rodata.tvm"), aligned(TVM_RUNTIME_CONST_ALLOC_ALIGNMENT_BYTES))) static uint8_t my_const_pool_1[{_get_constant_size_define_macro("my_const_pool_1")}] = {{ {_get_constant_data_define_macro("my_const_pool_1")} }}; """, ) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1], ) mod, inputs, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_mem_pools, constant_memory_pools=constant_mem_pools, target=target, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_tflite_model_u3_usecase_conv2d_var_cons(usmp_algo): """This checks for inference using workspace and constant pools placed in the application""" mod = tvm.parser.fromtext("""\ #[version = "0.0.5"] def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(3, 3, 5, 5), int8]) { %1 = nn.conv2d( %data, %weight, padding=[2, 2], channels=3, kernel_size=[5, 5], data_layout="NCHW", kernel_layout="OIHW", out_dtype="int32"); %2 = cast(nn.max_pool2d(%1, pool_size=[3, 3]), dtype="int8"); %3 = nn.conv2d( %2, %weight, padding=[2, 2], channels=3, kernel_size=[5, 5], data_layout="NCHW", kernel_layout="OIHW", out_dtype="int32"); %4 = nn.max_pool2d(%3, pool_size=[3, 3]); %4 } """) main_func = mod["main"] shape_dict = { p.name_hint: p.checked_type.concrete_shape for p in main_func.params } type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params} weight_data = np.random.randint(1, 255, shape_dict["weight"]).astype( type_dict["weight"]) input_data = np.ones(shape_dict["data"]).astype(type_dict["data"]) params = {"weight": weight_data} inputs = {"data": input_data} use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_mem_pools = WorkspaceMemoryPools([ WorkspacePoolInfo("my_memory_pool_1", [target], PoolInfoProperties(size_hint_bytes=8500000)), ]) constant_mem_pools = ConstantMemoryPools([ ConstantPoolInfo("my_const_pool_1", [target], []), ]) test_runner = AOTTestRunner( pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }, prologue=f""" __attribute__((section(".bss.noinit"), aligned(TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES))) static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}]; __attribute__((section(".rodata.tvm"), aligned(TVM_RUNTIME_CONST_ALLOC_ALIGNMENT_BYTES))) static uint8_t my_const_pool_1[{_get_constant_size_define_macro("my_const_pool_1")}] = {{ {_get_constant_data_define_macro("my_const_pool_1")} }}; """, ) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_mem_pools, constant_memory_pools=constant_mem_pools, target=target, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )