def test_aot_codegen_checks_returns(): """This test checks whether AoT lowering creates calls that check the return value correctly""" x = relay.var("x", shape=(1, 10)) y = relay.var("y", shape=(1, 10)) z = relay.add(x, y) func = relay.Function([x, y], z) compiled_test_mods = compile_models( models=AOTTestModel(module=IRModule.from_expr(func), inputs=None, outputs=None), interface_api="c", use_unpacked_api=True, ) source = compiled_test_mods[0].executor_factory.lib.imported_modules[ 0].get_source() main_ir_module = compiled_test_mods[ 0].executor_factory.lowered_ir_mods.items()[0][1] main_func = main_ir_module["__tvm_main__"] # Check operator call is wrapped properly assert (str( main_func.body[1]) == "tir.tvm_check_return(0, -1, tir.call_extern(" + '"tvmgen_default_fused_add",' + " x_buffer_var, y_buffer_var, output_buffer_var))\n") # TODO(Mousius) - Create a better place for C codegen tests assert ( "if (tvmgen_default_fused_add(x_buffer_var, y_buffer_var, output_buffer_var) != 0 ) return -1;" in source)
def test_tflite_model_u3_usecase_two_external_pools(model_url, usmp_algo): """This checks for inference using two external pools placed in the application""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing # pylint: disable=import-outside-toplevel use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_memory_pools = WorkspaceMemoryPools([ PoolInfo("my_memory_pool_1", {target: PoolInfo.READ_WRITE_ACCESS}, size_hint_bytes=2500000), PoolInfo("my_memory_pool_2", {target: PoolInfo.READ_WRITE_ACCESS}), ]) test_runner = AOTTestRunner( pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }, prologue=f""" __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}]; __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool_2[{_get_workspace_size_define_macro("my_memory_pool_2")}]; """, ) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1], ) mod, inputs, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_memory_pools, target=target, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_tflite_model_u1_usecase(model_url, usmp_algo, workspace_size, constant_size): """ This checks for ML models and the memory used by them when using USMP with different algorithms """ pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing # pylint: disable=import-outside-toplevel use_unpacked_api = True interface_api = "c" test_runner = AOTTestRunner(pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1], ) mod, inputs, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) # Checking the workspace size reported in model library format mlf_memory_map = mlf._build_function_memory_map( compiled_test_mods[0].executor_factory.function_metadata) assert mlf_memory_map["main"][0]["workspace_size_bytes"] == workspace_size assert mlf_memory_map["main"][0]["constants_size_bytes"] == constant_size # That should match to workspace size that will be codegen'd to the entry point. allocated_pool_info_size = sum([ _.allocated_size for _ in list( dict(compiled_test_mods[0].executor_factory. executor_codegen_metadata.pool_inputs).values()) ]) assert allocated_pool_info_size == workspace_size + constant_size run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_conv2d(interface_api, use_unpacked_api, test_runner, groups, weight_shape): """Test a subgraph with a single conv2d operator.""" dtype = "float32" ishape = (1, 32, 14, 14) wshape = (32, weight_shape, 3, 3) pass_config = {"tir.usmp.enable": True} test_runner = AOTTestRunner( makefile=test_runner.makefile, prologue=test_runner.prologue, epilogue=test_runner.epilogue, includes=test_runner.includes, parameters=test_runner.parameters, pass_config=pass_config, ) data0 = relay.var("data", shape=ishape, dtype=dtype) weight0 = relay.var("weight", shape=wshape, dtype=dtype) out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=groups) main_f = relay.Function([data0, weight0], out) mod = tvm.IRModule() mod["main"] = main_f mod = transform.InferType()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) w1_data = np.random.uniform(0, 1, wshape).astype(dtype) inputs = OrderedDict([("data", i_data), ("weight", w1_data)]) output_list = generate_ref_data(mod, inputs) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list), test_runner, interface_api, use_unpacked_api, ) compiled_test_mods = compile_models( models=AOTTestModel(module=mod, inputs=inputs, outputs=output_list), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def compile_to_main_func(interface_api="c", use_unpacked_api=True): test_runner = AOT_DEFAULT_RUNNER compiled_models = compile_models( models=AOTTestModel( module=IRModule.from_expr(func), inputs=inputs, outputs=output_list, ), interface_api=interface_api, use_unpacked_api=use_unpacked_api, workspace_byte_alignment=16, pass_config=test_runner.pass_config, ) main_ir_module = list( compiled_models[0].executor_factory.lowered_ir_mods.values())[0] main_func = main_ir_module["__tvm_main__"] return main_func
def compile_to_main_func(interface_api="c", use_unpacked_api=True): test_runner = create_test_runner() compiled_models = compile_models( models=AOTTestModel( module=mod, inputs=input_data, outputs=output_data, ), interface_api=interface_api, use_unpacked_api=use_unpacked_api, workspace_byte_alignment=16, pass_config=test_runner.pass_config, ) main_ir_module = compiled_models[ 0].executor_factory.lowered_ir_mods.items()[0][1] main_func = main_ir_module["__tvm_main__"] return main_func
def test_aot_codegen_backend_alloc_workspace_calls(): """This test checks whether AoT lowering creates TVMBackendAllocWorkspace calls""" # The %data and %weight shapes in the following primitive Relay should create # small tensors that would get lowered to stack allocations in the CPU PrimFuncs. # However, the AoT executor codegen should retain them as TVMBAW calls # pylint: disable=line-too-long relay_mod = tvm.parser.fromtext(""" #[version = "0.0.5"] def @main(%data: Tensor[(1, 4, 4, 4), float32], %weight: Tensor[(4, 4, 3, 3), float32], src_layout="OIHW", dst_layout="OIHW4i4o") -> Tensor[(1, 4, 4, 4), float32] { %0 = fn (%p02: Tensor[(1, 4, 4, 4), float32], Primitive=1, hash="9332b3872fb5292c", src_layout="NCHW", dst_layout="NCHW4c") -> Tensor[(1, 1, 4, 4, 4), float32] { layout_transform(%p02, src_layout="NCHW", dst_layout="NCHW4c") /* ty=Tensor[(1, 1, 4, 4, 4), float32] */ }; %1 = fn (%p03: Tensor[(4, 4, 3, 3), float32], Primitive=1, hash="9f0b2b8a24a4dab3", src_layout="OIHW", dst_layout="OIHW4i4o") -> Tensor[(1, 1, 3, 3, 4, 4), float32] { layout_transform(%p03, src_layout="OIHW", dst_layout="OIHW4i4o") /* ty=Tensor[(1, 1, 3, 3, 4, 4), float32] */ }; %2 = %0(%data) /* ty=Tensor[(1, 1, 4, 4, 4), float32] */; %3 = %1(%weight) /* ty=Tensor[(1, 1, 3, 3, 4, 4), float32] */; %4 = fn (%p01: Tensor[(1, 1, 4, 4, 4), float32], %p1: Tensor[(1, 1, 3, 3, 4, 4), float32], out_layout="NCHW4c", kernel_layout="OIHW4i4o", Primitive=1, data_layout="NCHW4c") -> Tensor[(1, 1, 4, 4, 4), float32] { nn.contrib_conv2d_NCHWc(%p01, %p1, padding=[1, 1, 1, 1], channels=4, kernel_size=[3, 3], data_layout="NCHW4c", kernel_layout="OIHW4i4o", out_layout="NCHW4c") /* ty=Tensor[(1, 1, 4, 4, 4), float32] */ }; %5 = %4(%2, %3) /* ty=Tensor[(1, 1, 4, 4, 4), float32] */; %6 = fn (%p0: Tensor[(1, 1, 4, 4, 4), float32], Primitive=1, src_layout="NCHW4c", dst_layout="NCHW") -> Tensor[(1, 4, 4, 4), float32] { layout_transform(%p0, src_layout="NCHW4c", dst_layout="NCHW") /* ty=Tensor[(1, 4, 4, 4), float32] */ }; %6(%5) /* ty=Tensor[(1, 4, 4, 4), float32] */ } """) # pylint: enable=line-too-long compiled_test_mods = compile_models( models=AOTTestModel(module=relay_mod, inputs=None, outputs=None), interface_api="c", use_unpacked_api=True, ) source = compiled_test_mods[0].executor_factory.lib.imported_modules[ 0].get_source() # There should be three allocates created for three primitive relay function # calls in the main for the above relay snippet. assert source.count("TVMBackendAllocWorkspace") == 3
def build_source( module, inputs, outputs, test_runner, output_tolerance=0, workspace_pools=None, ): return compile_models( models=AOTTestModel( module=module, inputs=inputs, outputs=outputs, output_tolerance=output_tolerance, extra_memory_in_bytes=0, ), interface_api="c", use_unpacked_api=True, workspace_memory_pools=workspace_pools, workspace_byte_alignment=16, pass_config=test_runner.pass_config, )
def build_source( module, inputs, outputs, accel="ethos-u55-256", output_tolerance=0, enable_usmp=True, enable_cascader=False, ): test_runner = create_test_runner(accel, enable_usmp, enable_cascader) return compile_models( models=AOTTestModel( module=module, inputs=inputs, outputs=outputs, output_tolerance=output_tolerance, extra_memory_in_bytes=0, ), interface_api="c", use_unpacked_api=True, workspace_byte_alignment=16, pass_config=test_runner.pass_config, )
def test_constants_alignment(constants_byte_alignment): """Test that constants_byte_alignment correctly sets constants byte alignment""" use_unpacked_api = True interface_api = "c" mod, params = testing.mobilenet.get_workload(batch_size=1) data_shape = [int(x) for x in mod["main"].checked_type.arg_types[0].shape] data = np.random.uniform(size=data_shape).astype("float32") inputs = {"data": data} output_list = generate_ref_data(mod, inputs, params) target = f"c -constants-byte-alignment={constants_byte_alignment}" compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api, use_unpacked_api, target=tvm.target.Target(target, host=target), ) source = compiled_test_mods[0].executor_factory.lib.imported_modules[ 0].get_source() assert f'__attribute__((section(".rodata.tvm"), aligned({constants_byte_alignment})))' in source
def test_reshape_removal(padding): """Tests reshape is removed from the network""" interface_api = "c" use_unpacked_api = True test_runner = AOT_USMP_CORSTONE300_RUNNER in_shape = (1, 28, 28, 12) pool_size = (3, 3) strides = (2, 2) relu_type = "NONE" zero_point, scale = (-34, 0.0256) max_pool = make_model( pool_op=relay.nn.max_pool2d, shape=in_shape, pool_size=pool_size, strides=strides, padding=padding, scale=scale, zero_point=zero_point, relu_type=relu_type, ) new_shape = (1, 28, 28, 3) if padding == "VALID" else (1, 30, 30, 3) reshape = relay.reshape(max_pool, newshape=new_shape) model = make_model( pool_op=relay.nn.avg_pool2d, shape=new_shape, pool_size=pool_size, strides=strides, padding=padding, scale=scale, zero_point=zero_point, relu_type=relu_type, input_op=reshape, ) orig_mod = make_module(model) cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod) # validate pattern matching assert_partitioned_function(orig_mod, cmsisnn_mod) # generate reference output rng = np.random.default_rng(12345) in_min, in_max = get_range_for_dtype_str("int8") inputs = { "input": rng.integers(in_min, high=in_max, size=in_shape, dtype="int8") } output_list = generate_ref_data(orig_mod["main"], inputs, params=None) # validate presence of depthwise convolution compiled_models = compile_models( AOTTestModel( module=cmsisnn_mod, inputs=inputs, outputs=output_list, params=None, output_tolerance=1, ), interface_api, use_unpacked_api, pass_config=test_runner.pass_config, ) main_mod = None for target, mod in compiled_models[ 0].executor_factory.lowered_ir_mods.items(): if target.kind.name == "c": main_mod = mod # when padding="SAME", extra padding is introduced which causes Reshape to be fused with the # Pad. RemoveReshapes pass cannot remove a fused Reshape. Whereas padding="VALID" doesn't need # an extra Pad layer. In this case, the pass removes the Reshape from the graph. reshapes_present = any( ["reshape" in gv.name_hint for gv in main_mod.get_global_vars()]) check_reshapes = reshapes_present if padding == "SAME" else not reshapes_present expected_reshapes = "a" if padding == "SAME" else "No" assert check_reshapes, "Expeting {} reshape layer(s).".format( expected_reshapes) # validate the output run_and_check( models=compiled_models, runner=test_runner, interface_api=interface_api, )
def test_relay_conv2d_cmsisnn_depthwise_int8( padding, strides, dilation, relu_type, input_zero_point, input_scale, kernel_scale, depth_multiplier, ): """Tests QNN Depthwise int8 op via CMSIS-NN""" interface_api = "c" use_unpacked_api = True test_runner = AOT_USMP_CORSTONE300_RUNNER dtype = "int8" in_min, in_max = get_range_for_dtype_str(dtype) ifm_shape = (1, 24, 24, 1) groups = ifm_shape[3] weight_format = "HWIO" (kernel_h, kernel_w) = (3, 3) kernel_shape = (kernel_h, kernel_w, ifm_shape[3], depth_multiplier) out_channels = ifm_shape[3] * depth_multiplier enable_bias = True ks_len = len(kernel_scale) kernel_zero_point = 0 kernel_scale = [kernel_scale[i % ks_len] for i in range(out_channels)] output_scale, output_zero_point = get_conv2d_qnn_params( kernel_shape, input_scale, input_zero_point, kernel_scale, kernel_zero_point, dtype, dtype, dtype, True, ) model, params = make_model( ifm_shape, kernel_shape, input_zero_point, input_scale, kernel_zero_point, kernel_scale, output_zero_point, output_scale, padding, strides, dilation, groups, dtype, dtype, out_channels, weight_format, enable_bias, relu_type, ) orig_mod = make_module(model) cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params) # validate pattern matching assert_partitioned_function(orig_mod, cmsisnn_mod) # generate reference output rng = np.random.default_rng(12345) inputs = {"input": rng.integers(in_min, high=in_max, size=ifm_shape, dtype=dtype)} output_list = generate_ref_data(orig_mod["main"], inputs, params) # validate presence of depthwise convolution compiled_models = compile_models( AOTTestModel( module=cmsisnn_mod, inputs=inputs, outputs=output_list, params=params, output_tolerance=1, ), interface_api, use_unpacked_api, pass_config=test_runner.pass_config, ) cmsisnn_tir_mod = None for target, mod in compiled_models[0].executor_factory.lowered_ir_mods.items(): if target.kind.name == "cmsis-nn": cmsisnn_tir_mod = mod cmsisnn_func = cmsisnn_tir_mod["tvmgen_default_cmsis_nn_main_0"] call_extern = None # This happens when context buffer is init in case depthM != 1 if isinstance(cmsisnn_func.body, tvm.tir.stmt.Evaluate): call_extern = cmsisnn_func.body.value else: call_extern = cmsisnn_func.body.body.value assert ( call_extern.args[0].value == "arm_depthwise_conv_wrapper_s8" ), "Relay Conv2D should be mapped to CMSIS-NN Depthwise Convolution." # validate the output run_and_check( models=compiled_models, runner=test_runner, interface_api=interface_api, )
def test_conv2d_number_primfunc_args( padding, enable_bias, input_zero_point, input_scale, kernel_scale, out_channels, ): """Tests number of arguments in Conv2D primfunc""" interface_api = "c" use_unpacked_api = True ifm_shape = (1, 64, 100, 4) kernel_size = (3, 3) strides = (1, 1) dilation = (1, 1) dtype = "int8" groups = 1 weight_format = "HWIO" kernel_h = kernel_size[0] kernel_w = kernel_size[1] kernel_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels) kernel_zero_point = 0 in_min, in_max = get_range_for_dtype_str(dtype) relu_type = "RELU" output_scale, output_zero_point = get_conv2d_qnn_params( kernel_shape, input_scale, input_zero_point, kernel_scale, kernel_zero_point, dtype, dtype, dtype, ) model, params = make_model( ifm_shape, kernel_shape, input_zero_point, input_scale, kernel_zero_point, kernel_scale, output_zero_point, output_scale, padding, strides, dilation, groups, dtype, dtype, out_channels, weight_format, enable_bias, relu_type, ) orig_mod = make_module(model) cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params) # validate pattern matching assert_partitioned_function(orig_mod, cmsisnn_mod) # compile the model rng = np.random.default_rng(12345) inputs = {"input": rng.integers(in_min, high=in_max, size=ifm_shape, dtype=dtype)} output_list = generate_ref_data(orig_mod["main"], inputs, params) compiled_models = compile_models( AOTTestModel(module=cmsisnn_mod, inputs=inputs, outputs=output_list, params=params), interface_api, use_unpacked_api, ) # validate number of TIR primfunc args expected_num_params = 6 if enable_bias else 5 cmsisnn_tir_mod = None for target, mod in compiled_models[0].executor_factory.lowered_ir_mods.items(): if target.kind.name == "cmsis-nn": cmsisnn_tir_mod = mod cmsisnn_func = cmsisnn_tir_mod["tvmgen_default_cmsis_nn_main_0"] assert ( len(cmsisnn_func.params) == expected_num_params ), "Generated unexpected number of function arguments."
def test_output_tensor_names(): """Test that the output names generated match those in the model""" pytest.importorskip("tflite") import os import tensorflow as tf import tflite.Model ifm_shape = (1, 299, 299, 3) padding = "VALID" strides = (1, 1) dilation = (1, 1) kernel_shape = (3, 2) def create_tflite_graph_two_outs(): """Create a model with 2 output tensors""" class Model(tf.Module): @tf.function def tf_function(self, x): # Use tf.nn API to create the model tf_strides = [1, strides[0], strides[1], 1] filter_shape = [kernel_shape[0], kernel_shape[1], 3, 3] filter1 = tf.constant( np.arange(np.prod(filter_shape)).reshape(filter_shape), dtype=tf.float32, ) op = tf.nn.conv2d( x, filters=filter1, strides=tf_strides, padding=padding, dilations=dilation, ) op = tf.nn.relu(op) # Second convolution filter2 = tf.constant( 1000 + np.arange(np.prod(filter_shape)).reshape(filter_shape), dtype=tf.float32, ) op2 = tf.nn.conv2d( x, filters=filter2, strides=strides, padding=padding, data_format="NHWC", dilations=dilation, ) op2 = tf.nn.relu(op2) return op, op2 model = Model() concrete_func = model.tf_function.get_concrete_function( tf.TensorSpec(ifm_shape, dtype=tf.float32)) # Convert the model def representative_dataset(): for _ in range(100): data = np.random.rand(*tuple(ifm_shape)) yield [data.astype(np.float32)] converter = tf.lite.TFLiteConverter.from_concrete_functions( [concrete_func]) converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.representative_dataset = representative_dataset converter.target_spec.supported_ops = [ tf.lite.OpsSet.TFLITE_BUILTINS_INT8 ] converter.inference_input_type = tf.int8 converter.inference_output_type = tf.int8 tflite_model = converter.convert() return tflite_model tflite_graph = create_tflite_graph_two_outs() tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0) mod, params = relay.frontend.from_tflite( tflite_model, shape_dict={"input": ifm_shape}, dtype_dict={"input": "int8"}, ) use_unpacked_api = True interface_api = "c" test_runner = AOT_DEFAULT_RUNNER in_min, in_max = (-128, 127) data = np.random.randint(in_min, high=in_max, size=ifm_shape, dtype="int8") input_name = mod["main"].params[0].name_hint inputs = {input_name: data} output_list = generate_ref_data(mod, inputs, params) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), test_runner, interface_api, use_unpacked_api, ) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api, use_unpacked_api, ) # Check that the names of the output tensors occur in the source code source = compiled_test_mods[0].executor_factory.lib.get_source() for output_name in output_list.keys(): assert output_name in source
def test_packed_global_variables(): """Check packed global variables in codegen output.""" dtype = "float32" ishape = (1, 32, 14, 14) wshape = (32, 32, 3, 3) interface_api = "packed" use_unpacked_api = False data0 = relay.var("data", shape=ishape, dtype=dtype) weight0 = relay.var("weight", shape=wshape, dtype=dtype) out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=1) main_f = relay.Function([data0, weight0], out) mod = tvm.IRModule() mod["main"] = main_f mod = transform.InferType()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) w1_data = np.random.uniform(0, 1, wshape).astype(dtype) inputs = OrderedDict([("data", i_data), ("weight", w1_data)]) output_list = generate_ref_data(mod, inputs) compiled_models_list = compile_models( models=AOTTestModel(module=mod, inputs=inputs, outputs=output_list), interface_api=interface_api, use_unpacked_api=use_unpacked_api, workspace_byte_alignment=8, enable_op_fusion=True, pass_config=AOT_DEFAULT_RUNNER.pass_config, use_runtime_executor=True, target=tvm.target.Target("c"), ) compiled_model = compiled_models_list[0] tmp_path = utils.tempdir() base_path = tmp_path.temp_dir model = compiled_model.model tar_file = os.path.join(base_path, f"{model.name}.tar") export_model_library_format(compiled_model.executor_factory, tar_file) t = tarfile.open(tar_file) t.extractall(base_path) file_list = [] for path in (pathlib.Path(base_path) / "codegen" / "host" / "src").iterdir(): if path.is_file(): file_list.append(path) assert len(file_list) > 0 for path in file_list: with open(path, "r") as lib_f: lib1 = lib_f.readlines() tvmgen_names = [] tvmgen_funcs = [] for line in lib1: for item in line.split(" "): # Find all names starting with tvmgen_default if item.startswith("tvmgen_default"): # Collect any name starting with tvmgen_default tvmgen_names.append(item) # Collect all functions starting with tvmgen_default tvmgen_funcs += re.findall(r"(?<=).*(?=\()", item) # Check if any function name has a packed variable name in all items that start with tvmgen_default for func in tvmgen_funcs: assert f"{func}_packed" not in tvmgen_names
def test_tflite_model_u3_usecase_var_cons_ext_pools(model_url, usmp_algo): """This checks for inference using one external workspace and one external constant pools placed in the application""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing # pylint: disable=import-outside-toplevel use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_mem_pools = WorkspaceMemoryPools([ WorkspacePoolInfo("my_memory_pool_1", [target], PoolInfoProperties(size_hint_bytes=8500000)), ]) constant_mem_pools = ConstantMemoryPools([ ConstantPoolInfo("my_const_pool_1", [target], []), ]) test_runner = AOTTestRunner( pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }, prologue=f""" __attribute__((section(".bss.noinit"), aligned(TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES))) static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}]; __attribute__((section(".rodata.tvm"), aligned(TVM_RUNTIME_CONST_ALLOC_ALIGNMENT_BYTES))) static uint8_t my_const_pool_1[{_get_constant_size_define_macro("my_const_pool_1")}] = {{ {_get_constant_data_define_macro("my_const_pool_1")} }}; """, ) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1], ) mod, inputs, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_mem_pools, constant_memory_pools=constant_mem_pools, target=target, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_byoc_microtvm(merge_compiler_regions): """ This is a simple test to check BYOC capabilities of AOT with and without merging compiler regions to test for https://github.com/apache/tvm/issues/9036 """ use_unpacked_api = False interface_api = "packed" test_runner = AOTTestRunner(pass_config={"tir.usmp.enable": True}) input_x = relay.var("x", shape=(10, 10)) input_w0 = relay.var("w0", shape=(10, 10)) input_w1 = relay.var("w1", shape=(10, 10)) # z0 = x + w0 marked_input_x = compiler_begin(input_x, "ccompiler") marked_input_w0 = compiler_begin(input_w0, "ccompiler") add_x_and_w0 = relay.add(marked_input_x, marked_input_w0) end_inner_add = compiler_end(add_x_and_w0, "ccompiler") # z1 = z0 + w1 marked_inner_add = compiler_begin(end_inner_add, "ccompiler") marked_w1 = compiler_begin(input_w1, "ccompiler") add_nested_and_w1 = relay.add(marked_inner_add, marked_w1) end_outer_add = compiler_end(add_nested_and_w1, "ccompiler") # z2 = z0 + z1 final_add = relay.add(end_inner_add, end_outer_add) relay_func = relay.Function([input_x, input_w0, input_w1], final_add) mod = tvm.IRModule() mod["main"] = relay_func if merge_compiler_regions: mod = transform.MergeCompilerRegions()(mod) mod = transform.PartitionGraph("mod_name")(mod) mod = transform.InferType()(mod) x_data = [("x", np.random.rand(10, 10).astype("float32"))] w_data = [("w{}".format(i), np.random.rand(10, 10).astype("float32")) for i in range(2)] map_inputs = OrderedDict(x_data + w_data) output_list = generate_ref_data(mod, map_inputs) compiled_test_mods = compile_models( AOTTestModel(name="my_mod", module=mod, inputs=map_inputs, outputs=output_list), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_tflite_model_u3_usecase_conv2d_var_cons(usmp_algo): """This checks for inference using workspace and constant pools placed in the application""" mod = tvm.parser.fromtext("""\ #[version = "0.0.5"] def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(3, 3, 5, 5), int8]) { %1 = nn.conv2d( %data, %weight, padding=[2, 2], channels=3, kernel_size=[5, 5], data_layout="NCHW", kernel_layout="OIHW", out_dtype="int32"); %2 = cast(nn.max_pool2d(%1, pool_size=[3, 3]), dtype="int8"); %3 = nn.conv2d( %2, %weight, padding=[2, 2], channels=3, kernel_size=[5, 5], data_layout="NCHW", kernel_layout="OIHW", out_dtype="int32"); %4 = nn.max_pool2d(%3, pool_size=[3, 3]); %4 } """) main_func = mod["main"] shape_dict = { p.name_hint: p.checked_type.concrete_shape for p in main_func.params } type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params} weight_data = np.random.randint(1, 255, shape_dict["weight"]).astype( type_dict["weight"]) input_data = np.ones(shape_dict["data"]).astype(type_dict["data"]) params = {"weight": weight_data} inputs = {"data": input_data} use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_mem_pools = WorkspaceMemoryPools([ WorkspacePoolInfo("my_memory_pool_1", [target], PoolInfoProperties(size_hint_bytes=8500000)), ]) constant_mem_pools = ConstantMemoryPools([ ConstantPoolInfo("my_const_pool_1", [target], []), ]) test_runner = AOTTestRunner( pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }, prologue=f""" __attribute__((section(".bss.noinit"), aligned(TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES))) static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}]; __attribute__((section(".rodata.tvm"), aligned(TVM_RUNTIME_CONST_ALLOC_ALIGNMENT_BYTES))) static uint8_t my_const_pool_1[{_get_constant_size_define_macro("my_const_pool_1")}] = {{ {_get_constant_data_define_macro("my_const_pool_1")} }}; """, ) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_mem_pools, constant_memory_pools=constant_mem_pools, target=target, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_two_models_with_a_single_external_pool(model_urls, usmp_algo): """This checks for inference using a single large enough common pool""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing # pylint: disable=import-outside-toplevel use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_memory_pools = WorkspaceMemoryPools( [PoolInfo("my_memory_pool", {target: PoolInfo.READ_WRITE_ACCESS})]) test_runner = AOTTestRunner( pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }, prologue=f""" #define MAX(A, B) ((A > B) ? A : B) __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool[MAX({_get_workspace_size_define_macro("my_memory_pool", "mod1")},{_get_workspace_size_define_macro("my_memory_pool", "mod2")})]; """, ) tflite_model_file1 = tf_testing.get_workload_official( model_urls[0][0], model_urls[0][1], ) mod1, inputs1, params1 = create_relay_module_and_inputs_from_tflite_file( tflite_model_file1) output_list1 = generate_ref_data(mod1, inputs1, params1) tflite_model_file2 = tf_testing.get_workload_official( model_urls[1][0], model_urls[1][1], ) mod2, inputs2, params2 = create_relay_module_and_inputs_from_tflite_file( tflite_model_file2) output_list2 = generate_ref_data(mod2, inputs2, params2) compiled_test_mods = compile_models( [ AOTTestModel(name="mod1", module=mod1, inputs=inputs1, outputs=output_list1, params=params1), AOTTestModel(name="mod2", module=mod2, inputs=inputs2, outputs=output_list2, params=params2), ], interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_memory_pools, target=target, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_tflite_model_u4_usecase_two_external_pools(model_url, usmp_algo): """This checks for inference with USMP using external pool placed in the application""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing # pylint: disable=import-outside-toplevel use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_memory_pools = WorkspaceMemoryPools([ PoolInfo("my_memory_pool_1", {target: PoolInfo.READ_WRITE_ACCESS}, size_hint_bytes=2500000), PoolInfo("my_memory_pool_2", {target: PoolInfo.READ_WRITE_ACCESS}), ]) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1], ) mod, inputs, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_list = generate_ref_data(mod, inputs, params) input_name, input_data = list(inputs.items())[0] input_size_bytes = input_data.size * input_data.itemsize test_runner = AOTTestRunner( pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo, "tir.usmp.use_workspace_io": True, }, prologue=f""" #include <string.h> __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}]; __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool_2[{_get_workspace_size_define_macro("my_memory_pool_2")}]; struct {_add_module_prefix("workspace_pools")} {_add_module_prefix("workspace_pools")} = {{ .my_memory_pool_1 = my_memory_pool_1, .my_memory_pool_2 = my_memory_pool_2, }}; struct {_add_module_prefix("inputs")} {_add_module_prefix("inputs")} = {_add_module_prefix("map_inputs")}(&{_add_module_prefix("workspace_pools")}); memcpy({_add_module_prefix("inputs")}.{input_name}, tvmgen_default_input_data_input, {input_size_bytes}); struct {_add_module_prefix("outputs")} {_add_module_prefix("outputs")} = {_add_module_prefix("map_outputs")}(&{_add_module_prefix("workspace_pools")}); """, ) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_memory_pools, target=target, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, use_workspace_io=True, )