def test_mobilenet(enable_usmp, target_kind): ir_mod, params = testing.mobilenet.get_workload(batch_size=1) data_shape = [ int(x) for x in ir_mod["main"].checked_type.arg_types[0].shape ] data = np.random.uniform(size=data_shape).astype("float32") inputs = {"data": data} ref_outputs = generate_ref_data(ir_mod, inputs, params) with tvm.transform.PassContext(opt_level=3, config={ "tir.disable_vectorize": True, "tir.usmp.enable": enable_usmp }): mod = tvm.relay.build( ir_mod, params=params, target=target_kind, executor=backend.Executor("aot", {"interface-api": "packed"}), ) temp_dir = tvm.contrib.utils.TempDirectory() test_so_path = temp_dir / "test.so" mod.export_library(test_so_path, cc="gcc", options=["-std=c11"]) loaded_mod = tvm.runtime.load_module(test_so_path) runner = tvm.runtime.executor.AotModule(loaded_mod["default"](tvm.cpu(0))) runner.set_input(**inputs) runner.run() assert (runner.get_output(0).asnumpy() == list( ref_outputs.values())[0]).all()
def test_byoc_microtvm_multiple_subgraphs(merge_compiler_regions): """This is a test case to check BYOC capabilities of AOT with multiple sub graphs""" use_unpacked_api = False interface_api = "packed" test_runner = AOT_DEFAULT_RUNNER x = relay.var("x", shape=(10, 10)) w0 = relay.var("w0", shape=(10, 10)) w1 = relay.var("w1", shape=(10, 10)) w2 = relay.var("w2", shape=(10, 10)) w3 = relay.var("w3", shape=(10, 10)) w4 = relay.var("w4", shape=(10, 10)) w5 = relay.var("w5", shape=(10, 10)) w6 = relay.var("w6", shape=(10, 10)) w7 = relay.var("w7", shape=(10, 10)) # C compiler z0 = relay.add(x, w0) p0 = relay.subtract(z0, w1) q0 = relay.multiply(p0, w2) z1 = relay.add(x, w3) p1 = relay.subtract(z1, w4) q1 = relay.multiply(p1, w5) # Other parts on TVM z2 = relay.add(x, w6) q2 = relay.subtract(z2, w7) r = relay.concatenate((q0, q1, q2), axis=0) f = relay.Function([x, w0, w1, w2, w3, w4, w5, w6, w7], r) mod = tvm.IRModule() ann = byoc.CcompilerAnnotator() mod["main"] = ann.visit(f) if merge_compiler_regions: mod = transform.MergeCompilerRegions()(mod) mod = tvm.relay.transform.PartitionGraph("mod_name")(mod) mod = tvm.relay.transform.InferType()(mod) x_data = np.random.rand(10, 10).astype("float32") w_data = [] for _ in range(8): w_data.append(np.random.rand(10, 10).astype("float32")) map_inputs = OrderedDict([("x", x_data)] + [("w{}".format(i), w_data[i]) for i in range(8)]) output_list = generate_ref_data(mod, map_inputs) input_list = [map_inputs["x"]] input_list.extend([map_inputs["w{}".format(i)] for i in range(8)]) compile_and_run( AOTTestModel(name="my_mod", module=mod, inputs=map_inputs, outputs=output_list), test_runner, interface_api, use_unpacked_api, )
def create_conv2d(groups=1, test_runner=AOT_DEFAULT_RUNNER, weight_shape=32): dtype = "float32" ishape = (1, 32, 14, 14) wshape = (32, weight_shape, 3, 3) pass_config = {"tir.usmp.enable": True} test_runner = AOTTestRunner( makefile=test_runner.makefile, prologue=test_runner.prologue, epilogue=test_runner.epilogue, includes=test_runner.includes, parameters=test_runner.parameters, pass_config=pass_config, ) data0 = relay.var("data", shape=ishape, dtype=dtype) weight0 = relay.var("weight", shape=wshape, dtype=dtype) out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=groups) main_f = relay.Function([data0, weight0], out) mod = tvm.IRModule() mod["main"] = main_f mod = transform.InferType()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) w1_data = np.random.uniform(0, 1, wshape).astype(dtype) inputs = OrderedDict([("data", i_data), ("weight", w1_data)]) output_list = generate_ref_data(mod, inputs) return mod, inputs, output_list, test_runner
def test_mobilenet(): """Full network test with Mobilenet""" use_unpacked_api = True interface_api = "c" test_runner = AOT_DEFAULT_RUNNER mod, params = testing.mobilenet.get_workload(batch_size=1) uma_backend = VanillaAcceleratorBackend() uma_backend.register() target = tvm.target.Target("vanilla_accelerator", host=tvm.target.Target("c")) target_c = tvm.target.Target("c") data_shape = [int(x) for x in mod["main"].checked_type.arg_types[0].shape] data = np.random.uniform(size=data_shape).astype("float32") input_list = {"data": data} output_list = generate_ref_data(mod, input_list, params) mod = uma_backend.partition(mod) aot_test_model = AOTTestModel(module=mod, inputs=input_list, outputs=output_list, params=params) compile_and_run( aot_test_model, test_runner, interface_api, use_unpacked_api, workspace_byte_alignment=1, debug_calculated_workspaces=False, target=[target_c, target], )
def test_mobilenet(debug_calculated_workspaces, workspace_byte_alignment): use_unpacked_api = True interface_api = "c" test_runner = AOT_DEFAULT_RUNNER # TODO(@Mousius) - Enable memory planning to take into account debug information debugging_memory_overhead = 1024 * 1024 mod, params = testing.mobilenet.get_workload(batch_size=1) data_shape = [int(x) for x in mod["main"].checked_type.arg_types[0].shape] data = np.random.uniform(size=data_shape).astype("float32") inputs = {"data": data} output_list = generate_ref_data(mod, inputs, params) compile_and_run( AOTTestModel( module=mod, inputs=inputs, outputs=output_list, params=params, extra_memory_in_bytes=debugging_memory_overhead, ), test_runner, interface_api, use_unpacked_api, workspace_byte_alignment=workspace_byte_alignment, debug_calculated_workspaces=debug_calculated_workspaces, )
def test_dense(dim_m, dim_k, dim_n): """Test a subgraph with a single dense operator.""" ishape = (dim_m, dim_k) wshape = (dim_n, dim_k) input0 = relay.var("input", relay.TensorType(ishape, "int8")) dense_f = relay.op.nn.batch_flatten(input0) weight0 = relay.const( np.random.randint(low=-10, high=10, size=wshape, dtype="int8")) out = relay.op.nn.dense(dense_f, weight0, out_dtype="int32") mod = tvm.IRModule.from_expr(relay.Function([input0], out)) inputs = { "input": np.random.randint(low=-128, high=127, size=ishape, dtype="int8") } output_list = generate_ref_data(mod, inputs) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list), runner=AOT_CORSTONE300_RUNNER, interface_api="c", use_unpacked_api=True, target_opts={ "-keys": "arm_cpu", "-mcpu": "cortex-m7", }, )
def test_add_name_mangling_with_params(interface_api, use_unpacked_api, test_runner): """Checks name mangling works with parameters""" input_x = relay.var("x", shape=(1, 10)) input_y = relay.var("y", shape=(1, 10)) func_add = relay.add(input_x, input_y) relay_func = relay.Function([input_x, input_y], func_add) x_in = np.ones((1, 10)).astype("float32") y_in = np.random.uniform(size=(1, 10)).astype("float32") params = {"x": x_in} inputs = {"y": y_in} output_list = generate_ref_data(relay_func, inputs, params) compile_and_run( AOTTestModel( name="my_mod", module=relay_func, inputs=inputs, outputs=output_list, params=params, ), test_runner, interface_api, use_unpacked_api, )
def test_add_with_params(interface_api, use_unpacked_api, test_runner): """Tests compilation of add with parameters""" input_x = relay.var("x", shape=(1, 10)) input_y = relay.var("y", shape=(1, 10)) input_z = relay.add(input_x, input_y) func = relay.Function([input_x, input_y], input_z) input_x_data = np.ones((1, 10)).astype("float32") input_y_data = np.random.uniform(size=(1, 10)).astype("float32") params = {"x": input_x_data} inputs = {"y": input_y_data} output_list = generate_ref_data(func, inputs, params) compile_and_run( AOTTestModel( module=IRModule.from_expr(func), inputs=inputs, outputs=output_list, params=params, ), test_runner, interface_api, use_unpacked_api, )
def test_op_int8(zero_point, scale, compiler_cpu, cpu_flags): """Tests int8 QNN Softmax for CMSIS-NN""" interface_api = "c" use_unpacked_api = True dtype = "int8" shape = [1, 16, 16, 3] model = make_model(shape, dtype, dtype, zero_point, scale) orig_mod = make_module(model) cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod) # validate pattern matching assert_partitioned_function(orig_mod, cmsisnn_mod) # validate the output in_min, in_max = get_range_for_dtype_str(dtype) np.random.seed(0) input_data = np.random.randint(in_min, high=in_max, size=shape, dtype=dtype) inputs = {"in0": input_data} params = {} output_list = generate_ref_data(orig_mod["main"], inputs, params) compile_and_run( AOTTestModel(module=cmsisnn_mod, inputs=inputs, outputs=output_list, params=params), create_test_runner(compiler_cpu, cpu_flags), interface_api, use_unpacked_api, )
def test_ethosu_left_shift_binary_elemwise( accel_type, ifm_shape, ifm2_shape, ): np.random.seed(0) dtype = "int32" def create_model(): ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype) ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype) c1 = relay.left_shift(ifm, ifm2) return tvm.IRModule.from_expr(relay.Function([ifm, ifm2], c1)) cpu_mod = create_model() # Generate reference data in_min, in_max = util.get_range_for_dtype_str(dtype) input_data = { "ifm": np.random.randint(in_min, high=in_max, size=ifm_shape, dtype=dtype), "ifm2": np.random.randint(0, high=32, size=ifm2_shape, dtype=dtype), } output_data = generate_ref_data(cpu_mod, input_data) ethosu_mod = partition_for_ethosu(cpu_mod) infra.compare_ethosu_with_reference(ethosu_mod, input_data, output_data, accel_type)
def test_add_name_mangling_with_params(interface_api, use_unpacked_api, test_runner): x = relay.var("x", shape=(1, 10)) y = relay.var("y", shape=(1, 10)) z = relay.add(x, y) func = relay.Function([x, y], z) x_in = np.ones((1, 10)).astype("float32") y_in = np.random.uniform(size=(1, 10)).astype("float32") params = {"x": x_in} inputs = {"y": y_in} output_list = generate_ref_data(func, inputs, params) compile_and_run( AOTTestModel( name="my_mod", module=func, inputs=inputs, outputs=output_list, params=params, ), test_runner, interface_api, use_unpacked_api, )
def test_op_int8(zero_point, scale): interface_api = "c" use_unpacked_api = True test_runner = AOT_USMP_CORSTONE300_RUNNER dtype = "int8" shape = [1, 16, 16, 3] model = make_model(shape, dtype, dtype, zero_point, scale) orig_mod = make_module(model) cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod) # validate pattern matching assert_partitioned_function(orig_mod, cmsisnn_mod) # validate the output in_min, in_max = get_range_for_dtype_str(dtype) np.random.seed(0) input_data = np.random.randint(in_min, high=in_max, size=shape, dtype=dtype) inputs = {"in0": input_data} params = {} output_list = generate_ref_data(orig_mod["main"], inputs, params) compile_and_run( AOTTestModel(module=cmsisnn_mod, inputs=inputs, outputs=output_list, params=params), test_runner, interface_api, use_unpacked_api, )
def test_elementwise_add_from_constant_scalar(accel_type, dtype, constant): np.random.seed(0) ifm_shape = (1, 4, 4, 8) def create_relay_graph(): inp = relay.var("input", shape=ifm_shape, dtype=dtype) scalar = relay.const(constant, dtype=dtype) add = relay.qnn.op.add( inp, scalar, relay.const(1.0, dtype="float32"), relay.const(0, dtype="int32"), relay.const(1.0, dtype="float32"), relay.const(0, dtype="int32"), relay.const(1.0, dtype="float32"), relay.const(0, dtype="int32"), ) return tvm.IRModule.from_expr( relay.Function(relay.analysis.free_vars(add), add)) cpu_mod = create_relay_graph() ethosu_mod = partition_for_ethosu(cpu_mod) # Generate reference data input_data = { "input": np.random.randint(low=np.iinfo(dtype).min, high=np.iinfo(dtype).max, size=ifm_shape, dtype=dtype), } output_data = generate_ref_data(cpu_mod, input_data) infra.compare_ethosu_with_reference(ethosu_mod, input_data, output_data, accel_type)
def test_name_sanitiser_name_clash(): """Test that 2 input tensors with names that clash once sanitized, generates an error""" interface_api = "c" use_unpacked_api = True test_runner = AOT_DEFAULT_RUNNER dtype = "float32" x = relay.var("input::-1", shape=(10, 5), dtype=dtype) # Next 2 input tensor names will clash once sanitized. y = relay.var("input::-2", shape=(10, 5), dtype=dtype) t = relay.var("input:--2", shape=(), dtype=dtype) a = relay.add(x, y) b = relay.transpose(a) z = relay.add(b, t) # Check result. func = relay.Function([x, y, t], z) x_data = np.random.rand(10, 5).astype(dtype) y_data = np.random.rand(10, 5).astype(dtype) t_data = np.random.uniform(size=()).astype(dtype) inputs = {"input::-1": x_data, "input::-2": y_data, "input:--2": t_data} output_list = generate_ref_data(func, inputs) with pytest.raises(TVMError, match="Sanitized input tensor name clash"): compile_and_run( AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list), test_runner, interface_api, use_unpacked_api, enable_op_fusion=False, )
def test_deprecated_target_arguments(capsys): """Tests we can still use relay.build with -executor, -runtime and -link-params""" interface_api = "c" use_unpacked_api = True test_runner = AOT_DEFAULT_RUNNER x = relay.var("x", shape=(1, 10)) y = relay.var("y", shape=(1, 10)) z = relay.add(x, y) func = relay.Function([x, y], z) x_in = np.ones((1, 10)).astype("float32") y_in = np.random.uniform(size=(1, 10)).astype("float32") params = {"x": x_in} inputs = {"y": y_in} output_list = generate_ref_data(func, inputs, params) compile_and_run( AOTTestModel( module=IRModule.from_expr(func), inputs=inputs, outputs=output_list, params=params, ), test_runner, interface_api, use_unpacked_api, use_runtime_executor=False, target= "c -executor=aot --link-params -runtime=c -interface-api=c --unpacked-api", )
def test_quant_mobilenet_tfl(): """Since in AOT we pass directly the output buffer from the user, in quantized networks sharing the output buffers is not possible. This is because the output data type is int8 and the intermediate buffer are int32 or int16. We use mobilenet quantized to stress this situation and verify that the output buffer sharing is disabled in AOT.""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing use_unpacked_api = True interface_api = "c" test_runner = AOT_DEFAULT_RUNNER tflite_model_file = tf_testing.get_workload_official( "https://storage.googleapis.com/download.tensorflow.org/" "models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz", "mobilenet_v1_1.0_224_quant.tflite", ) mod, inputs, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_list = generate_ref_data(mod, inputs, params) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), test_runner, interface_api, use_unpacked_api, )
def test_transpose(interface_api, use_unpacked_api, test_runner): """Test that non-inpleaceable operations (e.g., transpose) do not happen in-place.""" dtype = "float32" x = relay.var("x", shape=(10, 5), dtype=dtype) y = relay.var("y", shape=(10, 5), dtype=dtype) t = relay.var("z", shape=(), dtype=dtype) a = relay.add(x, y) b = relay.transpose(a) z = relay.add(b, t) # Check result. func = relay.Function([x, y, t], z) x_data = np.random.rand(10, 5).astype(dtype) y_data = np.random.rand(10, 5).astype(dtype) t_data = np.random.uniform(size=()).astype(dtype) inputs = {"x": x_data, "y": y_data, "z": t_data} output_list = generate_ref_data(func, inputs) compile_and_run( AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list), test_runner, interface_api, use_unpacked_api, enable_op_fusion=False, )
def test_ethosu_requantize(accel_type, ifm_shape, ifm_scale, ifm_zp, ofm_scale, ofm_zp): np.random.seed(0) dtype = "int8" def create_model(): ifm = relay.var("ifm", shape=ifm_shape, dtype="int8") requantize = relay.qnn.op.requantize( ifm, relay.const(ifm_scale, dtype="float32"), relay.const(ifm_zp, dtype="int32"), relay.const(ofm_scale, dtype="float32"), relay.const(ofm_zp, dtype="int32"), ) return tvm.IRModule.from_expr(relay.Function([ifm], requantize)) cpu_mod = create_model() input_data = { "ifm": np.random.randint(-128, high=127, size=ifm_shape, dtype=dtype) } output_data = generate_ref_data(cpu_mod, input_data) ethosu_mod = partition_for_ethosu(cpu_mod) infra.compare_ethosu_with_reference(ethosu_mod, input_data, output_data, accel_type)
def fixture_non_device_api_main_func(): """Test function generator which does not generate C Device API calls""" x = relay.var("x", shape=(10, 10)) y = relay.var("y", shape=(1, 10)) func = relay.Function([x, y], relay.multiply(x, y)) x_data = np.random.rand(10, 10).astype("float32") y_data = np.random.rand(1, 10).astype("float32") inputs = OrderedDict([("x", x_data), ("y", y_data)]) output_list = generate_ref_data(func, inputs) def compile_to_main_func(interface_api="c", use_unpacked_api=True): test_runner = AOT_DEFAULT_RUNNER compiled_models = compile_models( models=AOTTestModel( module=IRModule.from_expr(func), inputs=inputs, outputs=output_list, ), interface_api=interface_api, use_unpacked_api=use_unpacked_api, workspace_byte_alignment=16, pass_config=test_runner.pass_config, ) main_ir_module = list( compiled_models[0].executor_factory.lowered_ir_mods.values())[0] main_func = main_ir_module["__tvm_main__"] return main_func return compile_to_main_func
def test_maxpool_1d(data_shape_nwc, pool_size, strides, padding): """Test a subgraph with a single maxpool_1d operator.""" ishape = data_shape_nwc input0 = relay.var("input", relay.TensorType(ishape, "int8")) out = relay.op.nn.max_pool1d(input0, pool_size, layout="NWC", strides=strides, padding=padding) mod = tvm.IRModule.from_expr(relay.Function([input0], out)) inputs = { "input": np.random.randint(low=-128, high=127, size=ishape, dtype="int8") } output_list = generate_ref_data(mod, inputs) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list), runner=AOT_CORSTONE300_RUNNER, interface_api="c", use_unpacked_api=True, target_opts={ "-keys": "arm_cpu", "-mcpu": "cortex-m7", }, )
def test_conv2d(interface_api, use_unpacked_api, test_runner, groups, weight_shape): """Test a subgraph with a single conv2d operator.""" dtype = "float32" ishape = (1, 32, 14, 14) wshape = (32, weight_shape, 3, 3) data0 = relay.var("data", shape=ishape, dtype=dtype) weight0 = relay.var("weight", shape=wshape, dtype=dtype) out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=groups) main_f = relay.Function([data0, weight0], out) mod = tvm.IRModule() mod["main"] = main_f mod = transform.InferType()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) w1_data = np.random.uniform(0, 1, wshape).astype(dtype) inputs = OrderedDict([("data", i_data), ("weight", w1_data)]) output_list = generate_ref_data(mod, inputs) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list), test_runner, interface_api, use_unpacked_api, )
def test_pool( self, pool_type, shape, dtype, pool_size, strides, padding, dilation, layout, ceil_mode, schedule_name, ): """Test a subgraph with a single max_pool operator.""" ishape = shape input0 = relay.var("input", relay.TensorType(ishape, dtype)) out0 = getattr(relay.op.nn, pool_type)( input0, pool_size=pool_size, strides=strides, dilation=dilation, padding=padding, layout=layout, out_layout="", ceil_mode=ceil_mode, ) ref_mod = tvm.IRModule.from_expr(relay.Function([input0], out0)) input1 = relay.var("input", relay.TensorType(ishape, dtype)) out1 = getattr(relay.op.nn, pool_type)( input1, pool_size=pool_size, strides=strides, dilation=dilation, padding=padding, layout=layout, out_layout="", ceil_mode=ceil_mode, ) mod = tvm.IRModule.from_expr(relay.Function([input1], out1)) inputs = { "input": np.random.randint(low=-128, high=127, size=ishape, dtype=dtype) } output_list = generate_ref_data(ref_mod, inputs) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list), runner=AOT_CORSTONE300_RUNNER, interface_api="c", use_unpacked_api=True, target_opts={ "-keys": "arm_cpu", "-mcpu": "cortex-m7", }, schedule_name=schedule_name, )
def test_conv_with_params(interface_api, use_unpacked_api, test_runner): """Tests compilation of convolution with parameters""" mod = get_conv2d_relay_module() main_func = mod["main"] shape_dict = { p.name_hint: p.checked_type.concrete_shape for p in main_func.params } type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params} weight_data = np.ones(shape_dict["weight"]).astype(type_dict["weight"]) input_data = np.ones(shape_dict["data"]).astype(type_dict["data"]) params = {"weight": weight_data} inputs = {"data": input_data} output_list = generate_ref_data(mod, inputs, params) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), test_runner, interface_api, use_unpacked_api, )
def test_multiple_models(interface_api, use_unpacked_api, test_runner): """Compiles multiple models to ensure both can be compiled into one output""" # Identity model without params x = relay.var("x", "float32") mod1 = relay.Function([x], x) one = np.array(1.0, "float32") inputs1 = {"x": one} output_list1 = generate_ref_data(mod1, inputs1) params1 = None # Convolution model mod2 = get_conv2d_relay_module() main_func = mod2["main"] shape_dict = { p.name_hint: p.checked_type.concrete_shape for p in main_func.params } type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params} weight_data = np.ones(shape_dict["weight"]).astype(type_dict["weight"]) input_data = np.ones(shape_dict["data"]).astype(type_dict["data"]) params2 = {"weight": weight_data} inputs2 = {"data": input_data} output_list2 = generate_ref_data(mod2, inputs2, params2) compile_and_run( [ AOTTestModel( name="mod1", module=mod1, inputs=inputs1, outputs=output_list1, params=params1, ), AOTTestModel( name="mod2", module=mod2, inputs=inputs2, outputs=output_list2, params=params2, ), ], test_runner, interface_api, use_unpacked_api, )
def test_conv2d(data_shape_nhwc, kernel_size, num_filter, strides, padding, dilation, dtype): """Test a subgraph with a single conv2d operator.""" ishape = data_shape_nhwc wshape = (*kernel_size, data_shape_nhwc[-1], num_filter) weight_data = np.random.randint(low=-10, high=10, size=wshape, dtype=dtype) input0 = relay.var("input", relay.TensorType(ishape, dtype)) weight0 = relay.const(weight_data) out0 = relay.op.nn.conv2d( input0, weight0, kernel_size=kernel_size, strides=strides, padding=padding, dilation=(dilation, dilation), data_layout="NHWC", kernel_layout="HWIO", out_dtype="int32", out_layout="NHWC", ) ref_mod = tvm.IRModule.from_expr(relay.Function([input0], out0)) input1 = relay.var("input", relay.TensorType(ishape, dtype)) weight1 = relay.const(np.moveaxis(weight_data, 2, -1)) out1 = relay.op.nn.conv2d( input1, weight1, kernel_size=kernel_size, strides=strides, padding=padding, dilation=(dilation, dilation), data_layout="NHWC", kernel_layout="HWOI", out_dtype="int32", out_layout="NHWC", ) mod = tvm.IRModule.from_expr(relay.Function([input1], out1)) inputs = { "input": np.random.randint(low=-128, high=127, size=ishape, dtype=dtype) } output_list = generate_ref_data(ref_mod, inputs) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list), runner=AOT_CORSTONE300_RUNNER, interface_api="c", use_unpacked_api=True, target_opts={ "-keys": "arm_cpu", "-mcpu": "cortex-m7", }, )
def test_tflite_model_u3_usecase_two_external_pools(model_url, usmp_algo): """This checks for inference using two external pools placed in the application""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing # pylint: disable=import-outside-toplevel use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_memory_pools = WorkspaceMemoryPools([ PoolInfo("my_memory_pool_1", {target: PoolInfo.READ_WRITE_ACCESS}, size_hint_bytes=2500000), PoolInfo("my_memory_pool_2", {target: PoolInfo.READ_WRITE_ACCESS}), ]) test_runner = AOTTestRunner( pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }, prologue=f""" __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}]; __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool_2[{_get_workspace_size_define_macro("my_memory_pool_2")}]; """, ) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1], ) mod, inputs, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_memory_pools, target=target, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_op_int8( op, relu_type, input_0_scale, input_0_zero_point, input_1_scale, input_1_zero_point, compiler_cpu, cpu_flags, ): """Tests QNN binary operator for CMSIS-NN""" interface_api = "c" use_unpacked_api = True dtype = "int8" shape = [1, 16, 16, 3] model = make_model( op, generate_variable("input_0"), generate_variable("input_1"), input_0_scale, input_0_zero_point, input_1_scale, input_1_zero_point, relu_type, ) orig_mod = make_module(model) cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod) # validate pattern matching assert_partitioned_function(orig_mod, cmsisnn_mod) # validate the output in_min, in_max = get_range_for_dtype_str(dtype) inputs = { "input_0": np.random.randint(in_min, high=in_max, size=shape, dtype=dtype), "input_1": np.random.randint(in_min, high=in_max, size=shape, dtype=dtype), } output_list = generate_ref_data(orig_mod["main"], inputs) compile_and_run( AOTTestModel( module=cmsisnn_mod, inputs=inputs, outputs=output_list, output_tolerance=1, ), create_test_runner(compiler_cpu, cpu_flags), interface_api, use_unpacked_api, )
def test_tflite_model_u1_usecase(model_url, usmp_algo, workspace_size, constant_size): """ This checks for ML models and the memory used by them when using USMP with different algorithms """ pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing # pylint: disable=import-outside-toplevel use_unpacked_api = True interface_api = "c" test_runner = AOTTestRunner(pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1], ) mod, inputs, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) # Checking the workspace size reported in model library format mlf_memory_map = mlf._build_function_memory_map( compiled_test_mods[0].executor_factory.function_metadata) assert mlf_memory_map["main"][0]["workspace_size_bytes"] == workspace_size assert mlf_memory_map["main"][0]["constants_size_bytes"] == constant_size # That should match to workspace size that will be codegen'd to the entry point. allocated_pool_info_size = sum([ _.allocated_size for _ in list( dict(compiled_test_mods[0].executor_factory. executor_codegen_metadata.pool_inputs).values()) ]) assert allocated_pool_info_size == workspace_size + constant_size run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_conv2d(interface_api, use_unpacked_api, test_runner, groups, weight_shape): """Test a subgraph with a single conv2d operator.""" dtype = "float32" ishape = (1, 32, 14, 14) wshape = (32, weight_shape, 3, 3) pass_config = {"tir.usmp.enable": True} test_runner = AOTTestRunner( makefile=test_runner.makefile, prologue=test_runner.prologue, epilogue=test_runner.epilogue, includes=test_runner.includes, parameters=test_runner.parameters, pass_config=pass_config, ) data0 = relay.var("data", shape=ishape, dtype=dtype) weight0 = relay.var("weight", shape=wshape, dtype=dtype) out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=groups) main_f = relay.Function([data0, weight0], out) mod = tvm.IRModule() mod["main"] = main_f mod = transform.InferType()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) w1_data = np.random.uniform(0, 1, wshape).astype(dtype) inputs = OrderedDict([("data", i_data), ("weight", w1_data)]) output_list = generate_ref_data(mod, inputs) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list), test_runner, interface_api, use_unpacked_api, ) compiled_test_mods = compile_models( models=AOTTestModel(module=mod, inputs=inputs, outputs=output_list), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_same_input_to_binary_op(op, relu_type): """Tests QNN binary operator for CMSIS-NN where both inputs are the same""" interface_api = "c" use_unpacked_api = True test_runner = AOT_USMP_CORSTONE300_RUNNER dtype = "int8" shape = [1, 16, 16, 3] input_ = generate_variable("input") input_scale = 0.256 input_zero_point = 33 model = make_model( op, input_, input_, input_scale, input_zero_point, input_scale, input_zero_point, relu_type, ) orig_mod = make_module(model) cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod) # validate pattern matching assert_partitioned_function(orig_mod, cmsisnn_mod) # Check if the number of internal function parameter is 1 cmsisnn_global_func = cmsisnn_mod["tvmgen_default_cmsis_nn_main_0"] assert ( isinstance(cmsisnn_global_func.body, tvm.relay.expr.Call) and len(cmsisnn_global_func.body.args) == 1 ), "Composite function for the binary op should have only 1 parameter." # validate the output in_min, in_max = get_range_for_dtype_str(dtype) inputs = { "input": np.random.randint(in_min, high=in_max, size=shape, dtype=dtype), } output_list = generate_ref_data(orig_mod["main"], inputs) compile_and_run( AOTTestModel( module=cmsisnn_mod, inputs=inputs, outputs=output_list, output_tolerance=1, ), test_runner, interface_api, use_unpacked_api, )