def device_api_main_func(): # Ideally we should have a sample Target registered here # but we're going to re-use this for now pytest.importorskip("ethosu.vela") import tensorflow as tf import tflite.Model from tests.python.contrib.test_ethosu.infra import create_test_runner, generate_ref_data_tflite from tvm.relay.op.contrib.ethosu import partition_for_ethosu tf.config.run_functions_eagerly(True) class Model(tf.Module): @tf.function def tf_function(self, x): return tf.nn.max_pool(x, [1, 2], [1, 2], "SAME") def representative_dataset(): for _ in range(100): data = np.random.rand(1, 3, 4, 3) yield [data.astype(np.float32)] model = Model() concrete_func = model.tf_function.get_concrete_function( tf.TensorSpec([1, 3, 4, 3], dtype=tf.float32)) converter = tf.lite.TFLiteConverter.from_concrete_functions( [concrete_func]) converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.representative_dataset = representative_dataset converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] converter.inference_input_type = tf.int8 converter.inference_output_type = tf.int8 tflite_graph = converter.convert() tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0) relay_module, params = relay.frontend.from_tflite( tflite_model, shape_dict={"x": [1, 3, 4, 3]}, dtype_dict={"x": "int8"}, ) mod = partition_for_ethosu(relay_module, params) # Generate reference data input_data, output_data = generate_ref_data_tflite(tflite_graph) def compile_to_main_func(interface_api="c", use_unpacked_api=True): test_runner = create_test_runner() compiled_models = compile_models( models=AOTTestModel( module=mod, inputs=input_data, outputs=output_data, ), interface_api=interface_api, use_unpacked_api=use_unpacked_api, workspace_byte_alignment=16, pass_config=test_runner.pass_config, ) main_ir_module = compiled_models[ 0].executor_factory.lowered_ir_mods.items()[0][1] main_func = main_ir_module["__tvm_main__"] return main_func return compile_to_main_func
def test_mean(accel_type, ifm_shape, axis, keep_dims, use_same_quantization): dtype = "int8" def create_mod_from_tflite(): class Model(tf.Module): @tf.function def tf_function(self, x): op = tf.math.reduce_mean(x, axis=axis, keepdims=keep_dims) return op model = Model() concrete_func = model.tf_function.get_concrete_function( tf.TensorSpec(ifm_shape, dtype=tf.float32)) # Convert the model def representative_dataset(): for _ in range(100): data = np.random.rand(*tuple(ifm_shape)) yield [data.astype(np.float32)] converter = tf.lite.TFLiteConverter.from_concrete_functions( [concrete_func]) converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.representative_dataset = representative_dataset converter.target_spec.supported_ops = [ tf.lite.OpsSet.TFLITE_BUILTINS_INT8 ] converter.inference_input_type = tf.int8 converter.inference_output_type = tf.int8 tflite_graph = converter.convert() tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0) mod, _ = relay.frontend.from_tflite( tflite_model, shape_dict={"ifm": ifm_shape}, dtype_dict={"ifm": dtype}, ) input_data, output_data = infra.generate_ref_data_tflite(tflite_graph) return mod, input_data, output_data def create_mod_from_relay(): ifm = relay.var("input", shape=ifm_shape, dtype=dtype) cast = relay.cast(ifm, dtype="int32") mean = relay.mean(cast, axis=axis, keepdims=keep_dims) requantize = relay.qnn.op.requantize( mean, input_scale=relay.const(1.0, dtype="float32"), input_zero_point=relay.const(0, dtype="int32"), output_scale=relay.const(1.0, dtype="float32"), output_zero_point=relay.const(0, dtype="int32"), ) func = relay.Function(relay.analysis.free_vars(requantize), requantize) mod = tvm.IRModule.from_expr(func) input_data = { "input": np.random.randint(low=-127, high=128, size=ifm_shape, dtype=dtype) } output_data = generate_ref_data(mod, input_data) return mod, input_data, output_data mod, input_data, output_data = (create_mod_from_relay() if use_same_quantization else create_mod_from_tflite()) mod = partition_for_ethosu(mod) # TODO(lhutton1) For now output is not bit exact with TFLite. # This is because TFLite reference kernels are not being used. # For this, TFLite will need upgrading to 2.6. compiled_models = infra.build_source(mod, input_data, output_data, accel_type, output_tolerance=1) # Assumes only two runtime.Modules are created -- i.e. single offload module ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[ 0].imported_modules[0] # Verify generated C source get_artifacts = tvm._ffi.get_global_func( "runtime.module.ethos-u.get_artifacts") compilation_artifacts = get_artifacts(ethosu_module) cmms = bytes.fromhex(compilation_artifacts[0].command_stream) infra.print_payload(cmms) infra.verify_source(compiled_models, accel_type)
def test_ethosu_conv2d_single( ifm_shape, kernel_shape, strides, dilation, padding, accel_type, activation, ): dtype = "int8" def create_tflite_graph_single(): class Model(tf.Module): @tf.function def tf_function(self, x): # Use tf.nn API to create the model tf_strides = [1, strides[0], strides[1], 1] op = tf.nn.conv2d( x, filters=tf.constant( np.random.uniform( size=[kernel_shape[0], kernel_shape[1], 3, 3]), dtype=tf.float32, ), strides=tf_strides, padding=padding, dilations=dilation, ) if activation: op = tf.nn.relu(op) return op model = Model() concrete_func = model.tf_function.get_concrete_function( tf.TensorSpec(ifm_shape, dtype=tf.float32)) # Convert the model def representative_dataset(): for _ in range(100): data = np.random.rand(*tuple(ifm_shape)) yield [data.astype(np.float32)] converter = tf.lite.TFLiteConverter.from_concrete_functions( [concrete_func]) converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.representative_dataset = representative_dataset converter.target_spec.supported_ops = [ tf.lite.OpsSet.TFLITE_BUILTINS_INT8 ] converter.inference_input_type = tf.int8 converter.inference_output_type = tf.int8 tflite_model = converter.convert() return tflite_model tflite_graph = create_tflite_graph_single() tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0) relay_module, params = relay.frontend.from_tflite( tflite_model, shape_dict={"input": ifm_shape}, dtype_dict={"input": dtype}, ) mod = partition_for_ethosu(relay_module, params) # Generate reference data input_data, output_data = infra.generate_ref_data_tflite(tflite_graph) compiled_models = infra.build_source( mod, input_data, output_data, accel_type, ) # Assumes only two runtime.Modules are created -- i.e. single offload module ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[ 0].imported_modules[0] # Verify generated C source get_artifacts = tvm._ffi.get_global_func( "runtime.module.ethos-u.get_artifacts") compilation_artifacts = get_artifacts(ethosu_module) cmms = bytes.fromhex(compilation_artifacts[0].command_stream) infra.print_payload(cmms) infra.verify_source(compiled_models, accel_type)
def test_ethosu_conv2d(accel_type): def create_graph_single(input_tensor_name, input_tensor_shape, input_tensor_dtype): c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype) c1_params.ifm.shape = input_tensor_shape c1_params.kernel.shape = (3, 3, c1_params.ifm.shape[3], 32) c1_params.kernel.sc = relay.const(np.random.rand(32) * 2, "float32") c1_params.strides = (1, 1) c1_params.pad = "VALID" c1_params.update_output_qnn_params(input_tensor_dtype, input_tensor_dtype, input_tensor_dtype) input0 = relay.var(input_tensor_name, shape=c1_params.ifm.shape, dtype=c1_params.ifm.dtype) c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0) c1_params.ofm.shape = get_shape_expr(input0, c1) f = relay.Function([input0], c1) mod = tvm.IRModule() mod["main"] = f return mod, [c1_params] def create_graph_double(input_tensor_name, input_tensor_shape, input_tensor_dtype): c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype) c1_params.ifm.shape = input_tensor_shape c1_params.kernel.shape = (7, 7, c1_params.ifm.shape[3], 8) c1_params.strides = (2, 2) c1_params.pad = "VALID" c1_params.update_output_qnn_params(input_tensor_dtype, input_tensor_dtype, input_tensor_dtype) input0 = relay.var(input_tensor_name, shape=c1_params.ifm.shape, dtype=c1_params.ifm.dtype) c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0) c1_params.ofm.shape = get_shape_expr(input0, c1) c2_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype) c2_params.ifm.shape = c1_params.ofm.shape c2_params.kernel.shape = (5, 5, c2_params.ifm.shape[3], 16) c2_params.strides = (1, 1) c2_params.pad = "SAME" c2_params.update_output_qnn_params() c2, new_params = relay_ir_builder.create_qnn_conv2d(c2_params, c1) c2_params.ofm.shape = get_shape_expr(input0, c2) f = relay.Function([input0], c2) mod = tvm.IRModule() mod["main"] = f return mod, [c2_params, c1_params] def create_graph_activation(input_tensor_name, input_tensor_shape, input_tensor_dtype): c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype) c1_params.ifm.shape = input_tensor_shape c1_params.kernel.shape = (7, 7, c1_params.ifm.shape[3], 8) c1_params.strides = (2, 2) c1_params.pad = "VALID" c1_params.activation = "CLIP" c1_params.clip_min = 90 c1_params.clip_max = 110 c1_params.update_output_qnn_params(input_tensor_dtype, input_tensor_dtype, input_tensor_dtype) input0 = relay.var(input_tensor_name, shape=c1_params.ifm.shape, dtype=c1_params.ifm.dtype) c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0) c1_params.ofm.shape = get_shape_expr(input0, c1) c2_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype) c2_params.ifm.shape = c1_params.ofm.shape c2_params.kernel.shape = (5, 5, c2_params.ifm.shape[3], 16) c2_params.strides = (1, 1) c2_params.pad = "SAME" c2_params.update_output_qnn_params() c2, new_params = relay_ir_builder.create_qnn_conv2d(c2_params, c1) c2_params.ofm.shape = get_shape_expr(input0, c2) f = relay.Function([input0], c2) mod = tvm.IRModule() mod["main"] = f return mod, [c2_params, c1_params] test_cases = [ (create_graph_single, ["input", (1, 300, 300, 3), "int8"]), (create_graph_double, ["input", (1, 128, 256, 4), "int8"]), (create_graph_activation, ["input", (1, 64, 100, 4), "int8"]), ] np.random.seed(42) for test_case in test_cases: relay_module, conv_params = test_case[0](*test_case[1]) input_tensor, input_shape, input_dtype = test_case[1] mod = partition_for_ethosu(relay_module) # Generate reference data in_min, in_max = util.get_range_for_dtype_str(input_dtype) input_data = { input_tensor: np.random.randint(in_min, high=in_max, size=input_shape, dtype=input_dtype) } output_data = generate_ref_data(relay_module, input_data) compiled_models = infra.build_source(mod, input_data, output_data, accel_type, output_tolerance=1) # Assumes only two runtime.Modules are created -- i.e. single offload module imported_modules = compiled_models[ 0].executor_factory.lib.imported_modules assert len(imported_modules) == 2 ethosu_module = imported_modules[0] # Verify generated C source get_cs = tvm._ffi.get_global_func("runtime.module.ethosu.getcs") cmms = get_cs(ethosu_module) cmms = bytes.fromhex(cmms) infra.print_payload(cmms) infra.verify_source(compiled_models, accel_type)