def update_output_qnn_params(self, input_dtype="uint8", kernel_dtype="uint8", output_dtype="uint8"): _, dtype_max = get_range_for_dtype_str(input_dtype) input_max = self.ifm.sc.data.asnumpy() * (dtype_max - self.ifm.zp.data.asnumpy()) input_min = -self.ifm.sc.data.asnumpy() * self.ifm.zp.data.asnumpy() _, dtype_max = get_range_for_dtype_str(kernel_dtype) kernel_max = np.max(self.kernel.sc.data.asnumpy() * (dtype_max - self.kernel.zp.data.asnumpy())) kernel_min = np.min(-self.kernel.sc.data.asnumpy() * self.kernel.zp.data.asnumpy()) kernel_h = self.kernel.get_dim_size("H") kernel_w = self.kernel.get_dim_size("W") channels = self.kernel.get_dim_size("I") output_limits = [ kernel_max * kernel_h * kernel_w * channels * input_max, kernel_min * kernel_h * kernel_w * channels * input_max, kernel_min * kernel_h * kernel_w * channels * input_min, kernel_max * kernel_h * kernel_w * channels * input_min, ] output_max = max(output_limits) output_min = min(output_limits) dtype_min, dtype_max = get_range_for_dtype_str(input_dtype) self.ofm.sc = relay.const( (output_max - output_min) / (dtype_max - dtype_min), "float32") self.ofm.zp = relay.const( -int(output_min / self.ofm.sc.data.asnumpy()), "int32")
def __init__(self, dtype): self.ifm = TensorType() self.ofm = TensorType() self.kernel = TensorType() # default values self.ifm.dtype = dtype self.ifm.layout = "NHWC" ifm_min, ifm_max = get_range_for_dtype_str(self.ifm.dtype) self.ifm.zp = relay.const(np.random.randint(ifm_min, ifm_max), "int32") self.ifm.sc = relay.const(np.random.random() * 2, "float32") self.kernel.dtype = dtype self.kernel.layout = "HWIO" kernel_min, kernel_max = get_range_for_dtype_str(self.kernel.dtype) self.kernel.zp = relay.const(np.random.randint(kernel_min, kernel_max), "int32") self.kernel.sc = relay.const(np.random.random() * 2, "float32") self.ofm.layout = "NHWC" self.ofm.dtype = dtype ofm_min, ofm_max = get_range_for_dtype_str(self.ofm.dtype) self.ofm.zp = relay.const(np.random.randint(ofm_min, ofm_max), "int32") self.ofm.sc = relay.const(np.random.random() * 2, "float32") self.dilation = (1, 1) self.strides = None self.pad = None self.activation = "NONE" self.clip_min = 0 self.clip_max = 0
def test_ethosu_left_shift_binary_elemwise( accel_type, ifm_shape, ifm2_shape, ): np.random.seed(0) dtype = "int32" def create_model(): ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype) ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype) c1 = relay.left_shift(ifm, ifm2) return tvm.IRModule.from_expr(relay.Function([ifm, ifm2], c1)) cpu_mod = create_model() # Generate reference data in_min, in_max = util.get_range_for_dtype_str(dtype) input_data = { "ifm": np.random.randint(in_min, high=in_max, size=ifm_shape, dtype=dtype), "ifm2": np.random.randint(0, high=32, size=ifm2_shape, dtype=dtype), } output_data = generate_ref_data(cpu_mod, input_data) ethosu_mod = partition_for_ethosu(cpu_mod) infra.compare_ethosu_with_reference(ethosu_mod, input_data, output_data, accel_type)
def test_forward_mobilenet_v1(accel_type): """Test the Mobilenet V1 TF Lite model.""" np.random.seed(23) tflite_model_file = tf_testing.get_workload_official( "https://storage.googleapis.com/download.tensorflow.org/" "models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz", "mobilenet_v1_1.0_224_quant.tflite", ) with open(tflite_model_file, "rb") as f: tflite_model_buf = f.read() input_tensor = "input" input_dtype = "uint8" input_shape = (1, 224, 224, 3) in_min, in_max = util.get_range_for_dtype_str(input_dtype) input_data = np.random.randint(in_min, high=in_max, size=input_shape, dtype=input_dtype) relay_mod, params = convert_to_relay(tflite_model_buf) input_data = {input_tensor: input_data} output_data = generate_ref_data(relay_mod, input_data) mod = partition_for_ethosu(relay_mod, params) compiled_models = infra.build_source( mod, input_data, output_data, accel_type, output_tolerance=10 ) infra.verify_source(compiled_models, accel_type)
def test_ethosu_section_name(): def create_graph_single(input_tensor_name, input_tensor_shape, input_tensor_dtype): c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype) c1_params.ifm.shape = input_tensor_shape c1_params.kernel.shape = (3, 3, c1_params.ifm.shape[3], 32) c1_params.kernel.sc = relay.const(np.random.rand(32) * 2, "float32") c1_params.strides = (1, 1) c1_params.pad = "VALID" c1_params.update_output_qnn_params(input_tensor_dtype, input_tensor_dtype, input_tensor_dtype) input0 = relay.var(input_tensor_name, shape=c1_params.ifm.shape, dtype=c1_params.ifm.dtype) c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0) c1_params.ofm.shape = get_shape_expr(input0, c1) f = relay.Function([input0], c1) mod = tvm.IRModule() mod["main"] = f return mod, [c1_params] accel_type = "ethos-u55-256" relay_module, _ = create_graph_single("input", (1, 300, 300, 3), "int8") input_dtype = "int8" mod = partition_for_ethosu(relay_module) # Generate reference data in_min, in_max = util.get_range_for_dtype_str(input_dtype) input_data = { "input": np.random.randint(in_min, high=in_max, size=(1, 300, 300, 3), dtype=input_dtype) } output_data = generate_ref_data(relay_module, input_data) compiled_models = infra.build_source(mod, input_data, output_data, accel_type, output_tolerance=1) # Assumes only two runtime.Modules are created -- i.e. single offload module ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[ 0].imported_modules[0] # Verify generated C source source = ethosu_module.get_source() assert ( '__attribute__((section(".rodata.tvm"), aligned(16))) static int8_t tvmgen_default_ethos_u_main_0_cms_data_data' in source) assert ( '__attribute__((section(".rodata.tvm"), aligned(16))) static int8_t tvmgen_default_ethos_u_main_0_weights' in source)
def test_ethosu_right_shift_binary_elemwise(ifm_shape, ifm2_shape, reversed_operands, accel_type, ofm_dtype): np.random.seed(0) dtype = "int32" def create_model(): ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype) ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype) shr_op = infra.make_ethosu_binary_elementwise(ifm, ifm2, ifm_shape[3], ifm2_shape[3], "SHR", ofm_dtype, reversed_operands) return tvm.IRModule.from_expr(relay.Function([ifm, ifm2], shr_op)) def generate_output_data(input_data): lhs = input_data["ifm"] rhs = input_data["ifm2"] if reversed_operands: lhs = np.broadcast_to(lhs, ifm2_shape) lhs, rhs = rhs, lhs else: rhs = np.broadcast_to(rhs, ifm_shape) def rounding_right_shift(lhs, rhs): r = 1 << (rhs - 1) return (lhs + r) >> rhs return [ np.array([ rounding_right_shift(x[0], x[1]) for x in zip(lhs.flat, rhs.flat) ]).astype(ofm_dtype) ] cpu_mod = create_model() # Generate reference data in_min, in_max = util.get_range_for_dtype_str(dtype) in_min, in_max = 18, 19 lhs = np.random.randint(in_min, high=in_max, size=ifm_shape, dtype=dtype) rhs = np.random.randint(1, high=2, size=ifm2_shape, dtype=dtype) input_data = { "ifm": lhs, "ifm2": rhs, } output_data = {"output": generate_output_data(input_data)[0]} ethosu_mod = infra.create_ethosu_partition(cpu_mod) infra.compare_ethosu_with_reference(ethosu_mod, input_data, output_data, accel_type)
def test_ethosu_left_shift_binary_elemwise( accel_type, ifm_shape, ifm2_shape, ): dtype = "int32" def create_model(): ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype) ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype) c1 = relay.left_shift(ifm, ifm2) f = relay.Function([ifm, ifm2], c1) mod = tvm.IRModule() mod["main"] = f return mod relay_mod = create_model() mod = partition_for_ethosu(relay_mod) # Generate reference data in_min, in_max = util.get_range_for_dtype_str(dtype) input_data = { "ifm": np.random.randint(in_min, high=in_max, size=ifm_shape, dtype=dtype), "ifm2": np.random.randint(0, high=32, size=ifm2_shape, dtype=dtype), } output_data = generate_ref_data(relay_mod, input_data) compiled_models = infra.build_source( mod, input_data, output_data, accel_type, ) # Assumes only two runtime.Modules are created -- i.e. single offload module imported_modules = compiled_models[0].executor_factory.lib.imported_modules assert len(imported_modules) == 2 ethosu_module = imported_modules[0] # Verify generated C source get_cs = tvm._ffi.get_global_func("runtime.module.ethos-u.getcs") cmms = get_cs(ethosu_module) cmms = bytes.fromhex(cmms) infra.print_payload(cmms) infra.verify_source(compiled_models, accel_type)
def test_ethosu_conv2d(accel_type): def create_graph_single(input_tensor_name, input_tensor_shape, input_tensor_dtype): c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype) c1_params.ifm.shape = input_tensor_shape c1_params.kernel.shape = (3, 3, c1_params.ifm.shape[3], 32) c1_params.kernel.sc = relay.const(np.random.rand(32) * 2, "float32") c1_params.strides = (1, 1) c1_params.pad = "VALID" c1_params.update_output_qnn_params(input_tensor_dtype, input_tensor_dtype, input_tensor_dtype) input0 = relay.var(input_tensor_name, shape=c1_params.ifm.shape, dtype=c1_params.ifm.dtype) c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0) c1_params.ofm.shape = get_shape_expr(input0, c1) f = relay.Function([input0], c1) mod = tvm.IRModule() mod["main"] = f return mod, [c1_params] def create_graph_double(input_tensor_name, input_tensor_shape, input_tensor_dtype): c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype) c1_params.ifm.shape = input_tensor_shape c1_params.kernel.shape = (7, 7, c1_params.ifm.shape[3], 8) c1_params.strides = (2, 2) c1_params.pad = "VALID" c1_params.update_output_qnn_params(input_tensor_dtype, input_tensor_dtype, input_tensor_dtype) input0 = relay.var(input_tensor_name, shape=c1_params.ifm.shape, dtype=c1_params.ifm.dtype) c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0) c1_params.ofm.shape = get_shape_expr(input0, c1) c2_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype) c2_params.ifm.shape = c1_params.ofm.shape c2_params.kernel.shape = (5, 5, c2_params.ifm.shape[3], 16) c2_params.strides = (1, 1) c2_params.pad = "SAME" c2_params.update_output_qnn_params() c2, new_params = relay_ir_builder.create_qnn_conv2d(c2_params, c1) c2_params.ofm.shape = get_shape_expr(input0, c2) f = relay.Function([input0], c2) mod = tvm.IRModule() mod["main"] = f return mod, [c2_params, c1_params] def create_graph_activation(input_tensor_name, input_tensor_shape, input_tensor_dtype): c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype) c1_params.ifm.shape = input_tensor_shape c1_params.kernel.shape = (7, 7, c1_params.ifm.shape[3], 8) c1_params.strides = (2, 2) c1_params.pad = "VALID" c1_params.activation = "CLIP" c1_params.clip_min = 90 c1_params.clip_max = 110 c1_params.update_output_qnn_params(input_tensor_dtype, input_tensor_dtype, input_tensor_dtype) input0 = relay.var(input_tensor_name, shape=c1_params.ifm.shape, dtype=c1_params.ifm.dtype) c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0) c1_params.ofm.shape = get_shape_expr(input0, c1) c2_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype) c2_params.ifm.shape = c1_params.ofm.shape c2_params.kernel.shape = (5, 5, c2_params.ifm.shape[3], 16) c2_params.strides = (1, 1) c2_params.pad = "SAME" c2_params.update_output_qnn_params() c2, new_params = relay_ir_builder.create_qnn_conv2d(c2_params, c1) c2_params.ofm.shape = get_shape_expr(input0, c2) f = relay.Function([input0], c2) mod = tvm.IRModule() mod["main"] = f return mod, [c2_params, c1_params] test_cases = [ (create_graph_single, ["input", (1, 300, 300, 3), "int8"]), (create_graph_double, ["input", (1, 128, 256, 4), "int8"]), (create_graph_activation, ["input", (1, 64, 100, 4), "int8"]), ] np.random.seed(42) for test_case in test_cases: relay_module, conv_params = test_case[0](*test_case[1]) input_tensor, input_shape, input_dtype = test_case[1] mod = partition_for_ethosu(relay_module) # Generate reference data in_min, in_max = util.get_range_for_dtype_str(input_dtype) input_data = { input_tensor: np.random.randint(in_min, high=in_max, size=input_shape, dtype=input_dtype) } output_data = generate_ref_data(relay_module, input_data) compiled_models = infra.build_source(mod, input_data, output_data, accel_type, output_tolerance=1) # Assumes only two runtime.Modules are created -- i.e. single offload module ethosu_module = (compiled_models[0].executor_factory.lib. imported_modules[0].imported_modules[0]) # Verify generated C source get_artifacts = tvm._ffi.get_global_func( "runtime.module.ethos-u.get_artifacts") compilation_artifacts = get_artifacts(ethosu_module) cmms = bytes.fromhex(compilation_artifacts[0].command_stream) infra.print_payload(cmms) infra.verify_source(compiled_models, accel_type)
def test_ethosu_right_shift_binary_elemwise(ifm_shape, ifm2_shape, reversed_operands, accel_type, ofm_dtype): dtype = "int32" def create_model(): ifm_count = int(np.prod(ifm_shape)) ifm2_count = int(np.prod(ifm2_shape)) # Create a "partitioned" Relay function ifms = relay.var("ifms", shape=[ifm_count + ifm2_count], dtype=dtype) split = relay.split(ifms, [ifm_count]) ifm = relay.reshape(split[0], newshape=ifm_shape) ifm2 = relay.reshape(split[1], newshape=ifm2_shape) shr_op = infra.make_ethosu_binary_elementwise(ifm, ifm2, ifm_shape[3], ifm2_shape[3], "SHR", ofm_dtype, reversed_operands) glb_ethosu = relay.GlobalVar("tvmgen_default_ethos_u_main_0") func = (relay.Function([ifms], shr_op).with_attr( "Inline", 1).with_attr("Compiler", "ethos-u").with_attr( "global_symbol", "tvmgen_default_ethos_u_main_0").with_attr("Primitive", 1)) mod = tvm.IRModule() mod[glb_ethosu] = func mod = relay.transform.InferType()(mod) # Main ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype) ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype) call = relay.Call( glb_ethosu, [ relay.concatenate( data=( relay.reshape(ifm, newshape=ifm_count), relay.reshape(ifm2, newshape=ifm2_count), ), axis=0, ) ], ) mod["main"] = relay.Function([ifm, ifm2], call) mod = relay.transform.InferType()(mod) return mod mod = create_model() # Generate reference data in_min, in_max = util.get_range_for_dtype_str(dtype) in_min, in_max = 18, 19 lhs = np.random.randint(in_min, high=in_max, size=ifm_shape, dtype=dtype) rhs = np.random.randint(1, high=2, size=ifm2_shape, dtype=dtype) input_data = { "ifm": lhs, "ifm2": rhs, } if reversed_operands: lhs = np.broadcast_to(lhs, ifm2_shape) lhs, rhs = rhs, lhs else: rhs = np.broadcast_to(rhs, ifm_shape) def rounding_right_shift(lhs, rhs): r = 1 << (rhs - 1) return (lhs + r) >> rhs output_data = np.array([ rounding_right_shift(x[0], x[1]) for x in zip(lhs.flat, rhs.flat) ]).astype(ofm_dtype) compiled_model = infra.build_source(mod, input_data, [output_data], accel_type) imported_modules = compiled_model[0].executor_factory.lib.imported_modules assert len(imported_modules) == 2 ethosu_module = imported_modules[0] # Verify generated C source get_cs = tvm._ffi.get_global_func("runtime.module.ethos-u.getcs") cmms = get_cs(ethosu_module) cmms = bytes.fromhex(cmms) infra.print_payload(cmms) infra.verify_source(compiled_model, accel_type)