def test_conv2d_inline_reshape(trial): def _get_func(ifm_shape, reshaped, ifm_layout): ifm = relay.var("ifm", shape=ifm_shape, dtype="int8") ifm_reshaped = relay.reshape(ifm, reshaped) conv = make_ethosu_conv2d( ifm_reshaped, reshaped[3], 16, (3, 3), (1, 1), (1, 1), (1, 1), activation="NONE", ifm_layout=ifm_layout, ) func = relay.Function(relay.analysis.free_vars(conv), conv) func = run_opt_pass(func, relay.transform.InferType()) return func reference_mod = trial[0] params = trial[1:] func = _get_func(*params) mod, _ = lower_to_tir(func, cascader=total_cascader((1, 4, 6, 16))) script = mod.script(show_meta=True) mod = tvm.script.from_source(script) tvm.ir.assert_structural_equal(mod["main"], reference_mod["main"], True)
def test_weight_stream(): def _cascader(cached_func, const_dict, sch): weight = cached_func.inputs[1] scale_bias = cached_func.inputs[2] out = cached_func.outputs[0] conv_compute = Convolution2DCompute.from_output(out) co = conv_compute.split(sch, 3, 10) cache_weight = sch.cache_read(weight, "global", [conv_compute.conv2d]) cache_scale_bias = sch.cache_read(scale_bias, "global", [conv_compute.conv2d]) sch[cache_weight].compute_at(sch[out], co) sch[cache_scale_bias].compute_at(sch[out], co) def _get_func(): ifm = relay.var("ifm", shape=(1, 16, 16, 32), dtype="int8") conv = make_ethosu_conv2d( ifm, 32, 16, (1, 1), (0, 0), (1, 1), (1, 1), ) func = relay.Function(relay.analysis.free_vars(conv), conv) func = run_opt_pass(func, relay.transform.InferType()) return func func = _get_func() mod, _ = lower_to_tir(func, cascader=_cascader) script = mod.script(show_meta=True) test_mod = tvm.script.from_source(script) reference_mod = WeightStream tvm.ir.assert_structural_equal(test_mod["main"], reference_mod["main"], True)
def test_mixed_read(): def _planner(cached_func, const_dict, sch): weight = cached_func.inputs[4] scale_bias = cached_func.inputs[5] out = cached_func.outputs[0] conv_compute = Convolution2DCompute.from_output(out) co = conv_compute.split(sch, 3, 2) cache_weight = sch.cache_read(weight, "global", [conv_compute.conv2d]) cache_scale_bias = sch.cache_read(scale_bias, "global", [conv_compute.conv2d]) sch[cache_weight].compute_at(sch[out], co) sch[cache_scale_bias].compute_at(sch[out], co) def _get_func(): ifm = relay.var("ifm", shape=(1, 16, 16, 32), dtype="int8") conv1 = make_ethosu_conv2d( ifm, 32, 16, (1, 1), (0, 0), (1, 1), (1, 1), ) conv2 = make_ethosu_conv2d( conv1, 16, 8, (1, 1), (0, 0), (1, 1), (1, 1), ) func = relay.Function(relay.analysis.free_vars(conv2), conv2) func = run_opt_pass(func, relay.transform.InferType()) return func func = _get_func() mod, consts = lower_to_tir(func, cascader=_planner) script = mod.script(show_meta=True) test_mod = tvm.script.from_source(script) reference_mod = MixedRead tvm.ir.assert_structural_equal(test_mod["main"], reference_mod["main"], True) reference_const_sizes = [ 592, 160, 80, 32, 80, 32, 80, 32, 80, 32, ] test_const_size = [value.size for value in list(consts.values())] assert reference_const_sizes == test_const_size
def _compile(ext_func): """ This is the main wrapper that accepts an external relay function and runs all the passes to lower it down to command stream Parameters ---------- ext_func : tvm.relay.function.Function The partitioned relay function Returns ------- cs : str An hex string of the bytes of command stream encoded_constants : str An hex string of the bytes that includes concat'd encoded weights, encoded biases and scales. scratch_size : int The size of the scratch buffer needed. """ mod = tvm.IRModule() mod["main"] = ext_func mod = LegalizeEthosU()(mod) mod = relay.transform.InferType()(mod) # We are currently using copy_constants scheduler In the long run, # this should be a single intelligent and a composite scheduler # that can perform scheduling based on user inputs such as # scratch memory size. tir_mod, params = lower_to_tir(mod["main"], copy_constants()) cmms, encoded_constants, scratch_size = tir_to_cs_translator.translate( tir_mod, params) return cmms, encoded_constants, scratch_size
def test_pooling_single( ifm_shape, ofm_channels, ifm_layout, ofm_layout, pooling_type, activation, rounding_mode, upscale, ): pool_shape = (3, 2) strides = (1, 2) # When strides are not (1, 1) it is possible to create invalid # padding configurations. It is possible to construct a pooling # operation with invalid padding, but the compiler will account # for this and adjust the padding accordingly, leading to a # mismatch between the expected and actual result. Therefore, # hardcoded padding values are used for each case. padding = (1, 1, 1, 0) if upscale == "NONE" else (0, 0, 0, 0) ifm = relay.var("ifm", shape=ifm_shape, dtype="int8") pooling = make_ethosu_pooling( ifm, pooling_type, pool_shape, ofm_channels, strides, padding, activation, ifm_layout, ofm_layout, rounding_mode, upscale, ) func = relay.Function(relay.analysis.free_vars(pooling), pooling) func = run_opt_pass(func, relay.transform.InferType()) mod, _ = lower_to_tir(func) data = [] def _visit(stmt): if isinstance(stmt, tvm.tir.Call): data.append(get_pooling_args(stmt)) tvm.tir.stmt_functor.post_order_visit(mod["main"].body, _visit) serial_pooling = _create_serial_pooling( ifm_shape, ofm_channels, ifm_layout, ofm_layout, pool_shape, pooling_type, strides, padding, activation, rounding_mode, upscale, ) assert data[0] == ["ethosu_pooling"] + list(serial_pooling)
def test_lower_to_tir_arg_count(relay_function, arg_count): mod = tvm.IRModule() mod["main"] = relay_function() mod = relay.transform.InferType()(mod) tir_mod = lower_to_tir(mod["main"])[0] primfunc = tir_mod["main"] assert len(primfunc.params) == arg_count
def test_concat(): def _get_func(): ifm1 = relay.var("ifm1", shape=(1, 8, 12, 16), dtype="int8") ifm2 = relay.var("ifm2", shape=(1, 8, 10, 16), dtype="int8") conv1 = make_ethosu_conv2d(ifm1, 16, 16, (3, 3), (1, 1), (1, 1), (1, 1)) conv2 = make_ethosu_conv2d(ifm2, 16, 16, (3, 3), (1, 1), (1, 1), (1, 1)) conc1 = relay.concatenate((conv1, conv2), axis=2) conv3 = make_ethosu_conv2d(conc1, 16, 16, (3, 3), (1, 1), (1, 1), (1, 1)) conv4 = make_ethosu_conv2d(conv2, 16, 16, (3, 3), (1, 1), (1, 1), (1, 1)) conc2 = relay.concatenate((conv3, conv4), axis=2) func = relay.Function(relay.analysis.free_vars(conc2), conc2) func = run_opt_pass(func, relay.transform.InferType()) return func func = _get_func() mod, _ = lower_to_tir(func) script = mod.script(show_meta=True) test_mod = tvm.script.from_source(script) reference_mod = ReferenceModule tvm.ir.assert_structural_equal(test_mod["main"], reference_mod["main"], True)
def test_lower_to_tir(): data = relay.var("data", shape=(1, 1, 1, 1024), dtype="uint8") weight = relay.var("weight", shape=(1, 1, 1024, 1001), dtype="int8") p2 = relay.var("p2", shape=(1, 1, 1, 1), dtype="int32") conv = relay.nn.conv2d( data, weight, kernel_size=(1, 1), data_layout="NHWC", kernel_layout="HWIO", out_dtype="int32", ) tile = relay.tile(p2, reps=(1, 1, 1, 1001)) subtract = relay.subtract(conv, tile) func = subtract expr = relay.Function(relay.analysis.free_vars(func), func) mod = tvm.IRModule.from_expr(expr) mod = relay.transform.InferType()(mod) lower_to_tir(mod["main"])
def test_conv2d_big_pad(): def _get_func(): ifm_shape = (1, 2, 2, 8) ifm = relay.var("ifm", shape=ifm_shape, dtype="int8") conv = make_ethosu_conv2d(ifm, ifm_shape[3], 16, (1, 1), (7, 7), (1, 1), (1, 1), "NHWC") func = relay.Function(relay.analysis.free_vars(conv), conv) func = run_opt_pass(func, relay.transform.InferType()) return func func = _get_func() mod, _ = lower_to_tir(func, cascader=total_cascader((1, 4, 4, 16)))
def test_weight_stream_only(): def _planner(cached_func, const_dict, sch): weights = cached_func.inputs[1] bias = cached_func.inputs[2] out = cached_func.outputs[0] conv_compute = Convolution2DCompute.from_output(out) co = conv_compute.split(sch, 3, 2) cache_weights = sch.cache_read(weights, "global", [conv_compute.conv2d]) cache_bias = sch.cache_read(bias, "global", [conv_compute.conv2d]) sch[cache_weights].compute_at(sch[out], co) sch[cache_bias].compute_at(sch[out], co) def _get_func(): ifm = relay.var("ifm", shape=(1, 16, 16, 32), dtype="int8") conv = make_ethosu_conv2d( ifm, 32, 8, (1, 1), (0, 0), (1, 1), (1, 1), ) func = relay.Function(relay.analysis.free_vars(conv), conv) func = run_opt_pass(func, relay.transform.InferType()) return func func = _get_func() mod, consts = lower_to_tir(func, cascader=_planner) script = mod.script(show_meta=True) test_mod = tvm.script.from_source(script) reference_mod = WeightStreamOnly tvm.ir.assert_structural_equal(test_mod["main"], reference_mod["main"], True) reference_const_sizes = { 2: 128, 3: 32, 4: 112, 5: 32, 6: 112, 7: 32, 8: 112, 9: 32 } test_const_sizes = {} for key, value in consts.items(): test_const_sizes[key] = len(value) assert reference_const_sizes == test_const_sizes
def test_schedule_diamond_graph(): ifm_a = relay.var("IFM_A", shape=(1, 56, 56, 96), dtype="int8") conv_a = make_ethosu_conv2d(ifm_a, 96, 24, (1, 1), (0, 0), (1, 1), (1, 1)) conv_b = make_ethosu_conv2d(conv_a, 24, 24, (1, 1), (0, 0), (1, 1), (1, 1)) add = make_ethosu_binary_elementwise(conv_a, conv_b, 24, 24, "ADD", "int8") func = relay.Function(relay.analysis.free_vars(add), add) func = run_opt_pass(func, relay.transform.InferType()) test_mod, _ = lower_to_tir(func, copy_constants()) reference_mod = DiamondGraphTir tvm.ir.assert_structural_equal(test_mod["main"], reference_mod["main"], True)
def test_pooling_single( ifm_shape, ofm_channels, ifm_layout, ofm_layout, pooling_type, activation, rounding_mode, ): pool_shape = (3, 2) strides = (1, 2) padding = (1, 1, 1, 0) ifm = relay.var("ifm", shape=ifm_shape, dtype="int8") pooling = make_ethosu_pooling( ifm, pooling_type, pool_shape, ofm_channels, strides, padding, activation, ifm_layout, ofm_layout, rounding_mode, ) func = relay.Function(relay.analysis.free_vars(pooling), pooling) func = run_opt_pass(func, relay.transform.InferType()) mod, _ = lower_to_tir(func) data = [] def _visit(stmt): if isinstance(stmt, tvm.tir.Call): data.append(get_pooling_args(stmt)) tvm.tir.stmt_functor.post_order_visit(mod["main"].body, _visit) serial_pooling = _create_serial_pooling( ifm_shape, ofm_channels, ifm_layout, ofm_layout, pool_shape, pooling_type, strides, padding, activation, rounding_mode, ) assert data[0] == ["ethosu_pooling"] + list(serial_pooling)
def test_conv2d_double_cascade(trial): def _get_func( ifm_shape, ifm_channels, mid_channels, ofm_channels, kernel_shape, padding, strides, dilation, layout, ): ifm = relay.var("ifm", shape=ifm_shape, dtype="int8") conv1 = make_ethosu_conv2d( ifm, ifm_channels, mid_channels, kernel_shape, padding, strides, dilation, "NONE", layout, layout, ) conv2 = make_ethosu_conv2d( conv1, mid_channels, ofm_channels, kernel_shape, padding, strides, dilation, "NONE", layout, layout, ) func = relay.Function(relay.analysis.free_vars(conv2), conv2) func = run_opt_pass(func, relay.transform.InferType()) return func reference_mod = trial[0] params = trial[1:] func = _get_func(*params[:-1]) mod, _ = lower_to_tir(func, cascader=total_cascader(params[-1])) script = tvm.script.asscript(mod, True) mod = tvm.script.from_source(script) tvm.ir.assert_structural_equal(mod["main"], reference_mod["main"], True)
def test_constant_as_input(): """Test to check that constants specified as inputs aren't interpreted as an encoded constant.""" def get_graph(): dtype = "uint8" ifm = relay.var("ifm", shape=(1, 16, 16, 32), dtype=dtype) conv1 = make_ethosu_conv2d( ifm, 32, 16, (1, 1), (0, 0), (1, 1), (1, 1), ) scalar = relay.const(np.ones((1, 1, 1, 1), dtype=dtype), dtype=dtype) add1 = make_ethosu_binary_elementwise(conv1, scalar, ifm_channels=32, ifm2_channels=1, operator_type="ADD", ofm_dtype=dtype) func = relay.Function(relay.analysis.free_vars(add1), add1) func = run_opt_pass(func, relay.transform.InferType()) return func tir_mod, params = lower_to_tir(get_graph(), copy_constants()) # Check tile address for the scalar constant input hasn't been # overwritten. extern_calls = tir_mod["main"].body.body.body.body.body binary_elementwise = extern_calls[-1].value args = binary_elementwise.args reason = "Tile address overwritten" assert args[26] == 0, reason assert args[27] == 0, reason assert args[28] == 0, reason # More generally, check compiles successfully to make sure # nothing else was overrwritten. # With Target Hooks the TIR module needs a target attached # and lowered via make unpacked API. tir_mod["main"] = tir_mod["main"].with_attr("target", tvm.target.Target("ethos-u")) tir_mod = tvm.tir.transform.MakeUnpackedAPI()(tir_mod) tir_to_cs_translator.translate(tir_mod, params)
def test_conv2d_inline_copy(trial): def _get_func(ifm_shape, lower, upper, ofm_channels=16): ifm = relay.var("ifm", shape=ifm_shape, dtype="int8") sliced = relay.strided_slice(ifm, lower, upper) conv = make_ethosu_conv2d( sliced, upper[3] - lower[3], ofm_channels, (3, 3), (1, 1), (1, 1), (1, 1) ) func = relay.Function(relay.analysis.free_vars(conv), conv) func = run_opt_pass(func, relay.transform.InferType()) return func reference_mod = trial[0] params = trial[1:] func = _get_func(*params) mod, _ = lower_to_tir(func) script = tvm.script.asscript(mod, True) mod = tvm.script.from_source(script) tvm.ir.assert_structural_equal(mod["main"], reference_mod["main"], True)
def test_direct_read_only(): def _get_func(): ifm = relay.var("ifm", shape=(1, 16, 16, 32), dtype="int8") conv1 = make_ethosu_conv2d( ifm, 32, 16, (1, 1), (0, 0), (1, 1), (1, 1), ) conv2 = make_ethosu_conv2d( conv1, 16, 8, (1, 1), (0, 0), (1, 1), (1, 1), ) func = relay.Function(relay.analysis.free_vars(conv2), conv2) func = run_opt_pass(func, relay.transform.InferType()) return func func = _get_func() mod, consts = lower_to_tir(func) script = mod.script(show_meta=True) test_mod = tvm.script.from_source(script) reference_mod = DirectReadOnly tvm.ir.assert_structural_equal(test_mod["main"], reference_mod["main"], True) reference_const_sizes = {1: 592, 2: 160, 3: 160, 4: 80} test_const_sizes = {} for key, value in consts.items(): test_const_sizes[key] = len(value) assert reference_const_sizes == test_const_sizes
def relay_to_tir_func(ext_func: relay.Function) -> tvm.tir.PrimFunc: """ This is the hook for python-based lowering of relay function that gets offloaded to the microNPU. Parameters ---------- ext_func : relay.Function This is the partitioned relay function Returns ------- primfunc : tir.PrimFunc This returns the scheduled PrimFunc """ assert len(ext_func.params) == 1 input_size = util.calculate_size_bytes(ext_func.params[0]) output_size = util.calculate_size_bytes(ext_func.body) mod = tvm.IRModule() mod["main"] = ext_func mod = LegalizeEthosU()(mod) mod = LUTsOptimizer()(mod) mod = LayoutOptimizer()(mod) mod = relay.transform.InferType()(mod) # We are currently using copy_constants scheduler In the long run, # this should be a single intelligent and a composite scheduler # that can perform scheduling based on user inputs such as # scratch memory size. tir_mod, const_dict = lower_to_tir(mod["main"], copy_constants()) for idx in const_dict.keys(): const_dict[idx] = tvm.nd.array(const_dict[idx]) primfunc = tir_mod["main"] primfunc = primfunc.with_attr("global_symbol", ext_func.attrs["global_symbol"]) primfunc = primfunc.with_attr("ethos-u.constants", const_dict) primfunc = primfunc.with_attr("ethos-u.input_size", input_size) primfunc = primfunc.with_attr("ethos-u.output_size", output_size) return primfunc
def test_copy(): def _get_func(): data = relay.var("data", shape=(1, 16, 16, 32), dtype="int8") conv = make_ethosu_conv2d( data, 32, 8, (1, 1), (0, 0), (1, 1), (1, 1), ) func = relay.Function(relay.analysis.free_vars(conv), conv) func = run_opt_pass(func, relay.transform.InferType()) return func func = _get_func() mod, _ = lower_to_tir(func, cascader=copy_constants()) script = mod.script(show_meta=True) test_mod = tvm.script.from_source(script) reference_mod = ReferenceModule tvm.ir.assert_structural_equal(test_mod["main"], reference_mod["main"], True)
def test_depthwise_conv2d_single(trial): def _get_func( ifm_shape, channels, kernel_shape, padding, strides, dilation, activation, ifm_layout, ofm_layout, ): ifm = relay.var("ifm", shape=ifm_shape, dtype="int8") depthwise = make_ethosu_depthwise_conv2d( ifm, channels, kernel_shape, padding, strides, dilation, activation, ifm_layout, ofm_layout, ) func = relay.Function(relay.analysis.free_vars(depthwise), depthwise) func = run_opt_pass(func, relay.transform.InferType()) return func func = _get_func(*trial) mod, _ = lower_to_tir(func) data = [] def _visit(stmt): if isinstance(stmt, tvm.tir.Call): data.append(get_convolutional_args(stmt, remove_constants=True)) tvm.tir.stmt_functor.post_order_visit(mod["main"].body, _visit) ( ifm_shape, channels, kernel_shape, padding, strides, dilation, activation, ifm_layout, ofm_layout, ) = trial dilated_kernel_h = (kernel_shape[0] - 1) * dilation[0] + 1 dilated_kernel_w = (kernel_shape[1] - 1) * dilation[1] + 1 if ifm_layout == "NHWC": ifm_stride_c = 1 ifm_stride_w = ifm_shape[3] ifm_stride_h = ifm_shape[2] * ifm_shape[3] ofm_height = (ifm_shape[1] - dilated_kernel_h + padding[0] + padding[0]) // strides[0] + 1 ofm_width = (ifm_shape[2] - dilated_kernel_w + padding[1] + padding[1]) // strides[1] + 1 else: ifm_stride_w = 16 ifm_stride_c = 16 * ifm_shape[3] ifm_stride_h = 16 * ifm_shape[2] * ifm_shape[3] ofm_height = (ifm_shape[1] - dilated_kernel_h + padding[0] + padding[0]) // strides[0] + 1 ofm_width = (ifm_shape[3] - dilated_kernel_w + padding[1] + padding[1]) // strides[1] + 1 if ofm_layout == "NHWC": ofm_stride_c = 1 ofm_stride_w = channels if ofm_width > 1 else 1 ofm_stride_h = channels * ofm_width if ofm_height > 1 else 1 else: ofm_stride_w = 16 ofm_stride_c = 16 * ofm_width ofm_stride_h = 16 * ofm_width * ((channels - 1) // 16 + 1) answer = [ "int8", ifm_shape[1], ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3], channels, ifm_shape[1], 0, ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3], 0, 0, 0, 0, 0.6, 11, ifm_layout, ifm_stride_h, ifm_stride_w, ifm_stride_c, "int8", ofm_height, ofm_width, channels, ofm_height, 0, ofm_width, 0, 0, 0, 0, 0.26, 15, ofm_layout, ofm_stride_h, ofm_stride_w, ofm_stride_c, kernel_shape[1], kernel_shape[0], strides[1], strides[0], dilation[1], dilation[0], 13, padding[0], padding[1], padding[0], padding[1], activation, 15 if activation == "CLIP" else 0, 105 if activation == "CLIP" else 0, "NONE", ] assert data[0] == answer, data[0]
def test_identity(ifm_shape): ifm = relay.var("ifm", shape=ifm_shape, dtype="int8") identity = make_ethosu_identity(ifm) func = relay.Function(relay.analysis.free_vars(identity), identity) func = run_opt_pass(func, relay.transform.InferType()) mod, _ = lower_to_tir(func) data = [] def _visit(stmt): if isinstance(stmt, tvm.tir.Call): data.append(get_pooling_args(stmt)) # Construct the ifm shape that the initial ifm shape gets legalized into ref_ifm_shape = ifm_shape if len(ref_ifm_shape) < 4: ref_ifm_shape = [1] + ref_ifm_shape while len(ref_ifm_shape) < 4: ref_ifm_shape.append(1) tvm.tir.stmt_functor.post_order_visit(mod["main"].body, _visit) ifm_stride_c = 1 ifm_stride_w = ref_ifm_shape[3] ifm_stride_h = ref_ifm_shape[2] * ref_ifm_shape[3] ofm_height = ref_ifm_shape[1] ofm_width = ref_ifm_shape[2] ofm_channels = ref_ifm_shape[3] ofm_stride_c = 1 ofm_stride_w = ofm_channels if ofm_width > 1 else 1 ofm_stride_h = ofm_channels * ofm_width if ofm_height > 1 else 1 # The identity operator TIR gets converted into serial pooling serial_pooling = spec.SerialPooling( ifm=spec.SerialFeatureMap( data_type="int8", height=ref_ifm_shape[1], width=ref_ifm_shape[2], channels=ofm_channels, tile_height_0=ref_ifm_shape[1], tile_height_1=0, tile_width_0=ref_ifm_shape[2], tile_address_0=0, tile_address_1=0, tile_address_2=0, tile_address_3=0, scale=1.0, zero_point=0, layout="NHWC", stride_h=ifm_stride_h, stride_w=ifm_stride_w, stride_c=ifm_stride_c, ), ofm=spec.SerialFeatureMap( data_type="int8", height=ofm_height, width=ofm_width, channels=ofm_channels, tile_height_0=ofm_height, tile_height_1=0, tile_width_0=ofm_width, tile_address_0=0, tile_address_1=0, tile_address_2=0, tile_address_3=0, scale=1.0, zero_point=0, layout="NHWC", stride_h=ofm_stride_h, stride_w=ofm_stride_w, stride_c=ofm_stride_c, ), pooling_type="AVG", pool_shape=spec.SerialKernel(1, 1, 1, 1, 1, 1), padding=spec.SerialPadding(0, 0, 0, 0), activation=spec.SerialActivation(op="NONE", clip_min=0, clip_max=0), upscale="NONE", ) assert data[0] == ["ethosu_identity"] + list(serial_pooling)
def test_binary_elementwise_single( ifm_shape, ifm2_shape, ifm_channels, ifm2_channels, ifm_layout, ofm_layout, operator_type, activation, ): dtype = "int8" ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype) ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype) binary_elementwise = make_ethosu_binary_elementwise( ifm, ifm2, ifm_channels, ifm2_channels, operator_type, dtype, False, activation, ifm_layout, ifm_layout, ofm_layout, ) func = relay.Function(relay.analysis.free_vars(binary_elementwise), binary_elementwise) func = run_opt_pass(func, relay.transform.InferType()) mod, _ = lower_to_tir(func) data = [] def _visit(stmt): if isinstance(stmt, tvm.tir.Call): data.append(get_binary_elementwise_args(stmt)) tvm.tir.stmt_functor.post_order_visit(mod["main"].body, _visit) if ifm_layout == "NHWC": ifm_stride_c = 1 ifm_stride_w = ifm_shape[3] if ifm_shape[2] != 1 else 1 ifm_stride_h = ifm_shape[2] * ifm_shape[3] if ifm_shape[1] != 1 else 1 ifm2_stride_c = 1 ifm2_stride_w = ifm2_shape[3] if ifm2_shape[2] != 1 else 1 ifm2_stride_h = ifm2_shape[2] * ifm2_shape[3] if ifm2_shape[1] != 1 else 1 ofm_height = ifm_shape[1] ofm_width = ifm_shape[2] else: ifm_stride_w = 16 ifm_stride_c = 16 * ifm_shape[3] ifm_stride_h = 16 * ifm_shape[2] * ifm_shape[3] ifm2_stride_w = 16 ifm2_stride_c = 16 * ifm2_shape[3] ifm2_stride_h = 16 * ifm2_shape[2] * ifm2_shape[3] ofm_height = ifm_shape[1] ofm_width = ifm_shape[3] if ofm_layout == "NHWC": ofm_stride_c = 1 ofm_stride_w = ifm_channels if ofm_width > 1 else 1 ofm_stride_h = ifm_channels * ofm_width if ofm_height > 1 else 1 else: ofm_stride_w = 16 ofm_stride_c = 16 * ofm_width ofm_stride_h = 16 * ofm_width * ((ifm_channels - 1) // 16 + 1) serial_binary_elementwise = spec.SerialBinaryElementwise( ifm=spec.SerialFeatureMap( data_type=dtype, height=ifm_shape[1], width=ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3], channels=ifm_channels, tile_height_0=ifm_shape[1], tile_height_1=0, tile_width_0=ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3], tile_address_0=0, tile_address_1=0, tile_address_2=0, tile_address_3=0, scale=1.0, zero_point=0, layout=ifm_layout, stride_h=ifm_stride_h, stride_w=ifm_stride_w, stride_c=ifm_stride_c, ), ifm2=spec.SerialFeatureMap( data_type=dtype, height=ifm2_shape[1], width=ifm2_shape[2] if ifm_layout == "NHWC" else ifm2_shape[3], channels=ifm2_channels, tile_height_0=ifm2_shape[1], tile_height_1=0, tile_width_0=ifm2_shape[2] if ifm_layout == "NHWC" else ifm2_shape[3], tile_address_0=0, tile_address_1=0, tile_address_2=0, tile_address_3=0, scale=1.0, zero_point=0, layout=ifm_layout, stride_h=ifm2_stride_h, stride_w=ifm2_stride_w, stride_c=ifm2_stride_c, ), ofm=spec.SerialFeatureMap( data_type=dtype, height=ofm_height, width=ofm_width, channels=ifm_channels, tile_height_0=ofm_height, tile_height_1=0, tile_width_0=ofm_width, tile_address_0=0, tile_address_1=0, tile_address_2=0, tile_address_3=0, scale=1.0, zero_point=0, layout=ofm_layout, stride_h=ofm_stride_h, stride_w=ofm_stride_w, stride_c=ofm_stride_c, ), operator_type=operator_type, reversed_operands=False, activation=spec.SerialActivation( op=activation, clip_min=10 if activation == "CLIP" else 0, clip_max=100 if activation == "CLIP" else 0, ), ) assert data[0] == ["ethosu_binary_elementwise"] + list(serial_binary_elementwise)
def test_correct_stride_with_multiple_pooling(): """Testing a specific case of two pooling operations with NHWC inputs/outputs but a NHCWB16 intermediate tensor. This lead to elements being accessed in the wrong order by the NPU, due to incorrect stride values being calculated.""" ifm_shape = (1, 4, 4, 8) ofm_channels = 8 pooling_type = "MAX" pool_shape = (1, 1) strides = (1, 1) padding = (0, 0, 0, 0) ifm = relay.var("ifm", shape=ifm_shape, dtype="int8") op = make_ethosu_pooling( ifm, pooling_type, pool_shape, ofm_channels, strides, padding, ifm_layout="NHWC", ofm_layout="NHCWB16", ) op = make_ethosu_pooling( op, pooling_type, pool_shape, ofm_channels, strides, padding, ifm_layout="NHCWB16", ofm_layout="NHWC", ) func = relay.Function(relay.analysis.free_vars(op), op) func = run_opt_pass(func, relay.transform.InferType()) mod, _ = lower_to_tir(func) data = [] def _visit(stmt): if isinstance(stmt, tvm.tir.Call): data.append(get_pooling_args(stmt)) tvm.tir.stmt_functor.post_order_visit(mod["main"].body, _visit) serial_pooling_1 = _create_serial_pooling( [1, 4, 4, 8], 8, "NHWC", "NHCWB16", pool_shape, pooling_type, strides, padding, ) serial_pooling_2 = _create_serial_pooling( [1, 4, 1, 4, 16], 8, "NHCWB16", "NHWC", pool_shape, pooling_type, strides, padding, ) assert data[0] == ["ethosu_pooling"] + list(serial_pooling_1) assert data[1] == ["ethosu_pooling"] + list(serial_pooling_2)
def test_mixed_read(): def _planner(te_graph, const_dict, sch): weight = te_graph.inputs[4] scale_bias = te_graph.inputs[5] out = te_graph.outputs[0] conv_compute = Convolution2DCompute.from_output(out) co = conv_compute.split(sch, 3, 2) cache_weight = sch.cache_read(weight, "global", [conv_compute.conv2d]) cache_scale_bias = sch.cache_read(scale_bias, "global", [conv_compute.conv2d]) sch[cache_weight].compute_at(sch[out], co) sch[cache_scale_bias].compute_at(sch[out], co) def _get_func(): ifm = relay.var("ifm", shape=(1, 16, 16, 32), dtype="int8") conv1 = make_ethosu_conv2d( ifm, 32, 16, (1, 1), (0, 0), (1, 1), (1, 1), ) conv2 = make_ethosu_conv2d( conv1, 16, 8, (1, 1), (0, 0), (1, 1), (1, 1), ) func = relay.Function(relay.analysis.free_vars(conv2), conv2) func = run_opt_pass(func, relay.transform.InferType()) return func func = _get_func() mod, consts = lower_to_tir(func, cascader=_planner) script = tvm.script.asscript(mod, True) test_mod = tvm.script.from_source(script) reference_mod = MixedRead() tvm.ir.assert_structural_equal(test_mod["main"], reference_mod["main"], True) reference_const_sizes = { 1: 592, 2: 160, 4: 80, 5: 32, 6: 80, 7: 32, 8: 80, 9: 32, 10: 80, 11: 32, } test_const_sizes = {} for key, value in consts.items(): test_const_sizes[key] = len(value) assert reference_const_sizes == test_const_sizes
def test_pooling_single( ifm_shape, ofm_channels, ifm_layout, ofm_layout, pooling_type, activation, ): pool_shape = (3, 2) strides = (1, 2) padding = (1, 1, 1, 0) ifm = relay.var("ifm", shape=ifm_shape, dtype="int8") pooling = make_ethosu_pooling( ifm, pooling_type, pool_shape, ofm_channels, strides, padding, activation, ifm_layout, ofm_layout, ) func = relay.Function(relay.analysis.free_vars(pooling), pooling) func = run_opt_pass(func, relay.transform.InferType()) mod, _ = lower_to_tir(func) data = [] def _visit(stmt): if isinstance(stmt, tvm.tir.Call): data.append(get_pooling_args(stmt)) tvm.tir.stmt_functor.post_order_visit(mod["main"].body, _visit) if ifm_layout == "NHWC": ifm_stride_c = 1 ifm_stride_w = ifm_shape[3] ifm_stride_h = ifm_shape[2] * ifm_shape[3] ofm_height = (ifm_shape[1] - pool_shape[0] + padding[0] + padding[0]) // strides[0] + 1 ofm_width = (ifm_shape[2] - pool_shape[1] + padding[1] + padding[1]) // strides[1] + 1 else: ifm_stride_w = 16 ifm_stride_c = 16 * ifm_shape[3] ifm_stride_h = 16 * ifm_shape[2] * ifm_shape[3] ofm_height = (ifm_shape[1] - pool_shape[0] + padding[0] + padding[0]) // strides[0] + 1 ofm_width = (ifm_shape[3] - pool_shape[1] + padding[1] + padding[1]) // strides[1] + 1 if ofm_layout == "NHWC": ofm_stride_c = 1 ofm_stride_w = ofm_channels if ofm_width > 1 else 1 ofm_stride_h = ofm_channels * ofm_width if ofm_height > 1 else 1 else: ofm_stride_w = 16 ofm_stride_c = 16 * ofm_width ofm_stride_h = 16 * ofm_width * ((ofm_channels - 1) // 16 + 1) serial_pooling = spec.SerialPooling( ifm=spec.SerialFeatureMap( data_type="int8", height=ifm_shape[1], width=ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3], channels=ofm_channels, tile_height_0=ifm_shape[1], tile_height_1=0, tile_width_0=ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3], tile_address_0=0, tile_address_1=0, tile_address_2=0, tile_address_3=0, scale=1.0, zero_point=0, layout=ifm_layout, stride_h=ifm_stride_h, stride_w=ifm_stride_w, stride_c=ifm_stride_c, ), ofm=spec.SerialFeatureMap( data_type="int8", height=ofm_height, width=ofm_width, channels=ofm_channels, tile_height_0=ofm_height, tile_height_1=0, tile_width_0=ofm_width, tile_address_0=0, tile_address_1=0, tile_address_2=0, tile_address_3=0, scale=1.0, zero_point=0, layout=ofm_layout, stride_h=ofm_stride_h, stride_w=ofm_stride_w, stride_c=ofm_stride_c, ), pooling_type=pooling_type, pool_shape=spec.SerialKernel( width=pool_shape[1], height=pool_shape[0], stride_w=strides[1], stride_h=strides[0], dilation_w=1, dilation_h=1, ), padding=spec.SerialPadding(top=padding[0], left=padding[1], bottom=padding[2], right=padding[3]), activation=spec.SerialActivation( op=activation, clip_min=10 if activation == "CLIP" else 0, clip_max=100 if activation == "CLIP" else 0, ), upscale="NONE", ) assert data[0] == ["ethosu_pooling"] + list(serial_pooling)
def test_conv2d_single(trial): def _get_func( ifm_shape, ifm_channels, ofm_channels, kernel_shape, padding, strides, dilation, activation, ifm_layout, ofm_layout, ): ifm = relay.var("ifm", shape=ifm_shape, dtype="int8") conv = make_ethosu_conv2d( ifm, ifm_channels, ofm_channels, kernel_shape, padding, strides, dilation, activation, ifm_layout, ofm_layout, ) func = relay.Function(relay.analysis.free_vars(conv), conv) func = run_opt_pass(func, relay.transform.InferType()) return func # TODO(@mbaret) Fix the tests for these known failures # These are anticipated to actually be correct, just a testing issue to do with # equivalent convolutions. known_failures = [ [(1, 3, 12, 9, 16), 182, 67, (2, 3), (1, 3), (2, 2), (1, 1), "CLIP", "NHCWB16", "NHCWB16"], [(1, 2, 12, 9, 16), 182, 67, (1, 3), (6, 3), (2, 2), (1, 1), "CLIP", "NHCWB16", "NHCWB16"], ] func = _get_func(*trial) mod, _ = lower_to_tir(func) data = [] def _visit(stmt): if isinstance(stmt, tvm.tir.Call): data.append(get_convolutional_args(stmt, remove_constants=True)) tvm.tir.stmt_functor.post_order_visit(mod["main"].body, _visit) ( ifm_shape, ifm_channels, ofm_channels, kernel_shape, padding, strides, dilation, activation, ifm_layout, ofm_layout, ) = trial dilated_kernel_h = (kernel_shape[0] - 1) * dilation[0] + 1 dilated_kernel_w = (kernel_shape[1] - 1) * dilation[1] + 1 if ifm_layout == "NHWC": ifm_stride_c = 1 ifm_stride_w = ifm_shape[3] ifm_stride_h = ifm_shape[2] * ifm_shape[3] ofm_height = (ifm_shape[1] - dilated_kernel_h + padding[0] + padding[0]) // strides[0] + 1 ofm_width = (ifm_shape[2] - dilated_kernel_w + padding[1] + padding[1]) // strides[1] + 1 else: ifm_stride_w = 16 ifm_stride_c = 16 * ifm_shape[3] ifm_stride_h = 16 * ifm_shape[2] * ifm_shape[3] ofm_height = (ifm_shape[1] - dilated_kernel_h + padding[0] + padding[0]) // strides[0] + 1 ofm_width = (ifm_shape[3] - dilated_kernel_w + padding[1] + padding[1]) // strides[1] + 1 if ofm_layout == "NHWC": ofm_stride_c = 1 ofm_stride_w = ofm_channels if ofm_width > 1 else 1 ofm_stride_h = ofm_channels * ofm_width if ofm_height > 1 else 1 else: ofm_stride_w = 16 ofm_stride_c = 16 * ofm_width ofm_stride_h = 16 * ofm_width * ((ofm_channels - 1) // 16 + 1) answer = [ "int8", ifm_shape[1], ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3], ifm_channels, ifm_shape[1], 0, ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3], 0, 0, 0, 0, 0.5, 10, ifm_layout, ifm_stride_h, ifm_stride_w, ifm_stride_c, "int8", ofm_height, ofm_width, ofm_channels, ofm_height, 0, ofm_width, 0, 0, 0, 0, 0.25, 14, ofm_layout, ofm_stride_h, ofm_stride_w, ofm_stride_c, kernel_shape[1], kernel_shape[0], strides[1], strides[0], dilation[1], dilation[0], 12, padding[0], padding[1], padding[0], padding[1], activation, 10 if activation == "CLIP" else 0, 100 if activation == "CLIP" else 0, "NONE", ] assert data[0] == answer, data[0]