Пример #1
0
def test_avgpool_1d(data_shape_ncw, pool_size, strides, padding):
    """Test a subgraph with a single avgpool_1d operator."""

    ishape = data_shape_ncw

    input0 = relay.var("input", relay.TensorType(ishape, "int32"))
    out0 = relay.op.nn.avg_pool1d(input0, pool_size, layout="NCW", strides=strides, padding=padding)
    ref_mod = tvm.IRModule.from_expr(relay.Function([input0], out0))

    input1 = relay.var("input", relay.TensorType(ishape, "int16"))
    out1 = relay.op.nn.avg_pool1d(input1, pool_size, layout="NCW", strides=strides, padding=padding)
    mod = tvm.IRModule.from_expr(relay.Function([input1], out1))

    input_data = np.random.randint(low=-10, high=10, size=ishape, dtype="int32")
    inputs = {"input": input_data}
    output_list = generate_ref_data(ref_mod, inputs)

    compile_and_run(
        AOTTestModel(
            module=mod, inputs={"input": input_data.astype(dtype="int16")}, outputs=output_list
        ),
        runner=AOT_CORSTONE300_RUNNER,
        interface_api="c",
        use_unpacked_api=True,
        target_opts={
            "-keys": "arm_cpu",
            "-mcpu": "cortex-m7",
        },
    )
Пример #2
0
def test_dense(M, K, N):
    """Test a subgraph with a single dense operator."""
    ishape = (M, K)
    wshape = (N, K)

    input0 = relay.var("input", relay.TensorType(ishape, "int8"))
    dense_f = relay.op.nn.batch_flatten(input0)
    weight0 = relay.const(
        np.random.randint(low=-10, high=10, size=wshape, dtype="int8"))
    out = relay.op.nn.dense(dense_f, weight0, out_dtype="int32")

    mod = tvm.IRModule.from_expr(relay.Function([input0], out))
    inputs = {
        "input": np.random.randint(low=-128,
                                   high=127,
                                   size=ishape,
                                   dtype="int8")
    }
    output_list = generate_ref_data(mod, inputs)

    compile_and_run(
        AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
        runner=AOT_CORSTONE300_RUNNER,
        interface_api="c",
        use_unpacked_api=True,
        target_opts={
            "-keys": "arm_cpu",
            "-mcpu": "cortex-m7",
        },
    )
Пример #3
0
def test_maxpool_1d(data_shape_nwc, pool_size, strides, padding):
    """Test a subgraph with a single maxpool_1d operator."""
    ishape = data_shape_nwc

    input0 = relay.var("input", relay.TensorType(ishape, "int8"))
    out = relay.op.nn.max_pool1d(input0,
                                 pool_size,
                                 layout="NWC",
                                 strides=strides,
                                 padding=padding)

    mod = tvm.IRModule.from_expr(relay.Function([input0], out))
    inputs = {
        "input": np.random.randint(low=-128,
                                   high=127,
                                   size=ishape,
                                   dtype="int8")
    }
    output_list = generate_ref_data(mod, inputs)

    compile_and_run(
        AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
        runner=AOT_CORSTONE300_RUNNER,
        interface_api="c",
        use_unpacked_api=True,
        target_opts={
            "-keys": "arm_cpu",
            "-mcpu": "cortex-m7",
        },
    )
Пример #4
0
def test_ethosu_requantize(accel_type, ifm_shape, ifm_scale, ifm_zp, ofm_scale,
                           ofm_zp):
    np.random.seed(0)
    dtype = "int8"

    def create_model():
        ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
        requantize = relay.qnn.op.requantize(
            ifm,
            relay.const(ifm_scale, dtype="float32"),
            relay.const(ifm_zp, dtype="int32"),
            relay.const(ofm_scale, dtype="float32"),
            relay.const(ofm_zp, dtype="int32"),
        )
        return tvm.IRModule.from_expr(relay.Function([ifm], requantize))

    cpu_mod = create_model()
    input_data = {
        "ifm": np.random.randint(-128, high=127, size=ifm_shape, dtype=dtype)
    }
    output_data = generate_ref_data(cpu_mod, input_data)
    ethosu_mod = partition_for_ethosu(cpu_mod)

    _compare_ethosu_with_reference(ethosu_mod, input_data, output_data,
                                   accel_type)
Пример #5
0
def test_relay_reshape_codegen(ifm_shape, new_shape, accel_type):
    # Create a "partitioned" Relay graph
    ifm0 = relay.var("ifm0", shape=ifm_shape, dtype="int8")
    reshape = relay.op.reshape(ifm0, newshape=new_shape)
    mod = infra.make_partitioned_function(reshape)

    data = np.random.randint(-128, high=127, size=ifm_shape, dtype="int8")

    # Generate a reference output using Relay reshape that doesn't get offloaded
    ref_mod = tvm.IRModule()
    ref_mod["main"] = relay.Function([ifm0], reshape)
    ref_mod = relay.transform.InferType()(ref_mod)

    out_data = generate_ref_data(ref_mod, {"ifm0": data})

    compiled_model = infra.build_source(
        mod,
        {"ifm": data},
        out_data,
        accel_type,
    )

    imported_modules = compiled_model[0].executor_factory.lib.imported_modules
    assert len(imported_modules) == 2
    ethosu_module = imported_modules[0]

    # Verify generated C source
    get_cs = tvm._ffi.get_global_func("runtime.module.ethos-u.getcs")
    cmms = get_cs(ethosu_module)
    cmms = bytes.fromhex(cmms)

    infra.print_payload(cmms)
    infra.verify_source(compiled_model, accel_type)
Пример #6
0
def test_forward_mobilenet_v1(accel_type):
    """Test the Mobilenet V1 TF Lite model."""
    np.random.seed(23)
    tflite_model_file = tf_testing.get_workload_official(
        "https://storage.googleapis.com/download.tensorflow.org/"
        "models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz",
        "mobilenet_v1_1.0_224_quant.tflite",
    )
    with open(tflite_model_file, "rb") as f:
        tflite_model_buf = f.read()
    input_tensor = "input"
    input_dtype = "uint8"
    input_shape = (1, 224, 224, 3)
    in_min, in_max = util.get_range_for_dtype_str(input_dtype)
    input_data = np.random.randint(in_min, high=in_max, size=input_shape, dtype=input_dtype)

    relay_mod, params = convert_to_relay(tflite_model_buf)
    input_data = {input_tensor: input_data}
    output_data = generate_ref_data(relay_mod, input_data)

    mod = partition_for_ethosu(relay_mod, params)
    compiled_models = infra.build_source(
        mod, input_data, output_data, accel_type, output_tolerance=10
    )
    infra.verify_source(compiled_models, accel_type)
Пример #7
0
def test_ethosu_left_shift_binary_elemwise(
    accel_type,
    ifm_shape,
    ifm2_shape,
):
    np.random.seed(0)
    dtype = "int32"

    def create_model():
        ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
        ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype)
        c1 = relay.left_shift(ifm, ifm2)
        return tvm.IRModule.from_expr(relay.Function([ifm, ifm2], c1))

    cpu_mod = create_model()

    # Generate reference data
    in_min, in_max = util.get_range_for_dtype_str(dtype)
    input_data = {
        "ifm": np.random.randint(in_min,
                                 high=in_max,
                                 size=ifm_shape,
                                 dtype=dtype),
        "ifm2": np.random.randint(0, high=32, size=ifm2_shape, dtype=dtype),
    }
    output_data = generate_ref_data(cpu_mod, input_data)
    ethosu_mod = partition_for_ethosu(cpu_mod)

    _compare_ethosu_with_reference(ethosu_mod,
                                   input_data,
                                   output_data,
                                   accel_type,
                                   output_tolerance=0)
Пример #8
0
def test_op_int8(zero_point, scale):
    interface_api = "c"
    use_unpacked_api = True
    test_runner = AOT_USMP_CORSTONE300_RUNNER

    dtype = "int8"
    shape = [1, 16, 16, 3]
    model = make_model(shape, dtype, dtype, zero_point, scale)
    orig_mod = make_module(model)

    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)

    # validate pattern matching
    assert_partitioned_function(orig_mod, cmsisnn_mod)

    # validate the output
    in_min, in_max = get_range_for_dtype_str(dtype)
    np.random.seed(0)
    input_data = np.random.randint(in_min,
                                   high=in_max,
                                   size=shape,
                                   dtype=dtype)
    inputs = {"in0": input_data}
    params = {}
    output_list = generate_ref_data(orig_mod["main"], inputs, params)
    compile_and_run(
        AOTTestModel(module=cmsisnn_mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Пример #9
0
def test_cnn_small():
    # download the model
    base_url = "https://github.com/ARM-software/ML-zoo/raw/master/models/keyword_spotting/cnn_small/tflite_int8"
    file_to_download = "cnn_s_quantized.tflite"
    model_file = download_testdata("{}/{}".format(base_url, file_to_download), file_to_download)

    with open(model_file, "rb") as f:
        tflite_model_buf = f.read()

    input_shape = (1, 490)
    in_min, in_max = get_range_for_dtype_str("int8")
    input_data = np.random.randint(in_min, high=in_max, size=input_shape).astype(np.float32)

    orig_mod, params = convert_to_relay(tflite_model_buf, input_data, "input")
    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)

    # validate CMSIS-NN output against CPU output
    interface_api = "c"
    use_unpacked_api = True
    test_runner = AOT_CORSTONE300_RUNNER
    inputs = {"input": input_data}
    params = {}
    output_list = generate_ref_data(orig_mod["main"], inputs, params)
    compile_and_run(
        AOTTestModel(module=cmsisnn_mod, inputs=inputs, outputs=output_list, params=params),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Пример #10
0
def test_elementwise_add_from_constant_scalar(accel_type, dtype):
    ifm_shape = (1, 4, 4, 8)

    def create_relay_graph():
        inp = relay.var("input", shape=ifm_shape, dtype=dtype)
        scalar = relay.const(np.ones((1, 1, 1, 1), dtype=dtype), dtype=dtype)
        add = relay.qnn.op.add(
            inp,
            scalar,
            relay.const(1.0, dtype="float32"),
            relay.const(0, dtype="int32"),
            relay.const(1.0, dtype="float32"),
            relay.const(0, dtype="int32"),
            relay.const(1.0, dtype="float32"),
            relay.const(0, dtype="int32"),
        )
        return tvm.IRModule.from_expr(relay.Function(relay.analysis.free_vars(add), add))

    cpu_mod = create_relay_graph()
    ethosu_mod = partition_for_ethosu(cpu_mod)

    # Generate reference data
    input_data = {
        "input": np.random.randint(
            low=np.iinfo(dtype).min, high=np.iinfo(dtype).max, size=ifm_shape, dtype=dtype
        ),
    }
    output_data = generate_ref_data(cpu_mod, input_data)

    _compare_ethosu_with_reference(
        ethosu_mod, input_data, output_data, accel_type, output_tolerance=0
    )
Пример #11
0
def test_conv2d(data_shape_nhwc, kernel_size, num_filter, strides, padding,
                dilation, dtype):
    """Test a subgraph with a single conv2d operator."""
    ishape = data_shape_nhwc
    wshape = (*kernel_size, data_shape_nhwc[-1], num_filter)

    weight_data = np.random.randint(low=-10, high=10, size=wshape, dtype=dtype)

    input0 = relay.var("input", relay.TensorType(ishape, dtype))
    weight0 = relay.const(weight_data)
    out0 = relay.op.nn.conv2d(
        input0,
        weight0,
        kernel_size=kernel_size,
        strides=strides,
        padding=padding,
        dilation=(dilation, dilation),
        data_layout="NHWC",
        kernel_layout="HWIO",
        out_dtype="int32",
        out_layout="NHWC",
    )
    ref_mod = tvm.IRModule.from_expr(relay.Function([input0], out0))

    input1 = relay.var("input", relay.TensorType(ishape, dtype))
    weight1 = relay.const(np.moveaxis(weight_data, 2, -1))
    out1 = relay.op.nn.conv2d(
        input1,
        weight1,
        kernel_size=kernel_size,
        strides=strides,
        padding=padding,
        dilation=(dilation, dilation),
        data_layout="NHWC",
        kernel_layout="HWOI",
        out_dtype="int32",
        out_layout="NHWC",
    )
    mod = tvm.IRModule.from_expr(relay.Function([input1], out1))

    inputs = {
        "input": np.random.randint(low=-128,
                                   high=127,
                                   size=ishape,
                                   dtype=dtype)
    }
    output_list = generate_ref_data(ref_mod, inputs)

    compile_and_run(
        AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
        runner=AOT_CORSTONE300_RUNNER,
        interface_api="c",
        use_unpacked_api=True,
        target_opts={
            "-keys": "arm_cpu",
            "-mcpu": "cortex-m7",
        },
    )
Пример #12
0
def test_ethosu_section_name():
    def create_graph_single(input_tensor_name, input_tensor_shape,
                            input_tensor_dtype):
        c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
        c1_params.ifm.shape = input_tensor_shape
        c1_params.kernel.shape = (3, 3, c1_params.ifm.shape[3], 32)
        c1_params.kernel.sc = relay.const(np.random.rand(32) * 2, "float32")
        c1_params.strides = (1, 1)
        c1_params.pad = "VALID"
        c1_params.update_output_qnn_params(input_tensor_dtype,
                                           input_tensor_dtype,
                                           input_tensor_dtype)
        input0 = relay.var(input_tensor_name,
                           shape=c1_params.ifm.shape,
                           dtype=c1_params.ifm.dtype)
        c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0)
        c1_params.ofm.shape = get_shape_expr(input0, c1)

        f = relay.Function([input0], c1)
        mod = tvm.IRModule()
        mod["main"] = f
        return mod, [c1_params]

    accel_type = "ethos-u55-256"
    relay_module, _ = create_graph_single("input", (1, 300, 300, 3), "int8")
    input_dtype = "int8"
    mod = partition_for_ethosu(relay_module)

    # Generate reference data
    in_min, in_max = util.get_range_for_dtype_str(input_dtype)
    input_data = {
        "input":
        np.random.randint(in_min,
                          high=in_max,
                          size=(1, 300, 300, 3),
                          dtype=input_dtype)
    }
    output_data = generate_ref_data(relay_module, input_data)

    compiled_models = infra.build_source(mod,
                                         input_data,
                                         output_data,
                                         accel_type,
                                         output_tolerance=1)

    # Assumes only two runtime.Modules are created -- i.e. single offload module
    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[
        0].imported_modules[0]

    # Verify generated C source
    source = ethosu_module.get_source()
    assert (
        '__attribute__((section(".rodata.tvm"), aligned(16))) static int8_t tvmgen_default_ethos_u_main_0_cms_data_data'
        in source)
    assert (
        '__attribute__((section(".rodata.tvm"), aligned(16))) static int8_t tvmgen_default_ethos_u_main_0_weights'
        in source)
Пример #13
0
def test_op_int8(op, input_0_scale, input_0_zero_point, input_1_scale, input_1_zero_point):
    interface_api = "c"
    use_unpacked_api = True
    test_runner = AOT_CORSTONE300_RUNNER

    dtype = "int8"
    shape = [1, 16, 16, 3]
    model = make_model(
        op,
        shape,
        dtype,
        dtype,
        input_0_scale,
        input_0_zero_point,
        input_1_scale,
        input_1_zero_point,
    )
    orig_mod = make_module(model)

    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)

    # validate pattern matching
    attrs = [
        cmsisnn_mod[var.name_hint].attrs
        for var in cmsisnn_mod.get_global_vars()
        if cmsisnn_mod[var.name_hint].attrs
    ]
    assert any(attrs), "At least one function with external attributes was expected."

    compilers = [
        key == "Compiler" and value == "cmsis-nn" for attr in attrs for key, value in attr.items()
    ]
    assert any(compilers), "Module does not contain function for cmsisnn target."

    assert count_num_calls(orig_mod) == count_num_calls(
        cmsisnn_mod
    ), "Number of calls changed during partitioning"

    # validate the output
    in_min, in_max = get_range_for_dtype_str(dtype)
    inputs = {
        "input_0": np.random.randint(in_min, high=in_max, size=shape, dtype=dtype),
        "input_1": np.random.randint(in_min, high=in_max, size=shape, dtype=dtype),
    }
    output_list = generate_ref_data(orig_mod["main"], inputs)
    compile_and_run(
        AOTTestModel(
            module=cmsisnn_mod,
            inputs=inputs,
            outputs=output_list,
            output_tolerance=1,
        ),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Пример #14
0
def test_op_int8(
    in_shape,
    pool_size,
    strides,
    padding,
    relu_type,
    pool_type,
    zero_point,
    scale,
):
    interface_api = "c"
    use_unpacked_api = True
    test_runner = AOT_USMP_CORSTONE300_RUNNER

    dtype = "int8"

    model = make_model(
        pool_type,
        in_shape,
        pool_size,
        strides,
        padding,
        dtype,
        scale,
        zero_point,
        relu_type,
    )
    orig_mod = make_module(model)

    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)

    # validate pattern matching
    assert_partitioned_function(orig_mod, cmsisnn_mod)

    # validate the output
    in_min, in_max = get_range_for_dtype_str(dtype)
    np.random.seed(0)
    inputs = {
        "input":
        np.random.randint(in_min, high=in_max, size=in_shape, dtype="int8"),
    }
    output_list = generate_ref_data(orig_mod["main"], inputs)
    compile_and_run(
        AOTTestModel(
            module=cmsisnn_mod,
            inputs=inputs,
            outputs=output_list,
            params=None,
            output_tolerance=1,
        ),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Пример #15
0
def test_relay_strided_slice_codegen(ifm_shape, begin, end, accel_type):
    def create_model():
        ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
        strided_slice = relay.op.strided_slice(ifm, begin, end)
        return tvm.IRModule.from_expr(relay.Function([ifm], strided_slice))

    cpu_mod = create_model()
    input_data = {"ifm": np.random.randint(-128, high=127, size=ifm_shape, dtype="int8")}
    output_data = generate_ref_data(cpu_mod, input_data)
    ethosu_mod = _create_ethosu_partition(cpu_mod)

    _compare_ethosu_with_reference(ethosu_mod, input_data, output_data, accel_type)
Пример #16
0
def test_constant_input_int8(op, input_0, input_1):
    interface_api = "c"
    use_unpacked_api = True
    test_runner = AOT_USMP_CORSTONE300_RUNNER

    dtype = "int8"
    shape = [1, 16, 16, 3]
    input_0_scale = 0.256
    input_0_zero_point = 33
    input_1_scale = 0.128
    input_1_zero_point = -24
    model = make_model(
        op,
        input_0,
        input_1,
        input_0_scale,
        input_0_zero_point,
        input_1_scale,
        input_1_zero_point,
    )
    orig_mod = make_module(model)

    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)

    # validate pattern matching
    assert_partitioned_function(orig_mod, cmsisnn_mod)

    # validate the output
    in_min, in_max = get_range_for_dtype_str(dtype)
    inputs = {}
    if isinstance(input_0, tvm.relay.expr.Var):
        inputs.update({
            "input_0":
            np.random.randint(in_min, high=in_max, size=shape, dtype=dtype)
        })
    if isinstance(input_1, tvm.relay.expr.Var):
        inputs.update({
            "input_1":
            np.random.randint(in_min, high=in_max, size=shape, dtype=dtype)
        })
    output_list = generate_ref_data(orig_mod["main"], inputs)
    compile_and_run(
        AOTTestModel(
            module=cmsisnn_mod,
            inputs=inputs,
            outputs=output_list,
            output_tolerance=1,
        ),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Пример #17
0
def test_binary_add_from_constant_scalar(accel_type):
    dtype = "uint8"
    ifm_shape = (1, 4, 4, 8)

    def create_relay_graph():
        inp = relay.var("input", shape=ifm_shape, dtype=dtype)
        scalar = relay.const(np.ones((1, 1, 1, 1), dtype=dtype), dtype=dtype)
        add = relay.qnn.op.add(
            inp,
            scalar,
            relay.const(1.0, dtype="float32"),
            relay.const(0, dtype="int32"),
            relay.const(1.0, dtype="float32"),
            relay.const(0, dtype="int32"),
            relay.const(1.0, dtype="float32"),
            relay.const(0, dtype="int32"),
        )
        func = relay.Function(relay.analysis.free_vars(add), add)
        return tvm.IRModule.from_expr(func)

    mod = create_relay_graph()
    partitioned_mod = partition_for_ethosu(mod)

    # Generate reference data
    input_data = {
        "input": np.random.randint(low=0,
                                   high=255,
                                   size=ifm_shape,
                                   dtype=dtype)
    }
    output_data = generate_ref_data(mod, input_data)

    compiled_models = infra.build_source(
        partitioned_mod,
        input_data,
        output_data,
        accel_type,
        output_tolerance=0,
    )

    # Assumes only two runtime.Modules are created -- i.e. single offload module
    imported_modules = compiled_models[0].executor_factory.lib.imported_modules
    assert len(imported_modules) == 2
    ethosu_module = imported_modules[0]

    # Verify generated C source
    get_cs = tvm._ffi.get_global_func("runtime.module.ethos-u.getcs")
    cmms = get_cs(ethosu_module)
    cmms = bytes.fromhex(cmms)

    infra.print_payload(cmms)
    infra.verify_source(compiled_models, accel_type)
Пример #18
0
def test_op_int8(op, relu_type, input_0_scale, input_0_zero_point,
                 input_1_scale, input_1_zero_point):
    interface_api = "c"
    use_unpacked_api = True
    test_runner = AOT_USMP_CORSTONE300_RUNNER

    dtype = "int8"
    shape = [1, 16, 16, 3]
    model = make_model(
        op,
        generate_variable("input_0"),
        generate_variable("input_1"),
        input_0_scale,
        input_0_zero_point,
        input_1_scale,
        input_1_zero_point,
        relu_type,
    )
    orig_mod = make_module(model)

    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)

    # validate pattern matching
    assert_partitioned_function(orig_mod, cmsisnn_mod)

    # validate the output
    in_min, in_max = get_range_for_dtype_str(dtype)
    inputs = {
        "input_0": np.random.randint(in_min,
                                     high=in_max,
                                     size=shape,
                                     dtype=dtype),
        "input_1": np.random.randint(in_min,
                                     high=in_max,
                                     size=shape,
                                     dtype=dtype),
    }
    output_list = generate_ref_data(orig_mod["main"], inputs)
    compile_and_run(
        AOTTestModel(
            module=cmsisnn_mod,
            inputs=inputs,
            outputs=output_list,
            output_tolerance=1,
        ),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Пример #19
0
def test_ethosu_left_shift_binary_elemwise(
    accel_type,
    ifm_shape,
    ifm2_shape,
):
    dtype = "int32"

    def create_model():
        ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
        ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype)
        c1 = relay.left_shift(ifm, ifm2)
        f = relay.Function([ifm, ifm2], c1)
        mod = tvm.IRModule()
        mod["main"] = f
        return mod

    relay_mod = create_model()
    mod = partition_for_ethosu(relay_mod)

    # Generate reference data
    in_min, in_max = util.get_range_for_dtype_str(dtype)
    input_data = {
        "ifm": np.random.randint(in_min,
                                 high=in_max,
                                 size=ifm_shape,
                                 dtype=dtype),
        "ifm2": np.random.randint(0, high=32, size=ifm2_shape, dtype=dtype),
    }
    output_data = generate_ref_data(relay_mod, input_data)

    compiled_models = infra.build_source(
        mod,
        input_data,
        output_data,
        accel_type,
    )

    # Assumes only two runtime.Modules are created -- i.e. single offload module
    imported_modules = compiled_models[0].executor_factory.lib.imported_modules
    assert len(imported_modules) == 2
    ethosu_module = imported_modules[0]

    # Verify generated C source
    get_cs = tvm._ffi.get_global_func("runtime.module.ethos-u.getcs")
    cmms = get_cs(ethosu_module)
    cmms = bytes.fromhex(cmms)

    infra.print_payload(cmms)
    infra.verify_source(compiled_models, accel_type)
Пример #20
0
def test_relay_reshape_codegen(ifm_shape, new_shape, accel_type):
    np.random.seed(0)

    def create_model():
        ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
        reshape = relay.op.reshape(ifm, newshape=new_shape)
        return tvm.IRModule.from_expr(relay.Function([ifm], reshape))

    cpu_mod = create_model()
    input_data = {
        "ifm": np.random.randint(-128, high=127, size=ifm_shape, dtype="int8")
    }
    output_data = generate_ref_data(cpu_mod, input_data)
    ethosu_mod = _create_ethosu_partition(cpu_mod)

    _compare_ethosu_with_reference(ethosu_mod, input_data, output_data,
                                   accel_type)
Пример #21
0
    def create_mod_from_relay():
        ifm = relay.var("input", shape=ifm_shape, dtype=dtype)
        cast = relay.cast(ifm, dtype="int32")
        mean = relay.mean(cast, axis=axis, keepdims=keep_dims)
        requantize = relay.qnn.op.requantize(
            mean,
            input_scale=relay.const(1.0, dtype="float32"),
            input_zero_point=relay.const(0, dtype="int32"),
            output_scale=relay.const(1.0, dtype="float32"),
            output_zero_point=relay.const(0, dtype="int32"),
        )

        func = relay.Function(relay.analysis.free_vars(requantize), requantize)
        mod = tvm.IRModule.from_expr(func)

        input_data = {"input": np.random.randint(low=-127, high=128, size=ifm_shape, dtype=dtype)}
        output_data = generate_ref_data(mod, input_data)
        return mod, input_data, output_data
Пример #22
0
def test_networks_without_usmp(accel_type, model_url, workspace_size,
                               tolerance):
    np.random.seed(23)
    tflite_model_file = tf_testing.get_workload_official(
        model_url[0], model_url[1])
    mod, input_data, params = create_relay_module_and_inputs_from_tflite_file(
        tflite_model_file)
    output_data = generate_ref_data(mod, input_data, params)
    mod = partition_for_ethosu(mod, params)
    compiled_models = infra.build_source(mod,
                                         input_data,
                                         output_data,
                                         accel_type,
                                         output_tolerance=tolerance,
                                         enable_usmp=False)
    mlf_memory_map = mlf._build_function_memory_map(
        compiled_models[0].executor_factory.function_metadata)
    assert mlf_memory_map["main"][0]["workspace_size_bytes"] == workspace_size
    infra.verify_source(compiled_models, accel_type, enable_usmp=False)
Пример #23
0
def test_networks_with_usmp(accel_type, model_url, workspace_size, tolerance):
    np.random.seed(23)
    tflite_model_file = tf_testing.get_workload_official(
        model_url[0], model_url[1])
    mod, input_data, params = create_relay_module_and_inputs_from_tflite_file(
        tflite_model_file)
    output_data = generate_ref_data(mod, input_data, params)
    mod = partition_for_ethosu(mod, params)
    compiled_models = infra.build_source(mod,
                                         input_data,
                                         output_data,
                                         accel_type,
                                         output_tolerance=tolerance,
                                         enable_usmp=True)
    allocated_pool_info = list(
        dict(compiled_models[0].executor_factory.executor_codegen_metadata.
             pool_inputs).values())[0]
    assert allocated_pool_info.allocated_size == workspace_size
    infra.verify_source(compiled_models, accel_type, enable_usmp=True)
Пример #24
0
def test_empty_function():
    ORIGINAL_MODEL = """
#[version = "0.0.5"]
def @main(%data : Tensor[(16, 29), int8]) -> Tensor[(16, 29), int8] {
    add(%data, %data)
}
"""
    CMSISNN_MODEL = """
#[version = "0.0.5"]
def @tvmgen_default_cmsis_nn_main_1(%i1: Tensor[(16, 29), int8], Inline=1, Compiler="cmsis-nn", global_symbol="tvmgen_default_cmsis_nn_main_1", Primitive=1) -> Tensor[(16, 29), int8] {
  add(%i1, %i1)
}
def @main(%data : Tensor[(16, 29), int8]) -> Tensor[(16, 29), int8] {
  %1 = @tvmgen_default_cmsis_nn_main_1(%data) /* ty=Tensor[(16, 29), int8] */;
  %1
}
"""
    orig_mod = tvm.parser.fromtext(ORIGINAL_MODEL)
    cmsisnn_mod = tvm.parser.fromtext(CMSISNN_MODEL)
    params = {}

    # validate the output
    interface_api = "c"
    use_unpacked_api = True
    test_runner = AOT_USMP_CORSTONE300_RUNNER
    dtype = "int8"
    in_min, in_max = get_range_for_dtype_str(dtype)
    rng = np.random.default_rng(12345)
    inputs = {"data": rng.integers(in_min, high=in_max, size=(16, 29), dtype=dtype)}
    outputs = generate_ref_data(orig_mod["main"], inputs, params)
    compile_and_run(
        AOTTestModel(
            module=cmsisnn_mod,
            inputs=inputs,
            outputs=outputs,
            params=params,
            output_tolerance=0,
        ),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Пример #25
0
def test_cnn_small():
    # download the model
    base_url = "https://github.com/ARM-software/ML-zoo/raw/48a22ee22325d15d2371a6df24eb7d67e21dcc97/models/keyword_spotting/cnn_small/tflite_int8"
    file_to_download = "cnn_s_quantized.tflite"
    file_saved = "cnn_s_quantized_15Dec2021.tflite"
    model_file = download_testdata("{}/{}".format(base_url, file_to_download), file_saved)

    with open(model_file, "rb") as f:
        tflite_model_buf = f.read()

    input_shape = (1, 490)
    dtype = "int8"
    in_min, in_max = get_range_for_dtype_str(dtype)
    rng = np.random.default_rng(12345)
    input_data = rng.integers(in_min, high=in_max, size=input_shape, dtype=dtype)

    orig_mod, params = convert_to_relay(tflite_model_buf, input_data, "input")
    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)

    # validate CMSIS-NN output against CPU output
    interface_api = "c"
    use_unpacked_api = True
    test_runner = AOT_CORSTONE300_RUNNER
    inputs = {"input": input_data}
    params = {}
    output_list = generate_ref_data(orig_mod["main"], inputs, params)
    compile_and_run(
        AOTTestModel(
            module=cmsisnn_mod,
            inputs=inputs,
            outputs=output_list,
            params=params,
            output_tolerance=1,
        ),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Пример #26
0
def test_cnn_small():
    # download the model
    base_url = "https://github.com/ARM-software/ML-zoo/raw/ee35139af86bdace5e502b09fe8b9da9cb1f06bb/models/keyword_spotting/cnn_small/tflite_int8"
    file_to_download = "cnn_s_quantized.tflite"
    model_file = download_testdata("{}/{}".format(base_url, file_to_download),
                                   file_to_download)

    with open(model_file, "rb") as f:
        tflite_model_buf = f.read()

    input_shape = (1, 490)
    rng = np.random.default_rng(12345)
    input_data = rng.random(input_shape, dtype=np.float32)

    orig_mod, params = convert_to_relay(tflite_model_buf, input_data, "input")
    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)

    # validate CMSIS-NN output against CPU output
    interface_api = "c"
    use_unpacked_api = True
    test_runner = AOT_CORSTONE300_RUNNER
    inputs = {"input": input_data}
    params = {}
    output_list = generate_ref_data(orig_mod["main"], inputs, params)
    compile_and_run(
        AOTTestModel(
            module=cmsisnn_mod,
            inputs=inputs,
            outputs=output_list,
            params=params,
            output_tolerance=1,
        ),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Пример #27
0
def test_depthwise_int8(
    ifm_shape,
    kernel_size,
    padding,
    strides,
    dilation,
    enable_bias,
    relu_type,
    input_zero_point,
    input_scale,
    kernel_scale,
    out_channels,
    depth_multiplier,
):
    interface_api = "c"
    use_unpacked_api = True
    test_runner = AOT_CORSTONE300_RUNNER

    dtype = "int8"
    groups = 1
    weight_format = "HWIO"
    kernel_h = kernel_size[0]
    kernel_w = kernel_size[1]
    kernel_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels)
    kernel_zero_point = 0
    in_min, in_max = get_range_for_dtype_str(dtype)

    groups = ifm_shape[3]
    weight_format = "HWOI"
    kernel_shape = (kernel_h, kernel_w, ifm_shape[3], depth_multiplier)
    out_channels = ifm_shape[3] * depth_multiplier
    ks_len = len(kernel_scale)
    kernel_scale = [kernel_scale[i % ks_len] for i in range(out_channels)]

    output_scale, output_zero_point = get_conv2d_qnn_params(
        kernel_shape,
        input_scale,
        input_zero_point,
        kernel_scale,
        kernel_zero_point,
        dtype,
        dtype,
        dtype,
        True,
    )

    model, params = make_model(
        ifm_shape,
        kernel_shape,
        input_zero_point,
        input_scale,
        kernel_zero_point,
        kernel_scale,
        output_zero_point,
        output_scale,
        padding,
        strides,
        dilation,
        groups,
        dtype,
        dtype,
        out_channels,
        weight_format,
        enable_bias,
        relu_type,
    )
    orig_mod = make_module(model)
    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)

    # validate pattern matching
    attrs = [
        cmsisnn_mod[var.name_hint].attrs
        for var in cmsisnn_mod.get_global_vars()
        if cmsisnn_mod[var.name_hint].attrs
    ]
    assert any(attrs), "At least one function with external attributes was expected."

    compilers = [
        key == "Compiler" and value == "cmsis-nn" for attr in attrs for key, value in attr.items()
    ]
    assert any(compilers), "Module does not contain function for cmsis-nn target."

    assert count_num_calls(orig_mod) == count_num_calls(
        cmsisnn_mod
    ), "Number of calls changed during partitioning"

    # validate the output
    rng = np.random.default_rng(12345)
    inputs = {"input": rng.integers(in_min, high=in_max, size=ifm_shape, dtype=dtype)}
    output_list = generate_ref_data(orig_mod["main"], inputs, params)
    compile_and_run(
        AOTTestModel(
            module=cmsisnn_mod,
            inputs=inputs,
            outputs=output_list,
            params=params,
            output_tolerance=1,
        ),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Пример #28
0
def test_depthwise_int8(
    ifm_shape,
    kernel_size,
    padding,
    strides,
    dilation,
    enable_bias,
    relu_type,
    input_zero_point,
    input_scale,
    kernel_scale,
    out_channels,
    depth_multiplier,
):
    interface_api = "c"
    use_unpacked_api = True
    test_runner = AOT_CORSTONE300_RUNNER

    dtype = "int8"
    groups = 1
    weight_format = "HWIO"
    kernel_h = kernel_size[0]
    kernel_w = kernel_size[1]
    kernel_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels)
    kernel_zero_point = 0
    in_min, in_max = get_range_for_dtype_str(dtype)

    groups = ifm_shape[3]
    weight_format = "HWOI"
    kernel_shape = (kernel_h, kernel_w, ifm_shape[3], depth_multiplier)
    out_channels = ifm_shape[3] * depth_multiplier
    ks_len = len(kernel_scale)
    kernel_scale = [kernel_scale[i % ks_len] for i in range(out_channels)]

    output_scale, output_zero_point = get_conv2d_qnn_params(
        kernel_shape,
        input_scale,
        input_zero_point,
        kernel_scale,
        kernel_zero_point,
        dtype,
        dtype,
        dtype,
        True,
    )

    model, params = make_model(
        ifm_shape,
        kernel_shape,
        input_zero_point,
        input_scale,
        kernel_zero_point,
        kernel_scale,
        output_zero_point,
        output_scale,
        padding,
        strides,
        dilation,
        groups,
        dtype,
        dtype,
        out_channels,
        weight_format,
        enable_bias,
        relu_type,
    )
    orig_mod = make_module(model)
    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)

    # validate pattern matching
    assert_partitioned_function(orig_mod, cmsisnn_mod)

    # validate the output
    rng = np.random.default_rng(12345)
    inputs = {"input": rng.integers(in_min, high=in_max, size=ifm_shape, dtype=dtype)}
    output_list = generate_ref_data(orig_mod["main"], inputs, params)
    compile_and_run(
        AOTTestModel(
            module=cmsisnn_mod,
            inputs=inputs,
            outputs=output_list,
            params=params,
            output_tolerance=1,
        ),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Пример #29
0
def test_op_int8(
    in_shape,
    enable_bias,
    input_zero_point,
    input_scale,
    kernel_scale,
    out_channels,
    relu_type,
):
    interface_api = "c"
    use_unpacked_api = True
    test_runner = AOT_USMP_CORSTONE300_RUNNER

    dtype = "int8"
    kernel_zero_point = 0
    kernel_shape = [out_channels, in_shape[1]]
    conv2d_kernel_shape = (1, 1, kernel_shape[0], kernel_shape[1])
    in_min, in_max = get_range_for_dtype_str(dtype)

    output_scale, output_zero_point = get_conv2d_qnn_params(
        conv2d_kernel_shape,
        input_scale,
        input_zero_point,
        kernel_scale,
        kernel_zero_point,
        dtype,
    )

    model, params = make_model(
        in_shape,
        kernel_shape,
        input_zero_point,
        kernel_zero_point,
        input_scale,
        kernel_scale,
        output_zero_point,
        output_scale,
        dtype,
        dtype,
        out_channels,
        enable_bias,
    )
    orig_mod = make_module(model)
    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)

    # validate pattern matching
    assert_partitioned_function(orig_mod, cmsisnn_mod)

    # validate the output
    rng = np.random.default_rng(12345)
    inputs = {
        "input": rng.integers(in_min, high=in_max, size=in_shape, dtype=dtype)
    }
    output_list = generate_ref_data(orig_mod["main"], inputs, params)
    compile_and_run(
        AOTTestModel(
            module=cmsisnn_mod,
            inputs=inputs,
            outputs=output_list,
            params=params,
            output_tolerance=1,
        ),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Пример #30
0
def test_ethosu_conv2d(accel_type):
    def create_graph_single(input_tensor_name, input_tensor_shape,
                            input_tensor_dtype):
        c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
        c1_params.ifm.shape = input_tensor_shape
        c1_params.kernel.shape = (3, 3, c1_params.ifm.shape[3], 32)
        c1_params.kernel.sc = relay.const(np.random.rand(32) * 2, "float32")
        c1_params.strides = (1, 1)
        c1_params.pad = "VALID"
        c1_params.update_output_qnn_params(input_tensor_dtype,
                                           input_tensor_dtype,
                                           input_tensor_dtype)
        input0 = relay.var(input_tensor_name,
                           shape=c1_params.ifm.shape,
                           dtype=c1_params.ifm.dtype)
        c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0)
        c1_params.ofm.shape = get_shape_expr(input0, c1)

        f = relay.Function([input0], c1)
        mod = tvm.IRModule()
        mod["main"] = f
        return mod, [c1_params]

    def create_graph_double(input_tensor_name, input_tensor_shape,
                            input_tensor_dtype):
        c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
        c1_params.ifm.shape = input_tensor_shape
        c1_params.kernel.shape = (7, 7, c1_params.ifm.shape[3], 8)
        c1_params.strides = (2, 2)
        c1_params.pad = "VALID"
        c1_params.update_output_qnn_params(input_tensor_dtype,
                                           input_tensor_dtype,
                                           input_tensor_dtype)
        input0 = relay.var(input_tensor_name,
                           shape=c1_params.ifm.shape,
                           dtype=c1_params.ifm.dtype)
        c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0)
        c1_params.ofm.shape = get_shape_expr(input0, c1)

        c2_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
        c2_params.ifm.shape = c1_params.ofm.shape
        c2_params.kernel.shape = (5, 5, c2_params.ifm.shape[3], 16)
        c2_params.strides = (1, 1)
        c2_params.pad = "SAME"
        c2_params.update_output_qnn_params()
        c2, new_params = relay_ir_builder.create_qnn_conv2d(c2_params, c1)
        c2_params.ofm.shape = get_shape_expr(input0, c2)

        f = relay.Function([input0], c2)
        mod = tvm.IRModule()
        mod["main"] = f
        return mod, [c2_params, c1_params]

    def create_graph_activation(input_tensor_name, input_tensor_shape,
                                input_tensor_dtype):
        c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
        c1_params.ifm.shape = input_tensor_shape
        c1_params.kernel.shape = (7, 7, c1_params.ifm.shape[3], 8)
        c1_params.strides = (2, 2)
        c1_params.pad = "VALID"
        c1_params.activation = "CLIP"
        c1_params.clip_min = 90
        c1_params.clip_max = 110
        c1_params.update_output_qnn_params(input_tensor_dtype,
                                           input_tensor_dtype,
                                           input_tensor_dtype)
        input0 = relay.var(input_tensor_name,
                           shape=c1_params.ifm.shape,
                           dtype=c1_params.ifm.dtype)
        c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0)
        c1_params.ofm.shape = get_shape_expr(input0, c1)

        c2_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
        c2_params.ifm.shape = c1_params.ofm.shape
        c2_params.kernel.shape = (5, 5, c2_params.ifm.shape[3], 16)
        c2_params.strides = (1, 1)
        c2_params.pad = "SAME"
        c2_params.update_output_qnn_params()
        c2, new_params = relay_ir_builder.create_qnn_conv2d(c2_params, c1)
        c2_params.ofm.shape = get_shape_expr(input0, c2)

        f = relay.Function([input0], c2)
        mod = tvm.IRModule()
        mod["main"] = f
        return mod, [c2_params, c1_params]

    test_cases = [
        (create_graph_single, ["input", (1, 300, 300, 3), "int8"]),
        (create_graph_double, ["input", (1, 128, 256, 4), "int8"]),
        (create_graph_activation, ["input", (1, 64, 100, 4), "int8"]),
    ]
    np.random.seed(42)
    for test_case in test_cases:
        relay_module, conv_params = test_case[0](*test_case[1])
        input_tensor, input_shape, input_dtype = test_case[1]
        mod = partition_for_ethosu(relay_module)

        # Generate reference data
        in_min, in_max = util.get_range_for_dtype_str(input_dtype)
        input_data = {
            input_tensor:
            np.random.randint(in_min,
                              high=in_max,
                              size=input_shape,
                              dtype=input_dtype)
        }
        output_data = generate_ref_data(relay_module, input_data)

        compiled_models = infra.build_source(mod,
                                             input_data,
                                             output_data,
                                             accel_type,
                                             output_tolerance=1)

        # Assumes only two runtime.Modules are created -- i.e. single offload module
        ethosu_module = (compiled_models[0].executor_factory.lib.
                         imported_modules[0].imported_modules[0])

        # Verify generated C source
        get_artifacts = tvm._ffi.get_global_func(
            "runtime.module.ethos-u.get_artifacts")
        compilation_artifacts = get_artifacts(ethosu_module)
        cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
        infra.print_payload(cmms)
        infra.verify_source(compiled_models, accel_type)