예제 #1
0
def device_api_main_func():
    # Ideally we should have a sample Target registered here
    # but we're going to re-use this for now
    pytest.importorskip("ethosu.vela")
    import tensorflow as tf
    import tflite.Model

    from tests.python.contrib.test_ethosu.infra import create_test_runner, generate_ref_data_tflite
    from tvm.relay.op.contrib.ethosu import partition_for_ethosu

    tf.config.run_functions_eagerly(True)

    class Model(tf.Module):
        @tf.function
        def tf_function(self, x):
            return tf.nn.max_pool(x, [1, 2], [1, 2], "SAME")

    def representative_dataset():
        for _ in range(100):
            data = np.random.rand(1, 3, 4, 3)
            yield [data.astype(np.float32)]

    model = Model()
    concrete_func = model.tf_function.get_concrete_function(
        tf.TensorSpec([1, 3, 4, 3], dtype=tf.float32))

    converter = tf.lite.TFLiteConverter.from_concrete_functions(
        [concrete_func])
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.representative_dataset = representative_dataset
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    converter.inference_input_type = tf.int8
    converter.inference_output_type = tf.int8

    tflite_graph = converter.convert()
    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)

    relay_module, params = relay.frontend.from_tflite(
        tflite_model,
        shape_dict={"x": [1, 3, 4, 3]},
        dtype_dict={"x": "int8"},
    )
    mod = partition_for_ethosu(relay_module, params)

    # Generate reference data
    input_data, output_data = generate_ref_data_tflite(tflite_graph)

    def compile_to_main_func(interface_api="c", use_unpacked_api=True):
        test_runner = create_test_runner()
        compiled_models = compile_models(
            models=AOTTestModel(
                module=mod,
                inputs=input_data,
                outputs=output_data,
            ),
            interface_api=interface_api,
            use_unpacked_api=use_unpacked_api,
            workspace_byte_alignment=16,
            pass_config=test_runner.pass_config,
        )
        main_ir_module = compiled_models[
            0].executor_factory.lowered_ir_mods.items()[0][1]
        main_func = main_ir_module["__tvm_main__"]
        return main_func

    return compile_to_main_func
예제 #2
0
def test_mean(accel_type, ifm_shape, axis, keep_dims, use_same_quantization):
    dtype = "int8"

    def create_mod_from_tflite():
        class Model(tf.Module):
            @tf.function
            def tf_function(self, x):
                op = tf.math.reduce_mean(x, axis=axis, keepdims=keep_dims)
                return op

        model = Model()
        concrete_func = model.tf_function.get_concrete_function(
            tf.TensorSpec(ifm_shape, dtype=tf.float32))

        # Convert the model
        def representative_dataset():
            for _ in range(100):
                data = np.random.rand(*tuple(ifm_shape))
                yield [data.astype(np.float32)]

        converter = tf.lite.TFLiteConverter.from_concrete_functions(
            [concrete_func])
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        converter.representative_dataset = representative_dataset
        converter.target_spec.supported_ops = [
            tf.lite.OpsSet.TFLITE_BUILTINS_INT8
        ]
        converter.inference_input_type = tf.int8
        converter.inference_output_type = tf.int8
        tflite_graph = converter.convert()
        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)

        mod, _ = relay.frontend.from_tflite(
            tflite_model,
            shape_dict={"ifm": ifm_shape},
            dtype_dict={"ifm": dtype},
        )
        input_data, output_data = infra.generate_ref_data_tflite(tflite_graph)
        return mod, input_data, output_data

    def create_mod_from_relay():
        ifm = relay.var("input", shape=ifm_shape, dtype=dtype)
        cast = relay.cast(ifm, dtype="int32")
        mean = relay.mean(cast, axis=axis, keepdims=keep_dims)
        requantize = relay.qnn.op.requantize(
            mean,
            input_scale=relay.const(1.0, dtype="float32"),
            input_zero_point=relay.const(0, dtype="int32"),
            output_scale=relay.const(1.0, dtype="float32"),
            output_zero_point=relay.const(0, dtype="int32"),
        )

        func = relay.Function(relay.analysis.free_vars(requantize), requantize)
        mod = tvm.IRModule.from_expr(func)

        input_data = {
            "input":
            np.random.randint(low=-127, high=128, size=ifm_shape, dtype=dtype)
        }
        output_data = generate_ref_data(mod, input_data)
        return mod, input_data, output_data

    mod, input_data, output_data = (create_mod_from_relay()
                                    if use_same_quantization else
                                    create_mod_from_tflite())
    mod = partition_for_ethosu(mod)

    # TODO(lhutton1) For now output is not bit exact with TFLite.
    # This is because TFLite reference kernels are not being used.
    # For this, TFLite will need upgrading to 2.6.
    compiled_models = infra.build_source(mod,
                                         input_data,
                                         output_data,
                                         accel_type,
                                         output_tolerance=1)

    # Assumes only two runtime.Modules are created -- i.e. single offload module
    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[
        0].imported_modules[0]

    # Verify generated C source
    get_artifacts = tvm._ffi.get_global_func(
        "runtime.module.ethos-u.get_artifacts")
    compilation_artifacts = get_artifacts(ethosu_module)
    cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
    infra.print_payload(cmms)
    infra.verify_source(compiled_models, accel_type)
예제 #3
0
def test_ethosu_conv2d_single(
    ifm_shape,
    kernel_shape,
    strides,
    dilation,
    padding,
    accel_type,
    activation,
):
    dtype = "int8"

    def create_tflite_graph_single():
        class Model(tf.Module):
            @tf.function
            def tf_function(self, x):
                # Use tf.nn API to create the model
                tf_strides = [1, strides[0], strides[1], 1]
                op = tf.nn.conv2d(
                    x,
                    filters=tf.constant(
                        np.random.uniform(
                            size=[kernel_shape[0], kernel_shape[1], 3, 3]),
                        dtype=tf.float32,
                    ),
                    strides=tf_strides,
                    padding=padding,
                    dilations=dilation,
                )
                if activation:
                    op = tf.nn.relu(op)
                return op

        model = Model()
        concrete_func = model.tf_function.get_concrete_function(
            tf.TensorSpec(ifm_shape, dtype=tf.float32))

        # Convert the model
        def representative_dataset():
            for _ in range(100):
                data = np.random.rand(*tuple(ifm_shape))
                yield [data.astype(np.float32)]

        converter = tf.lite.TFLiteConverter.from_concrete_functions(
            [concrete_func])
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        converter.representative_dataset = representative_dataset
        converter.target_spec.supported_ops = [
            tf.lite.OpsSet.TFLITE_BUILTINS_INT8
        ]
        converter.inference_input_type = tf.int8
        converter.inference_output_type = tf.int8
        tflite_model = converter.convert()
        return tflite_model

    tflite_graph = create_tflite_graph_single()
    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)

    relay_module, params = relay.frontend.from_tflite(
        tflite_model,
        shape_dict={"input": ifm_shape},
        dtype_dict={"input": dtype},
    )
    mod = partition_for_ethosu(relay_module, params)

    # Generate reference data
    input_data, output_data = infra.generate_ref_data_tflite(tflite_graph)

    compiled_models = infra.build_source(
        mod,
        input_data,
        output_data,
        accel_type,
    )

    # Assumes only two runtime.Modules are created -- i.e. single offload module
    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[
        0].imported_modules[0]

    # Verify generated C source
    get_artifacts = tvm._ffi.get_global_func(
        "runtime.module.ethos-u.get_artifacts")
    compilation_artifacts = get_artifacts(ethosu_module)
    cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
    infra.print_payload(cmms)
    infra.verify_source(compiled_models, accel_type)
예제 #4
0
def test_ethosu_conv2d(accel_type):
    def create_graph_single(input_tensor_name, input_tensor_shape,
                            input_tensor_dtype):
        c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
        c1_params.ifm.shape = input_tensor_shape
        c1_params.kernel.shape = (3, 3, c1_params.ifm.shape[3], 32)
        c1_params.kernel.sc = relay.const(np.random.rand(32) * 2, "float32")
        c1_params.strides = (1, 1)
        c1_params.pad = "VALID"
        c1_params.update_output_qnn_params(input_tensor_dtype,
                                           input_tensor_dtype,
                                           input_tensor_dtype)
        input0 = relay.var(input_tensor_name,
                           shape=c1_params.ifm.shape,
                           dtype=c1_params.ifm.dtype)
        c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0)
        c1_params.ofm.shape = get_shape_expr(input0, c1)

        f = relay.Function([input0], c1)
        mod = tvm.IRModule()
        mod["main"] = f
        return mod, [c1_params]

    def create_graph_double(input_tensor_name, input_tensor_shape,
                            input_tensor_dtype):
        c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
        c1_params.ifm.shape = input_tensor_shape
        c1_params.kernel.shape = (7, 7, c1_params.ifm.shape[3], 8)
        c1_params.strides = (2, 2)
        c1_params.pad = "VALID"
        c1_params.update_output_qnn_params(input_tensor_dtype,
                                           input_tensor_dtype,
                                           input_tensor_dtype)
        input0 = relay.var(input_tensor_name,
                           shape=c1_params.ifm.shape,
                           dtype=c1_params.ifm.dtype)
        c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0)
        c1_params.ofm.shape = get_shape_expr(input0, c1)

        c2_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
        c2_params.ifm.shape = c1_params.ofm.shape
        c2_params.kernel.shape = (5, 5, c2_params.ifm.shape[3], 16)
        c2_params.strides = (1, 1)
        c2_params.pad = "SAME"
        c2_params.update_output_qnn_params()
        c2, new_params = relay_ir_builder.create_qnn_conv2d(c2_params, c1)
        c2_params.ofm.shape = get_shape_expr(input0, c2)

        f = relay.Function([input0], c2)
        mod = tvm.IRModule()
        mod["main"] = f
        return mod, [c2_params, c1_params]

    def create_graph_activation(input_tensor_name, input_tensor_shape,
                                input_tensor_dtype):
        c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
        c1_params.ifm.shape = input_tensor_shape
        c1_params.kernel.shape = (7, 7, c1_params.ifm.shape[3], 8)
        c1_params.strides = (2, 2)
        c1_params.pad = "VALID"
        c1_params.activation = "CLIP"
        c1_params.clip_min = 90
        c1_params.clip_max = 110
        c1_params.update_output_qnn_params(input_tensor_dtype,
                                           input_tensor_dtype,
                                           input_tensor_dtype)
        input0 = relay.var(input_tensor_name,
                           shape=c1_params.ifm.shape,
                           dtype=c1_params.ifm.dtype)
        c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0)
        c1_params.ofm.shape = get_shape_expr(input0, c1)

        c2_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
        c2_params.ifm.shape = c1_params.ofm.shape
        c2_params.kernel.shape = (5, 5, c2_params.ifm.shape[3], 16)
        c2_params.strides = (1, 1)
        c2_params.pad = "SAME"
        c2_params.update_output_qnn_params()
        c2, new_params = relay_ir_builder.create_qnn_conv2d(c2_params, c1)
        c2_params.ofm.shape = get_shape_expr(input0, c2)

        f = relay.Function([input0], c2)
        mod = tvm.IRModule()
        mod["main"] = f
        return mod, [c2_params, c1_params]

    test_cases = [
        (create_graph_single, ["input", (1, 300, 300, 3), "int8"]),
        (create_graph_double, ["input", (1, 128, 256, 4), "int8"]),
        (create_graph_activation, ["input", (1, 64, 100, 4), "int8"]),
    ]
    np.random.seed(42)
    for test_case in test_cases:
        relay_module, conv_params = test_case[0](*test_case[1])
        input_tensor, input_shape, input_dtype = test_case[1]
        mod = partition_for_ethosu(relay_module)

        # Generate reference data
        in_min, in_max = util.get_range_for_dtype_str(input_dtype)
        input_data = {
            input_tensor:
            np.random.randint(in_min,
                              high=in_max,
                              size=input_shape,
                              dtype=input_dtype)
        }
        output_data = generate_ref_data(relay_module, input_data)

        compiled_models = infra.build_source(mod,
                                             input_data,
                                             output_data,
                                             accel_type,
                                             output_tolerance=1)

        # Assumes only two runtime.Modules are created -- i.e. single offload module
        imported_modules = compiled_models[
            0].executor_factory.lib.imported_modules
        assert len(imported_modules) == 2
        ethosu_module = imported_modules[0]

        # Verify generated C source
        get_cs = tvm._ffi.get_global_func("runtime.module.ethosu.getcs")
        cmms = get_cs(ethosu_module)
        cmms = bytes.fromhex(cmms)
        infra.print_payload(cmms)
        infra.verify_source(compiled_models, accel_type)