예제 #1
0
    def verify(test_vec, mock_enc_w):
        ethosu_conv2d_calls = extract_ethosu_conv2d_extern_calls(
            test_vec["tir_module"])
        buffer_info = tirtocs.extract_buffer_info(test_vec["tir_module"],
                                                  test_vec["param_dict"])
        for ethosu_conv2d_call in ethosu_conv2d_calls:
            npu_op, w_zero_point = tirtocs.translate_ethosu_conv2d(
                ethosu_conv2d_call)
            weights = buffer_info[npu_op.weights[0].address.buffer_var][0]

            assert mock_enc_w.call_args[1]["accelerator"] == accel
            assert (mock_enc_w.call_args[1]["weights_volume"].flatten() ==
                    weights.astype(np.int64) - w_zero_point).all()
            assert mock_enc_w.call_args[1]["dilation_xy"] == (
                npu_op.kernel.dilation_x,
                npu_op.kernel.dilation_y,
            )
            assert mock_enc_w.call_args[1]["dilation_xy"] == (
                npu_op.kernel.dilation_x,
                npu_op.kernel.dilation_y,
            )
            assert mock_enc_w.call_args[1][
                "ifm_bitdepth"] == npu_op.ifm.data_type.size_in_bits()
            assert mock_enc_w.call_args[1]["block_traversal"] == test_vec[
                "block_traversal"]
예제 #2
0
 def create_mock(test_vec):
     with patch("ethosu.vela.api.npu_encode_weights") as mock_enc_w:
         with patch("ethosu.vela.api.npu_find_block_configs") as mock_blk_cfg:
             mock_blk_cfg.return_value = [vapi.NpuShape3D(8, 8, 8)]
             ethosu_conv2d_calls = extract_ethosu_conv2d_extern_calls(test_vec["tir_module"])
             buffer_info = tirtocs.extract_buffer_info(
                 test_vec["tir_module"], test_vec["param_dict"]
             )
             for ethosu_conv2d_call in ethosu_conv2d_calls:
                 npu_op, _ = tirtocs.translate_ethosu_conv2d(ethosu_conv2d_call)
                 weights = buffer_info[npu_op.weights[0].address.buffer_var][0]
                 vela_api.encode_weights(ethosu_conv2d_call, weights, accel)
             return mock_enc_w
def test_assign_addresses():
    test_cases = [
        {
            # Stimulus
            "tir_module": WeightStreamOnly,
            "param_dict": {
                2: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [144], "uint8"),
                3: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [20], "uint8"),
                4: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [144], "uint8"),
                5: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [20], "uint8"),
                6: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [144], "uint8"),
                7: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [20], "uint8"),
                8: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [144], "uint8"),
                9: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [20], "uint8"),
            },
        },
        {
            # Stimulus
            "tir_module": MixedRead,
            "param_dict": {
                1: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [592], "uint8"),
                3: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [160], "uint8"),
                4: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [80], "uint8"),
                5: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [20], "uint8"),
                6: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [80], "uint8"),
                7: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [20], "uint8"),
                8: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [80], "uint8"),
                9: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [20], "uint8"),
                10: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [80], "uint8"),
                11: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [20], "uint8"),
            },
        },
    ]

    def extract_extern_calls(mod):
        """This function will obtain all ethosu_conv2d
        calls from a NPU TIR module
        Parameters
        ----------
        mod : tvm.IRModule
            This is a NPU TIR Module

        Returns
        -------
        list
            of tvm.tir.Call objects
            that are tir extern calls
            for ethosu_conv2d
        """
        # There should only be a single function
        assert len(mod.functions.items()) == 1
        primfunc = mod.functions.items()[0][1]

        extern_calls = list()

        def populate_extern_calls(stmt):
            if isinstance(stmt, tvm.tir.Call) and stmt.op.name == "tir.call_extern":
                extern_calls.append(stmt)

        stmt_functor.post_order_visit(primfunc.body, populate_extern_calls)
        return extern_calls

    def collect_tir_buffer_info(npu_ops):
        """This is run prior to address assigning to collect tir buffer information
        for verification later on"""
        _npu_op_tir_buffers = dict()
        for npu_op in npu_ops:
            if isinstance(npu_op, vapi.NpuDmaOperation):
                _npu_op_tir_buffers[npu_op] = (npu_op.src.address, npu_op.dest.address)
            elif issubclass(type(npu_op), vapi.NpuBlockOperation):
                _npu_op_tir_buffers[npu_op] = (
                    npu_op.ifm.tiles.addresses[0],
                    npu_op.ofm.tiles.addresses[0],
                    npu_op.weights,
                    npu_op.biases,
                )
        return _npu_op_tir_buffers

    def _check_buffer(address, region, length, buffer_var):
        """Checks whether the buffer information is valid with
        original tir buffers.
        - If its constant, this will check
          the slice in the constant tensor has the values.
        - If its scratch, this will check
          the slice is within scratch and does not have conflicts
          with other scratch tensors.
        - If its input/output, this will check the
          address is zero
        """
        inverse_region_map = {
            0: tir_to_cs_translator.BufferType.constant,
            1: tir_to_cs_translator.BufferType.scratch,
            3: tir_to_cs_translator.BufferType.input,
            4: tir_to_cs_translator.BufferType.output,
        }
        buffer_type = inverse_region_map[region]
        if buffer_type == tir_to_cs_translator.BufferType.constant:
            ref = buffer_info[buffer_var].values
            assert (constant_tensor[address : address + length] == ref).all()
            # Every buffer is adjusted to align to 16 bytes
            length = util.round_up(length, 16)
            # Mark these constants are read at least once
            constant_tensor_read_mask[address : address + length] = np.ones(length, dtype="uint8")
        elif buffer_type == tir_to_cs_translator.BufferType.scratch:
            shape = list(buffer_info[buffer_var].shape)
            assert length == np.prod(shape)
            assert address < scratch_size
            # Every buffer is adjusted to align to 16 bytes
            length = util.round_up(length, 16)
            assert address + length <= scratch_size
            # The scratch area should not be used by anyother buffer
            assert not scratch_allocation_mask[address : address + length].any()
            # The scratch area is marked as used
            scratch_allocation_mask[address : address + length] = np.ones(length, dtype="uint8")
        elif buffer_type == tir_to_cs_translator.BufferType.input:
            assert address == 0
        else:
            assert buffer_type == tir_to_cs_translator.BufferType.output
            assert address == 0

    def verify(npu_ops):
        """This wrapper verifies the allocated addresses matches with original tir buffers"""
        checked_buffers = set()

        def check_buffer(address, region, length, buffer_var):
            if buffer_var not in checked_buffers:
                _check_buffer(address, region, length, buffer_var)
                checked_buffers.add(buffer_var)

        for npu_op in npu_ops:
            if isinstance(npu_op, vapi.NpuDmaOperation):
                src_tir_buffer_var = npu_op_tir_buffers[npu_op][0].buffer_var
                check_buffer(
                    npu_op.src.address, npu_op.src.region, npu_op.src.length, src_tir_buffer_var
                )
                dest_tir_load = npu_op_tir_buffers[npu_op][1].buffer_var
                check_buffer(
                    npu_op.dest.address,
                    npu_op.dest.region,
                    npu_op.dest.length,
                    dest_tir_load,
                )
            elif issubclass(type(npu_op), vapi.NpuBlockOperation):
                ifm_tir_buffer_var = npu_op_tir_buffers[npu_op][0].buffer_var
                ifm_length = (
                    npu_op.ifm.shape.height * npu_op.ifm.shape.width * npu_op.ifm.shape.depth
                )
                check_buffer(
                    npu_op.ifm.tiles.addresses[0],
                    npu_op.ifm.region,
                    ifm_length,
                    ifm_tir_buffer_var,
                )
                ofm_tir_buffer_var = npu_op_tir_buffers[npu_op][1].buffer_var
                ofm_length = (
                    npu_op.ofm.shape.height * npu_op.ofm.shape.width * npu_op.ofm.shape.depth
                )
                check_buffer(
                    npu_op.ofm.tiles.addresses[0],
                    npu_op.ofm.region,
                    ofm_length,
                    ofm_tir_buffer_var,
                )
                for idx, weight in enumerate(npu_op_tir_buffers[npu_op][2]):
                    assert isinstance(weight, vapi.NpuAddressRange)
                    check_buffer(
                        npu_op.weights[idx].address,
                        npu_op.weights[idx].region,
                        npu_op.weights[idx].length,
                        weight.address.buffer_var,
                    )
                for idx, bias in enumerate(npu_op_tir_buffers[npu_op][3]):
                    assert isinstance(bias, vapi.NpuAddressRange)
                    check_buffer(
                        npu_op.biases[idx].address,
                        npu_op.biases[idx].region,
                        npu_op.biases[idx].length,
                        bias.address.buffer_var,
                    )

    for test_case in test_cases:
        buffer_info = tir_to_cs_translator.extract_buffer_info(
            test_case["tir_module"], test_case["param_dict"]
        )
        extern_calls = extract_extern_calls(test_case["tir_module"])
        _npu_ops = list()
        for extern_call in extern_calls:
            _npu_ops.append(tir_to_cs_translator.translate_ethosu_tir_extern_call(extern_call))
        npu_op_tir_buffers = collect_tir_buffer_info(_npu_ops)
        _npu_ops, constant_tensor, scratch_size = tir_to_cs_translator.assign_addresses(
            buffer_info, _npu_ops
        )
        scratch_allocation_mask = np.zeros(scratch_size, dtype="uint8")
        constant_tensor_read_mask = np.zeros(constant_tensor.size, dtype="uint8")
        verify(_npu_ops)
        # This will be only 1 if all allocated scratch is used.
        assert np.prod(scratch_allocation_mask) == 1
        # This will be only 1 if all constant tensors is read at least once.
        assert np.prod(constant_tensor_read_mask) == 1
def test_buffer_info_extraction():
    test_cases = [
        {
            # Stimulus
            "tir_module": SingleEthosUConv2D,
            "param_dict": {
                1: np.random.randint(
                    np.iinfo("uint8").min, np.iinfo("uint8").max, [1, 1, 3, 16], "uint8"
                ),
                2: np.random.randint(np.iinfo("int32").min, np.iinfo("int32").max, [16], "int32"),
            },
            # Reference Outputs
            "constants": {
                "placeholder_4": 1,
                "placeholder_5": 2,
            },
            "data_buffers": {
                "placeholder_3": (
                    [1, 8, 8, 3],
                    "uint8",
                    tir_to_cs_translator.BufferType.input_or_output,
                ),
                "ethosu_conv2d_1": (
                    [1, 8, 8, 16],
                    "uint8",
                    tir_to_cs_translator.BufferType.input_or_output,
                ),
            },
        },
        {
            "tir_module": MultiEthosUConv2D,
            "param_dict": {
                1: np.random.randint(
                    np.iinfo("uint8").min, np.iinfo("uint8").max, [1, 1, 3, 32], "uint8"
                ),
                2: np.random.randint(np.iinfo("int32").min, np.iinfo("int32").max, [32], "int32"),
                3: np.random.randint(
                    np.iinfo("uint8").min, np.iinfo("uint8").max, [1, 1, 32, 8], "uint8"
                ),
                4: np.random.randint(np.iinfo("int32").min, np.iinfo("int32").max, [8], "int32"),
            },
            # Reference Outputs
            "constants": {
                "placeholder_5": 4,
                "placeholder_7": 1,
                "placeholder_8": 2,
                "placeholder_9": 3,
            },
            "data_buffers": {
                "placeholder_6": (
                    [1, 8, 8, 3],
                    "uint8",
                    tir_to_cs_translator.BufferType.input_or_output,
                ),
                "ethosu_conv2d_1": (
                    [1, 8, 8, 8],
                    "uint8",
                    tir_to_cs_translator.BufferType.input_or_output,
                ),
                "ethosu_conv2d_2": ([1024], "uint8", tir_to_cs_translator.BufferType.scratch),
                "ethosu_conv2d_3": ([2048], "uint8", tir_to_cs_translator.BufferType.scratch),
            },
        },
    ]
    for test_case in test_cases:
        buffer_info = tir_to_cs_translator.extract_buffer_info(
            test_case["tir_module"], test_case["param_dict"]
        )
        for buffer_var, info in buffer_info.items():
            buffer_name = buffer_var.name
            if buffer_name in test_case["constants"].keys():
                assert (
                    info.values == test_case["param_dict"][test_case["constants"][buffer_name]]
                ).all()
                assert (
                    info.dtype == test_case["param_dict"][test_case["constants"][buffer_name]].dtype
                )
                info.btype == tir_to_cs_translator.BufferType.constant
            else:
                assert list(info.shape) == test_case["data_buffers"][buffer_name][0]
                assert info.dtype == test_case["data_buffers"][buffer_name][1]
                assert info.btype == test_case["data_buffers"][buffer_name][2]