Пример #1
0
def translate(tir_module, params):
    """This will take an tir module for the NPU
    and compile to command stream

    Parameters
    ----------
    tir_module : tvm.IRModule
        The TIR module containing ethosu extern calls
    params : dict
        A dictionary containing TIR primfunc argument ordering
        idx to constant NDArray map
    accel_type : ethosu.vela.api.NpuAccelerator
        the accelerator variant the tir module needs to compiled to

    Returns
    -------
    cs : str
        An hex string of the bytes of command stream
    encoded_constants : str
        An hex string of the bytes that includes concat'd
        encoded weights, encoded biases and scales.
    base_addresses : List[util.BaseAddress]
        base addresses to be used by the driver
    """

    # The NPU has 6 usable regions ranging from 0-6
    # The regions 0, 3, and 4 is already used for input,
    # output and constant, respectively (See _get_regions()).
    # Thus, for scratch we are left with 5, 2 and 1.
    candidate_regions_for_scratch = [5, 2, 1]
    (
        scratch_region_map,
        dynamic_allocation_size,
        dynamic_allocation_region,
    ) = analyze_scratch_memory_acesses(tir_module,
                                       candidate_regions_for_scratch)
    buffer_info = extract_buffer_info(tir_module, params)
    call_extern_list = extract_call_extern_list(tir_module)
    _npu_ops = list()
    for call_extern in call_extern_list:
        _npu_ops.append(translate_ethosu_tir_call_extern(call_extern))
    _npu_ops, constant_data = assign_addresses(buffer_info, _npu_ops,
                                               scratch_region_map)
    base_addresses = extract_param_base_addresses(tir_module, buffer_info,
                                                  scratch_region_map)
    if dynamic_allocation_size:
        base_addresses.append(
            util.BaseAddress(
                name="dynamic_allocation",
                primfunc_param_idx=None,
                region=dynamic_allocation_region,
                size=dynamic_allocation_size,
                is_runtime_allocation=True,
            ))
    target_accel_config = vela_api.get_accelerator_config()
    cmds = vapi.npu_generate_register_command_stream(_npu_ops,
                                                     target_accel_config)
    payload = vapi.npu_create_driver_payload(cmds, target_accel_config)
    return payload.hex(), constant_data, base_addresses
Пример #2
0
def test_create_driver_payload(accelerator: NpuAccelerator):
    """Tests npu_create_driver_payload"""
    # Generate a random command stream with defined beginning and end
    random.seed(0)
    num_commands = 793
    register_command_stream = random.choices(range(1 << 32), k=num_commands)
    register_command_stream[0] = 0xFEDCBA98
    register_command_stream[-1] = 0xA0B1C2D3
    payload = npu_create_driver_payload(register_command_stream, accelerator)
    header_size = 32  # expected driver header size in bytes
    assert len(payload) == header_size + 4 * num_commands
    # Check that the first register command is located directly after the header
    assert list(payload[header_size:header_size +
                        4]) == [0x98, 0xBA, 0xDC, 0xFE]
    # Check that the last register command is present in the payload
    assert list(payload[-4:]) == [0xD3, 0xC2, 0xB1, 0xA0]
Пример #3
0
def translate(tir_module, params):
    """This will take an tir module for the NPU
    and compile to command stream

    Parameters
    ----------
    tir_module : tvm.IRModule
        The TIR module containing ethosu extern calls
    params : dict
        A dictionary containing TIR primfunc argument ordering
        idx to constant NDArray map
    accel_type : ethosu.vela.api.NpuAccelerator
        the accelerator variant the tir module needs to compiled to

    Returns
    -------
    cs : str
        An hex string of the bytes of command stream
    encoded_constants : str
        An hex string of the bytes that includes concat'd
        encoded weights, encoded biases and scales.
    base_addresses : List[util.BaseAddress]
        base addresses to be used by the driver
    """

    buffer_info = extract_buffer_info(tir_module, params)
    call_extern_list = extract_call_extern_list(tir_module)
    _npu_ops = list()
    for call_extern in call_extern_list:
        _npu_ops.append(translate_ethosu_tir_call_extern(call_extern))
    _npu_ops, constant_data, scratch_size = assign_addresses(buffer_info, _npu_ops)
    base_addresses = extract_param_base_addresses(tir_module, buffer_info)
    if scratch_size > 0:
        base_addresses.append(
            util.BaseAddress(
                "scratch",
                None,
                _REGION_MAP[BufferType.scratch],
                scratch_size,
                True,
            )
        )
    target_accel_config = vela_api.get_accelerator_config()
    cmds = vapi.npu_generate_register_command_stream(_npu_ops, target_accel_config)
    payload = vapi.npu_create_driver_payload(cmds, target_accel_config)
    return payload.hex(), constant_data, base_addresses
Пример #4
0
def translate(tir_module, params):
    """This will take an tir module for the NPU
    and compile to command stream

    Parameters
    ----------
    tir_module : tvm.IRModule
        The TIR module containing ethosu extern calls
    params : dict
        A dictionary containing TIR primfunc argument ordering
        idx to constant NDArray map
    accel_type : ethosu.vela.api.NpuAccelerator
        the accelerator variant the tir module needs to compiled to

    Returns
    -------
    cs : str
        An hex string of the bytes of command stream
    encoded_constants : str
        An hex string of the bytes that includes concat'd
        encoded weights, encoded biases and scales.
    scratch_size : int
        The size of the scratch buffer needed.
    """

    buffer_info = extract_buffer_info(tir_module, params)
    call_extern_list = extract_call_extern_list(tir_module)
    _npu_ops = list()
    for call_extern in call_extern_list:
        _npu_ops.append(translate_ethosu_tir_call_extern(call_extern))
    _npu_ops, constant_tensor, scratch_size = assign_addresses(
        buffer_info, _npu_ops)
    target_accel_config = vela_api.get_accelerator_config()
    cmds = vapi.npu_generate_register_command_stream(_npu_ops,
                                                     target_accel_config)
    payload = vapi.npu_create_driver_payload(cmds, target_accel_config)
    hex_value = "" if constant_tensor is None else constant_tensor.tobytes(
    ).hex()
    return payload.hex(), hex_value, scratch_size