示例#1
0
def pack_biases(
    biases: np.ndarray,
    ifm_scale: float,
    ifm_dtype: np.dtype,
    weight_scales: np.ndarray,
    ofm_scale: float,
    is_activation_tanh_or_sigmoid: bool = False,
) -> np.ndarray:
    """
    The NPU requires the each bias value to be packed with
    output scale parameters in a 80-bit format (that is returned
    via npu_encode_bias API). This function will pack such values
    to a binary artifact that the NPU will use in the execution.


    Parameters
    ----------
    biases : numpy.ndarray
        The values of biases
    ifm_scale : float
        The quantization scale parameter of input feature map
    ifm_dtype : numpy.dtype
        The data type of input feature map data.
    weight_scales : numpy.ndarray
        The quantization scale parameter of weight feature map
        This could be a tuple if per-channel quantization is present.
    ofm_scale : float
        The quantization scale parameter of output feature map.
    is_activation_tanh_or_sigmoid : bool
        Indicates whether the fused activation function is tanh or sigmoid.

    Returns
    -------
    scale_bias : numpy.ndarray
        Packed scales/biases as the hardware requires them.
    """
    # The BYOC infra should not partition anything else.
    supported_ifm_dtypes = (np.uint8, np.int8, np.int16)
    assert ifm_dtype in supported_ifm_dtypes

    if weight_scales.size == 1:
        weight_scales = [weight_scales] * biases.size

    hw_bias_scales = _calculate_hw_bias_scales(ifm_scale, weight_scales,
                                               ofm_scale, ifm_dtype,
                                               is_activation_tanh_or_sigmoid)
    assert len(hw_bias_scales) == biases.size
    biases = biases.astype("int64")
    packed_biases = bytearray()
    for idx, scale in enumerate(hw_bias_scales):
        packed_biases.extend(vapi.npu_encode_bias(biases[idx], *scale))
    scale_bias = np.frombuffer(packed_biases, dtype=np.uint8)
    scale_bias = np.reshape(scale_bias, (-1, 10))
    return scale_bias
def test_encode_bias():
    bias_lower_limit = -(1 << (40 - 1))
    bias_upper_limit = (1 << (40 - 1)) - 1
    scale_lower_limit = 0
    scale_upper_limit = (1 << 32) - 1
    shift_lower_limit = 0
    shift_upper_limit = (1 << 6) - 1

    for _ in range(30):
        bias = np.int64(random.randint(bias_lower_limit, bias_upper_limit))
        scale = int(random.randint(scale_lower_limit, scale_upper_limit))
        shift = int(random.randint(shift_lower_limit, shift_upper_limit))
        biases_enc = npu_encode_bias(bias, scale, shift)
        assert isinstance(biases_enc, bytearray)
        assert len(biases_enc) == 10