def pack_biases( biases: np.ndarray, ifm_scale: float, ifm_dtype: np.dtype, weight_scales: np.ndarray, ofm_scale: float, is_activation_tanh_or_sigmoid: bool = False, ) -> np.ndarray: """ The NPU requires the each bias value to be packed with output scale parameters in a 80-bit format (that is returned via npu_encode_bias API). This function will pack such values to a binary artifact that the NPU will use in the execution. Parameters ---------- biases : numpy.ndarray The values of biases ifm_scale : float The quantization scale parameter of input feature map ifm_dtype : numpy.dtype The data type of input feature map data. weight_scales : numpy.ndarray The quantization scale parameter of weight feature map This could be a tuple if per-channel quantization is present. ofm_scale : float The quantization scale parameter of output feature map. is_activation_tanh_or_sigmoid : bool Indicates whether the fused activation function is tanh or sigmoid. Returns ------- scale_bias : numpy.ndarray Packed scales/biases as the hardware requires them. """ # The BYOC infra should not partition anything else. supported_ifm_dtypes = (np.uint8, np.int8, np.int16) assert ifm_dtype in supported_ifm_dtypes if weight_scales.size == 1: weight_scales = [weight_scales] * biases.size hw_bias_scales = _calculate_hw_bias_scales(ifm_scale, weight_scales, ofm_scale, ifm_dtype, is_activation_tanh_or_sigmoid) assert len(hw_bias_scales) == biases.size biases = biases.astype("int64") packed_biases = bytearray() for idx, scale in enumerate(hw_bias_scales): packed_biases.extend(vapi.npu_encode_bias(biases[idx], *scale)) scale_bias = np.frombuffer(packed_biases, dtype=np.uint8) scale_bias = np.reshape(scale_bias, (-1, 10)) return scale_bias
def test_encode_bias(): bias_lower_limit = -(1 << (40 - 1)) bias_upper_limit = (1 << (40 - 1)) - 1 scale_lower_limit = 0 scale_upper_limit = (1 << 32) - 1 shift_lower_limit = 0 shift_upper_limit = (1 << 6) - 1 for _ in range(30): bias = np.int64(random.randint(bias_lower_limit, bias_upper_limit)) scale = int(random.randint(scale_lower_limit, scale_upper_limit)) shift = int(random.randint(shift_lower_limit, shift_upper_limit)) biases_enc = npu_encode_bias(bias, scale, shift) assert isinstance(biases_enc, bytearray) assert len(biases_enc) == 10