示例#1
0
def test_get_quantized_range(num_bits, signed, restrict, expected_q_min,
                             expected_q_max):
    q_min, q_max = qu.get_quantized_range(num_bits,
                                          signed=signed,
                                          signed_restrict_qrange=restrict)
    assert q_min == expected_q_min
    assert q_max == expected_q_max
示例#2
0
def attach_quant_metadata(t, num_bits, quant_mode, stats=None, clip_mode=ClipMode.NONE, per_channel=False,
                          num_stds=None, scale_approx_mult_bits=None):
    if stats is None:
        scale, zp = _get_quant_params_from_tensor(t, num_bits, quant_mode, clip_mode, per_channel, num_stds,
                                                  scale_approx_mult_bits)
    else:
        scale, zp = _get_quant_params_from_stats_dict(stats, num_bits, quant_mode, clip_mode, num_stds,
                                                      scale_approx_mult_bits)
    signed = quant_mode != LinearQuantMode.ASYMMETRIC_UNSIGNED
    restrict = quant_mode == LinearQuantMode.SYMMETRIC_RESTRICTED
    min_q_val, max_q_val = q_utils.get_quantized_range(num_bits, signed)
    t.quant_metadata = TensorQuantMetadata(scale, zp, min_q_val, max_q_val)
    return t
示例#3
0
def _fake_quant_tensor(tensor, n_bits, mode, per_channel):
    q_min, q_max = q_utils.get_quantized_range(
        n_bits, mode != LinearQuantMode.ASYMMETRIC_UNSIGNED)
    scale, zp = _get_quant_params_from_tensor(tensor,
                                              n_bits,
                                              mode,
                                              per_channel=per_channel)
    q_utils.linear_quantize_clamp(tensor,
                                  scale,
                                  zp,
                                  q_min,
                                  q_max,
                                  inplace=True)
    q_utils.linear_dequantize(tensor, scale, zp, inplace=True)