Пример #1
0
def create_int8_bias_tensor_fill(tensor, out_blob_name, x_q_param, w_q_param):
    """
    Similar to create_int8_given_tensor_fill, but for bias blobs to be stored
    as int32.
    """
    scale = x_q_param.scale * w_q_param.scale
    quantized_tensor = np.around(tensor / scale).astype(np.int32)
    quantized_tensor.reshape(-1)
    op = core.CreateOperator("Int8GivenIntTensorFill", [], out_blob_name)
    op.arg.extend([
        utils.MakeArgument("values", quantized_tensor),
        utils.MakeArgument("shape", quantized_tensor.shape),
    ])
    q_param = hardcode_scale_zp.QuantizationParam(scale, 0)
    add_quantization_param_args_(op, q_param)
    return op
Пример #2
0
def choose_quantization_params(tensor_min,
                               tensor_max,
                               preserve_sparsity=False):
    if tensor_min < 0 and tensor_max > 0 and preserve_sparsity:
        symmetric_qmin = -(255 // 2 + 1)
        symmetric_qmax = 255 // 2
        max_scale = max(abs(tensor_min / symmetric_qmin),
                        abs(tensor_max / symmetric_qmax))
        tensor_min = max_scale * symmetric_qmin
        tensor_max = max_scale * symmetric_qmax

    q_param = hardcode_scale_zp.choose_quantization_params(
        tensor_min, tensor_max)

    if tensor_min < 0 and tensor_max > 0 and preserve_sparsity:
        q_param = hardcode_scale_zp.QuantizationParam(q_param.scale, 128)

    return q_param
Пример #3
0
def add_quantization_param_args(op, tensor, preserve_sparsity=False):
    tensor_min = 0 if tensor.size == 0 else tensor.min()
    tensor_max = 0 if tensor.size == 0 else tensor.max()

    if tensor_min < 0 and tensor_max > 0 and preserve_sparsity:
        symmetric_qmin = -(255 // 2 + 1)
        symmetric_qmax = 255 // 2
        max_scale = max(abs(tensor_min / symmetric_qmin),
                        abs(tensor_max / symmetric_qmax))
        tensor_min = max_scale * symmetric_qmin
        tensor_max = max_scale * symmetric_qmax

    q_param = hardcode_scale_zp.choose_quantization_params(
        tensor_min, tensor_max)

    if tensor_min < 0 and tensor_max > 0 and preserve_sparsity:
        q_param = hardcode_scale_zp.QuantizationParam(q_param.scale, 128)

    add_quantization_param_args_(op, q_param)
    return q_param