Exemplo n.º 1
0
def fake_quantize_per_tensor_affine(g,
                                    inputs,
                                    scale,
                                    zero_point,
                                    quant_min=-128,
                                    quant_max=127):
    if (quant_min, quant_max) not in [(0, 255), (-128, 127)]:
        raise RuntimeError(
            "For (quant_min, quant_max), ONNX allows only (0, 255) and (-128, 127). "
            "Got ({}, {})".format(quant_min, quant_max))
    scale = sym_help._maybe_get_scalar(scale)
    if scale is None:
        sym_help._onnx_opset_unsupported_detailed(
            "fake_quantize_per_tensor_affine", 10, 13,
            "Non-constant scale not supported")
    scale = scale.float().data  # Avoid exporter generating double type
    if quant_min == 0:
        zero_point = g.op("Cast",
                          zero_point,
                          to_i=torch.onnx.TensorProtoDataType.UINT8)
    else:
        zero_point = g.op("Cast",
                          zero_point,
                          to_i=torch.onnx.TensorProtoDataType.INT8)
    return g.op("DequantizeLinear",
                g.op("QuantizeLinear", inputs, scale, zero_point), scale,
                zero_point)
Exemplo n.º 2
0
def batch_norm(g, input, weight, bias, running_mean, running_var, training,
               momentum, eps, cudnn_enabled):

    if torch.is_autocast_enabled() and \
            not args_have_same_dtype([input, weight, bias, running_mean, running_var]) and \
            sym_help._export_onnx_opset_version < 15:
        return sym_help._onnx_opset_unsupported_detailed(
            "BatchNormalization", 14, 15,
            "All input tensors must have the same `dtype`."
            " Turn off Autocast or export using opset version 15.")

    sym_help.check_training_mode(training, "batch_norm")
    weight, bias, running_mean, running_var = sym_help._batchnorm_helper(
        g, input, weight, bias, running_mean, running_var)
    out = g.op("BatchNormalization",
               input,
               weight,
               bias,
               running_mean,
               running_var,
               epsilon_f=eps,
               momentum_f=1 - momentum,
               training_mode_i=0 if not training else 1,
               outputs=1 if not training else 3)
    if not training:
        return out
    else:
        res, new_running_mean, new_running_var = out
        new_running_mean.setType(running_mean.type())
        new_running_var.setType(running_var.type())
        return res
Exemplo n.º 3
0
def fake_quantize_per_tensor_affine(g,
                                    inputs,
                                    scale,
                                    zero_point,
                                    quant_min=-128,
                                    quant_max=127):
    # NOTE: (0, 127) is a special case. PyTorch restricts activations to be in the range (0, 127).
    #   https://github.com/pytorch/pytorch/blob/b34b192d6b97325c9f78e5995c48c8498ede34bd/torch/ao/quantization/observer.py#L1422
    if (quant_min, quant_max) == (0, 127):
        symbolic_helper._onnx_opset_unsupported_detailed(
            "fake_quantize_per_tensor_affine",
            10,
            13,
            "Quantize range (0, 127) not supported, requires opset 13 Clip",
            inputs,
        )
    if (quant_min, quant_max) not in [(0, 255), (-128, 127)]:
        raise errors.SymbolicValueError(
            f"For (quant_min, quant_max), ONNX allows only (0, 255) and (-128, 127). "
            f"Got ({quant_min}, {quant_max})",
            inputs,
        )
    scale = symbolic_helper._maybe_get_scalar(scale)
    if scale is None:
        symbolic_helper._onnx_opset_unsupported_detailed(
            "fake_quantize_per_tensor_affine",
            10,
            13,
            "Non-constant scale not supported",
            inputs,
        )
    scale = scale.float().data  # Avoid exporter generating double type
    if quant_min == 0:
        zero_point = g.op("Cast",
                          zero_point,
                          to_i=_C_onnx.TensorProtoDataType.UINT8)
    else:
        zero_point = g.op("Cast",
                          zero_point,
                          to_i=_C_onnx.TensorProtoDataType.INT8)
    return g.op(
        "DequantizeLinear",
        g.op("QuantizeLinear", inputs, scale, zero_point),
        scale,
        zero_point,
    )