def fake_quantize_per_tensor_affine(g, inputs, scale, zero_point, quant_min=-128, quant_max=127): if (quant_min, quant_max) not in [(0, 255), (-128, 127)]: raise RuntimeError( "For (quant_min, quant_max), ONNX allows only (0, 255) and (-128, 127). " "Got ({}, {})".format(quant_min, quant_max)) scale = sym_help._maybe_get_scalar(scale) if scale is None: sym_help._onnx_opset_unsupported_detailed( "fake_quantize_per_tensor_affine", 10, 13, "Non-constant scale not supported") scale = scale.float().data # Avoid exporter generating double type if quant_min == 0: zero_point = g.op("Cast", zero_point, to_i=torch.onnx.TensorProtoDataType.UINT8) else: zero_point = g.op("Cast", zero_point, to_i=torch.onnx.TensorProtoDataType.INT8) return g.op("DequantizeLinear", g.op("QuantizeLinear", inputs, scale, zero_point), scale, zero_point)
def batch_norm(g, input, weight, bias, running_mean, running_var, training, momentum, eps, cudnn_enabled): if torch.is_autocast_enabled() and \ not args_have_same_dtype([input, weight, bias, running_mean, running_var]) and \ sym_help._export_onnx_opset_version < 15: return sym_help._onnx_opset_unsupported_detailed( "BatchNormalization", 14, 15, "All input tensors must have the same `dtype`." " Turn off Autocast or export using opset version 15.") sym_help.check_training_mode(training, "batch_norm") weight, bias, running_mean, running_var = sym_help._batchnorm_helper( g, input, weight, bias, running_mean, running_var) out = g.op("BatchNormalization", input, weight, bias, running_mean, running_var, epsilon_f=eps, momentum_f=1 - momentum, training_mode_i=0 if not training else 1, outputs=1 if not training else 3) if not training: return out else: res, new_running_mean, new_running_var = out new_running_mean.setType(running_mean.type()) new_running_var.setType(running_var.type()) return res
def fake_quantize_per_tensor_affine(g, inputs, scale, zero_point, quant_min=-128, quant_max=127): # NOTE: (0, 127) is a special case. PyTorch restricts activations to be in the range (0, 127). # https://github.com/pytorch/pytorch/blob/b34b192d6b97325c9f78e5995c48c8498ede34bd/torch/ao/quantization/observer.py#L1422 if (quant_min, quant_max) == (0, 127): symbolic_helper._onnx_opset_unsupported_detailed( "fake_quantize_per_tensor_affine", 10, 13, "Quantize range (0, 127) not supported, requires opset 13 Clip", inputs, ) if (quant_min, quant_max) not in [(0, 255), (-128, 127)]: raise errors.SymbolicValueError( f"For (quant_min, quant_max), ONNX allows only (0, 255) and (-128, 127). " f"Got ({quant_min}, {quant_max})", inputs, ) scale = symbolic_helper._maybe_get_scalar(scale) if scale is None: symbolic_helper._onnx_opset_unsupported_detailed( "fake_quantize_per_tensor_affine", 10, 13, "Non-constant scale not supported", inputs, ) scale = scale.float().data # Avoid exporter generating double type if quant_min == 0: zero_point = g.op("Cast", zero_point, to_i=_C_onnx.TensorProtoDataType.UINT8) else: zero_point = g.op("Cast", zero_point, to_i=_C_onnx.TensorProtoDataType.INT8) return g.op( "DequantizeLinear", g.op("QuantizeLinear", inputs, scale, zero_point), scale, zero_point, )