def test_forward_per_channel_half_precision_numerics(self): scale = torch.randn(5).abs() zero = torch.randn(5).to(dtype=torch.int) axis = 1 mini = 0 maxi = 255 for i in range(20): X1 = torch.randn(4, 5).to(torch.float16) Y1 = torch.fake_quantize_per_channel_affine(X1, scale, zero, axis, mini, maxi) Y1r = _fake_quantize_per_channel_affine_reference(X1, scale, zero, axis, mini, maxi) self.assertTrue(torch.allclose(Y1, Y1r, rtol=tolerance, atol=tolerance)) # to force overflow X2 = torch.randn(4, 5).to(torch.float16) X2[0, 0] = 2**15 + .01 Y2 = torch.fake_quantize_per_channel_affine(X2, scale, zero, axis, mini, maxi) Y2r = _fake_quantize_per_channel_affine_reference(X2, scale, zero, axis, mini, maxi) self.assertTrue(torch.allclose(Y2, Y2r, rtol=tolerance, atol=tolerance)) scale = torch.zeros(5) + 10 # to force underflow X3 = torch.randn(4, 5).to(torch.float16) X3[0, 0] = 2**-24 Y3 = torch.fake_quantize_per_channel_affine(X3, scale, zero, axis, mini, maxi) Y3r = _fake_quantize_per_channel_affine_reference(X3, scale, zero, axis, mini, maxi) self.assertTrue(torch.allclose(Y3, Y3r, rtol=tolerance, atol=tolerance))
def fake_quantize_per_channel(input, scale_inv, zero_point, axis, quant_min, quant_max, method, inplace): if method == -1: if (int(torch.__version__.split('.')[1]) > 9) and (int(torch.__version__.split('.')[0]) > 0): zero_point = zero_point.to(torch.int32) else: zero_point = zero_point.to(torch.long) return torch.fake_quantize_per_channel_affine(input, 1.0 / scale_inv, zero_point, axis, quant_min, quant_max) else: device_id = 1 if input.device == torch.device("cpu") else 0 input_split = torch.split(input, 1, dim=axis) input_cat = [] if support_onnx_export(): for i in range(len(input_split)): input_cat.append(torch.ops.vai.fix_neuron(input_split[i], quant_min, quant_max, scale_inv[i], zero_point[i], method, device_id, inplace)) output = torch.cat(input_cat, axis) return output else: for i in range(len(input_split)): nndct_kernels.FixNeuronV2(input_split[i], input_split[i], quant_min, quant_max, scale_inv, zero_point, method, device_id) input_cat.append(input_split[i]) output = torch.cat(input_cat, axis) return output
def _fb_fake_quant(self, inputs, amax): """Native pytorch fake quantization.""" logging.log_first_n( logging.WARNING, "Use Pytorch's native experimental fake quantization.", 1) bound = (1 << (self._num_bits - 1 + int(self._unsigned))) - 1 # To be consistent with ONNX, full range is used. e.g. range is [-128, 127] in int8 if amax.numel() == 1: outputs = torch.fake_quantize_per_tensor_affine( inputs, amax.item() / bound, 0, -bound - 1 if not self._unsigned else 0, bound) else: amax_sequeeze = amax.squeeze().detach() if len(amax_sequeeze.shape) != 1: raise TypeError( "Pytorch's native quantization doesn't support multiple axes" ) quant_dim = list(amax.shape).index(list(amax_sequeeze.shape)[0]) scale = amax_sequeeze / bound outputs = torch.fake_quantize_per_channel_affine( inputs, scale.data, torch.zeros_like(scale, dtype=torch.long).data, quant_dim, -bound - 1 if not self._unsigned else 0, bound) return outputs
def _test_backward_per_channel_cachemask_impl(self, device): torch_types = (torch.qint8, torch.quint8) float_types = (torch.float32, torch.float16, torch.float64) for torch_type, float_type in itertools.product( torch_types, float_types): X = torch.randn(1, 2, 4, 4, dtype=float_type).to(device) # pick the scale + zp so that some values get clipped axis = 1 obs = torch.quantization.PerChannelMinMaxObserver( axis, torch_type).to(device) obs(X * 0.75) scale, zero_point = obs.calculate_qparams() # TODO(future PR): fix the wrong dtype in obs.calculate_qparams and remove the cast zero_point = zero_point.to(torch.int64) quant_min, quant_max = obs._calculate_qmin_qmax() X.requires_grad_() Y_prime = torch.fake_quantize_per_channel_affine( X, scale, zero_point, axis, quant_min, quant_max) dout = torch.rand_like(X, dtype=float_type).to(device) dX = _fake_quantize_per_channel_affine_grad_reference( dout, X, scale, zero_point, axis, quant_min, quant_max) Y_prime.backward(dout) np.testing.assert_allclose(dX.cpu().detach().numpy(), X.grad.cpu().detach().numpy(), rtol=tolerance, atol=tolerance) assert (X.grad.dtype == float_type)
def forward(self, X): self.activation_post_process(X.detach()) _scale, _zero_point = self.calculate_qparams() _scale = _scale.to(self.scale.device) _zero_point = _zero_point.to(self.zero_point.device) if self.static_enabled[0] == 1: self.scale.data.copy_(_scale) self.zero_point.data.copy_(_zero_point) if self.fake_quant_enabled[0] == 1: if self.learning_enabled[0] == 1: self.zero_point.clamp(self.quant_min, self.quant_max) if self.qscheme in (torch.per_channel_symmetric, torch.per_channel_affine): X = _LearnableFakeQuantizePerChannelOp.apply( X, self.scale, self.zero_point, self.ch_axis, self.quant_min, self.quant_max, self.grad_factor) else: X = _LearnableFakeQuantizePerTensorOp.apply( X, self.scale, self.zero_point, self.quant_min, self.quant_max, self.grad_factor) else: if self.qscheme == torch.per_channel_symmetric or \ self.qscheme == torch.per_channel_affine: X = torch.fake_quantize_per_channel_affine( X, self.scale, self.zero_point, self.ch_axis, self.quant_min, self.quant_max) else: X = torch.fake_quantize_per_tensor_affine( X, float(self.scale.item()), int(self.zero_point.item()), self.quant_min, self.quant_max) return X
def forward(ctx, X, scale, zero_point, ch_axis, q_min, q_max, grad_factor): ctx.save_for_backward(X, scale, zero_point) scale_vec = scale.detach().type(torch.float32) zp_vec = ((zero_point.detach() + 0.5).clamp(q_min, q_max)).type(torch.int64) X_fq = torch.fake_quantize_per_channel_affine( X, scale_vec, zp_vec, ch_axis, q_min, q_max) ctx.other = q_min, q_max, X_fq, ch_axis, grad_factor return X_fq
def forward(self, X): if self.static_enabled[0] == 1: self.activation_post_process(X.detach()) _scale, _zero_point = self.calculate_qparams() _scale = _scale.to(self.scale.device) _zero_point = _zero_point.to(self.zero_point.device) if self.init and self.wt: assert self.static_enabled[0] == 1 self.scale = Parameter(torch.FloatTensor([1.] * len(_scale))) self.zero_point = Parameter( torch.FloatTensor([0.] * len(_zero_point))) self.to(_zero_point.device) self.scale.requires_grad = False self.zero_point.requires_grad = False self.init = False if self.static_enabled[0] == 1: self.scale.data.copy_(_scale) self.zero_point.data.copy_(_zero_point) if self.fake_quant_enabled[0] == 1: if self.learning_enabled[0] == 1: if self.use_grad_scaling: if self.wt: grad_factor = 1.0 / (X.numel() * self.quant_max)**0.5 else: grad_factor = 1.0 / (X[0].numel() * self.quant_max)**0.5 else: grad_factor = 1.0 if self.qscheme in (torch.per_channel_symmetric, torch.per_channel_affine): X = _LearnableFakeQuantizePerChannelOp.apply( X, self.scale, self.zero_point, self.ch_axis, self.quant_min, self.quant_max, grad_factor) else: X = _LearnableFakeQuantizePerTensorOp.apply( X, self.scale, self.zero_point, self.quant_min, self.quant_max, grad_factor) else: if self.qscheme == torch.per_channel_symmetric or \ self.qscheme == torch.per_channel_affine: zero_point = torch.LongTensor([ i.round() for i in self.zero_point ]).to(self.zero_point.device) X = torch.fake_quantize_per_channel_affine( X, self.scale, zero_point, self.ch_axis, self.quant_min, self.quant_max) else: X = torch.fake_quantize_per_tensor_affine( X, float(self.scale.item()), int(self.zero_point.item()), self.quant_min, self.quant_max) return X
def forward(self, X): if self.observer_enabled: self.activation_post_process(X.detach()) self.scale, self.zero_point = self.calculate_qparams() if self.fake_quant_enabled: if self.qscheme == torch.per_channel_symmetric or self.qscheme == torch.per_channel_affine: X = torch.fake_quantize_per_channel_affine(X, self.scale, self.zero_point, self.ch_axis, self.quant_min, self.quant_max) else: X = torch.fake_quantize_per_tensor_affine(X, float(self.scale), int(self.zero_point), self.quant_min, self.quant_max) return X
def _test_numerical_consistency(self, test_type): r"""Comparing numerical consistency between quantize/dequantize op and the fake quantize op across devices and dtypes """ torch.random.manual_seed(NP_RANDOM_SEED) torch_types = [torch.qint8, torch.quint8] float_types = [torch.float, torch.float16, torch.float64] zero_types = [torch.long] devices = [torch.device('cpu'), torch.device('cuda') ] if torch.cuda.is_available() else [torch.device('cpu')] axis = 1 for i in range(20): for torch_type, float_type, device, zero_type in itertools.product( torch_types, float_types, devices, zero_types): X = torch.randn(3, 3, device=device).to(float_type) scales = (10 * torch.randn(3, device=device)).abs() scale = scales.mean().to(float).item() zeros = (10 * torch.randn(3, device=device)).abs().to( dtype=zero_type) zero = zeros.max().view(1).item() quant_min = torch.iinfo(torch_type).min quant_max = torch.iinfo(torch_type).max test_was_run = False if test_type == "per_tensor": test_was_run = True Y = torch.dequantize( torch.quantize_per_tensor( X.to('cpu').to(torch.float), scale, zero, torch_type)).to(device).to(float_type) Y_prime = torch.fake_quantize_per_tensor_affine( X, scale, zero, quant_min, quant_max) self.assertEqual( Y, Y_prime, "Difference found between dequant+quant_per_tensor and fake_quantize_per_tensor" ) if test_type == "per_channel": test_was_run = True Y = torch.dequantize( torch.quantize_per_channel( X.to('cpu').to(torch.float), scales.to('cpu'), zeros.to('cpu'), axis, torch_type)).to(device).to(float_type) Y_prime = torch.fake_quantize_per_channel_affine( X, scales, zeros, axis, quant_min, quant_max) self.assertEqual( Y, Y_prime, "Difference found between dequant+quant_per_channel and fake_quantize_per_channel" ) self.assertTrue(test_was_run)
def fake_quant(self, X): if self.qscheme == torch.per_channel_symmetric: zero_point = torch.LongTensor([i.round() for i in self.zero_point ]).to(self.zero_point.device) X = torch.fake_quantize_per_channel_affine(X, self.scale, zero_point, self.ch_axis, self.quant_min, self.quant_max) else: X = torch.fake_quantize_per_tensor_affine( X, float(self.scale.item()), int(self.zero_point.item()), self.quant_min, self.quant_max) return X
def test_forward_per_channel(self, device, X): r"""Tests the forward path of the FakeQuantizePerTensorAffine op. """ np.random.seed(NP_RANDOM_SEED) X, (scale, zero_point, axis, torch_type) = X quant_min = torch.iinfo(torch_type).min quant_max = torch.iinfo(torch_type).max X = to_tensor(X, device) scale = to_tensor(scale, device) zero_point = torch.tensor(zero_point).to(dtype=torch.int64, device=device) Y = _fake_quantize_per_channel_affine_reference(X.cpu(), scale.cpu(), zero_point.cpu(), axis, quant_min, quant_max) Y_prime = torch.fake_quantize_per_channel_affine( X, scale, zero_point, axis, quant_min, quant_max) np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance)
def test_numerical_consistency_per_channel(self, device, X): r"""Comparing numerical consistency between CPU quantize/dequantize op and the CPU fake quantize op """ np.random.seed(NP_RANDOM_SEED) X, (scale, zero_point, axis, torch_type) = X quant_min = torch.iinfo(torch_type).min quant_max = torch.iinfo(torch_type).max X = to_tensor(X, device) scale = to_tensor(scale, device) zero_point = torch.tensor(zero_point).to(dtype=torch.int64, device=device) # quantize_linear and dequantize are only implemented in CPU Y = torch.dequantize(torch.quantize_per_channel(X.cpu(), scale.cpu(), zero_point.cpu(), axis, torch_type)) Y_prime = torch.fake_quantize_per_channel_affine( X, scale, zero_point, axis, quant_min, quant_max) np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance)
def test_backward_per_channel(self, device, X): r"""Tests the backward method. """ np.random.seed(NP_RANDOM_SEED) X, (scale, zero_point, axis, torch_type) = X quant_min = torch.iinfo(torch_type).min quant_max = torch.iinfo(torch_type).max X = to_tensor(X, device) scale = to_tensor(scale, device) zero_point = torch.tensor(zero_point).to(dtype=torch.int64, device=device) X.requires_grad_() Y_prime = torch.fake_quantize_per_channel_affine( X, scale, zero_point, axis, quant_min, quant_max) dout = torch.rand(X.shape, dtype=torch.float).to(device) dX = _fake_quantize_per_channel_affine_grad_reference( dout, X, scale, zero_point, axis, quant_min, quant_max) Y_prime.backward(dout) np.testing.assert_allclose(dX.cpu().detach().numpy(), X.grad.cpu().detach().numpy(), rtol=tolerance, atol=tolerance)
def forward(self, X): if self.observer_enabled[0] == 1: self.activation_post_process(X.detach()) _scale, _zero_point = self.calculate_qparams() _scale, _zero_point = _scale.to(self.scale.device), _zero_point.to(self.zero_point.device) self.scale.resize_(_scale.shape) self.scale.copy_(_scale) self.zero_point.resize_(_zero_point.shape) self.zero_point.copy_(_zero_point) if self.fake_quant_enabled[0] == 1: if self.qscheme == torch.per_channel_symmetric or self.qscheme == torch.per_channel_affine: X = torch.fake_quantize_per_channel_affine(X, self.scale, self.zero_point, self.ch_axis, self.quant_min, self.quant_max) else: X = torch.fake_quantize_per_tensor_affine(X, float(self.scale), int(self.zero_point), self.quant_min, self.quant_max) return X
def _test_forward_per_channel_cachemask_impl(self, device): torch_types = (torch.qint8, torch.quint8) float_types = (torch.float32, torch.float16, torch.float64) for torch_type, float_type in itertools.product(torch_types, float_types): X = torch.randn(1, 2, 4, 4, dtype=float_type).to(device) # pick the scale + zp so that some values get clipped axis = 1 obs = torch.quantization.PerChannelMinMaxObserver(axis, torch_type).to(device) obs(X * 0.75) scale, zero_point = obs.calculate_qparams() # TODO(future PR): fix the wrong dtype in obs.calculate_qparams and remove the cast zero_point = zero_point.to(torch.int32) quant_min, quant_max = obs._calculate_qmin_qmax() Y = _fake_quantize_per_channel_affine_reference( X.cpu(), scale.cpu(), zero_point.cpu(), axis, quant_min, quant_max) Y_prime = torch.fake_quantize_per_channel_affine( X, scale, zero_point, axis, quant_min, quant_max) np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance) self.assertTrue(Y.dtype == float_type)
def fake_quantize(self, input, inplace): if self._round_method == "half_even": if NndctOption.nndct_tensorrt_quant_algo.value and self._symmetric_mode == "symmetric": return fake_quantize_per_channel_tensorrt(input, self._float_max, self._quant_min, self._quant_max, self._axis) else: if (int(torch.__version__.split('.')[1]) > 9) and (int(torch.__version__.split('.')[0]) > 0): self._zero_point = self._zero_point.to(torch.int32) else: self._zero_point = self._zero_point.to(torch.long) return torch.fake_quantize_per_channel_affine(input, self._scale, self._zero_point, self._axis, self._quant_min, self._quant_max) else: if self._round_method == "half_up": method = 2 elif self._round_method == "half_down": method = 6 elif self._round_method == "std_round": method = 3 return fake_quantize_per_channel(input, 1.0/self._scale, self._zero_point, self._axis, self._quant_min, self._quant_max, method, inplace)
def pointwise_ops(self): a = torch.randn(4) b = torch.randn(4) t = torch.tensor([-1, -2, 3], dtype=torch.int8) r = torch.tensor([0, 1, 10, 0], dtype=torch.int8) t = torch.tensor([-1, -2, 3], dtype=torch.int8) s = torch.tensor([4, 0, 1, 0], dtype=torch.int8) f = torch.zeros(3) g = torch.tensor([-1, 0, 1]) w = torch.tensor([0.3810, 1.2774, -0.2972, -0.3719, 0.4637]) return ( torch.abs(torch.tensor([-1, -2, 3])), torch.absolute(torch.tensor([-1, -2, 3])), torch.acos(a), torch.arccos(a), torch.acosh(a.uniform_(1.0, 2.0)), torch.add(a, 20), torch.add(a, torch.randn(4, 1), alpha=10), torch.addcdiv(torch.randn(1, 3), torch.randn(3, 1), torch.randn(1, 3), value=0.1), torch.addcmul(torch.randn(1, 3), torch.randn(3, 1), torch.randn(1, 3), value=0.1), torch.angle(a), torch.asin(a), torch.arcsin(a), torch.asinh(a), torch.arcsinh(a), torch.atan(a), torch.arctan(a), torch.atanh(a.uniform_(-1.0, 1.0)), torch.arctanh(a.uniform_(-1.0, 1.0)), torch.atan2(a, a), torch.bitwise_not(t), torch.bitwise_and(t, torch.tensor([1, 0, 3], dtype=torch.int8)), torch.bitwise_or(t, torch.tensor([1, 0, 3], dtype=torch.int8)), torch.bitwise_xor(t, torch.tensor([1, 0, 3], dtype=torch.int8)), torch.ceil(a), torch.clamp(a, min=-0.5, max=0.5), torch.clamp(a, min=0.5), torch.clamp(a, max=0.5), torch.clip(a, min=-0.5, max=0.5), torch.conj(a), torch.copysign(a, 1), torch.copysign(a, b), torch.cos(a), torch.cosh(a), torch.deg2rad( torch.tensor([[180.0, -180.0], [360.0, -360.0], [90.0, -90.0]])), torch.div(a, b), torch.divide(a, b, rounding_mode="trunc"), torch.divide(a, b, rounding_mode="floor"), torch.digamma(torch.tensor([1.0, 0.5])), torch.erf(torch.tensor([0.0, -1.0, 10.0])), torch.erfc(torch.tensor([0.0, -1.0, 10.0])), torch.erfinv(torch.tensor([0.0, 0.5, -1.0])), torch.exp(torch.tensor([0.0, math.log(2.0)])), torch.exp2(torch.tensor([0.0, math.log(2.0), 3.0, 4.0])), torch.expm1(torch.tensor([0.0, math.log(2.0)])), torch.fake_quantize_per_channel_affine( torch.randn(2, 2, 2), (torch.randn(2) + 1) * 0.05, torch.zeros(2), 1, 0, 255, ), torch.fake_quantize_per_tensor_affine(a, 0.1, 0, 0, 255), torch.float_power(torch.randint(10, (4, )), 2), torch.float_power(torch.arange(1, 5), torch.tensor([2, -3, 4, -5])), torch.floor(a), # torch.floor_divide(torch.tensor([4.0, 3.0]), torch.tensor([2.0, 2.0])), # torch.floor_divide(torch.tensor([4.0, 3.0]), 1.4), torch.fmod(torch.tensor([-3, -2, -1, 1, 2, 3]), 2), torch.fmod(torch.tensor([1, 2, 3, 4, 5]), 1.5), torch.frac(torch.tensor([1.0, 2.5, -3.2])), torch.randn(4, dtype=torch.cfloat).imag, torch.ldexp(torch.tensor([1.0]), torch.tensor([1])), torch.ldexp(torch.tensor([1.0]), torch.tensor([1, 2, 3, 4])), torch.lerp(torch.arange(1.0, 5.0), torch.empty(4).fill_(10), 0.5), torch.lerp( torch.arange(1.0, 5.0), torch.empty(4).fill_(10), torch.full_like(torch.arange(1.0, 5.0), 0.5), ), torch.lgamma(torch.arange(0.5, 2, 0.5)), torch.log(torch.arange(5) + 10), torch.log10(torch.rand(5)), torch.log1p(torch.randn(5)), torch.log2(torch.rand(5)), torch.logaddexp(torch.tensor([-1.0]), torch.tensor([-1, -2, -3])), torch.logaddexp(torch.tensor([-100.0, -200.0, -300.0]), torch.tensor([-1, -2, -3])), torch.logaddexp(torch.tensor([1.0, 2000.0, 30000.0]), torch.tensor([-1, -2, -3])), torch.logaddexp2(torch.tensor([-1.0]), torch.tensor([-1, -2, -3])), torch.logaddexp2(torch.tensor([-100.0, -200.0, -300.0]), torch.tensor([-1, -2, -3])), torch.logaddexp2(torch.tensor([1.0, 2000.0, 30000.0]), torch.tensor([-1, -2, -3])), torch.logical_and(r, s), torch.logical_and(r.double(), s.double()), torch.logical_and(r.double(), s), torch.logical_and(r, s, out=torch.empty(4, dtype=torch.bool)), torch.logical_not(torch.tensor([0, 1, -10], dtype=torch.int8)), torch.logical_not( torch.tensor([0.0, 1.5, -10.0], dtype=torch.double)), torch.logical_not( torch.tensor([0.0, 1.0, -10.0], dtype=torch.double), out=torch.empty(3, dtype=torch.int16), ), torch.logical_or(r, s), torch.logical_or(r.double(), s.double()), torch.logical_or(r.double(), s), torch.logical_or(r, s, out=torch.empty(4, dtype=torch.bool)), torch.logical_xor(r, s), torch.logical_xor(r.double(), s.double()), torch.logical_xor(r.double(), s), torch.logical_xor(r, s, out=torch.empty(4, dtype=torch.bool)), torch.logit(torch.rand(5), eps=1e-6), torch.hypot(torch.tensor([4.0]), torch.tensor([3.0, 4.0, 5.0])), torch.i0(torch.arange(5, dtype=torch.float32)), torch.igamma(a, b), torch.igammac(a, b), torch.mul(torch.randn(3), 100), torch.multiply(torch.randn(4, 1), torch.randn(1, 4)), torch.mvlgamma(torch.empty(2, 3).uniform_(1.0, 2.0), 2), torch.tensor([float("nan"), float("inf"), -float("inf"), 3.14]), torch.nan_to_num(w), torch.nan_to_num(w, nan=2.0), torch.nan_to_num(w, nan=2.0, posinf=1.0), torch.neg(torch.randn(5)), # torch.nextafter(torch.tensor([1, 2]), torch.tensor([2, 1])) == torch.tensor([eps + 1, 2 - eps]), torch.polygamma(1, torch.tensor([1.0, 0.5])), torch.polygamma(2, torch.tensor([1.0, 0.5])), torch.polygamma(3, torch.tensor([1.0, 0.5])), torch.polygamma(4, torch.tensor([1.0, 0.5])), torch.pow(a, 2), torch.pow(torch.arange(1.0, 5.0), torch.arange(1.0, 5.0)), torch.rad2deg( torch.tensor([[3.142, -3.142], [6.283, -6.283], [1.570, -1.570]])), torch.randn(4, dtype=torch.cfloat).real, torch.reciprocal(a), torch.remainder(torch.tensor([-3.0, -2.0]), 2), torch.remainder(torch.tensor([1, 2, 3, 4, 5]), 1.5), torch.round(a), torch.rsqrt(a), torch.sigmoid(a), torch.sign(torch.tensor([0.7, -1.2, 0.0, 2.3])), torch.sgn(a), torch.signbit(torch.tensor([0.7, -1.2, 0.0, 2.3])), torch.sin(a), torch.sinc(a), torch.sinh(a), torch.sqrt(a), torch.square(a), torch.sub(torch.tensor((1, 2)), torch.tensor((0, 1)), alpha=2), torch.tan(a), torch.tanh(a), torch.trunc(a), torch.xlogy(f, g), torch.xlogy(f, g), torch.xlogy(f, 4), torch.xlogy(2, g), )
torch.erfinv(torch.tensor([0, 0.5, -1.])) # exp torch.exp(torch.tensor([0, math.log(2.)])) # exp2 torch.exp2(torch.tensor([0, math.log2(2.), 3, 4])) # expm1 torch.expm1(torch.tensor([0, math.log(2.)])) # fake_quantize_per_channel_affine x = torch.randn(2, 2, 2) scales = (torch.randn(2) + 1) * 0.05 zero_points = torch.zeros(2).to(torch.long) torch.fake_quantize_per_channel_affine(x, scales, zero_points, 1, 0, 255) # fake_quantize_per_tensor_affine torch.fake_quantize_per_tensor_affine(a, 0.1, 0, 0, 255) # float_power torch.float_power(torch.randint(10, (4, )), 2) torch.float_power(torch.arange(1, 5), torch.tensor([2, -3, 4, -5])) # floor torch.floor(a) # floor_divide torch.floor_divide(torch.tensor([4., 3.]), torch.tensor([2., 2.])) torch.floor_divide(torch.tensor([4., 3.]), 1.4)
def fakeQuantizePerChannelOriginalKernel(input, scale, zero_point, axis: int, quant_min: int, quant_max: int): return torch.fake_quantize_per_channel_affine(input, scale, zero_point, axis, quant_min, quant_max)