def test_qtensor_copy(self): scale = 0.5 zero_point = 10 val = 100 numel = 10 # copy from same scale and zero_point q = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, dtype=torch.quint8) q2 = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, dtype=torch.quint8) q.copy_(q2) self.assertEqual(q.int_repr(), q2.int_repr()) self.assertEqual(q.q_scale(), q2.q_scale()) self.assertEqual(q.q_zero_point(), q2.q_zero_point()) # copying from different scale and zero_point scale = 3.2 zero_point = 5 q = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, dtype=torch.quint8) # check original scale and zero_points are set correctly self.assertEqual(q.q_scale(), scale) self.assertEqual(q.q_zero_point(), zero_point) q.copy_(q2) # check scale and zero_points has been copied self.assertEqual(q, q2)
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'): super(Conv2d, self).__init__() if padding_mode != 'zeros': raise NotImplementedError( "Currently only zero-padding is supported by quantized conv") if in_channels % groups != 0: raise ValueError('in_channels must be divisible by groups') if out_channels % groups != 0: raise ValueError('out_channels must be divisible by groups') self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size self.stride = _pair(stride) self.padding = _pair(padding) self.dilation = _pair(dilation) self.transposed = False self.output_padding = 0 self.groups = groups self.padding_mode = padding_mode qweight = torch._empty_affine_quantized( [out_channels, kernel_size[0], kernel_size[1], in_channels // self.groups], scale=1, zero_point=0, dtype=torch.qint8) self.set_weight(qweight) self.bias = torch._empty_affine_quantized([out_channels], scale=1, zero_point=0, dtype=torch.qint32) self.scale = 1.0 self.zero_point = 0
def __init__(self, in_features, out_features, bias_=True): super(Linear, self).__init__() # We don't muck around with buffers or attributes or anything here # to keep the module simple. *everything* is simply a Python attribute. # Serialization logic is explicitly handled in the below serialization and # deserialization modules self.in_features = in_features self.out_features = out_features if bias_: self.bias = torch._empty_affine_quantized([out_features], scale=1, zero_point=0, dtype=torch.qint32) else: self.bias = None qweight = torch._empty_affine_quantized([out_features, in_features], scale=1, zero_point=0, dtype=torch.qint8) self.set_weight(qweight) self.weight_scale = 1.0 self.scale = 1.0 self.zero_point = 0
def test_qtensor_copy(self): scale = 0.5 zero_point = 10 numel = 10 for device in get_supported_device_types(): for dtype in [torch.qint8, torch.quint8, torch.qint32]: # copy from same scale and zero_point q = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, device=device, dtype=dtype) q2 = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, device=device, dtype=dtype) q.copy_(q2) self.assertEqual(q.int_repr(), q2.int_repr()) self.assertEqual(q.q_scale(), q2.q_scale()) self.assertEqual(q.q_zero_point(), q2.q_zero_point()) # copying from different scale and zero_point scale = 3.2 zero_point = 5 q = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, device=device, dtype=dtype) # check original scale and zero_points are set correctly self.assertEqual(q.q_scale(), scale) self.assertEqual(q.q_zero_point(), zero_point) q.copy_(q2) # check scale and zero_points has been copied self.assertEqual(q, q2) # can't copy from quantized tensor to non-quantized tensor r = torch.empty([numel], dtype=torch.float) q = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, dtype=torch.quint8) with self.assertRaisesRegex(RuntimeError, "please use dequantize"): r.copy_(q)
def test_qtensor_copy(self): scale = 0.5 zero_point = 10 val = 100 numel = 10 # copy from same scale and zero_point q = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, dtype=torch.quint8) q2 = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, dtype=torch.quint8) q.copy_(q2) self.assertEqual(q.int_repr(), q2.int_repr()) self.assertEqual(q.q_scale(), q2.q_scale()) self.assertEqual(q.q_zero_point(), q2.q_zero_point()) # copying from different scale and zero_point scale = 3.2 zero_point = 5 q = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, dtype=torch.quint8) # check original scale and zero_points are set correctly self.assertEqual(q.q_scale(), scale) self.assertEqual(q.q_zero_point(), zero_point) q.copy_(q2) # check scale and zero_points has been copied self.assertEqual(q, q2) # deep copy scale, zero_point, dtype = 1.0, 2, torch.uint8 q_int = torch.randint(0, 100, [3, 5], dtype=dtype) scale, zero_point = 2.0, 3 q = torch._make_per_tensor_quantized_tensor(q_int, scale=scale, zero_point=zero_point) qc = deepcopy(q) self.assertEqual(qc, q) # can't copy from quantized tensor to non-quantized tensor r = torch.empty([numel], dtype=torch.float) q = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, dtype=torch.quint8) with self.assertRaisesRegex(RuntimeError, "please use dequantize"): r.copy_(q)
def test_qtensor_creation(self): scale = 0.5 zero_point = 10 val = 100 numel = 10 q = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, dtype=torch.quint8) self.assertEqual(scale, q.q_scale()) self.assertEqual(zero_point, q.q_zero_point()) # create Tensor from uint8_t Tensor, scale and zero_point int_tensor = torch.randint(0, 100, size=(10, ), dtype=torch.uint8) q = torch._make_per_tensor_quantized_tensor(int_tensor, scale, zero_point) self.assertEqual(int_tensor, q.int_repr()) self.assertEqual(scale, q.q_scale()) self.assertEqual(zero_point, q.q_zero_point()) # create via empty_like q = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, dtype=torch.quint8) q_el = torch.empty_like(q) self.assertEqual(q.q_scale(), q_el.q_scale()) self.assertEqual(q.q_zero_point(), q_el.q_zero_point()) self.assertEqual(q.dtype, q_el.dtype) # create via empty_like but change the dtype (currently not supported) with self.assertRaises(RuntimeError): torch.empty_like(q, dtype=torch.qint8)
def test_qtensor_view(self): scale, zero_point, dtype = 1.0, 2, torch.quint8 q = torch._empty_affine_quantized(1, 2, 3, scale=scale, zero_point=zero_point, dtype=dtype) q2 = q.view(1, 3, 2) self.assertEqual(q.numel(), q2.numel()) # testing -1 self.assertEqual(q, q2.view(1, -1, 3)) a = torch._empty_affine_quantized([1, 2, 3, 4], scale=scale, zero_point=zero_point, dtype=dtype) b = a.transpose(1, 2) # swaps 2nd and 3rd dimension c = a.view(1, 3, 2, 4) # does not change tensor layout self.assertEqual(b.size(), c.size()) self.assertEqual(b.q_scale(), c.q_scale()) self.assertEqual(b.q_zero_point(), c.q_zero_point()) self.assertNotEqual(b.int_repr(), c.int_repr()) # a case can't view non-contiguos Tensor a = torch._empty_affine_quantized([1, 2, 3, 4], scale=scale, zero_point=zero_point, dtype=dtype) b = a.transpose(1, 2) # swaps 2nd and 3rd dimension err_str = "view size is not compatible with input tensor's size and stride*" with self.assertRaisesRegex(RuntimeError, err_str): b.view(1, 4, 2, 3) # view on contiguous tensor is fine b.contiguous().view(1, 4, 2, 3)
def test_qtensor_reshape(self): scale, zero_point, dtype = 1.0, 2, torch.quint8 q = torch._empty_affine_quantized([3, 5], scale=scale, zero_point=zero_point, dtype=dtype) q2 = q.reshape([15]) self.assertEqual(q.numel(), q2.numel()) self.assertEqual(q2.size(), [15]) # testing -1 self.assertEqual(q, q2.reshape([3, -1])) a = torch._empty_affine_quantized([1, 2, 3, 4], scale=scale, zero_point=zero_point, dtype=dtype) b = a.transpose(1, 2) # swaps 2nd and 3rd dimension c = a.reshape(1, 3, 2, 4) # does not change tensor layout self.assertEqual(b.size(), c.size()) self.assertEqual(b.q_scale(), c.q_scale()) self.assertEqual(b.q_zero_point(), c.q_zero_point()) # TODO: fix flaky test # self.assertNotEqual(b.int_repr(), c.int_repr()) # we can use reshape for non-contiguous Tensor a = torch._empty_affine_quantized([1, 2, 3, 4], scale=scale, zero_point=zero_point, dtype=dtype) b = a.transpose(1, 2) # swaps 2nd and 3rd dimension c = b.reshape(1, 4, 2, 3) self.assertEqual(b, c.reshape(1, 3, 2, 4))
def test_qadd_relu_different_qparams(self): add_relu = torch.ops.quantized.add_relu add = torch.ops.quantized.add add_out = torch.ops.quantized.add_out add_relu_out = torch.ops.quantized.add_relu_out A = torch.arange(-25, 25, dtype=torch.float) B = torch.arange(-25, 25, dtype=torch.float) scale_A = 3.0 zero_point_A = 7 scale_B = 5.0 zero_point_B = 127 scale_C = 0.5 zero_point_C = 5 qA = torch.quantize_linear(A, scale=scale_A, zero_point=zero_point_A, dtype=torch.quint8) qB = torch.quantize_linear(B, scale=scale_B, zero_point=zero_point_B, dtype=torch.quint8) # Add ground truth C = (qA.dequantize() + qB.dequantize()).numpy() qC = _quantize(C, scale_C, zero_point_C) qC_hat = add(qA, qB, scale=scale_C, zero_point=zero_point_C) np.testing.assert_equal(qC, qC_hat.int_repr(), "Quantized addition failed.") qC_out_hat = torch._empty_affine_quantized(qC.shape, scale=scale_C, zero_point=zero_point_C, dtype=torch.quint8) add_out(qA, qB, out=qC_out_hat) self.assertEqual(qC_hat, qC_out_hat, message="Add.out failed") # Add + ReLU ground truth Crelu = C.copy() Crelu[C < 0] = 0 qCrelu = _quantize(Crelu, scale_C, zero_point_C) qCrelu_hat = add_relu(qA, qB, scale=scale_C, zero_point=zero_point_C) np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(), "Quantized addition with ReLU failed.") qCrelu_out_hat = torch._empty_affine_quantized(qCrelu.shape, scale=scale_C, zero_point=zero_point_C, dtype=torch.quint8) add_relu_out(qA, qB, out=qCrelu_out_hat) self.assertEqual(qCrelu_hat, qCrelu_out_hat, message="AddReLU.out failed")
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'): if padding_mode != 'zeros': raise NotImplementedError( "Currently only zero-padding is supported!") stride = _pair(stride) padding = _pair(padding) dilation = _pair(dilation) kernel_size = _pair(kernel_size) transposed = False output_padding = _pair(0) super(Conv2d, self).__init__(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, transposed=transposed, output_padding=output_padding, groups=groups, bias=True, padding_mode=padding_mode) del self.weight del self.bias qweight = torch._empty_affine_quantized([ out_channels, kernel_size[0], kernel_size[1], in_channels // self.groups ], scale=1, zero_point=0, dtype=torch.qint8) qbias = torch._empty_affine_quantized([out_channels], scale=1, zero_point=0, dtype=torch.qint32) self.register_buffer( '_packed_weight', torch.ops.quantized.fbgemm_conv_prepack( qweight.permute([0, 2, 3, 1]), self.stride, self.padding, self.dilation, self.groups)) self.register_buffer('bias', qbias) self.register_buffer('scale', torch.tensor([1.0], dtype=torch.double)) self.register_buffer('zero_point', torch.tensor([0], dtype=torch.long))
def test_qmul_relu_same_qparams(self): mul_relu = torch.ops.quantized.mul_relu mul = torch.ops.quantized.mul mul_out = torch.ops.quantized.mul_out mul_relu_out = torch.ops.quantized.mul_relu_out A = torch.arange(-25, 25, dtype=torch.float) B = torch.arange(-25, 25, dtype=torch.float) scale = 2.0 zero_point = 127 qA = torch.quantize_linear(A, scale=scale, zero_point=zero_point, dtype=torch.quint8) qB = torch.quantize_linear(B, scale=scale, zero_point=zero_point, dtype=torch.quint8) # mul ReLU ground truth C = (qA.dequantize() * qB.dequantize()).numpy() qC = _quantize(C, scale, zero_point) qC_hat = mul(qA, qB, scale=scale, zero_point=zero_point) np.testing.assert_equal(qC, qC_hat.int_repr(), "Quantized mulition failed.") qC_out_hat = torch._empty_affine_quantized(qC.shape, scale=scale, zero_point=zero_point, dtype=torch.quint8) mul_out(qA, qB, out=qC_out_hat) self.assertEqual(qC_hat, qC_out_hat, message="mul.out failed") # mul + ReLU ground truth Crelu = C.copy() Crelu[C < 0] = 0 qCrelu = _quantize(Crelu, scale, zero_point) qCrelu_hat = mul_relu(qA, qB, scale=scale, zero_point=zero_point) np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(), "Quantized mulition with ReLU failed.") qCrelu_out_hat = torch._empty_affine_quantized(qCrelu.shape, scale=scale, zero_point=zero_point, dtype=torch.quint8) mul_relu_out(qA, qB, out=qCrelu_out_hat) self.assertEqual(qCrelu_hat, qCrelu_out_hat, message="mulReLU.out failed") # Scalar addition mul = torch.ops.quantized.mul_scalar for b in B: C_ref = qA.dequantize().numpy() * b.item() qC = _quantize(C_ref, scale, zero_point) dqC = _dequantize(qC, scale, zero_point) qC_hat = mul(qA, b.item(), scale, zero_point) dqC_hat = qC_hat.dequantize() self.assertEqual(dqC, dqC_hat)
def __init__(self, in_features, out_features, bias_=True, dtype=torch.qint8): super(Linear, self).__init__() # We don't muck around with buffers or attributes or anything here # to keep the module simple. *everything* is simply a Python attribute. # Serialization logic is explicitly handled in the below serialization and # deserialization modules self.in_features = in_features self.out_features = out_features bias = None if bias_: bias = torch.zeros(out_features, dtype=torch.float) if dtype == torch.qint8: qweight = torch._empty_affine_quantized( [out_features, in_features], scale=1, zero_point=0, dtype=torch.qint8) elif dtype == torch.float16: qweight = torch.zeros([out_features, in_features], dtype=torch.float) else: raise RuntimeError( 'Unsupported dtype specified for quantized Linear!') self._packed_params = LinearPackedParams(dtype) self._packed_params.set_weight_bias(qweight, bias) self.scale = 1.0 self.zero_point = 0
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'): super(Conv2d, self).__init__() if padding_mode != 'zeros': raise NotImplementedError( "Currently only zero-padding is supported by quantized conv") if in_channels % groups != 0: raise ValueError('in_channels must be divisible by groups') if out_channels % groups != 0: raise ValueError('out_channels must be divisible by groups') self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = _pair(kernel_size) self.stride = _pair(stride) self.padding = _pair(padding) self.dilation = _pair(dilation) self.transposed = False self.output_padding = 0 self.groups = groups self.padding_mode = padding_mode # Initialize as NCHW. set_weight will internally transpose to # NHWC qweight = torch._empty_affine_quantized( [out_channels, in_channels // self.groups, self.kernel_size[0], self.kernel_size[1]], scale=1, zero_point=0, dtype=torch.qint8) bias_float = None if bias: bias_float = torch.zeros(out_channels, dtype=torch.float) self.set_weight_bias(qweight, bias_float) self.scale = 1.0 self.zero_point = 0
def elu(input, alpha=1., inplace=False, scale=None, zero_point=None): # type: (Tensor, Optional[float], bool, Optional[float], Optional[int]) -> Tensor r""" Applies the quantized ELU function element-wise: .. math:: \text{ELU}(x) = \max(0,x) + \min(0, \alpha * (\exp(x) - 1)) Args: input: quantized input alpha: the :math:`\alpha` value for the ELU formulation. Default: 1.0 inplace: Inplace modification of the input tensor scale, zero_point: Scale and zero point of the output tensor. """ if not input.is_quantized: raise ValueError("Input to 'quantized.elu' must be quantized!") if (scale is not None) != (zero_point is not None): raise ValueError( "Either both or none of (scale, zero_point) must be specified!") if scale is not None and zero_point is not None: assert not inplace, "Cannot rescale with `inplace`" output = torch._empty_affine_quantized(input.shape, scale=scale, zero_point=int(zero_point), dtype=input.dtype) torch._C._nn.elu(input, alpha, out=output) return output elif inplace: return torch._C._nn.elu_(input, alpha) else: return torch._C._nn.elu(input, alpha)
def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, transposed, output_padding, groups, bias, padding_mode='zeros'): super(_ConvNd, self).__init__() if padding_mode != 'zeros': raise NotImplementedError( "Currently only zero-padding is supported by quantized conv") if in_channels % groups != 0: raise ValueError('in_channels must be divisible by groups') if out_channels % groups != 0: raise ValueError('out_channels must be divisible by groups') self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size self.stride = stride self.padding = padding self.dilation = dilation self.transposed = transposed self.output_padding = output_padding self.groups = groups self.padding_mode = padding_mode # Initialize as NCHW. set_weight will internally transpose to NHWC. qweight = torch._empty_affine_quantized( [out_channels, in_channels // self.groups] + list(kernel_size), scale=1, zero_point=0, dtype=torch.qint8) bias_float = ( torch.zeros(out_channels, dtype=torch.float) if bias else None) self.set_weight_bias(qweight, bias_float) self.scale = 1.0 self.zero_point = 0
def get_size_of_node(fx_module: GraphModule, node: Node) -> size_bytes: """Given a node with node.dtype and node.shape, return its total size and its output size. total_size = weights + bias + output_size """ # Total num of elements total_num_of_elems = 0 # For a module, conside all parameters if node.op == "call_module": submodule_dict = dict(fx_module.named_modules()) submodule = submodule_dict[node.target] parameters = submodule.named_parameters() # Parameters are named tuples for name, p in parameters: total_num_of_elems += p.numel() # Don't forget the output size # node.shape is the shape of this node's output tensor_meta = get_tensor_meta(node) output_elem = tensor_meta.shape.numel() total_num_of_elems += output_elem # Assume for now if it's quantized then it's qint8 or quint8 if tensor_meta.is_quantized: size_per_elem_bytes = torch._empty_affine_quantized( [], dtype=tensor_meta.dtype).element_size() else: size_per_elem_bytes = torch.tensor( [], dtype=tensor_meta.dtype).element_size() total_size = size_per_elem_bytes * total_num_of_elems output_size = size_per_elem_bytes * output_elem return size_bytes(output_size, total_size)
def __init__(self): super(LinearPackedParams, self).__init__() wq = torch._empty_affine_quantized([1, 1], scale=1.0, zero_point=0, dtype=torch.qint8) self.set_weight_bias(wq, None)
def _rebuild_qtensor(storage, storage_offset, size, stride, quantizer_params, requires_grad, backward_hooks): qscheme = quantizer_params[0] if qscheme == torch.per_tensor_affine: _, scale, zero_point = quantizer_params tensor = torch._empty_affine_quantized(size, scale=scale, zero_point=zero_point, dtype=storage.dtype) elif qscheme == torch.per_channel_affine: _, scales, zero_points, axis = quantizer_params scales = torch.tensor(scales, dtype=torch.float64) zero_points = torch.tensor(zero_points, dtype=torch.int64) tensor = torch._empty_per_channel_affine_quantized( size, scales=scales, zero_points=zero_points, axis=axis, dtype=storage.dtype) else: raise RuntimeError( "Can't deserialize quantized tensor with qscheme {}".format( qscheme)) tensor.set_(storage, storage_offset, size, stride) tensor.requires_grad = requires_grad # NB: This line exists only for backwards compatibility; the # general expectation is that backward_hooks is an empty # OrderedDict. See Note [Don't serialize hooks] tensor._backward_hooks = backward_hooks return tensor
def hardswish(input, scale, zero_point): # type: (Tensor, float, int) -> Tensor r"""Applies the quantized version of the hardswish function, element-wise, as described in the paper: `Searching for MobileNetV3`_. .. math:: \text{Hardswish}(x) = \begin{cases} 0 & \text{if~} x \le -3, \\ x & \text{if~} x \ge +3, \\ x^2/6 & \text{otherwise} \end{cases} Args: input: quantized input scale, zero_point: Scale and zero point of the output tensor. See :class:`~torch.nn.Hardswish` for more details. .. _`Searching for MobileNetV3`: https://arxiv.org/abs/1905.02244 """ if not input.is_quantized: raise ValueError("Input to 'quantized.hardswish' must be quantized!") output = torch._empty_affine_quantized(input.shape, scale=scale, zero_point=int(zero_point), dtype=input.dtype) torch._C._nn.hardswish(input, out=output) return output
def test_qtensor_fill(self): numel = 10 scale = 0.5 zero_point = 10 ones = torch.ones(numel).to(torch.float) types = [torch.qint8, torch.quint8, torch.qint32] fills = [-1, 1, 2**32] # positive, negative, overflow # `fill_` uses `copy_(float)`, which doesn't support CUDA device = 'cpu' ones = ones.to(device) for qtype, fill_with in itertools.product(types, fills): q_filled = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, device=device, dtype=qtype) q_filled.fill_(fill_with) int_repr = torch.quantize_per_tensor(ones * fill_with, scale, zero_point, qtype) fill_with = int_repr.dequantize() int_repr = int_repr.int_repr() self.assertEqual(q_filled.int_repr(), int_repr) self.assertEqual(q_filled.dequantize(), fill_with) # Make sure the scale and zero_point don't change self.assertEqual(q_filled.q_scale(), scale) self.assertEqual(q_filled.q_zero_point(), zero_point)
def leaky_relu(input, negative_slope=0.01, inplace=False, scale=None, zero_point=None): # type: (Tensor, float, bool, float, int) -> Tensor r""" Quantized version of the. leaky_relu(input, negative_slope=0.01, inplace=False, scale, zero_point) -> Tensor Applies element-wise, :math:`\text{LeakyReLU}(x) = \max(0, x) + \text{negative\_slope} * \min(0, x)` Args: input: Quaintized input negative_slope: The slope of the negative input inplace: Inplace modification of the input tensor scale, zero_point: Scale and zero point of thhe output tensor. See :class:`~torch.nn.LeakyReLU` for more details. """ if scale is not None and zero_point is not None: assert not inplace, "Cannot rescale with `inplace`" output = torch._empty_affine_quantized(input.shape, scale=scale, zero_point=int(zero_point), dtype=input.dtype) torch._C._nn.leaky_relu(input, negative_slope, out=output) return output if inplace: result = torch._C._nn.leaky_relu_(input, negative_slope) else: result = torch._C._nn.leaky_relu(input, negative_slope) return result
def test_qtensor_sub_byte(self): num_elements = 10 scale = 1.0 zero_point = 2 for dtype in [torch.quint4x2]: r = torch.ones((5, 2), dtype=torch.float) qr = torch.quantize_per_tensor(r, scale, zero_point, dtype) self.assertEqual(qr.q_scale(), scale) self.assertEqual(qr.q_zero_point(), zero_point) self.assertTrue(qr.is_quantized) self.assertFalse(r.is_quantized) self.assertEqual(qr.storage().size(), 5) int_repr = qr.int_repr() for num in int_repr[0:5]: self.assertEqual(num, 51) # Packed entries, each of value 3, i.e. 00110011 # Test tensor creation q = torch._empty_affine_quantized([num_elements], scale=scale, zero_point=zero_point, dtype=torch.quint4x2) self.assertEqual(q.storage().size(), 5) # Test save/load with tempfile.NamedTemporaryFile() as f: torch.save(qr, f) f.seek(0) loaded_q = torch.load(f) loaded_int_repr = loaded_q.int_repr()[0:5] self.assertEqual(int_repr[0:5], loaded_int_repr)
def __init__(self, in_features, out_features, row_block_size, col_block_size, bias=True, dtype=torch.qint8): super().__init__() if dtype != torch.qint8: raise NotImplementedError( "Only QINT8 is supported for Sparse Quantized Linear") self.in_features = in_features self.out_features = out_features if bias: bias = torch.zeros(self.out_features, dtype=torch.float) else: bias = None qweight = torch._empty_affine_quantized([out_features, in_features], scale=1, zero_point=0, dtype=torch.qint8) self._packed_params = LinearPackedParams(row_block_size=row_block_size, col_block_size=col_block_size, dtype=dtype) self._packed_params.set_weight_bias(qweight, bias, row_block_size, col_block_size) self.scale = 1.0 self.zero_point = 0
def test_clone(self): numel = 10 scale = 0.5 zero_point = 10 options = itertools.product(get_supported_device_types(), [torch.qint8, torch.quint8, torch.qint32]) for device, dtype in options: per_tensor_quantized = torch._empty_affine_quantized( [numel], scale=scale, zero_point=zero_point, device=device, dtype=dtype) per_channel_quantized = torch._empty_per_channel_affine_quantized( [numel], scales=torch.tensor([scale]), zero_points=torch.tensor([zero_point]), axis=0, device=device, dtype=dtype) qtensors = [per_tensor_quantized, per_channel_quantized] for q in qtensors: q2 = q.clone() # Check to make sure the scale and zero_point has been copied. self.assertEqual(q, q2)
def test_qtensor_clone(self): numel = 10 scale = 0.5 zero_point = 10 q2 = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, dtype=torch.quint8) q = q2.clone() # Check to make sure the scale and zero_point has been copied. self.assertEqual(q, q2)
def __init__(self): super(ConvPackedParams, self).__init__() wq = torch._empty_affine_quantized([1, 1, 1, 1], scale=1.0, zero_point=0, dtype=torch.qint8) self.stride = [1, 1] self.padding = [0, 0] self.dilation = [1, 1] self.groups = 1 self.set_weight_bias(wq, None)
def __init__(self, dtype=torch.qint8): super().__init__() self.dtype = dtype if self.dtype == torch.qint8: wq = torch._empty_affine_quantized([1, 1], scale=1.0, zero_point=0, dtype=torch.qint8) elif self.dtype == torch.float16: wq = torch.zeros([1, 1], dtype=torch.float) self.set_weight_bias(wq, None)
def test_cat(self, X, num, dim, relu): tensors_q = [] tensors_ref = [] X, (scale, zero_point, torch_type) = X assume(dim < X.ndim) X = torch.from_numpy(X) new_shape = np.array(X.shape) new_shape[dim] = 0 for idx in range(num): tensors_q.append( torch.quantize_linear(X, scale, zero_point, torch_type)) tensors_ref.append(X) new_shape[dim] += tensors_ref[-1].shape[dim] cat_ref = torch.cat(tensors_ref, dim=dim) cat_ref = torch.quantize_linear(cat_ref, scale, zero_point, torch_type) cat_ref = cat_ref.dequantize() if relu: cat_ref = F.relu(cat_ref) q_cat_op = torch.ops.quantized.cat_relu q_cat_out_op = torch.ops.quantized.cat_relu_out else: q_cat_op = torch.ops.quantized.cat q_cat_out_op = torch.ops.quantized.cat_out cat_q = q_cat_op(tensors_q, dim=dim, scale=scale, zero_point=zero_point) cat_q = cat_q.dequantize() np.testing.assert_equal(cat_ref.numpy(), cat_q.numpy()) cat_q_out = torch._empty_affine_quantized(list(new_shape), scale=scale, zero_point=zero_point, dtype=torch_type) q_cat_out_op(tensors_q, dim=dim, out=cat_q_out) cat_q_out = cat_q_out.dequantize() np.testing.assert_equal(cat_ref.numpy(), cat_q_out.numpy()) # Test the cat on per-channel quantized tensor. ch_axis = 1 scales = torch.from_numpy(np.array([1.0] * X.shape[ch_axis])) scales = scales.to(torch.float64) zero_points = torch.from_numpy(np.array([0] * X.shape[ch_axis])) zero_points = zero_points.to(torch.long) tensors_q[0] = torch.quantize_linear_per_channel(X, scales, zero_points, axis=[ch_axis], dtype=torch_type) with self.assertRaisesRegex(RuntimeError, "supported.*cat"): cat_q = q_cat_op(tensors_q, dim=ch_axis, scale=scale, zero_point=zero_point)
def __init__(self, in_features, out_features): super(Linear, self).__init__() qweight = torch._empty_affine_quantized( [out_features, in_features], scale=1, zero_point=0, dtype=torch.qint8) self._packed_weight = torch.ops.quantized.linear_prepack( qweight)
def _init(self, in_channels, out_channels, kernel_size, stride, padding, dilation, transposed, output_padding, groups, bias, padding_mode='zeros', device=None, dtype=None) -> None: factory_kwargs = {'device': device, 'dtype': dtype} super(_ConvNd, self).__init__() if in_channels % groups != 0: raise ValueError('in_channels must be divisible by groups') if out_channels % groups != 0: raise ValueError('out_channels must be divisible by groups') self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size self.stride = stride self.padding = padding self.dilation = dilation self.transposed = transposed self.output_padding = output_padding self.groups = groups if padding_mode not in _SUPPORTED_PADDING: raise ValueError( "'padding_mode' {} is not supported by quantized convolution". format(padding_mode)) self.padding_mode = padding_mode # Initialize as NCHW. set_weight will internally transpose to NHWC. if self.transposed: weight_shape = [in_channels, out_channels // self.groups] else: weight_shape = [out_channels, in_channels // self.groups] qweight = torch._empty_affine_quantized( weight_shape + list(kernel_size), scale=1, zero_point=0, dtype=torch.qint8, **{k: v for k, v in factory_kwargs.items() if k != 'dtype'}) bias_float = (torch.zeros( out_channels, dtype=torch.float, **{k: v for k, v in factory_kwargs.items() if k != 'dtype'}) if bias else None) self.set_weight_bias(qweight, bias_float) self.scale = 1.0 self.zero_point = 0