def test_equal(self, X, X2, X_per_channel, X2_per_channel): X, X_params = X (scale, zero_point, torch_type) = X_params X2, X2_params = X2 (scale2, zero_point2, torch_type2) = X2_params X = torch.from_numpy(X) if X_per_channel: X_scheme = 'per_channel' channels = X.shape[-1] qX = torch.quantize_linear_per_channel( X, scales=torch.tensor([scale] * channels), zero_points=torch.tensor([zero_point] * channels), dtype=torch_type, axis=[X.ndim - 1]) else: X_scheme = 'per_tensor' qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point, dtype=torch_type) X2 = torch.from_numpy(X2) if X2_per_channel: X2_scheme = 'per_channel' channels = X2.shape[-1] qX2 = torch.quantize_linear_per_channel( X2, scales=torch.tensor([scale2] * channels), zero_points=torch.tensor([zero_point2] * channels), dtype=torch_type2, axis=[X2.ndim - 1]) else: X2_scheme = 'per_tensor' qX2 = torch.quantize_linear(X2, scale=scale2, zero_point=zero_point2, dtype=torch_type2) def equal_ref(X, params, X_scheme, X2, params2, X2_scheme): if X_scheme != X2_scheme: return False if params != params2: return False if X.shape != X2.shape: return False if (X != X2).any(): return False return True self.assertEqual( qX.equal(qX), equal_ref(X, X_params, X_scheme, X, X_params, X_scheme)) self.assertEqual( qX.equal(qX2), equal_ref(X, X_params, X_scheme, X2, X2_params, X2_scheme))
def test_qtensor_per_channel_affine(self): r = torch.rand(3, 2, dtype=torch.float) * 2 - 4 scales = torch.tensor([2.0, 3.0], dtype=torch.double) zero_points = torch.tensor([5, 10], dtype=torch.long) axis = [1] def quantize_c(data, scales, zero_points): res = torch.empty((3, 2)) quant_min, quant_max = 0, 255 for i in range(3): for j in range(2): res[i][j] = np.clip( np.round(data[i][j] / scales[j]) + zero_points[j], quant_min, quant_max) return res qr = torch.quantize_linear_per_channel(r, scales, zero_points, axis, torch.quint8) rqr = qr.dequantize() self.assertTrue( np.allclose(qr.int_repr(), quantize_c(r, scales, zero_points))) self.assertTrue( np.allclose(r.numpy(), rqr.numpy(), atol=2 / np.min(scales.numpy())))
def test_cat(self, X, num, dim, relu): tensors_q = [] tensors_ref = [] X, (scale, zero_point, torch_type) = X assume(dim < X.ndim) X = torch.from_numpy(X) new_shape = np.array(X.shape) new_shape[dim] = 0 for idx in range(num): tensors_q.append( torch.quantize_linear(X, scale, zero_point, torch_type)) tensors_ref.append(X) new_shape[dim] += tensors_ref[-1].shape[dim] cat_ref = torch.cat(tensors_ref, dim=dim) cat_ref = torch.quantize_linear(cat_ref, scale, zero_point, torch_type) cat_ref = cat_ref.dequantize() if relu: cat_ref = F.relu(cat_ref) q_cat_op = torch.ops.quantized.cat_relu q_cat_out_op = torch.ops.quantized.cat_relu_out else: q_cat_op = torch.ops.quantized.cat q_cat_out_op = torch.ops.quantized.cat_out cat_q = q_cat_op(tensors_q, dim=dim, scale=scale, zero_point=zero_point) cat_q = cat_q.dequantize() np.testing.assert_equal(cat_ref.numpy(), cat_q.numpy()) cat_q_out = torch._empty_affine_quantized(list(new_shape), scale=scale, zero_point=zero_point, dtype=torch_type) q_cat_out_op(tensors_q, dim=dim, out=cat_q_out) cat_q_out = cat_q_out.dequantize() np.testing.assert_equal(cat_ref.numpy(), cat_q_out.numpy()) # Test the cat on per-channel quantized tensor. ch_axis = 1 scales = torch.from_numpy(np.array([1.0] * X.shape[ch_axis])) scales = scales.to(torch.float64) zero_points = torch.from_numpy(np.array([0] * X.shape[ch_axis])) zero_points = zero_points.to(torch.long) tensors_q[0] = torch.quantize_linear_per_channel(X, scales, zero_points, axis=[ch_axis], dtype=torch_type) with self.assertRaisesRegex(RuntimeError, "supported.*cat"): cat_q = q_cat_op(tensors_q, dim=ch_axis, scale=scale, zero_point=zero_point)
def test_qconv_unpack(self, X, strideH, strideW, padH, padW, channelwise): (inputs, filters, bias, groups) = X inputs, (inputs_scale, inputs_zero_point, inputs_qtype) = inputs filters, (filters_scale, filters_zero_point, filters_qtype) = filters bias, (bias_scale, bias_zero_point, bias_qtype) = bias if channelwise: output_channels = filters.shape[0] filters_scale = torch.tensor([filters_scale] * output_channels).to(torch.double) filters_zero_point = torch.tensor([filters_zero_point] * output_channels).to(torch.long) qconv_prepack = torch.ops.quantized.fbgemm_conv_prepack qconv_unpack = torch.ops.quantized.fbgemm_conv_unpack # Orig tensor is assumed to be in K(C/G)RS format W = torch.from_numpy(filters).to(torch.float) # K(C/G)RS -> KRS(C/G) W_KRSC = W.permute([0, 2, 3, 1]).contiguous() if channelwise: W_q = torch.quantize_linear_per_channel(W_KRSC, scales=filters_scale, zero_points=filters_zero_point, axis=[0], dtype=filters_qtype) else: W_q = torch.quantize_linear(W_KRSC, scale=filters_scale, zero_point=filters_zero_point, dtype=filters_qtype) # Pack weights using weight packing operator strides = [strideH, strideW] paddings = [padH, padW] dilations = [1, 1] W_packed = qconv_prepack(W_q, strides, paddings, dilations, groups) # Unpack weights weight unpacking operator (Used for serialization) W_unpacked = qconv_unpack(W_packed) # Assert equal np.testing.assert_equal(W_q.int_repr().numpy(), W_unpacked.int_repr().numpy()) if channelwise: np.testing.assert_array_almost_equal(np.float32(W_q.q_per_channel_scales().numpy()), np.float32(W_unpacked.q_per_channel_scales().numpy()), decimal=4) np.testing.assert_equal(W_q.q_per_channel_zero_points().numpy(), W_unpacked.q_per_channel_zero_points().numpy()) else: np.testing.assert_equal(np.float32(W_q.q_scale()), np.float32(W_unpacked.q_scale())) np.testing.assert_equal(W_q.q_zero_point(), W_unpacked.q_zero_point())
def test_qtensor_per_channel_permute(self): r = torch.rand(20, 10, 2, 2, dtype=torch.float) * 4 - 2 scales = torch.rand(10) * 0.02 + 0.01 zero_points = torch.round(torch.rand(10) * 2 - 1).to(torch.long) qr = torch.quantize_linear_per_channel(r, scales, zero_points, [1], torch.qint8) # we can't reorder the axis with self.assertRaises(RuntimeError): qr.transpose(0, 1) # but we can change memory format qlast = qr.contiguous(memory_format=torch.channels_last) self.assertEqual(qr.stride(), list(reversed(sorted(qr.stride())))) self.assertNotEqual(qlast.stride(), list(reversed(sorted(qlast.stride())))) self.assertEqual(qr.int_repr(), qlast.int_repr()) self.assertEqual(scales, qlast.q_per_channel_scales()) self.assertEqual(zero_points, qlast.q_per_channel_zero_points()) self.assertEqual((1,), qlast.q_per_channel_axis()) self.assertEqual(qlast.dequantize(), qr.dequantize())
def test_cat(self, X, num, axis, relu): tensors_q = [] tensors_ref = [] X, (scale, zero_point, torch_type) = X assume(axis < X.ndim) X = torch.from_numpy(X) for idx in range(num): tensors_q.append( torch.quantize_linear(X, scale, zero_point, torch_type)) tensors_ref.append(X) cat_ref = torch.cat(tensors_ref, axis=axis) cat_ref = torch.quantize_linear(cat_ref, scale, zero_point, torch_type) cat_ref = cat_ref.dequantize() if relu: cat_ref = F.relu(cat_ref) q_cat_op = torch.ops.quantized.cat_relu else: q_cat_op = torch.ops.quantized.cat cat_q = q_cat_op(tensors_q, axis=axis, scale=scale, zero_point=zero_point) cat_q = cat_q.dequantize() np.testing.assert_equal(cat_ref.numpy(), cat_q.numpy()) # Test the cat on per-channel quantized tensor. ch_axis = 1 scales = torch.from_numpy(np.array([1.0] * X.shape[ch_axis])) zero_points = torch.from_numpy(np.array([0] * X.shape[ch_axis])) tensors_q[0] = torch.quantize_linear_per_channel(X, scales, zero_points, axis=[ch_axis], dtype=torch_type) with self.assertRaisesRegex(RuntimeError, "supported.*cat"): cat_q = q_cat_op(tensors_q, axis=axis, scale=scale, zero_point=zero_point)
def test_qconv( self, batch_size, input_channels_per_group, height, width, output_channels_per_group, groups, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation, X_scale, X_zero_point, W_scale, W_zero_point, Y_scale, Y_zero_point, use_bias, use_relu, use_channelwise ): qconv = torch.ops.quantized.fbgemm_conv2d if use_relu: qconv = torch.ops.quantized.fbgemm_conv2d_relu qconv_prepack = torch.ops.quantized.fbgemm_conv_prepack # C input_channels = input_channels_per_group * groups # K output_channels = output_channels_per_group * groups dilation_h = dilation_w = dilation W_scale = W_scale * output_channels W_zero_point = W_zero_point * output_channels # Resize W_scale and W_zero_points arrays equal to output_channels W_scale = W_scale[:output_channels] W_zero_point = W_zero_point[:output_channels] # For testing, we use small values for weights and for activations so that no overflow occurs # in vpmaddubsw instruction. If the overflow occurs in qconv implementation and if there is no overflow # in reference we can't exactly match the results with reference. # Please see the comment in qconv implementation file (aten/src/ATen/native/quantized/cpu/qconv.cpp) # for more details. W_value_min = -5 W_value_max = 5 # the operator expects them in the format (output_channels, input_channels/groups, kernel_h, kernel_w) W_init = torch.from_numpy( np.random.randint( W_value_min, W_value_max, (output_channels, int(input_channels / groups), kernel_h, kernel_w)), ) b_init = torch.from_numpy(np.random.randint(0, 10, (output_channels,))) stride = [stride_h, stride_w] pad = [pad_h, pad_w] dilation = [dilation_h, dilation_w] X_value_min = 0 X_value_max = 4 X_init = torch.from_numpy(np.random.randint( X_value_min, X_value_max, (batch_size, input_channels, height, width))) X = X_scale * (X_init - X_zero_point).to(dtype=torch.float) if use_channelwise: W_scales_tensor = torch.tensor(W_scale, dtype=torch.float) W_zero_points_tensor = torch.tensor(W_zero_point, dtype=torch.float) W = W_scales_tensor.reshape(-1, 1, 1, 1) * (W_init.to(dtype=torch.float) - W_zero_points_tensor.reshape(-1, 1, 1, 1)).to(dtype=torch.float) b = X_scale * W_scales_tensor * (b_init - 0).to(dtype=torch.float) else: W = W_scale[0] * (W_init - W_zero_point[0]).to(dtype=torch.float) b = X_scale * W_scale[0] * (b_init - 0).to(dtype=torch.float) # Existing floating point conv operator conv_op = torch.nn.Conv2d(input_channels, output_channels, (kernel_h, kernel_w), (stride_h, stride_w), (pad_h, pad_w), (dilation_h, dilation_w), groups) # assign weights conv_op.weight = torch.nn.Parameter(W, requires_grad=False) conv_op.bias = torch.nn.Parameter(b, requires_grad=False) if use_bias else None result_ref = conv_op(X) if use_relu: relu = torch.nn.ReLU() result_ref = relu(result_ref) # quantize reference results for comparision result_ref_q = torch.quantize_linear(result_ref, scale=Y_scale, zero_point=Y_zero_point, dtype=torch.quint8) # reformat X_init and W_init in the required format by qconv operator # NCHW -> NHWC X_NHWC = X.permute([0, 2, 3, 1]).contiguous() # K(C/G)RS -> KRS(C/G) W_KRSC = W.permute([0, 2, 3, 1]).contiguous() X_q = torch.quantize_linear(X_NHWC, scale=X_scale, zero_point=X_zero_point, dtype=torch.quint8) if use_channelwise: W_q = torch.quantize_linear_per_channel(W_KRSC, W_scales_tensor.to(dtype=torch.double), W_zero_points_tensor.to(dtype=torch.long), [0], dtype=torch.qint8) b_q = torch.quantize_linear_per_channel(b, X_scale * W_scales_tensor.to(dtype=torch.double), torch.zeros(output_channels, dtype=torch.long), [0], dtype=torch.qint32) if use_bias else None else: W_q = torch.quantize_linear(W_KRSC, scale=W_scale[0], zero_point=W_zero_point[0], dtype=torch.qint8) b_q = torch.quantize_linear(b, scale=X_scale * W_scale[0], zero_point=0, dtype=torch.qint32) if use_bias else None W_prepack = qconv_prepack(W_q, stride, pad, dilation, groups) Y_q = qconv( X_q, W_prepack, b_q, stride, pad, dilation, groups, Y_scale, Y_zero_point, ) # Back to NCHW format Y_q = Y_q.permute([0, 3, 1, 2]).contiguous() # Make sure the results match # assert_array_almost_equal compares using the following formula: # abs(desired-actual) < 1.5 * 10**(-decimal) # (https://docs.scipy.org/doc/numpy/reference/generated/numpy.testing.assert_almost_equal.html) # We use decimal = 0 to ignore off-by-1 differences between reference and # test. Off-by-1 differences arise due to the order of round and # zero_point addition operation, i.e., if addition followed by round is # used by reference and round followed by addition is used by test, the # results may differ by 1. # For example, the result of round(2.5) + 1 is 3 while round(2.5 + 1) is 4 # assuming the rounding mode is round-to-nearest, ties-to-even. np.testing.assert_array_almost_equal(result_ref_q.int_repr().numpy(), Y_q.int_repr().numpy(), decimal=0)