Exemplo n.º 1
0
    def test_equal(self, X, X2, X_per_channel, X2_per_channel):
        X, X_params = X
        (scale, zero_point, torch_type) = X_params
        X2, X2_params = X2
        (scale2, zero_point2, torch_type2) = X2_params

        X = torch.from_numpy(X)
        if X_per_channel:
            X_scheme = 'per_channel'
            channels = X.shape[-1]
            qX = torch.quantize_linear_per_channel(
                X,
                scales=torch.tensor([scale] * channels),
                zero_points=torch.tensor([zero_point] * channels),
                dtype=torch_type,
                axis=[X.ndim - 1])
        else:
            X_scheme = 'per_tensor'
            qX = torch.quantize_linear(X,
                                       scale=scale,
                                       zero_point=zero_point,
                                       dtype=torch_type)
        X2 = torch.from_numpy(X2)
        if X2_per_channel:
            X2_scheme = 'per_channel'
            channels = X2.shape[-1]
            qX2 = torch.quantize_linear_per_channel(
                X2,
                scales=torch.tensor([scale2] * channels),
                zero_points=torch.tensor([zero_point2] * channels),
                dtype=torch_type2,
                axis=[X2.ndim - 1])
        else:
            X2_scheme = 'per_tensor'
            qX2 = torch.quantize_linear(X2,
                                        scale=scale2,
                                        zero_point=zero_point2,
                                        dtype=torch_type2)

        def equal_ref(X, params, X_scheme, X2, params2, X2_scheme):
            if X_scheme != X2_scheme:
                return False
            if params != params2:
                return False
            if X.shape != X2.shape:
                return False
            if (X != X2).any():
                return False
            return True

        self.assertEqual(
            qX.equal(qX),
            equal_ref(X, X_params, X_scheme, X, X_params, X_scheme))
        self.assertEqual(
            qX.equal(qX2),
            equal_ref(X, X_params, X_scheme, X2, X2_params, X2_scheme))
Exemplo n.º 2
0
    def test_qtensor_per_channel_affine(self):
        r = torch.rand(3, 2, dtype=torch.float) * 2 - 4
        scales = torch.tensor([2.0, 3.0], dtype=torch.double)
        zero_points = torch.tensor([5, 10], dtype=torch.long)
        axis = [1]

        def quantize_c(data, scales, zero_points):
            res = torch.empty((3, 2))
            quant_min, quant_max = 0, 255
            for i in range(3):
                for j in range(2):
                    res[i][j] = np.clip(
                        np.round(data[i][j] / scales[j]) + zero_points[j],
                        quant_min, quant_max)
            return res

        qr = torch.quantize_linear_per_channel(r, scales, zero_points, axis,
                                               torch.quint8)
        rqr = qr.dequantize()
        self.assertTrue(
            np.allclose(qr.int_repr(), quantize_c(r, scales, zero_points)))
        self.assertTrue(
            np.allclose(r.numpy(),
                        rqr.numpy(),
                        atol=2 / np.min(scales.numpy())))
Exemplo n.º 3
0
    def test_cat(self, X, num, dim, relu):
        tensors_q = []
        tensors_ref = []
        X, (scale, zero_point, torch_type) = X
        assume(dim < X.ndim)
        X = torch.from_numpy(X)
        new_shape = np.array(X.shape)
        new_shape[dim] = 0
        for idx in range(num):
            tensors_q.append(
                torch.quantize_linear(X, scale, zero_point, torch_type))
            tensors_ref.append(X)
            new_shape[dim] += tensors_ref[-1].shape[dim]

        cat_ref = torch.cat(tensors_ref, dim=dim)
        cat_ref = torch.quantize_linear(cat_ref, scale, zero_point, torch_type)
        cat_ref = cat_ref.dequantize()

        if relu:
            cat_ref = F.relu(cat_ref)
            q_cat_op = torch.ops.quantized.cat_relu
            q_cat_out_op = torch.ops.quantized.cat_relu_out
        else:
            q_cat_op = torch.ops.quantized.cat
            q_cat_out_op = torch.ops.quantized.cat_out

        cat_q = q_cat_op(tensors_q,
                         dim=dim,
                         scale=scale,
                         zero_point=zero_point)
        cat_q = cat_q.dequantize()
        np.testing.assert_equal(cat_ref.numpy(), cat_q.numpy())

        cat_q_out = torch._empty_affine_quantized(list(new_shape),
                                                  scale=scale,
                                                  zero_point=zero_point,
                                                  dtype=torch_type)
        q_cat_out_op(tensors_q, dim=dim, out=cat_q_out)
        cat_q_out = cat_q_out.dequantize()
        np.testing.assert_equal(cat_ref.numpy(), cat_q_out.numpy())

        # Test the cat on per-channel quantized tensor.
        ch_axis = 1
        scales = torch.from_numpy(np.array([1.0] * X.shape[ch_axis]))
        scales = scales.to(torch.float64)
        zero_points = torch.from_numpy(np.array([0] * X.shape[ch_axis]))
        zero_points = zero_points.to(torch.long)
        tensors_q[0] = torch.quantize_linear_per_channel(X,
                                                         scales,
                                                         zero_points,
                                                         axis=[ch_axis],
                                                         dtype=torch_type)
        with self.assertRaisesRegex(RuntimeError, "supported.*cat"):
            cat_q = q_cat_op(tensors_q,
                             dim=ch_axis,
                             scale=scale,
                             zero_point=zero_point)
Exemplo n.º 4
0
    def test_qconv_unpack(self, X, strideH, strideW, padH, padW, channelwise):
        (inputs, filters, bias, groups) = X
        inputs, (inputs_scale, inputs_zero_point, inputs_qtype) = inputs
        filters, (filters_scale, filters_zero_point, filters_qtype) = filters
        bias, (bias_scale, bias_zero_point, bias_qtype) = bias

        if channelwise:
            output_channels = filters.shape[0]
            filters_scale = torch.tensor([filters_scale] * output_channels).to(torch.double)
            filters_zero_point = torch.tensor([filters_zero_point] * output_channels).to(torch.long)

        qconv_prepack = torch.ops.quantized.fbgemm_conv_prepack
        qconv_unpack = torch.ops.quantized.fbgemm_conv_unpack

        # Orig tensor is assumed to be in K(C/G)RS format
        W = torch.from_numpy(filters).to(torch.float)
        # K(C/G)RS -> KRS(C/G)
        W_KRSC = W.permute([0, 2, 3, 1]).contiguous()
        if channelwise:
            W_q = torch.quantize_linear_per_channel(W_KRSC,
                                                    scales=filters_scale,
                                                    zero_points=filters_zero_point,
                                                    axis=[0],
                                                    dtype=filters_qtype)
        else:
            W_q = torch.quantize_linear(W_KRSC, scale=filters_scale, zero_point=filters_zero_point, dtype=filters_qtype)

        # Pack weights using weight packing operator
        strides = [strideH, strideW]
        paddings = [padH, padW]
        dilations = [1, 1]
        W_packed = qconv_prepack(W_q, strides, paddings, dilations, groups)
        # Unpack weights weight unpacking operator (Used for serialization)
        W_unpacked = qconv_unpack(W_packed)

        # Assert equal
        np.testing.assert_equal(W_q.int_repr().numpy(), W_unpacked.int_repr().numpy())
        if channelwise:
            np.testing.assert_array_almost_equal(np.float32(W_q.q_per_channel_scales().numpy()),
                                                 np.float32(W_unpacked.q_per_channel_scales().numpy()),
                                                 decimal=4)
            np.testing.assert_equal(W_q.q_per_channel_zero_points().numpy(), W_unpacked.q_per_channel_zero_points().numpy())
        else:
            np.testing.assert_equal(np.float32(W_q.q_scale()), np.float32(W_unpacked.q_scale()))
            np.testing.assert_equal(W_q.q_zero_point(), W_unpacked.q_zero_point())
Exemplo n.º 5
0
    def test_qtensor_per_channel_permute(self):
        r = torch.rand(20, 10, 2, 2, dtype=torch.float) * 4 - 2
        scales = torch.rand(10) * 0.02 + 0.01
        zero_points = torch.round(torch.rand(10) * 2 - 1).to(torch.long)
        qr = torch.quantize_linear_per_channel(r, scales, zero_points, [1], torch.qint8)

        # we can't reorder the axis
        with self.assertRaises(RuntimeError):
            qr.transpose(0, 1)

        # but we can change memory format
        qlast = qr.contiguous(memory_format=torch.channels_last)
        self.assertEqual(qr.stride(), list(reversed(sorted(qr.stride()))))
        self.assertNotEqual(qlast.stride(), list(reversed(sorted(qlast.stride()))))
        self.assertEqual(qr.int_repr(), qlast.int_repr())
        self.assertEqual(scales, qlast.q_per_channel_scales())
        self.assertEqual(zero_points, qlast.q_per_channel_zero_points())
        self.assertEqual((1,), qlast.q_per_channel_axis())
        self.assertEqual(qlast.dequantize(), qr.dequantize())
Exemplo n.º 6
0
    def test_cat(self, X, num, axis, relu):
        tensors_q = []
        tensors_ref = []
        X, (scale, zero_point, torch_type) = X
        assume(axis < X.ndim)
        X = torch.from_numpy(X)
        for idx in range(num):
            tensors_q.append(
                torch.quantize_linear(X, scale, zero_point, torch_type))
            tensors_ref.append(X)

        cat_ref = torch.cat(tensors_ref, axis=axis)
        cat_ref = torch.quantize_linear(cat_ref, scale, zero_point, torch_type)
        cat_ref = cat_ref.dequantize()

        if relu:
            cat_ref = F.relu(cat_ref)
            q_cat_op = torch.ops.quantized.cat_relu
        else:
            q_cat_op = torch.ops.quantized.cat
        cat_q = q_cat_op(tensors_q,
                         axis=axis,
                         scale=scale,
                         zero_point=zero_point)
        cat_q = cat_q.dequantize()

        np.testing.assert_equal(cat_ref.numpy(), cat_q.numpy())

        # Test the cat on per-channel quantized tensor.
        ch_axis = 1
        scales = torch.from_numpy(np.array([1.0] * X.shape[ch_axis]))
        zero_points = torch.from_numpy(np.array([0] * X.shape[ch_axis]))
        tensors_q[0] = torch.quantize_linear_per_channel(X,
                                                         scales,
                                                         zero_points,
                                                         axis=[ch_axis],
                                                         dtype=torch_type)
        with self.assertRaisesRegex(RuntimeError, "supported.*cat"):
            cat_q = q_cat_op(tensors_q,
                             axis=axis,
                             scale=scale,
                             zero_point=zero_point)
Exemplo n.º 7
0
    def test_qconv(
            self,
            batch_size,
            input_channels_per_group,
            height,
            width,
            output_channels_per_group,
            groups,
            kernel_h,
            kernel_w,
            stride_h,
            stride_w,
            pad_h,
            pad_w,
            dilation,
            X_scale,
            X_zero_point,
            W_scale,
            W_zero_point,
            Y_scale,
            Y_zero_point,
            use_bias,
            use_relu,
            use_channelwise
    ):

        qconv = torch.ops.quantized.fbgemm_conv2d
        if use_relu:
            qconv = torch.ops.quantized.fbgemm_conv2d_relu
        qconv_prepack = torch.ops.quantized.fbgemm_conv_prepack

        # C
        input_channels = input_channels_per_group * groups
        # K
        output_channels = output_channels_per_group * groups

        dilation_h = dilation_w = dilation

        W_scale = W_scale * output_channels
        W_zero_point = W_zero_point * output_channels
        # Resize W_scale and W_zero_points arrays equal to output_channels
        W_scale = W_scale[:output_channels]
        W_zero_point = W_zero_point[:output_channels]

        # For testing, we use small values for weights and for activations so that no overflow occurs
        # in vpmaddubsw instruction. If the overflow occurs in qconv implementation and if there is no overflow
        # in reference we can't exactly match the results with reference.
        # Please see the comment in qconv implementation file (aten/src/ATen/native/quantized/cpu/qconv.cpp)
        # for more details.
        W_value_min = -5
        W_value_max = 5

        # the operator expects them in the format (output_channels, input_channels/groups, kernel_h, kernel_w)
        W_init = torch.from_numpy(
            np.random.randint(
                W_value_min,
                W_value_max,
                (output_channels, int(input_channels / groups), kernel_h, kernel_w)),
        )


        b_init = torch.from_numpy(np.random.randint(0, 10, (output_channels,)))

        stride = [stride_h, stride_w]
        pad = [pad_h, pad_w]
        dilation = [dilation_h, dilation_w]

        X_value_min = 0
        X_value_max = 4
        X_init = torch.from_numpy(np.random.randint(
            X_value_min, X_value_max, (batch_size, input_channels, height, width)))

        X = X_scale * (X_init - X_zero_point).to(dtype=torch.float)

        if use_channelwise:
            W_scales_tensor = torch.tensor(W_scale, dtype=torch.float)
            W_zero_points_tensor = torch.tensor(W_zero_point, dtype=torch.float)
            W = W_scales_tensor.reshape(-1, 1, 1, 1) * (W_init.to(dtype=torch.float) -
                                                        W_zero_points_tensor.reshape(-1, 1, 1, 1)).to(dtype=torch.float)
            b = X_scale * W_scales_tensor * (b_init - 0).to(dtype=torch.float)
        else:
            W = W_scale[0] * (W_init - W_zero_point[0]).to(dtype=torch.float)
            b = X_scale * W_scale[0] * (b_init - 0).to(dtype=torch.float)


        # Existing floating point conv operator
        conv_op = torch.nn.Conv2d(input_channels,
                                  output_channels,
                                  (kernel_h, kernel_w),
                                  (stride_h, stride_w),
                                  (pad_h, pad_w),
                                  (dilation_h, dilation_w),
                                  groups)

        # assign weights
        conv_op.weight = torch.nn.Parameter(W, requires_grad=False)

        conv_op.bias = torch.nn.Parameter(b, requires_grad=False) if use_bias else None

        result_ref = conv_op(X)
        if use_relu:
            relu = torch.nn.ReLU()
            result_ref = relu(result_ref)
        # quantize reference results for comparision
        result_ref_q = torch.quantize_linear(result_ref, scale=Y_scale, zero_point=Y_zero_point, dtype=torch.quint8)

        # reformat X_init and W_init in the required format by qconv operator
        # NCHW -> NHWC
        X_NHWC = X.permute([0, 2, 3, 1]).contiguous()
        # K(C/G)RS -> KRS(C/G)
        W_KRSC = W.permute([0, 2, 3, 1]).contiguous()

        X_q = torch.quantize_linear(X_NHWC, scale=X_scale, zero_point=X_zero_point, dtype=torch.quint8)
        if use_channelwise:
            W_q = torch.quantize_linear_per_channel(W_KRSC,
                                                    W_scales_tensor.to(dtype=torch.double),
                                                    W_zero_points_tensor.to(dtype=torch.long),
                                                    [0],
                                                    dtype=torch.qint8)
            b_q = torch.quantize_linear_per_channel(b,
                                                    X_scale * W_scales_tensor.to(dtype=torch.double),
                                                    torch.zeros(output_channels, dtype=torch.long),
                                                    [0],
                                                    dtype=torch.qint32) if use_bias else None
        else:
            W_q = torch.quantize_linear(W_KRSC, scale=W_scale[0], zero_point=W_zero_point[0], dtype=torch.qint8)
            b_q = torch.quantize_linear(b, scale=X_scale * W_scale[0], zero_point=0, dtype=torch.qint32) if use_bias else None

        W_prepack = qconv_prepack(W_q, stride, pad, dilation, groups)

        Y_q = qconv(
            X_q,
            W_prepack,
            b_q,
            stride,
            pad,
            dilation,
            groups,
            Y_scale,
            Y_zero_point,
        )

        # Back to NCHW format
        Y_q = Y_q.permute([0, 3, 1, 2]).contiguous()

        # Make sure the results match
        # assert_array_almost_equal compares using the following formula:
        #     abs(desired-actual) < 1.5 * 10**(-decimal)
        # (https://docs.scipy.org/doc/numpy/reference/generated/numpy.testing.assert_almost_equal.html)

        # We use decimal = 0 to ignore off-by-1 differences between reference and
        # test. Off-by-1 differences arise due to the order of round and
        # zero_point addition operation, i.e., if addition followed by round is
        # used by reference and round followed by addition is used by test, the
        # results may differ by 1.

        # For example, the result of round(2.5) + 1 is 3 while round(2.5 + 1) is 4
        # assuming the rounding mode is round-to-nearest, ties-to-even.
        np.testing.assert_array_almost_equal(result_ref_q.int_repr().numpy(), Y_q.int_repr().numpy(), decimal=0)