Exemplo n.º 1
0
class TestQuantizedConv(unittest.TestCase):
    """Tests the correctness of quantized convolution op."""
    @given(
        batch_size=st.integers(1, 3),
        input_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]),
        height=st.integers(10, 16),
        width=st.integers(7, 14),
        output_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]),
        groups=st.integers(1, 3),
        kernel_h=st.integers(1, 7),
        kernel_w=st.integers(1, 7),
        stride_h=st.integers(1, 2),
        stride_w=st.integers(1, 2),
        pad_h=st.integers(0, 2),
        pad_w=st.integers(0, 2),
        dilation=st.integers(1, 1),
        use_bias=st.booleans(),
    )
    def test_qconv(
            self,
            batch_size,
            input_channels_per_group,
            height,
            width,
            output_channels_per_group,
            groups,
            kernel_h,
            kernel_w,
            stride_h,
            stride_w,
            pad_h,
            pad_w,
            dilation,
            use_bias
    ):

        qconv = torch.ops.quantized.fbgemm_conv2d
        qconv_prepack = torch.ops.quantized.fbgemm_conv_prepack

        # C
        input_channels = input_channels_per_group * groups
        # K
        output_channels = output_channels_per_group * groups

        dilation_h = dilation_w = dilation

        # For testing, we use small values for weights and for activations so that no overflow occurs
        # in vpmaddubsw instruction. If the overflow occurs in qconv implementation and if there is no overflow
        # in reference we can't exactly match the results with reference.
        # Please see the comment in qconv implementation file (aten/src/ATen/native/quantized/cpu/qconv.cpp)
        # for more details.
        W_value_min = -5
        W_value_max = 5

        # the operator expects them in the format (output_channels, input_channels/groups, kernel_h, kernel_w)
        W_init = torch.from_numpy(
            np.random.randint(
                W_value_min,
                W_value_max,
                (output_channels, int(input_channels / groups), kernel_h, kernel_w)),
        )


        b_init = torch.from_numpy(np.random.randint(0, 10, (output_channels,)))

        # Existing floating point conv operator
        conv_op = torch.nn.Conv2d(
            input_channels,
            output_channels,
            (kernel_h, kernel_w),
            (stride_h, stride_w),
            (pad_h, pad_w),
            (dilation_h, dilation_w),
            groups,
        )

        # assign the weights
        conv_op.weight = torch.nn.Parameter(
            W_init.to(dtype=torch.float), requires_grad=False
        )
        conv_op.bias = torch.nn.Parameter(
            b_init.to(dtype=torch.float), requires_grad=False
        ) if use_bias else None

        X_value_min = 0
        X_value_max = 4
        X_init = torch.from_numpy(np.random.randint(
            X_value_min, X_value_max, (batch_size, input_channels, height, width)))

        # run on an input tensor
        result_ref = conv_op(X_init.to(dtype=torch.float))

        # reformat X_init and W_init in the required format by conv operator
        # NCHW -> NHWC
        X_NHWC = X_init.permute([0, 2, 3, 1]).contiguous()
        # K(C/G)RS -> KRS(C/G)
        W_KRSC = W_init.permute([0, 2, 3, 1]).contiguous()

        X_scale = 1.5
        # Currently only 0 as zero point is supported.
        X_zero_point = 0
        X = X_scale * (X_NHWC - X_zero_point).to(dtype=torch.float)

        W_scale = 2.5
        W_zero_point = 0
        W = W_scale * (W_KRSC - W_zero_point).to(dtype=torch.float)

        b = X_scale * W_scale * (b_init - 0).to(dtype=torch.float)

        X_q = torch.quantize_linear(X, scale=X_scale, zero_point=X_zero_point, dtype=torch.quint8)
        W_q = torch.quantize_linear(W, scale=W_scale, zero_point=W_zero_point, dtype=torch.qint8)
        b_q = torch.quantize_linear(b, scale=X_scale * W_scale, zero_point=0, dtype=torch.qint32) if use_bias else None

        W_prepack = qconv_prepack(W_q, groups)
        Y_scale = 7.3
        Y_zero_point = 5

        Y_q = qconv(
            X_q,
            W_prepack,
            b_q,
            [stride_h, stride_w],  # stride
            [pad_h, pad_w],  # padding
            [dilation_h, dilation_w],  # dilation
            groups,  # groups
            Y_scale,
            Y_zero_point,
        )

        result_NHWK = result_ref.permute([0, 2, 3, 1])
        result_q = _requantize(
            result_NHWK.numpy(), X_scale * W_scale / Y_scale, Y_zero_point
        )

        # Make sure the results match
        np.testing.assert_equal(result_q, Y_q.int_repr().numpy())

    """Tests the correctness of the quantized::fbgemm_qconv_unpack op."""
    @given(Q=qtensor(shapes=array_shapes(4, 4,), dtypes=((torch.qint8, np.int8, 0),)))
    def test_qconv_unpack(self, Q):
        W, (W_scale, W_zp), (qmin, qmax), (torch_type, np_type) = Q
        qconv_prepack = torch.ops.quantized.fbgemm_conv_prepack
        qconv_unpack = torch.ops.quantized.fbgemm_conv_unpack

        # Orig tensor is assumed to be in K(C/G)RS format
        W = torch.from_numpy(W)
        # K(C/G)RS -> KRS(C/G)
        W_KRSC = W.permute([0, 2, 3, 1]).contiguous()
        W_q = torch.quantize_linear(W_KRSC, scale=W_scale, zero_point=W_zp, dtype=torch_type)

        # Pack weights using weight packing operator
        W_packed = qconv_prepack(W_q, 1)
        # Unpack weights weight unpacking operator (Used for serialization)
        W_unpacked = qconv_unpack(W_packed)

        # Assert equal
        np.testing.assert_equal(W_q.int_repr().numpy(), W_unpacked.int_repr().numpy())
        np.testing.assert_equal(W_q.q_scale(), W_unpacked.q_scale())
        np.testing.assert_equal(W_q.q_zero_point(), W_unpacked.q_zero_point())
Exemplo n.º 2
0
class TestQuantizedOps(TestCase):
    """Computes the output shape given pooling parameters."""
    def _pool_output_shape(self, input_size, kernel_size, padding, stride,
                           dilation, ceiling_mode=False):
        output_size = (
            (input_size + 2 * padding - dilation * (kernel_size - 1) - 1
             + (stride - 1 if ceiling_mode else 0)) / stride + 1)
        if (padding > 0 and
                ((output_size - 1) * stride >= input_size + padding)):
            output_size += 1
        return output_size

    """Tests the correctness of the quantized::relu op."""
    @given(Q=qtensor(shapes=array_shapes(1, 5, 1, 5)))
    def test_qrelu(self, Q):
        X, (scale, zero_point), (qmin, qmax), (torch_type, np_type) = Q
        relu = torch.ops.quantized.relu

        Y = X.copy()
        X = torch.from_numpy(X)

        qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point,
                                   dtype=torch_type)
        qY_hat = relu(qX)

        Y[Y < 0] = 0
        qY = torch.quantize_linear(torch.from_numpy(Y), scale=scale, zero_point=zero_point, dtype=torch_type)
        self.assertEqual(qY.int_repr(), qY_hat.int_repr())

    """Tests the correctness of the add and add_relu op."""
    def test_qadd_relu_same_qparams(self):
        add_relu = torch.ops.quantized.add_relu
        add = torch.ops.quantized.add

        A = torch.arange(-25, 25, dtype=torch.float)
        B = torch.arange(-25, 25, dtype=torch.float)
        scale = 2.0
        zero_point = 127
        qA = torch.quantize_linear(A, scale=scale, zero_point=zero_point,
                                   dtype=torch.quint8)
        qB = torch.quantize_linear(B, scale=scale, zero_point=zero_point,
                                   dtype=torch.quint8)

        # Add ReLU ground truth
        C = (qA.dequantize() + qB.dequantize()).numpy()
        qC = _quantize(C, scale, zero_point)
        qC_hat = add(qA, qB, scale=scale, zero_point=zero_point)
        np.testing.assert_equal(qC, qC_hat.int_repr(),
                                "Quantized addition failed.")

        # Add + ReLU ground truth
        Crelu = C.copy()
        Crelu[C < 0] = 0
        qCrelu = _quantize(Crelu, scale, zero_point)
        qCrelu_hat = add_relu(qA, qB, scale=scale, zero_point=zero_point)
        np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(),
                                "Quantized addition with ReLU failed.")

    """Tests the correctness of the add and add_relu op."""
    def test_qadd_relu_different_qparams(self):
        add_relu = torch.ops.quantized.add_relu
        add = torch.ops.quantized.add

        A = torch.arange(-25, 25, dtype=torch.float)
        B = torch.arange(-25, 25, dtype=torch.float)
        scale_A = 3.0
        zero_point_A = 7
        scale_B = 5.0
        zero_point_B = 127

        scale_C = 0.5
        zero_point_C = 5

        qA = torch.quantize_linear(A, scale=scale_A, zero_point=zero_point_A,
                                   dtype=torch.quint8)
        qB = torch.quantize_linear(B, scale=scale_B, zero_point=zero_point_B,
                                   dtype=torch.quint8)

        # Add ground truth
        C = (qA.dequantize() + qB.dequantize()).numpy()
        qC = _quantize(C, scale_C, zero_point_C)
        qC_hat = add(qA, qB, scale=scale_C, zero_point=zero_point_C)
        np.testing.assert_equal(qC, qC_hat.int_repr(),
                                "Quantized addition failed.")

        # Add + ReLU ground truth
        Crelu = C.copy()
        Crelu[C < 0] = 0
        qCrelu = _quantize(Crelu, scale_C, zero_point_C)
        qCrelu_hat = add_relu(qA, qB, scale=scale_C, zero_point=zero_point_C)
        np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(),
                                "Quantized addition with ReLU failed.")

    """Tests max pool operation on quantized tensors."""
    @given(Q=qtensor(shapes=array_shapes(min_dims=3, max_dims=4,
                                         min_side=1, max_side=10)),
           kernel=st.sampled_from((3, 5, 7)),
           stride=st.integers(1, 2),
           dilation=st.integers(1, 2),
           padding=st.integers(0, 2))
    def test_max_pool2d(self, Q, kernel, stride, dilation, padding):
        import torch.nn.functional as F
        X, (scale, zero_point), (qmin, qmax), (torch_type, np_type) = Q

        # Check constraints
        assume(kernel // 2 >= padding)  # Kernel cannot be overhanging!
        iH, iW = X.shape[-2:]
        oH = self._pool_output_shape(iH, kernel, padding, stride, dilation)
        assume(oH > 0)
        oW = self._pool_output_shape(iW, kernel, padding, stride, dilation)
        assume(oW > 0)

        k = (kernel, kernel)
        s = (stride, stride)
        d = (dilation, dilation)
        p = (padding, padding)

        q_max_pool = torch.ops.quantized.max_pool2d

        a = torch.from_numpy(X)
        qa = torch.quantize_linear(a, scale=scale, zero_point=zero_point,
                                   dtype=torch_type)

        a_hat = qa.dequantize()
        a_pool = F.max_pool2d(a_hat, kernel_size=k, stride=s, padding=p,
                              dilation=d)

        qa_pool_hat = q_max_pool(qa, kernel_size=k, stride=s, padding=p,
                                 dilation=d)
        a_pool_hat = qa_pool_hat.dequantize()

        np.testing.assert_equal(a_pool.numpy(), a_pool_hat.numpy())
Exemplo n.º 3
0
class TestQuantizedOps(TestCase):
    """Computes the output shape given pooling parameters."""
    def _pool_output_shape(self, input_size, kernel_size, padding, stride,
                           dilation, ceiling_mode=False):
        if stride is None:
            stride = kernel_size
        output_size = (
            (input_size + 2 * padding - dilation * (kernel_size - 1) - 1
             + (stride - 1 if ceiling_mode else 0)) // stride + 1)
        if (padding > 0 and
                ((output_size - 1) * stride >= input_size + padding)):
            output_size += 1
        return output_size

    """Tests the correctness of the quantized::relu op."""
    @given(X=hu.tensor(shapes=hu.array_shapes(1, 5, 1, 5),
                       qparams=hu.qparams()))
    def test_qrelu(self, X):
        X, (scale, zero_point, torch_type) = X

        Y = X.copy()
        Y[Y < 0] = 0
        qY = torch.quantize_linear(torch.from_numpy(Y), scale=scale,
                                   zero_point=zero_point, dtype=torch_type)
        X = torch.from_numpy(X)
        qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point,
                                   dtype=torch_type)

        ops_under_test = {
            'ops.quantized': torch.ops.quantized.relu,
            'native': torch.relu,
            'nn.functional': torch.nn.functional.relu
        }

        for name, op in ops_under_test.items():
            qY_hat = op(qX)
            self.assertEqual(qY, qY_hat, message="{} relu failed".format(name))

    """Tests the correctness of the add and add_relu op."""
    def test_qadd_relu_same_qparams(self):
        add_relu = torch.ops.quantized.add_relu
        add = torch.ops.quantized.add

        A = torch.arange(-25, 25, dtype=torch.float)
        B = torch.arange(-25, 25, dtype=torch.float)
        scale = 2.0
        zero_point = 127
        qA = torch.quantize_linear(A, scale=scale, zero_point=zero_point,
                                   dtype=torch.quint8)
        qB = torch.quantize_linear(B, scale=scale, zero_point=zero_point,
                                   dtype=torch.quint8)

        # Add ReLU ground truth
        C = (qA.dequantize() + qB.dequantize()).numpy()
        qC = _quantize(C, scale, zero_point)
        qC_hat = add(qA, qB, scale=scale, zero_point=zero_point)
        np.testing.assert_equal(qC, qC_hat.int_repr(),
                                "Quantized addition failed.")

        # Add + ReLU ground truth
        Crelu = C.copy()
        Crelu[C < 0] = 0
        qCrelu = _quantize(Crelu, scale, zero_point)
        qCrelu_hat = add_relu(qA, qB, scale=scale, zero_point=zero_point)
        np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(),
                                "Quantized addition with ReLU failed.")

    """Tests the correctness of the add and add_relu op."""
    def test_qadd_relu_different_qparams(self):
        add_relu = torch.ops.quantized.add_relu
        add = torch.ops.quantized.add

        A = torch.arange(-25, 25, dtype=torch.float)
        B = torch.arange(-25, 25, dtype=torch.float)
        scale_A = 3.0
        zero_point_A = 7
        scale_B = 5.0
        zero_point_B = 127

        scale_C = 0.5
        zero_point_C = 5

        qA = torch.quantize_linear(A, scale=scale_A, zero_point=zero_point_A,
                                   dtype=torch.quint8)
        qB = torch.quantize_linear(B, scale=scale_B, zero_point=zero_point_B,
                                   dtype=torch.quint8)

        # Add ground truth
        C = (qA.dequantize() + qB.dequantize()).numpy()
        qC = _quantize(C, scale_C, zero_point_C)
        qC_hat = add(qA, qB, scale=scale_C, zero_point=zero_point_C)
        np.testing.assert_equal(qC, qC_hat.int_repr(),
                                "Quantized addition failed.")

        # Add + ReLU ground truth
        Crelu = C.copy()
        Crelu[C < 0] = 0
        qCrelu = _quantize(Crelu, scale_C, zero_point_C)
        qCrelu_hat = add_relu(qA, qB, scale=scale_C, zero_point=zero_point_C)
        np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(),
                                "Quantized addition with ReLU failed.")

    """Tests max pool operation on quantized tensors."""
    @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4,
                                              min_side=1, max_side=10),
                       qparams=hu.qparams()),
           kernel=st.sampled_from((3, 5, 7)),
           stride=st.sampled_from((None, 1, 2)),
           dilation=st.integers(1, 2),
           padding=st.integers(0, 2))
    def test_max_pool2d(self, X, kernel, stride, dilation, padding):
        X, (scale, zero_point, torch_type) = X
        # Check constraints
        assume(kernel // 2 >= padding)  # Kernel cannot be overhanging!
        iH, iW = X.shape[-2:]
        oH = self._pool_output_shape(iH, kernel, padding, stride, dilation)
        assume(oH > 0)
        oW = self._pool_output_shape(iW, kernel, padding, stride, dilation)
        assume(oW > 0)

        a = torch.from_numpy(X)
        a_pool = torch.nn.functional.max_pool2d(a, kernel_size=kernel,
                                                stride=stride,
                                                padding=padding, dilation=dilation)
        a_ref = torch.quantize_linear(a_pool, scale=scale,
                                      zero_point=zero_point, dtype=torch_type)
        a_ref = a_ref.dequantize()
        qa = torch.quantize_linear(a, scale=scale, zero_point=zero_point,
                                   dtype=torch_type)

        ops_under_test = {
            "torch": torch.max_pool2d,
            "nn.functional": torch.nn.functional.max_pool2d,
            "nn.quantized.functional": torch.nn.quantized.functional.max_pool2d
        }

        for name, op in ops_under_test.items():
            a_hat = op(qa, kernel_size=kernel, stride=stride, padding=padding,
                       dilation=dilation)
            self.assertEqual(a_ref, a_hat.dequantize(),
                             message="{} results are off".format(name))
        # Test the ops.quantized separately, because None is not treated.
        a_hat = torch.ops.quantized.max_pool2d(
            qa, kernel_size=_pair(kernel),
            stride=_pair(kernel if stride is None else stride),
            padding=_pair(padding), dilation=_pair(dilation))
        self.assertEqual(a_ref, a_hat.dequantize(),
                         message="ops.quantized.max_pool2d results are off")

    @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4,
                                              min_side=1, max_side=10),
                       qparams=hu.qparams()),
           output_size_h=st.integers(1, 10),
           output_size_w=st.integers(1, 10))
    def test_adaptive_avg_pool2d(self, X, output_size_h, output_size_w):
        X, (scale, zero_point, torch_type) = X

        H, W = X.shape[-2:]
        assume(output_size_h <= H)
        assume(output_size_w <= W)
        if output_size_h == output_size_w:
            output_size = output_size_h
        else:
            output_size = (output_size_h, output_size_w)

        X = torch.from_numpy(X)
        qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point,
                                   dtype=torch_type)

        # Run reference on int_repr + round to avoid double rounding error.
        X_ref = torch.nn.functional.adaptive_avg_pool2d(
            qX.int_repr().to(torch.float), output_size).round()

        ops_under_test = {
            "nn.functional": torch.nn.functional.adaptive_avg_pool2d,
            "nn.quantized.functional":
                torch.nn.quantized.functional.adaptive_avg_pool2d
        }

        error_message = r"Results are off for {}:\n\tExpected:\n{}\n\tGot:\n{}"

        for name, op in ops_under_test.items():
            qX_hat = op(qX, output_size=output_size)
            qX_repr = qX_hat.int_repr()
            self.assertEqual(X_ref, qX_repr,
                             message=error_message.format(name, X_ref, qX_repr))


    """Tests quantize concatenation (both fused and not)."""
    @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4,
                                              min_side=1, max_side=10),
                       qparams=hu.qparams()),
           num=st.integers(1, 4),
           axis=st.integers(1, 4),
           relu=st.booleans())
    def test_cat(self, X, num, axis, relu):
        tensors_q = []
        tensors_ref = []
        X, (scale, zero_point, torch_type) = X
        assume(axis < X.ndim)
        X = torch.from_numpy(X)
        new_shape = np.array(X.shape)
        new_shape[axis] = 0
        for idx in range(num):
            tensors_q.append(torch.quantize_linear(X, scale, zero_point,
                                                   torch_type))
            tensors_ref.append(X)
            new_shape[axis] += tensors_ref[-1].shape[axis]

        cat_ref = torch.cat(tensors_ref, axis=axis)
        cat_ref = torch.quantize_linear(cat_ref, scale, zero_point, torch_type)
        cat_ref = cat_ref.dequantize()

        if relu:
            cat_ref = F.relu(cat_ref)
            q_cat_op = torch.ops.quantized.cat_relu
            q_cat_out_op = torch.ops.quantized.cat_relu_out
        else:
            q_cat_op = torch.ops.quantized.cat
            q_cat_out_op = torch.ops.quantized.cat_out

        cat_q = q_cat_op(tensors_q, axis=axis, scale=scale,
                         zero_point=zero_point)
        cat_q = cat_q.dequantize()
        np.testing.assert_equal(cat_ref.numpy(), cat_q.numpy())

        cat_q_out = torch._empty_affine_quantized(
            list(new_shape), scale=scale,
            zero_point=zero_point, dtype=torch_type)
        q_cat_out_op(tensors_q, axis=axis, out=cat_q_out)
        cat_q_out = cat_q_out.dequantize()
        np.testing.assert_equal(cat_ref.numpy(), cat_q_out.numpy())

        # Test the cat on per-channel quantized tensor.
        ch_axis = 1
        scales = torch.from_numpy(np.array([1.0] * X.shape[ch_axis]))
        scales = scales.to(torch.float64)
        zero_points = torch.from_numpy(np.array([0] * X.shape[ch_axis]))
        zero_points = zero_points.to(torch.long)
        tensors_q[0] = torch.quantize_linear_per_channel(
            X, scales, zero_points, axis=[ch_axis], dtype=torch_type)
        with self.assertRaisesRegex(RuntimeError, "supported.*cat"):
            cat_q = q_cat_op(tensors_q, axis=axis, scale=scale,
                             zero_point=zero_point)
Exemplo n.º 4
0
class TestQuantizedLinear(unittest.TestCase):
    """Tests the correctness of the quantized::fbgemm_linear op."""

    def test_qlinear(self):
        qlinear_prepack = torch.ops.quantized.fbgemm_linear_prepack
        qlinear = torch.ops.quantized.fbgemm_linear

        batch_size = 4
        input_channels = 16
        output_channels = 8

        X_scale = 1.5
        X_zp = 5
        X_value_min = 0
        X_value_max = 225
        X_q0 = np.round(
            np.random.rand(batch_size, input_channels) * (X_value_max - X_value_min)
            + X_value_min
        ).astype(np.uint8)

        W_scale = 0.4
        W_zp = 2
        W_value_min = -128
        W_value_max = 127
        W_q0 = np.round(
            np.random.rand(output_channels, input_channels)
            * (W_value_max - W_value_min)
            + W_value_min
        ).astype(np.int8)

        b_value_min = -10
        b_value_max = 10
        b_q0 = np.round(
            np.random.rand(output_channels) * (b_value_max - b_value_min) + b_value_min
        ).astype(np.int32)

        avoid_vpmaddubsw_overflow_linear(
            batch_size,
            input_channels,
            output_channels,
            X_q0,
            X_value_min,
            X_value_max,
            W_q0,
            W_value_min,
            W_value_max,
        )

        X = torch.from_numpy(_dequantize(X_q0, X_scale, X_zp)).to(dtype=torch.float)
        W = torch.from_numpy(_dequantize(W_q0, W_scale, W_zp)).to(dtype=torch.float)
        b = torch.from_numpy(_dequantize(b_q0, X_scale * W_scale, 0)).to(dtype=torch.float)

        X_q = torch.quantize_linear(X, scale=X_scale, zero_point=X_zp, dtype=torch.quint8)
        W_q = torch.quantize_linear(W, scale=W_scale, zero_point=W_zp, dtype=torch.qint8)
        b_q = torch.quantize_linear(b, scale=X_scale * W_scale, zero_point=0, dtype=torch.qint32)

        # Compare X_scale * W_scale * input_channels * X_value_max * W_value_max with
        # Y_scale * 255 (max for uint8).
        Y_scale = 125.1234
        Y_zp = 5

        # Reference quantized Linear operator
        Y_q_ref = qlinear_ref(X_q0, X_scale, X_zp, W_q0, W_scale, W_zp, b_q0, Y_scale, Y_zp)

        # Weight prepacking operator for quantized Linear
        W_prepack = qlinear_prepack(W_q)
        # Quantized Linear operator with prepacked weight
        Y_q = qlinear(X_q, W_prepack, b_q, Y_scale, Y_zp)

        # Y_q_ref_real = _dequantize(Y_q_ref, Y_scale, Y_zp)
        # Y_q_real = Y_q.dequantize()

        # Assert equal
        np.testing.assert_equal(Y_q_ref, Y_q.int_repr().numpy())

        # Reference quantized result from PyTorch Linear operator
        W_fp32 = W_q.dequantize().to(dtype=torch.float)
        X_fp32 = X_q.dequantize().to(dtype=torch.float)
        b_fp32 = b_q.dequantize().to(dtype=torch.float)
        Y_fp32_ref = F.linear(X_fp32, W_fp32, b_fp32)
        Y_q_ref2 = torch.quantize_linear(Y_fp32_ref, Y_scale, Y_zp, torch.quint8)

        # Assert equal
        np.testing.assert_equal(Y_q_ref2.int_repr().numpy(), Y_q.int_repr().numpy())


    """Tests the correctness of the quantized::fbgemm_linear_relu op."""
    def test_qlinear_relu(self):
        qlinear_prepack = torch.ops.quantized.fbgemm_linear_prepack
        qlinear_relu = torch.ops.quantized.fbgemm_linear_relu

        batch_size = 4
        input_channels = 16
        output_channels = 8

        X_scale = 1.5
        X_zp = 5
        X_value_min = 0
        X_value_max = 225
        X_q0 = np.round(
            np.random.rand(batch_size, input_channels) * (X_value_max - X_value_min)
            + X_value_min
        ).astype(np.uint8)

        W_scale = 0.4
        W_zp = 2
        W_value_min = -128
        W_value_max = 127
        W_q0 = np.round(
            np.random.rand(output_channels, input_channels)
            * (W_value_max - W_value_min)
            + W_value_min
        ).astype(np.int8)

        b_value_min = -10
        b_value_max = 10
        b_q0 = np.round(
            np.random.rand(output_channels) * (b_value_max - b_value_min) + b_value_min
        ).astype(np.int32)

        avoid_vpmaddubsw_overflow_linear(
            batch_size,
            input_channels,
            output_channels,
            X_q0,
            X_value_min,
            X_value_max,
            W_q0,
            W_value_min,
            W_value_max,
        )

        X = torch.from_numpy(_dequantize(X_q0, X_scale, X_zp)).to(dtype=torch.float)
        W = torch.from_numpy(_dequantize(W_q0, W_scale, W_zp)).to(dtype=torch.float)
        b = torch.from_numpy(_dequantize(b_q0, X_scale * W_scale, 0)).to(dtype=torch.float)

        X_q = torch.quantize_linear(X, scale=X_scale, zero_point=X_zp, dtype=torch.quint8)
        W_q = torch.quantize_linear(W, scale=W_scale, zero_point=W_zp, dtype=torch.qint8)
        b_q = torch.quantize_linear(b, scale=X_scale * W_scale, zero_point=0, dtype=torch.qint32)

        # Compare X_scale * W_scale * input_channels * X_value_max * W_value_max with
        # Y_scale * 255 (max for uint8).
        Y_scale = 125.1234
        Y_zp = 5

        # Reference quantized Linear operator
        Y_q_ref = qlinear_ref(X_q0, X_scale, X_zp, W_q0, W_scale, W_zp, b_q0, Y_scale, Y_zp)
        Y_q_ref[Y_q_ref < Y_zp] = Y_zp

        # Weight prepacking operator for quantized Linear
        W_prepack = qlinear_prepack(W_q)
        # Quantized Linear operator with prepacked weight
        Y_q = qlinear_relu(X_q, W_prepack, b_q, Y_scale, Y_zp)

        # Y_q_ref_real = _dequantize(Y_q_ref, Y_scale, Y_zp)
        # Y_q_real = Y_q.dequantize()

        # Assert equal
        np.testing.assert_equal(Y_q_ref, Y_q.int_repr().numpy())

        # Reference quantized result from PyTorch Linear operator
        W_fp32 = W_q.dequantize().to(dtype=torch.float)
        X_fp32 = X_q.dequantize().to(dtype=torch.float)
        b_fp32 = b_q.dequantize().to(dtype=torch.float)
        Y_fp32_ref = F.linear(X_fp32, W_fp32, b_fp32)
        Y_fp32_ref[Y_fp32_ref < 0.0] = 0.0
        Y_q_ref2 = torch.quantize_linear(Y_fp32_ref, Y_scale, Y_zp, torch.quint8)

        # Assert equal
        np.testing.assert_equal(Y_q_ref2.int_repr().numpy(), Y_q.int_repr().numpy())

    """Tests the correctness of the quantized::fbgemm_linear_unpack op."""
    @given(Q=qtensor(shapes=array_shapes(2, 2,), dtypes=((torch.qint8, np.int8, None),)))
    def test_qlinear_unpack(self, Q):
        W, (W_scale, W_zp), (qmin, qmax), (torch_type, np_type) = Q
        qlinear_prepack = torch.ops.quantized.fbgemm_linear_prepack
        qlinear_unpack = torch.ops.quantized.fbgemm_linear_unpack

        W = torch.from_numpy(W)
        W_q = torch.quantize_linear(W, scale=W_scale, zero_point=W_zp, dtype=torch_type)

        # Weight prepacking operator for quantized Linear
        W_prepack = qlinear_prepack(W_q)
        # Weight unpack operator for quantized Linear (Used for serialization)
        W_q_origin = qlinear_unpack(W_prepack)

        # Assert equal
        np.testing.assert_equal(W_q.int_repr(), W_q_origin.int_repr().numpy())
        np.testing.assert_equal(W_q.q_scale(), W_q_origin.q_scale())
        np.testing.assert_equal(W_q.q_zero_point(), W_q_origin.q_zero_point())
Exemplo n.º 5
0
class TestQuantizedLinear(unittest.TestCase):
    """Tests the correctness of the quantized linear and linear_relu op."""
    @given(batch_size=st.integers(1, 4),
           input_channels=st.integers(16, 32),
           output_channels=st.integers(4, 8),
           use_bias=st.booleans(),
           use_relu=st.booleans(),
           use_multi_dim_input=st.booleans())
    def test_qlinear(self, batch_size, input_channels, output_channels,
                     use_bias, use_relu, use_multi_dim_input):
        qlinear_prepack = torch.ops.quantized.fbgemm_linear_prepack
        if use_relu:
            qlinear = torch.ops.quantized.fbgemm_linear_relu
        else:
            qlinear = torch.ops.quantized.fbgemm_linear

        if use_multi_dim_input:
            batch_size *= 3  # Test the multi-dim input tensor

        X_scale = 1.5
        X_zp = 5
        X_value_min = 0
        X_value_max = 225
        X_q0 = np.round(
            np.random.rand(batch_size, input_channels) *
            (X_value_max - X_value_min) + X_value_min).astype(np.uint8)

        W_scale = 0.4
        W_zp = 2
        W_value_min = -128
        W_value_max = 127
        W_q0 = np.round(
            np.random.rand(output_channels, input_channels) *
            (W_value_max - W_value_min) + W_value_min).astype(np.int8)

        b_value_min = -10
        b_value_max = 10
        b_q0 = np.round(
            np.random.rand(output_channels) * (b_value_max - b_value_min) +
            b_value_min).astype(np.int32) if use_bias else None

        avoid_vpmaddubsw_overflow_linear(
            batch_size,
            input_channels,
            output_channels,
            X_q0,
            X_value_min,
            X_value_max,
            W_q0,
            W_value_min,
            W_value_max,
        )

        X = torch.from_numpy(_dequantize(X_q0, X_scale,
                                         X_zp)).to(dtype=torch.float)
        W = torch.from_numpy(_dequantize(W_q0, W_scale,
                                         W_zp)).to(dtype=torch.float)
        b = torch.from_numpy(_dequantize(b_q0, X_scale * W_scale, 0)).to(
            dtype=torch.float) if use_bias else None

        X_q = torch.quantize_linear(X,
                                    scale=X_scale,
                                    zero_point=X_zp,
                                    dtype=torch.quint8)
        W_q = torch.quantize_linear(W,
                                    scale=W_scale,
                                    zero_point=W_zp,
                                    dtype=torch.qint8)
        b_q = torch.quantize_linear(
            b, scale=X_scale *
            W_scale, zero_point=0, dtype=torch.qint32) if use_bias else None

        # Compare X_scale * W_scale * input_channels * X_value_max * W_value_max with
        # Y_scale * 255 (max for uint8).
        Y_scale = 125.1234
        Y_zp = 5

        # Reference quantized Linear operator
        Y_q_ref = qlinear_ref(X_q0, X_scale, X_zp, W_q0, W_scale, W_zp, b_q0,
                              Y_scale, Y_zp)
        if use_relu:
            Y_q_ref[Y_q_ref < Y_zp] = Y_zp
        if use_multi_dim_input:
            Y_q_ref = np.reshape(Y_q_ref,
                                 (3, int(batch_size / 3), output_channels))

        # Weight prepacking operator for quantized Linear
        W_prepack = qlinear_prepack(W_q)

        if use_multi_dim_input:
            X_q = X_q.view(3, int(batch_size / 3), input_channels)

        # Quantized Linear operator with prepacked weight
        Y_q = qlinear(X_q, W_prepack, b_q, Y_scale, Y_zp)

        # Y_q_ref_real = _dequantize(Y_q_ref, Y_scale, Y_zp)
        # Y_q_real = Y_q.dequantize()

        # Assert equal
        np.testing.assert_equal(Y_q_ref, Y_q.int_repr().numpy())

        # Reference quantized result from PyTorch Linear operator
        W_fp32 = W_q.dequantize().to(dtype=torch.float)
        X_fp32 = X_q.dequantize().to(dtype=torch.float)
        b_fp32 = b_q.dequantize().to(dtype=torch.float) if use_bias else None
        Y_fp32_ref = F.linear(X_fp32, W_fp32, b_fp32)
        if use_relu:
            Y_fp32_ref[Y_fp32_ref < 0.0] = 0.0
        Y_q_ref2 = torch.quantize_linear(Y_fp32_ref, Y_scale, Y_zp,
                                         torch.quint8)

        # Assert equal
        np.testing.assert_equal(Y_q_ref2.int_repr().numpy(),
                                Y_q.int_repr().numpy())

    """Tests the correctness of the quantized::fbgemm_linear_unpack op."""

    @given(W=hu.tensor(shapes=hu.array_shapes(
        2,
        2,
    ),
                       qparams=hu.qparams(dtypes=torch.qint8)))
    def test_qlinear_unpack(self, W):
        W, (W_scale, W_zp, torch_type) = W
        qlinear_prepack = torch.ops.quantized.fbgemm_linear_prepack
        qlinear_unpack = torch.ops.quantized.fbgemm_linear_unpack

        W = torch.from_numpy(W)
        W_q = torch.quantize_linear(W,
                                    scale=W_scale,
                                    zero_point=W_zp,
                                    dtype=torch_type)

        # Weight prepacking operator for quantized Linear
        W_prepack = qlinear_prepack(W_q)
        # Weight unpack operator for quantized Linear (Used for serialization)
        W_q_origin = qlinear_unpack(W_prepack)

        # Assert equal
        np.testing.assert_equal(W_q.int_repr(), W_q_origin.int_repr().numpy())
        np.testing.assert_equal(W_q.q_scale(), W_q_origin.q_scale())
        np.testing.assert_equal(W_q.q_zero_point(), W_q_origin.q_zero_point())
Exemplo n.º 6
0
class TestQNNPackOps(TestCase):
    """Tests the correctness of the quantized::qnnpack_relu op."""
    @given(X=hu.tensor(shapes=hu.array_shapes(1, 5, 1, 5),
                       qparams=hu.qparams(dtypes=torch.quint8,
                                          zero_point_min=0,
                                          zero_point_max=0)))
    def test_qnnpack_relu(self, X):
        X, (scale, zero_point, torch_type) = X
        relu = torch.ops.quantized.qnnpack_relu

        X = torch.from_numpy(X)
        Y = X.clone()

        qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point, dtype=torch_type)
        qY_hat = relu(qX)

        Y[Y < 0] = 0
        qY = torch.quantize_linear(Y, scale=scale, zero_point=zero_point, dtype=torch_type)
        self.assertEqual(qY, qY_hat)

    """Tests the correctness of the quantized::qnnpack_linear op."""
    @given(output_channels=st.sampled_from([2, 4, 5, 8, 16, 32]),
           X=hu.tensor(shapes=hu.array_shapes(2, 3, 8, 15),
                       qparams=hu.qparams(dtypes=torch.quint8)))
    def test_qnnpack_linear(self, output_channels, X):
        X, (X_scale, X_zp, torch_type) = X
        qmin = torch.iinfo(torch_type).min
        qmax = torch.iinfo(torch_type).max

        input_channels = X.shape[X.ndim - 1]

        input_rows = 1

        for x in range(X.ndim - 1):
            input_rows *= X.shape[x]

        qnnpack_linear = torch.ops.quantized.qnnpack_linear

        X_q0 = np.round(X * (qmin - qmax) + qmin).astype(np.uint8)

        W_scale = 0.4
        W_zp = 0
        W_value_min = 0
        W_value_max = 255
        W_q0 = np.round(
            np.random.rand(output_channels, input_channels)
            * (W_value_max - W_value_min)
            + W_value_min
        ).astype(np.uint8)

        b_value_min = -10
        b_value_max = 10
        b_q0 = np.round(
            np.random.rand(output_channels) * (b_value_max - b_value_min) + b_value_min
        ).astype(np.int32)

        X_scale = 10
        X_zp = 0
        X = torch.from_numpy(_dequantize(X_q0, X_scale, X_zp)).to(dtype=torch.float)
        W = torch.from_numpy(_dequantize(W_q0, W_scale, W_zp)).to(dtype=torch.float)
        b = torch.from_numpy(_dequantize(b_q0, X_scale * W_scale, 0)).to(dtype=torch.float)

        X_q = torch.quantize_linear(X, scale=X_scale, zero_point=X_zp, dtype=torch.quint8)
        W_q = torch.quantize_linear(W, scale=W_scale, zero_point=W_zp, dtype=torch.quint8)
        b_q = torch.quantize_linear(b, scale=X_scale * W_scale, zero_point=0, dtype=torch.qint32)

        Y_scale = 5.4  # This makes sure that the max output value does not exceed 255.
        Y_zp = 0

        # Reference quantized Linear operator
        Y_q_ref = qlinear_ref(X_q0, X_scale, X_zp, W_q0, W_scale, W_zp, b_q0, Y_scale, Y_zp)
        Y_q_ref_float = _dequantize(Y_q_ref, Y_scale, Y_zp)

        # Quantized linear operator
        Y_q = qnnpack_linear(X_q, W_q, b_q, Y_scale, Y_zp)

        # Assert equal
        np.testing.assert_array_almost_equal(Y_q_ref_float, Y_q.dequantize().numpy(), decimal=4)

        # Reference quantized result from PyTorch Linear operator

        W_fp32 = W_q.dequantize().to(dtype=torch.float)
        X_fp32 = X_q.dequantize().to(dtype=torch.float)
        b_fp32 = b_q.dequantize().to(dtype=torch.float)
        Y_fp32_ref = F.linear(X_fp32, W_fp32, b_fp32)
        Y_fp32_ref = Y_fp32_ref.view(-1, output_channels)
        Y_q_ref2 = torch.quantize_linear(Y_fp32_ref, Y_scale, Y_zp, torch.quint8)

        # Assert equal
        np.testing.assert_array_almost_equal(Y_q_ref2.dequantize().numpy(), Y_q.dequantize().numpy(), decimal=4)
Exemplo n.º 7
0
class TestQuantizedOps(TestCase):
    """Computes the output shape given pooling parameters."""
    def _pool_output_shape(self, input_size, kernel_size, padding, stride,
                           dilation, ceiling_mode=False):
        if stride is None:
            stride = kernel_size
        output_size = (
            (input_size + 2 * padding - dilation * (kernel_size - 1) - 1
             + (stride - 1 if ceiling_mode else 0)) // stride + 1)
        if (padding > 0 and
                ((output_size - 1) * stride >= input_size + padding)):
            output_size += 1
        return output_size

    """Tests the correctness of the quantized::relu op."""
    @given(qparams=hu.qparams())
    def test_qrelu(self, qparams):
        X = np.array([[-3, -2, 1, 2],
                      [0, 0, 0, 0],
                      [-5, -4, -3, -2],
                      [1, 2, 3, 4]], dtype=np.float32)
        scale, zero_point, torch_type = qparams

        Y = X.copy()
        Y[Y < 0] = 0
        qY = torch.quantize_linear(torch.from_numpy(Y), scale=scale,
                                   zero_point=zero_point, dtype=torch_type)
        X = torch.from_numpy(X)
        qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point,
                                   dtype=torch_type)

        ops_under_test = {
            'native': torch.relu,
            'nn.functional': torch.nn.functional.relu,
        }

        for name, op in ops_under_test.items():
            qY_hat = op(qX)
            self.assertEqual(qY, qY_hat, message="{} relu failed".format(name))

        ops_under_test_inplace = {
            'inplace native': torch.relu_,
            'inplace nn.functional': torch.nn.functional.relu_,
        }

        for name, op_ in ops_under_test_inplace.items():
            qY_hat = qX.clone()
            op_(qY_hat)
            self.assertEqual(qY, qY_hat, message="{} relu failed".format(name))

    """Tests the correctness of the quantized::relu op."""
    @given(X=hu.tensor(shapes=hu.array_shapes(1, 5, 1, 5),
                       qparams=hu.qparams()))
    def test_qrelu6(self, X):
        X, (scale, zero_point, torch_type) = X

        Y = X.copy()
        Y[Y < 0] = 0
        Y[Y > 6.0] = 6.0
        qY = torch.quantize_linear(torch.from_numpy(Y), scale=scale,
                                   zero_point=zero_point, dtype=torch_type)
        X = torch.from_numpy(X)
        qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point,
                                   dtype=torch_type)

        ops_under_test = {
            'ops.quantized': torch.ops.quantized.relu6,
            'module': torch.nn.quantized.ReLU6(),
        }

        for name, op in ops_under_test.items():
            qY_hat = op(qX)
            self.assertEqual(qY, qY_hat, message="{} relu failed".format(name))

    """Tests the correctness of the scalar addition."""
    @given(A=hu.tensor(shapes=hu.array_shapes(1, 4, 1, 5),
                       elements=st.floats(-1e6, 1e6, allow_nan=False),
                       qparams=hu.qparams()),
           b=st.floats(-1e6, 1e6, allow_nan=False, allow_infinity=False))
    def test_qadd_scalar_relu(self, A, b):
        import copy
        add_scalar = torch.ops.quantized.add_scalar
        add_scalar_relu = torch.ops.quantized.add_scalar_relu

        A, (scale, zero_point, dtype) = A
        A = A.astype(np.float32)
        qA = torch.quantize_linear(torch.from_numpy(A), scale, zero_point, dtype)

        C = qA.dequantize() + b
        C_relu = copy.deepcopy(C)
        C_relu[C_relu < 0] = 0

        C_ref = torch.quantize_linear(C, scale, zero_point, dtype)
        C_relu_ref = torch.quantize_linear(C_relu, scale, zero_point, dtype)

        C_hat = add_scalar(qA, b, scale=scale, zero_point=zero_point)
        C_relu_hat = add_scalar_relu(qA, b, scale=scale, zero_point=zero_point)

        self.assertEqual(C_ref, C_hat,
                         message="Scalar add results don't match:\
                         {} vs {}".format(C_ref, C_hat))
        self.assertEqual(C_relu_ref, C_relu_hat,
                         message="Scalar add relu results don't match:\
                         {} vs {}".format(C_relu_ref, C_relu_hat))

    """Tests the correctness of the add and add_relu op."""
    def test_qadd_relu_same_qparams(self):
        add_relu = torch.ops.quantized.add_relu
        add = torch.ops.quantized.add
        add_out = torch.ops.quantized.add_out
        add_relu_out = torch.ops.quantized.add_relu_out

        A = torch.arange(-25, 25, dtype=torch.float)
        B = torch.arange(-25, 25, dtype=torch.float)
        scale = 2.0
        zero_point = 127
        qA = torch.quantize_linear(A, scale=scale, zero_point=zero_point,
                                   dtype=torch.quint8)
        qB = torch.quantize_linear(B, scale=scale, zero_point=zero_point,
                                   dtype=torch.quint8)

        # Add ReLU ground truth
        C = (qA.dequantize() + qB.dequantize()).numpy()
        qC = _quantize(C, scale, zero_point)
        qC_hat = add(qA, qB, scale=scale, zero_point=zero_point)
        np.testing.assert_equal(qC, qC_hat.int_repr(),
                                "Quantized addition failed.")
        qC_out_hat = torch._empty_affine_quantized(qC.shape,
                                                   scale=scale,
                                                   zero_point=zero_point,
                                                   dtype=torch.quint8)
        add_out(qA, qB, out=qC_out_hat)
        self.assertEqual(qC_hat, qC_out_hat, message="Add.out failed")

        # Add + ReLU ground truth
        Crelu = C.copy()
        Crelu[C < 0] = 0
        qCrelu = _quantize(Crelu, scale, zero_point)
        qCrelu_hat = add_relu(qA, qB, scale=scale, zero_point=zero_point)
        np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(),
                                "Quantized addition with ReLU failed.")
        qCrelu_out_hat = torch._empty_affine_quantized(qCrelu.shape,
                                                       scale=scale,
                                                       zero_point=zero_point,
                                                       dtype=torch.quint8)
        add_relu_out(qA, qB, out=qCrelu_out_hat)
        self.assertEqual(qCrelu_hat, qCrelu_out_hat,
                         message="AddReLU.out failed")


    """Tests the correctness of the add and add_relu op."""
    def test_qadd_relu_different_qparams(self):
        add_relu = torch.ops.quantized.add_relu
        add = torch.ops.quantized.add
        add_out = torch.ops.quantized.add_out
        add_relu_out = torch.ops.quantized.add_relu_out

        A = torch.arange(-25, 25, dtype=torch.float)
        B = torch.arange(-25, 25, dtype=torch.float)
        scale_A = 3.0
        zero_point_A = 7
        scale_B = 5.0
        zero_point_B = 127

        scale_C = 0.5
        zero_point_C = 5

        qA = torch.quantize_linear(A, scale=scale_A, zero_point=zero_point_A,
                                   dtype=torch.quint8)
        qB = torch.quantize_linear(B, scale=scale_B, zero_point=zero_point_B,
                                   dtype=torch.quint8)

        # Add ground truth
        C = (qA.dequantize() + qB.dequantize()).numpy()
        qC = _quantize(C, scale_C, zero_point_C)
        qC_hat = add(qA, qB, scale=scale_C, zero_point=zero_point_C)
        np.testing.assert_equal(qC, qC_hat.int_repr(),
                                "Quantized addition failed.")
        qC_out_hat = torch._empty_affine_quantized(qC.shape,
                                                   scale=scale_C,
                                                   zero_point=zero_point_C,
                                                   dtype=torch.quint8)
        add_out(qA, qB, out=qC_out_hat)
        self.assertEqual(qC_hat, qC_out_hat, message="Add.out failed")

        # Add + ReLU ground truth
        Crelu = C.copy()
        Crelu[C < 0] = 0
        qCrelu = _quantize(Crelu, scale_C, zero_point_C)
        qCrelu_hat = add_relu(qA, qB, scale=scale_C, zero_point=zero_point_C)
        np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(),
                                "Quantized addition with ReLU failed.")
        qCrelu_out_hat = torch._empty_affine_quantized(qCrelu.shape,
                                                       scale=scale_C,
                                                       zero_point=zero_point_C,
                                                       dtype=torch.quint8)
        add_relu_out(qA, qB, out=qCrelu_out_hat)
        self.assertEqual(qCrelu_hat, qCrelu_out_hat,
                         message="AddReLU.out failed")

    """Tests the correctness of the mul and mul_relu op."""
    def test_qmul_relu_same_qparams(self):
        mul_relu = torch.ops.quantized.mul_relu
        mul = torch.ops.quantized.mul
        mul_out = torch.ops.quantized.mul_out
        mul_relu_out = torch.ops.quantized.mul_relu_out

        A = torch.arange(-25, 25, dtype=torch.float)
        B = torch.arange(-25, 25, dtype=torch.float)
        scale = 2.0
        zero_point = 127
        qA = torch.quantize_linear(A, scale=scale, zero_point=zero_point,
                                   dtype=torch.quint8)
        qB = torch.quantize_linear(B, scale=scale, zero_point=zero_point,
                                   dtype=torch.quint8)

        # mul ReLU ground truth
        C = (qA.dequantize() * qB.dequantize()).numpy()
        qC = _quantize(C, scale, zero_point)
        qC_hat = mul(qA, qB, scale=scale, zero_point=zero_point)
        np.testing.assert_equal(qC, qC_hat.int_repr(),
                                "Quantized mulition failed.")
        qC_out_hat = torch._empty_affine_quantized(qC.shape,
                                                   scale=scale,
                                                   zero_point=zero_point,
                                                   dtype=torch.quint8)
        mul_out(qA, qB, out=qC_out_hat)
        self.assertEqual(qC_hat, qC_out_hat, message="mul.out failed")

        # mul + ReLU ground truth
        Crelu = C.copy()
        Crelu[C < 0] = 0
        qCrelu = _quantize(Crelu, scale, zero_point)
        qCrelu_hat = mul_relu(qA, qB, scale=scale, zero_point=zero_point)
        np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(),
                                "Quantized mulition with ReLU failed.")
        qCrelu_out_hat = torch._empty_affine_quantized(qCrelu.shape,
                                                       scale=scale,
                                                       zero_point=zero_point,
                                                       dtype=torch.quint8)
        mul_relu_out(qA, qB, out=qCrelu_out_hat)
        self.assertEqual(qCrelu_hat, qCrelu_out_hat,
                         message="mulReLU.out failed")

        # Scalar addition
        mul = torch.ops.quantized.mul_scalar
        for b in B:
            C_ref = qA.dequantize().numpy() * b.item()
            qC = _quantize(C_ref, scale, zero_point)
            dqC = _dequantize(qC, scale, zero_point)
            qC_hat = mul(qA, b.item(), scale, zero_point)
            dqC_hat = qC_hat.dequantize()
            self.assertEqual(dqC, dqC_hat)

    """Tests the correctness of the mul and mul_relu op."""
    def test_qmul_relu_different_qparams(self):
        mul_relu = torch.ops.quantized.mul_relu
        mul = torch.ops.quantized.mul
        mul_out = torch.ops.quantized.mul_out
        mul_relu_out = torch.ops.quantized.mul_relu_out

        A = torch.arange(-25, 25, dtype=torch.float)
        B = torch.arange(-25, 25, dtype=torch.float)
        scale_A = 3.0
        zero_point_A = 7
        scale_B = 5.0
        zero_point_B = 127

        scale_C = 0.5
        zero_point_C = 5

        qA = torch.quantize_linear(A, scale=scale_A, zero_point=zero_point_A,
                                   dtype=torch.quint8)
        qB = torch.quantize_linear(B, scale=scale_B, zero_point=zero_point_B,
                                   dtype=torch.quint8)

        # mul ground truth
        C = (qA.dequantize() * qB.dequantize()).numpy()
        qC = _quantize(C, scale_C, zero_point_C)
        qC_hat = mul(qA, qB, scale=scale_C, zero_point=zero_point_C)
        np.testing.assert_equal(qC, qC_hat.int_repr(),
                                "Quantized multiplication failed.")
        qC_out_hat = torch._empty_affine_quantized(qC.shape,
                                                   scale=scale_C,
                                                   zero_point=zero_point_C,
                                                   dtype=torch.quint8)
        mul_out(qA, qB, out=qC_out_hat)
        self.assertEqual(qC_hat, qC_out_hat, message="mul.out failed")

        # mul + ReLU ground truth
        Crelu = C.copy()
        Crelu[C < 0] = 0
        qCrelu = _quantize(Crelu, scale_C, zero_point_C)
        qCrelu_hat = mul_relu(qA, qB, scale=scale_C, zero_point=zero_point_C)
        np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(),
                                "Quantized multiplication with ReLU failed.")
        qCrelu_out_hat = torch._empty_affine_quantized(qCrelu.shape,
                                                       scale=scale_C,
                                                       zero_point=zero_point_C,
                                                       dtype=torch.quint8)
        mul_relu_out(qA, qB, out=qCrelu_out_hat)
        self.assertEqual(qCrelu_hat, qCrelu_out_hat,
                         message="mulReLU.out failed")

    """Tests max pool operation on quantized tensors."""
    @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4,
                                              min_side=1, max_side=10),
                       qparams=hu.qparams()),
           kernel=st.sampled_from((3, 5, 7)),
           stride=st.sampled_from((None, 1, 2)),
           dilation=st.integers(1, 2),
           padding=st.integers(0, 2))
    def test_max_pool2d(self, X, kernel, stride, dilation, padding):
        X, (scale, zero_point, torch_type) = X
        # Check constraints
        assume(kernel // 2 >= padding)  # Kernel cannot be overhanging!
        iH, iW = X.shape[-2:]
        oH = self._pool_output_shape(iH, kernel, padding, stride, dilation)
        assume(oH > 0)
        oW = self._pool_output_shape(iW, kernel, padding, stride, dilation)
        assume(oW > 0)

        a = torch.from_numpy(X)
        a_pool = torch.nn.functional.max_pool2d(a, kernel_size=kernel,
                                                stride=stride,
                                                padding=padding, dilation=dilation)
        a_ref = torch.quantize_linear(a_pool, scale=scale,
                                      zero_point=zero_point, dtype=torch_type)
        a_ref = a_ref.dequantize()
        qa = torch.quantize_linear(a, scale=scale, zero_point=zero_point,
                                   dtype=torch_type)

        ops_under_test = {
            "torch": torch.max_pool2d,
            "nn.functional": torch.nn.functional.max_pool2d,
            "nn.quantized.functional": torch.nn.quantized.functional.max_pool2d
        }

        for name, op in ops_under_test.items():
            a_hat = op(qa, kernel_size=kernel, stride=stride, padding=padding,
                       dilation=dilation)
            self.assertEqual(a_ref, a_hat.dequantize(),
                             message="{} results are off".format(name))
        # Test the ops.quantized separately, because None is not treated.
        a_hat = torch.ops.quantized.max_pool2d(
            qa, kernel_size=_pair(kernel),
            stride=_pair(kernel if stride is None else stride),
            padding=_pair(padding), dilation=_pair(dilation))
        self.assertEqual(a_ref, a_hat.dequantize(),
                         message="ops.quantized.max_pool2d results are off")

    @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4,
                                              min_side=1, max_side=10),
                       qparams=hu.qparams()),
           output_size_h=st.integers(1, 10),
           output_size_w=st.integers(1, 10))
    def test_adaptive_avg_pool2d(self, X, output_size_h, output_size_w):
        X, (scale, zero_point, torch_type) = X

        H, W = X.shape[-2:]
        assume(output_size_h <= H)
        assume(output_size_w <= W)
        if output_size_h == output_size_w:
            output_size = output_size_h
        else:
            output_size = (output_size_h, output_size_w)

        X = torch.from_numpy(X)
        qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point,
                                   dtype=torch_type)

        # Run reference on int_repr + round to avoid double rounding error.
        X_ref = torch.nn.functional.adaptive_avg_pool2d(
            qX.int_repr().to(torch.float), output_size).round()

        ops_under_test = {
            "nn.functional": torch.nn.functional.adaptive_avg_pool2d,
            "nn.quantized.functional":
                torch.nn.quantized.functional.adaptive_avg_pool2d
        }

        error_message = r"Results are off for {}:\n\tExpected:\n{}\n\tGot:\n{}"

        for name, op in ops_under_test.items():
            qX_hat = op(qX, output_size=output_size)
            self.assertEqual(X_ref, qX_hat.int_repr(), prec=1.0,
                             message=error_message.format(name, X_ref, qX_hat))
            self.assertEqual(scale, qX_hat.q_scale(),
                             message=error_message.format(name + '.scale', scale, qX_hat.q_scale()))
            self.assertEqual(zero_point, qX_hat.q_zero_point(),
                             message=error_message.format(name + '.zero_point', scale,
                                                          qX_hat.q_zero_point()))

    """Tests quantize concatenation (both fused and not)."""
    @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4,
                                              min_side=1, max_side=10),
                       qparams=hu.qparams()),
           num=st.integers(1, 4),
           dim=st.integers(1, 4),
           relu=st.booleans())
    def test_cat(self, X, num, dim, relu):
        tensors_q = []
        tensors_ref = []
        X, (scale, zero_point, torch_type) = X
        assume(dim < X.ndim)
        X = torch.from_numpy(X)
        new_shape = np.array(X.shape)
        new_shape[dim] = 0
        for idx in range(num):
            tensors_q.append(torch.quantize_linear(X, scale, zero_point,
                                                   torch_type))
            tensors_ref.append(X)
            new_shape[dim] += tensors_ref[-1].shape[dim]

        cat_ref = torch.cat(tensors_ref, dim=dim)
        cat_ref = torch.quantize_linear(cat_ref, scale, zero_point, torch_type)
        cat_ref = cat_ref.dequantize()

        if relu:
            cat_ref = F.relu(cat_ref)
            q_cat_op = torch.ops.quantized.cat_relu
            q_cat_out_op = torch.ops.quantized.cat_relu_out
        else:
            q_cat_op = torch.ops.quantized.cat
            q_cat_out_op = torch.ops.quantized.cat_out

        cat_q = q_cat_op(tensors_q, dim=dim, scale=scale,
                         zero_point=zero_point)
        cat_q = cat_q.dequantize()
        np.testing.assert_equal(cat_ref.numpy(), cat_q.numpy())

        cat_q_out = torch._empty_affine_quantized(
            list(new_shape), scale=scale,
            zero_point=zero_point, dtype=torch_type)
        q_cat_out_op(tensors_q, dim=dim, out=cat_q_out)
        cat_q_out = cat_q_out.dequantize()
        np.testing.assert_equal(cat_ref.numpy(), cat_q_out.numpy())

        # Test the cat on per-channel quantized tensor.
        ch_axis = 1
        scales = torch.from_numpy(np.array([1.0] * X.shape[ch_axis]))
        scales = scales.to(torch.float64)
        zero_points = torch.from_numpy(np.array([0] * X.shape[ch_axis]))
        zero_points = zero_points.to(torch.long)
        tensors_q[0] = torch.quantize_linear_per_channel(
            X, scales, zero_points, axis=[ch_axis], dtype=torch_type)
        with self.assertRaisesRegex(RuntimeError, "supported.*cat"):
            cat_q = q_cat_op(tensors_q, dim=ch_axis, scale=scale,
                             zero_point=zero_point)

    """Tests the correctness of the quantized equal op."""
    @given(X=hu.tensor(shapes=hu.array_shapes(1, 5, 1, 5),
                       qparams=hu.qparams()),
           X2=hu.tensor(shapes=hu.array_shapes(1, 5, 1, 5),
                        qparams=hu.qparams()),
           X_per_channel=st.booleans(),
           X2_per_channel=st.booleans())
    def test_equal(self, X, X2, X_per_channel, X2_per_channel):
        X, X_params = X
        (scale, zero_point, torch_type) = X_params
        X2, X2_params = X2
        (scale2, zero_point2, torch_type2) = X2_params

        X = torch.from_numpy(X)
        if X_per_channel:
            X_scheme = 'per_channel'
            channels = X.shape[-1]
            qX = torch.quantize_linear_per_channel(
                X,
                scales=torch.tensor([scale] * channels),
                zero_points=torch.tensor([zero_point] * channels),
                dtype=torch_type,
                axis=[X.ndim - 1])
        else:
            X_scheme = 'per_tensor'
            qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point,
                                       dtype=torch_type)
        X2 = torch.from_numpy(X2)
        if X2_per_channel:
            X2_scheme = 'per_channel'
            channels = X2.shape[-1]
            qX2 = torch.quantize_linear_per_channel(
                X2,
                scales=torch.tensor([scale2] * channels),
                zero_points=torch.tensor([zero_point2] * channels),
                dtype=torch_type2,
                axis=[X2.ndim - 1])
        else:
            X2_scheme = 'per_tensor'
            qX2 = torch.quantize_linear(X2, scale=scale2, zero_point=zero_point2,
                                        dtype=torch_type2)

        def equal_ref(X, params, X_scheme, X2, params2, X2_scheme):
            if X_scheme != X2_scheme:
                return False
            if params != params2:
                return False
            if X.shape != X2.shape:
                return False
            if (X != X2).any():
                return False
            return True

        self.assertEqual(qX.equal(qX), equal_ref(X, X_params, X_scheme, X, X_params, X_scheme))
        self.assertEqual(qX.equal(qX2), equal_ref(X, X_params, X_scheme, X2, X2_params, X2_scheme))
Exemplo n.º 8
0
class TestFakeQuantizePerTensorAffine(unittest.TestCase):
    # NOTE: Tests in this class are decorated with no_deadline
    # to prevent spurious failures due to cuda runtime initialization.

    def to_tensor(self, X, device):
        return torch.tensor(X).to(device=torch.device(device),
                                  dtype=torch.float32)

    @no_deadline
    @given(device=st.sampled_from(
        ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
           X=hu.tensor(shapes=hu.array_shapes(
               1,
               5,
           ),
                       qparams=hu.qparams(dtypes=torch.quint8)))
    def test_forward(self, device, X):
        r"""Tests the forward path of the FakeQuantizePerTensorAffine op.
        """
        np.random.seed(NP_RANDOM_SEED)
        X, (scale, zero_point, torch_type) = X
        quant_min = torch.iinfo(torch_type).min
        quant_max = torch.iinfo(torch_type).max

        X = torch.tensor(X).to(dtype=torch.float, device=device)
        Y = _fake_quantize_per_tensor_affine_reference(X.cpu(), scale,
                                                       zero_point, quant_min,
                                                       quant_max)
        Y_prime = torch.fake_quantize_per_tensor_affine(
            X, scale, zero_point, quant_min, quant_max)
        np.testing.assert_allclose(Y,
                                   Y_prime.cpu(),
                                   rtol=tolerance,
                                   atol=tolerance)

    @no_deadline
    @given(device=st.sampled_from(
        ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
           X=hu.tensor(shapes=hu.array_shapes(
               1,
               5,
           ),
                       qparams=hu.qparams(dtypes=torch.quint8)))
    def test_backward(self, device, X):
        r"""Tests the backward method. Note that this runs the reference quantization
        and thus the errors might be originating there.
        """
        np.random.seed(NP_RANDOM_SEED)
        X, (scale, zero_point, torch_type) = X
        quant_min = torch.iinfo(torch_type).min
        quant_max = torch.iinfo(torch_type).max

        X = torch.tensor(X).to(dtype=torch.float, device=device)
        X.requires_grad_()
        Y = _fake_quantize_per_tensor_affine_reference(X.cpu(), scale,
                                                       zero_point, quant_min,
                                                       quant_max)
        Y_prime = torch.fake_quantize_per_tensor_affine(
            X, scale, zero_point, quant_min, quant_max)
        dout = torch.rand(X.shape, dtype=torch.float).to(device)
        dX = _fake_quantize_per_tensor_affine_grad_reference(
            dout, X, scale, zero_point, quant_min, quant_max)
        Y_prime.backward(dout)
        np.testing.assert_allclose(dX.cpu(),
                                   X.grad.cpu().detach().numpy(),
                                   rtol=tolerance,
                                   atol=tolerance)

    @no_deadline
    @given(device=st.sampled_from(
        ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
           X=hu.tensor(shapes=hu.array_shapes(
               1,
               5,
           ),
                       qparams=hu.qparams(dtypes=torch.quint8)))
    def test_numerical_consistency(self, device, X):
        r"""Comparing numerical consistency between CPU quantize/dequantize op and the CPU fake quantize op
        """
        np.random.seed(NP_RANDOM_SEED)
        X, (scale, zero_point, torch_type) = X
        quant_min = torch.iinfo(torch_type).min
        quant_max = torch.iinfo(torch_type).max

        X = torch.tensor(X).to(dtype=torch.float, device=device)
        # quantize_linear and dequantize are only implemented in CPU
        Y = torch.dequantize(
            torch.quantize_linear(X.cpu(), scale, zero_point, torch_type))
        Y_prime = torch.fake_quantize_per_tensor_affine(
            X, scale, zero_point, quant_min, quant_max)
        np.testing.assert_allclose(Y,
                                   Y_prime.cpu(),
                                   rtol=tolerance,
                                   atol=tolerance)

    @no_deadline
    @given(device=st.sampled_from(
        ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
           X=hu.tensor(shapes=hu.array_shapes(
               1,
               5,
           ),
                       qparams=hu.qparams(dtypes=torch.quint8)))
    def test_fq_module(self, device, X):
        np.random.seed(NP_RANDOM_SEED)
        X, (scale, zero_point, torch_type) = X
        quant_min = torch.iinfo(torch_type).min
        quant_max = torch.iinfo(torch_type).max

        X = torch.tensor(X).to(dtype=torch.float, device=device)
        X.requires_grad_()
        fq_module = FakeQuantize(torch_type, torch.per_tensor_affine,
                                 quant_min, quant_max)
        Y_prime = fq_module(X)
        assert fq_module.scale is not None
        assert fq_module.zero_point is not None
        Y = _fake_quantize_per_tensor_affine_reference(X, fq_module.scale,
                                                       fq_module.zero_point,
                                                       quant_min, quant_max)
        np.testing.assert_allclose(Y.cpu().detach().numpy(),
                                   Y_prime.cpu().detach().numpy(),
                                   rtol=tolerance,
                                   atol=tolerance)

        # Test backward
        dout = torch.rand(X.shape, dtype=torch.float, device=device)
        Y_prime.backward(dout)
        dX = _fake_quantize_per_tensor_affine_grad_reference(
            dout, X, fq_module.scale, fq_module.zero_point, quant_min,
            quant_max)
        np.testing.assert_allclose(dX.cpu(),
                                   X.grad.cpu().detach().numpy(),
                                   rtol=tolerance,
                                   atol=tolerance)
Exemplo n.º 9
0
class TestQuantizedOps(TestCase):
    """Computes the output shape given pooling parameters."""
    def _pool_output_shape(self,
                           input_size,
                           kernel_size,
                           padding,
                           stride,
                           dilation,
                           ceiling_mode=False):
        output_size = ((input_size + 2 * padding - dilation *
                        (kernel_size - 1) - 1 +
                        (stride - 1 if ceiling_mode else 0)) // stride + 1)
        if (padding > 0
                and ((output_size - 1) * stride >= input_size + padding)):
            output_size += 1
        return output_size

    """Tests the correctness of the quantized::relu op."""

    @given(X=hu.tensor(shapes=hu.array_shapes(1, 5, 1, 5),
                       qparams=hu.qparams()))
    def test_qrelu(self, X):
        X, (scale, zero_point, torch_type) = X

        Y = X.copy()
        Y[Y < 0] = 0
        qY = torch.quantize_linear(torch.from_numpy(Y),
                                   scale=scale,
                                   zero_point=zero_point,
                                   dtype=torch_type)
        X = torch.from_numpy(X)
        qX = torch.quantize_linear(X,
                                   scale=scale,
                                   zero_point=zero_point,
                                   dtype=torch_type)

        ops_under_test = {
            'ops.quantized': torch.ops.quantized.relu,
            'native': torch.relu,
            'nn.functional': torch.nn.functional.relu
        }

        for name, op in ops_under_test.items():
            qY_hat = op(qX)
            self.assertEqual(qY, qY_hat, "{} relu failed".format(name))

    """Tests the correctness of the add and add_relu op."""

    def test_qadd_relu_same_qparams(self):
        add_relu = torch.ops.quantized.add_relu
        add = torch.ops.quantized.add

        A = torch.arange(-25, 25, dtype=torch.float)
        B = torch.arange(-25, 25, dtype=torch.float)
        scale = 2.0
        zero_point = 127
        qA = torch.quantize_linear(A,
                                   scale=scale,
                                   zero_point=zero_point,
                                   dtype=torch.quint8)
        qB = torch.quantize_linear(B,
                                   scale=scale,
                                   zero_point=zero_point,
                                   dtype=torch.quint8)

        # Add ReLU ground truth
        C = (qA.dequantize() + qB.dequantize()).numpy()
        qC = _quantize(C, scale, zero_point)
        qC_hat = add(qA, qB, scale=scale, zero_point=zero_point)
        np.testing.assert_equal(qC, qC_hat.int_repr(),
                                "Quantized addition failed.")

        # Add + ReLU ground truth
        Crelu = C.copy()
        Crelu[C < 0] = 0
        qCrelu = _quantize(Crelu, scale, zero_point)
        qCrelu_hat = add_relu(qA, qB, scale=scale, zero_point=zero_point)
        np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(),
                                "Quantized addition with ReLU failed.")

    """Tests the correctness of the add and add_relu op."""

    def test_qadd_relu_different_qparams(self):
        add_relu = torch.ops.quantized.add_relu
        add = torch.ops.quantized.add

        A = torch.arange(-25, 25, dtype=torch.float)
        B = torch.arange(-25, 25, dtype=torch.float)
        scale_A = 3.0
        zero_point_A = 7
        scale_B = 5.0
        zero_point_B = 127

        scale_C = 0.5
        zero_point_C = 5

        qA = torch.quantize_linear(A,
                                   scale=scale_A,
                                   zero_point=zero_point_A,
                                   dtype=torch.quint8)
        qB = torch.quantize_linear(B,
                                   scale=scale_B,
                                   zero_point=zero_point_B,
                                   dtype=torch.quint8)

        # Add ground truth
        C = (qA.dequantize() + qB.dequantize()).numpy()
        qC = _quantize(C, scale_C, zero_point_C)
        qC_hat = add(qA, qB, scale=scale_C, zero_point=zero_point_C)
        np.testing.assert_equal(qC, qC_hat.int_repr(),
                                "Quantized addition failed.")

        # Add + ReLU ground truth
        Crelu = C.copy()
        Crelu[C < 0] = 0
        qCrelu = _quantize(Crelu, scale_C, zero_point_C)
        qCrelu_hat = add_relu(qA, qB, scale=scale_C, zero_point=zero_point_C)
        np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(),
                                "Quantized addition with ReLU failed.")

    """Tests max pool operation on quantized tensors."""

    @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3,
                                              max_dims=4,
                                              min_side=1,
                                              max_side=10),
                       qparams=hu.qparams()),
           kernel=st.sampled_from((3, 5, 7)),
           stride=st.integers(1, 2),
           dilation=st.integers(1, 2),
           padding=st.integers(0, 2))
    def test_max_pool2d(self, X, kernel, stride, dilation, padding):
        X, (scale, zero_point, torch_type) = X
        # Check constraints
        assume(kernel // 2 >= padding)  # Kernel cannot be overhanging!
        iH, iW = X.shape[-2:]
        oH = self._pool_output_shape(iH, kernel, padding, stride, dilation)
        assume(oH > 0)
        oW = self._pool_output_shape(iW, kernel, padding, stride, dilation)
        assume(oW > 0)

        k = (kernel, kernel)
        s = (stride, stride)
        d = (dilation, dilation)
        p = (padding, padding)

        q_max_pool = torch.ops.quantized.max_pool2d

        a = torch.from_numpy(X)
        qa = torch.quantize_linear(a,
                                   scale=scale,
                                   zero_point=zero_point,
                                   dtype=torch_type)

        a_hat = qa.dequantize()
        a_pool = F.max_pool2d(a_hat,
                              kernel_size=k,
                              stride=s,
                              padding=p,
                              dilation=d)

        qa_pool_hat = q_max_pool(qa,
                                 kernel_size=k,
                                 stride=s,
                                 padding=p,
                                 dilation=d)
        a_pool_hat = qa_pool_hat.dequantize()
        np.testing.assert_equal(a_pool.numpy(), a_pool_hat.numpy())

    """Tests quantize concatenation (both fused and not)."""

    @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3,
                                              max_dims=4,
                                              min_side=1,
                                              max_side=10),
                       qparams=hu.qparams()),
           num=st.integers(1, 4),
           axis=st.integers(1, 4),
           relu=st.booleans())
    def test_cat(self, X, num, axis, relu):
        tensors_q = []
        tensors_ref = []
        X, (scale, zero_point, torch_type) = X
        assume(axis < X.ndim)
        X = torch.from_numpy(X)
        for idx in range(num):
            tensors_q.append(
                torch.quantize_linear(X, scale, zero_point, torch_type))
            tensors_ref.append(X)

        cat_ref = torch.cat(tensors_ref, axis=axis)
        cat_ref = torch.quantize_linear(cat_ref, scale, zero_point, torch_type)
        cat_ref = cat_ref.dequantize()

        if relu:
            cat_ref = F.relu(cat_ref)
            q_cat_op = torch.ops.quantized.cat_relu
        else:
            q_cat_op = torch.ops.quantized.cat
        cat_q = q_cat_op(tensors_q,
                         axis=axis,
                         scale=scale,
                         zero_point=zero_point)
        cat_q = cat_q.dequantize()

        np.testing.assert_equal(cat_ref.numpy(), cat_q.numpy())

        # Test the cat on per-channel quantized tensor.
        ch_axis = 1
        scales = torch.from_numpy(np.array([1.0] * X.shape[ch_axis]))
        zero_points = torch.from_numpy(np.array([0] * X.shape[ch_axis]))
        tensors_q[0] = torch.quantize_linear_per_channel(X,
                                                         scales,
                                                         zero_points,
                                                         axis=[ch_axis],
                                                         dtype=torch_type)
        with self.assertRaisesRegex(RuntimeError, "supported.*cat"):
            cat_q = q_cat_op(tensors_q,
                             axis=axis,
                             scale=scale,
                             zero_point=zero_point)
Exemplo n.º 10
0
class TestFakeQuantizePerTensor(TestCase):
    # NOTE: Tests in this class are decorated with no_deadline
    # to prevent spurious failures due to cuda runtime initialization.

    @no_deadline
    @given(device=st.sampled_from(
        ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
           X=hu.tensor(shapes=hu.array_shapes(
               1,
               5,
           ),
                       qparams=hu.qparams(dtypes=torch.quint8)))
    def test_forward_per_tensor(self, device, X):
        r"""Tests the forward path of the FakeQuantizePerTensorAffine op.
        """
        np.random.seed(NP_RANDOM_SEED)
        X, (scale, zero_point, torch_type) = X
        quant_min = torch.iinfo(torch_type).min
        quant_max = torch.iinfo(torch_type).max

        X = to_tensor(X, device)
        Y = _fake_quantize_per_tensor_affine_reference(X.cpu(), scale,
                                                       zero_point, quant_min,
                                                       quant_max)
        Y_prime = torch.fake_quantize_per_tensor_affine(
            X, scale, zero_point, quant_min, quant_max)
        np.testing.assert_allclose(Y,
                                   Y_prime.cpu(),
                                   rtol=tolerance,
                                   atol=tolerance)

    @no_deadline
    @given(device=st.sampled_from(
        ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
           X=hu.tensor(shapes=hu.array_shapes(
               1,
               5,
           ),
                       qparams=hu.qparams(dtypes=torch.quint8)))
    def test_backward_per_tensor(self, device, X):
        r"""Tests the backward method.
        """
        np.random.seed(NP_RANDOM_SEED)
        X, (scale, zero_point, torch_type) = X
        quant_min = torch.iinfo(torch_type).min
        quant_max = torch.iinfo(torch_type).max

        X = to_tensor(X, device)
        X.requires_grad_()
        Y = _fake_quantize_per_tensor_affine_reference(X.cpu(), scale,
                                                       zero_point, quant_min,
                                                       quant_max)
        Y_prime = torch.fake_quantize_per_tensor_affine(
            X, scale, zero_point, quant_min, quant_max)
        dout = torch.rand(X.shape, dtype=torch.float).to(device)
        dX = _fake_quantize_per_tensor_affine_grad_reference(
            dout, X, scale, zero_point, quant_min, quant_max)
        Y_prime.backward(dout)
        np.testing.assert_allclose(dX.cpu(),
                                   X.grad.cpu().detach().numpy(),
                                   rtol=tolerance,
                                   atol=tolerance)

    @no_deadline
    @given(device=st.sampled_from(
        ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
           X=hu.tensor(shapes=hu.array_shapes(
               1,
               5,
           ),
                       qparams=hu.qparams(dtypes=torch.quint8)))
    def test_numerical_consistency_per_tensor(self, device, X):
        r"""Comparing numerical consistency between CPU quantize/dequantize op and the CPU fake quantize op
        """
        np.random.seed(NP_RANDOM_SEED)
        X, (scale, zero_point, torch_type) = X
        quant_min = torch.iinfo(torch_type).min
        quant_max = torch.iinfo(torch_type).max

        X = to_tensor(X, device)
        # quantize_per_tensor and dequantize are only implemented in CPU
        Y = torch.dequantize(
            torch.quantize_per_tensor(X.cpu(), scale, zero_point, torch_type))
        Y_prime = torch.fake_quantize_per_tensor_affine(
            X, scale, zero_point, quant_min, quant_max)
        np.testing.assert_allclose(Y,
                                   Y_prime.cpu(),
                                   rtol=tolerance,
                                   atol=tolerance)

    @no_deadline
    @given(
        device=st.sampled_from(
            ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
        X=hu.tensor(shapes=hu.array_shapes(
            1,
            5,
        ),
                    qparams=hu.qparams(dtypes=[torch.quint8])),
    )
    def test_fq_module(self, device, X):
        np.random.seed(NP_RANDOM_SEED)
        X, (scale, zero_point, torch_type) = X
        quant_min = torch.iinfo(torch_type).min
        quant_max = torch.iinfo(torch_type).max

        X = to_tensor(X, device)
        X.requires_grad_()
        fq_module = torch.quantization.default_fake_quant().to(device)
        Y_prime = fq_module(X)
        assert fq_module.scale is not None
        assert fq_module.zero_point is not None
        Y = _fake_quantize_per_tensor_affine_reference(X, fq_module.scale,
                                                       fq_module.zero_point,
                                                       quant_min, quant_max)
        np.testing.assert_allclose(Y.cpu().detach().numpy(),
                                   Y_prime.cpu().detach().numpy(),
                                   rtol=tolerance,
                                   atol=tolerance)

        # Test backward
        dout = torch.rand(X.shape, dtype=torch.float, device=device)
        Y_prime.backward(dout)
        dX = _fake_quantize_per_tensor_affine_grad_reference(
            dout, X, fq_module.scale, fq_module.zero_point, quant_min,
            quant_max)
        np.testing.assert_allclose(dX.cpu().numpy(),
                                   X.grad.cpu().detach().numpy(),
                                   rtol=tolerance,
                                   atol=tolerance)

    def test_fq_serializable(self):
        observer = default_observer
        quant_min = 0
        quant_max = 255
        fq_module = FakeQuantize(observer, quant_min, quant_max)
        X = torch.tensor([-5, -3.5, -2, 0, 3, 5, 7], dtype=torch.float32)
        y_ref = fq_module(X)
        state_dict = fq_module.state_dict()
        self.assertEqual(state_dict['scale'], 0.094488)
        self.assertEqual(state_dict['zero_point'], 53)
        b = io.BytesIO()
        torch.save(state_dict, b)
        b.seek(0)
        loaded_dict = torch.load(b)
        loaded_fq_module = FakeQuantize(observer, quant_min, quant_max)
        loaded_fq_module.load_state_dict(loaded_dict)
        for key in state_dict:
            self.assertEqual(state_dict[key],
                             loaded_fq_module.state_dict()[key])

        self.assertEqual(loaded_fq_module.calculate_qparams(),
                         fq_module.calculate_qparams())

    def test_fake_quant_control(self):
        torch.manual_seed(42)
        X = torch.rand(20, 10, dtype=torch.float32)
        fq_module = torch.quantization.default_fake_quant()
        # Output of fake quant is not identical to input
        Y = fq_module(X)
        self.assertNotEqual(Y, X)
        torch.quantization.disable_fake_quant(fq_module)
        X = torch.rand(20, 10, dtype=torch.float32)
        Y = fq_module(X)
        # Fake quant is disabled,output is identical to input
        self.assertEqual(Y, X)
        scale = fq_module.scale
        zero_point = fq_module.zero_point
        torch.quantization.disable_observer(fq_module)
        torch.quantization.enable_fake_quant(fq_module)
        X = 10.0 * torch.rand(20, 10, dtype=torch.float32) - 5.0
        Y = fq_module(X)
        self.assertNotEqual(Y, X)
        # Observer is disabled, scale and zero-point do not change
        self.assertEqual(fq_module.scale, scale)
        self.assertEqual(fq_module.zero_point, zero_point)
        torch.quantization.enable_observer(fq_module)
        Y = fq_module(X)
        self.assertNotEqual(Y, X)
        # Observer is enabled, scale and zero-point are different
        self.assertNotEqual(fq_module.scale, scale)
        self.assertNotEqual(fq_module.zero_point, zero_point)
Exemplo n.º 11
0
class TestFakeQuantizePerChannel(TestCase):
    # NOTE: Tests in this class are decorated with no_deadline
    # to prevent spurious failures due to cuda runtime initialization.

    @no_deadline
    @given(device=st.sampled_from(
        ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
           X=hu.per_channel_tensor(shapes=hu.array_shapes(
               1,
               5,
           ),
                                   qparams=hu.qparams(dtypes=torch.quint8)))
    def test_forward_per_channel(self, device, X):
        r"""Tests the forward path of the FakeQuantizePerTensorAffine op.
        """
        np.random.seed(NP_RANDOM_SEED)
        X, (scale, zero_point, axis, torch_type) = X
        quant_min = torch.iinfo(torch_type).min
        quant_max = torch.iinfo(torch_type).max

        X = to_tensor(X, device)
        scale = to_tensor(scale, device)
        zero_point = torch.tensor(zero_point).to(dtype=torch.int64,
                                                 device=device)
        Y = _fake_quantize_per_channel_affine_reference(
            X.cpu(), scale.cpu(), zero_point.cpu(), axis, quant_min, quant_max)
        Y_prime = torch.fake_quantize_per_channel_affine(
            X, scale, zero_point, axis, quant_min, quant_max)
        np.testing.assert_allclose(Y,
                                   Y_prime.cpu(),
                                   rtol=tolerance,
                                   atol=tolerance)

    @no_deadline
    @given(device=st.sampled_from(
        ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
           X=hu.per_channel_tensor(shapes=hu.array_shapes(
               1,
               5,
           ),
                                   qparams=hu.qparams(dtypes=torch.quint8)))
    def test_backward_per_channel(self, device, X):
        r"""Tests the backward method.
        """
        np.random.seed(NP_RANDOM_SEED)
        X, (scale, zero_point, axis, torch_type) = X
        quant_min = torch.iinfo(torch_type).min
        quant_max = torch.iinfo(torch_type).max

        X = to_tensor(X, device)
        scale = to_tensor(scale, device)
        zero_point = torch.tensor(zero_point).to(dtype=torch.int64,
                                                 device=device)
        X.requires_grad_()
        Y_prime = torch.fake_quantize_per_channel_affine(
            X, scale, zero_point, axis, quant_min, quant_max)
        dout = torch.rand(X.shape, dtype=torch.float).to(device)
        dX = _fake_quantize_per_channel_affine_grad_reference(
            dout, X, scale, zero_point, axis, quant_min, quant_max)
        Y_prime.backward(dout)
        np.testing.assert_allclose(dX.cpu().detach().numpy(),
                                   X.grad.cpu().detach().numpy(),
                                   rtol=tolerance,
                                   atol=tolerance)

    @no_deadline
    @given(device=st.sampled_from(
        ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
           X=hu.per_channel_tensor(shapes=hu.array_shapes(
               1,
               5,
           ),
                                   qparams=hu.qparams(dtypes=torch.quint8)))
    @unittest.skip("temporarily disable the test")
    def test_numerical_consistency_per_channel(self, device, X):
        r"""Comparing numerical consistency between CPU quantize/dequantize op and the CPU fake quantize op
        """
        np.random.seed(NP_RANDOM_SEED)
        X, (scale, zero_point, axis, torch_type) = X
        quant_min = torch.iinfo(torch_type).min
        quant_max = torch.iinfo(torch_type).max

        X = to_tensor(X, device)
        scale = to_tensor(scale, device)
        zero_point = torch.tensor(zero_point).to(dtype=torch.int64,
                                                 device=device)
        # quantize_linear and dequantize are only implemented in CPU
        Y = torch.dequantize(
            torch.quantize_per_channel(X.cpu(), scale.cpu(), zero_point.cpu(),
                                       axis, torch_type))
        Y_prime = torch.fake_quantize_per_channel_affine(
            X, scale, zero_point, axis, quant_min, quant_max)
        np.testing.assert_allclose(Y,
                                   Y_prime.cpu(),
                                   rtol=tolerance,
                                   atol=tolerance)

    @no_deadline
    @given(device=st.sampled_from(
        ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
           X=hu.per_channel_tensor(shapes=hu.array_shapes(
               2,
               5,
           ),
                                   qparams=hu.qparams(dtypes=torch.qint8)))
    def test_fq_module(self, device, X):
        np.random.seed(NP_RANDOM_SEED)
        X, (scale, zero_point, axis, torch_type) = X
        quant_min = torch.iinfo(torch_type).min
        quant_max = torch.iinfo(torch_type).max

        X = to_tensor(X, device)
        X.requires_grad_()
        fq_module = FakeQuantize(default_per_channel_weight_observer,
                                 quant_min,
                                 quant_max,
                                 ch_axis=axis).to(device)
        Y_prime = fq_module(X)
        assert fq_module.scale is not None
        assert fq_module.zero_point is not None
        Y = _fake_quantize_per_channel_affine_reference(
            X, fq_module.scale, fq_module.zero_point, axis, quant_min,
            quant_max)
        np.testing.assert_allclose(Y.cpu().detach().numpy(),
                                   Y_prime.cpu().detach().numpy(),
                                   rtol=tolerance,
                                   atol=tolerance)

        # Test backward
        dout = torch.rand(X.shape, dtype=torch.float, device=device)
        Y_prime.backward(dout)
        dX = _fake_quantize_per_channel_affine_grad_reference(
            dout, X, fq_module.scale, fq_module.zero_point, axis, quant_min,
            quant_max)
        np.testing.assert_allclose(dX.cpu().numpy(),
                                   X.grad.cpu().detach().numpy(),
                                   rtol=tolerance,
                                   atol=tolerance)

    def test_fq_serializable(self):
        observer = default_per_channel_weight_observer
        quant_min = -128
        quant_max = 127
        fq_module = FakeQuantize(observer, quant_min, quant_max)
        X = torch.tensor(
            [[-5, -3.5, -2, 0, 3, 5, 7], [1, 3, 2, 5, 6.5, 8, 10]],
            dtype=torch.float32)
        y_ref = fq_module(X)
        state_dict = fq_module.state_dict()
        self.assertEqual(state_dict['scale'], [0.054902, 0.078431])
        self.assertEqual(state_dict['zero_point'], [0, 0])
        b = io.BytesIO()
        torch.save(state_dict, b)
        b.seek(0)
        loaded_dict = torch.load(b)
        for key in state_dict:
            self.assertEqual(state_dict[key], loaded_dict[key])