Exemplo n.º 1
0
class FunctionalAPITest(TestCase):
    @given(X=hu.tensor_conv2d(min_batch=1,
                              max_batch=3,
                              min_in_channels=1,
                              max_in_channels=7,
                              min_out_channels=1,
                              max_out_channels=7,
                              H_range=(6, 12),
                              W_range=(6, 12),
                              kH_range=(3, 5),
                              kW_range=(3, 5),
                              max_groups=4,
                              qparams=[
                                  hu.qparams(dtypes=torch.quint8,
                                             zero_point_min=0,
                                             zero_point_max=0),
                                  hu.qparams(dtypes=torch.qint8,
                                             zero_point_min=0,
                                             zero_point_max=0),
                                  hu.qparams(dtypes=torch.qint32,
                                             zero_point_min=0,
                                             zero_point_max=0)
                              ]),
           padH=st.integers(1, 3),
           padW=st.integers(1, 3),
           sH=st.integers(1, 3),
           sW=st.integers(1, 3),
           dH=st.integers(1, 2),
           dW=st.integers(1, 2),
           prepacked=st.booleans())
    def test_conv_api(self, X, padH, padW, sH, sW, dH, dW, prepacked):
        """Tests the correctness of the conv functional.

        The correctness is defined by the behavior being similar to the
        `quantized._ops` implementation.
        """
        # Random inputs
        # X, (scale, zero_point, torch_type) = X
        (inputs, filters, bias, groups) = X
        inputs, (inputs_scale, inputs_zero_point, inputs_qtype) = inputs
        filters, (filters_scale, filters_zero_point, filters_qtype) = filters
        bias, (bias_scale, bias_zero_point, bias_qtype) = bias

        scale, zero_point = inputs_scale, inputs_zero_point
        torch_type = inputs_qtype

        iC, oC = inputs.shape[1], filters.shape[0]

        iH, iW = inputs.shape[2:]
        kH, kW = filters.shape[2:]
        assume(kH // 2 >= padH)
        assume(kW // 2 >= padW)
        oH = _conv_output_shape(iH, kH, padH, sH, dH)
        assume(oH > 0)
        oW = _conv_output_shape(iW, kW, padW, sW, dW)
        assume(oW > 0)

        inputs = torch.from_numpy(inputs).to(torch.float)
        filters = torch.from_numpy(filters).to(torch.float)
        bias = torch.from_numpy(bias).to(torch.float)

        kernel_size = (kH, kW)
        stride = (sH, sW)
        i_padding = (padH, padW)
        dilation = (dH, dW)

        # Quantized inputs
        i_NHWC = inputs.permute([0, 2, 3, 1]).contiguous()
        w_RSCK = filters.permute([0, 2, 3, 1]).contiguous()

        q_inputs = torch.quantize_linear(i_NHWC, inputs_scale,
                                         inputs_zero_point, inputs_qtype)
        q_filters = torch.quantize_linear(w_RSCK, filters_scale,
                                          filters_zero_point, filters_qtype)
        q_filters_ref = torch.ops.quantized.fbgemm_conv_prepack(
            q_filters, groups)
        q_bias = torch.quantize_linear(bias, bias_scale, bias_zero_point,
                                       bias_qtype)

        # Reference op
        ref_op = torch.ops.quantized.fbgemm_conv2d

        # Results check
        try:
            ref_result = ref_op(q_inputs, q_filters_ref, q_bias, stride,
                                i_padding, dilation, groups, scale, zero_point)
        except RuntimeError as e:
            e_msg = str(e).split("\n")[0].split("(")[0].strip()
            np.testing.assert_raises_regex(type(e),
                                           e_msg,
                                           qF.conv2d,
                                           q_inputs,
                                           q_filters_ref,
                                           bias=q_bias,
                                           scale=scale,
                                           zero_point=zero_point,
                                           stride=stride,
                                           padding=i_padding,
                                           dilation=dilation,
                                           groups=groups,
                                           prepacked=True,
                                           dtype=torch_type)
        else:
            if prepacked:
                q_filters = torch.ops.quantized.fbgemm_conv_prepack(
                    q_filters, groups)
            q_result = qF.conv2d(q_inputs,
                                 q_filters,
                                 bias=q_bias,
                                 scale=scale,
                                 zero_point=zero_point,
                                 stride=stride,
                                 padding=i_padding,
                                 dilation=dilation,
                                 groups=groups,
                                 prepacked=prepacked,
                                 dtype=torch_type)

            np.testing.assert_equal(ref_result.int_repr().numpy(),
                                    q_result.int_repr().numpy())
Exemplo n.º 2
0
class TestQuantizedConv(unittest.TestCase):
    """Tests the correctness of quantized convolution op."""
    @given(batch_size=st.integers(1, 3),
           input_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]),
           height=st.integers(10, 16),
           width=st.integers(7, 14),
           output_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]),
           groups=st.integers(1, 3),
           kernel_h=st.integers(1, 7),
           kernel_w=st.integers(1, 7),
           stride_h=st.integers(1, 2),
           stride_w=st.integers(1, 2),
           pad_h=st.integers(0, 2),
           pad_w=st.integers(0, 2),
           dilation=st.integers(1, 1),
           X_scale=st.floats(0.2, 1.6),
           X_zero_point=st.integers(0, 4),
           W_scale=st.floats(0.2, 1.6),
           W_zero_point=st.integers(-5, 5),
           Y_scale=st.floats(0.2, 1.6),
           Y_zero_point=st.integers(0, 4),
           use_bias=st.booleans(),
           use_relu=st.booleans())
    def test_qconv(
            self,
            batch_size,
            input_channels_per_group,
            height,
            width,
            output_channels_per_group,
            groups,
            kernel_h,
            kernel_w,
            stride_h,
            stride_w,
            pad_h,
            pad_w,
            dilation,
            X_scale,
            X_zero_point,
            W_scale,
            W_zero_point,
            Y_scale,
            Y_zero_point,
            use_bias,
            use_relu
    ):

        qconv = torch.ops.quantized.fbgemm_conv2d
        if use_relu:
            qconv = torch.ops.quantized.fbgemm_conv2d_relu
        qconv_prepack = torch.ops.quantized.fbgemm_conv_prepack

        # C
        input_channels = input_channels_per_group * groups
        # K
        output_channels = output_channels_per_group * groups

        dilation_h = dilation_w = dilation

        # For testing, we use small values for weights and for activations so that no overflow occurs
        # in vpmaddubsw instruction. If the overflow occurs in qconv implementation and if there is no overflow
        # in reference we can't exactly match the results with reference.
        # Please see the comment in qconv implementation file (aten/src/ATen/native/quantized/cpu/qconv.cpp)
        # for more details.
        W_value_min = -5
        W_value_max = 5

        # the operator expects them in the format (output_channels, input_channels/groups, kernel_h, kernel_w)
        W_init = torch.from_numpy(
            np.random.randint(
                W_value_min,
                W_value_max,
                (output_channels, int(input_channels / groups), kernel_h, kernel_w)),
        )


        b_init = torch.from_numpy(np.random.randint(0, 10, (output_channels,)))

        stride = [stride_h, stride_w]
        pad = [pad_h, pad_w]
        dilation = [dilation_h, dilation_w]

        X_value_min = 0
        X_value_max = 4
        X_init = torch.from_numpy(np.random.randint(
            X_value_min, X_value_max, (batch_size, input_channels, height, width)))

        X = X_scale * (X_init - X_zero_point).to(dtype=torch.float)

        W = W_scale * (W_init - W_zero_point).to(dtype=torch.float)

        b = X_scale * W_scale * (b_init - 0).to(dtype=torch.float)

        # Existing floating point conv operator
        conv_op = torch.nn.Conv2d(input_channels,
                                  output_channels,
                                  (kernel_h, kernel_w),
                                  (stride_h, stride_w),
                                  (pad_h, pad_w),
                                  (dilation_h, dilation_w),
                                  groups)

        # assign weights
        conv_op.weight = torch.nn.Parameter(W, requires_grad=False)

        conv_op.bias = torch.nn.Parameter(b, requires_grad=False) if use_bias else None

        result_ref = conv_op(X)
        if use_relu:
            relu = torch.nn.ReLU()
            result_ref = relu(result_ref)
        # quantize reference results for comparision
        result_ref_q = torch.quantize_linear(result_ref, scale=Y_scale, zero_point=Y_zero_point, dtype=torch.quint8)

        # reformat X_init and W_init in the required format by qconv operator
        # NCHW -> NHWC
        X_NHWC = X.permute([0, 2, 3, 1]).contiguous()
        # K(C/G)RS -> KRS(C/G)
        W_KRSC = W.permute([0, 2, 3, 1]).contiguous()

        X_q = torch.quantize_linear(X_NHWC, scale=X_scale, zero_point=X_zero_point, dtype=torch.quint8)
        W_q = torch.quantize_linear(W_KRSC, scale=W_scale, zero_point=W_zero_point, dtype=torch.qint8)
        b_q = torch.quantize_linear(b, scale=X_scale * W_scale, zero_point=0, dtype=torch.qint32) if use_bias else None

        W_prepack = qconv_prepack(W_q, stride, pad, dilation, groups)

        Y_q = qconv(
            X_q,
            W_prepack,
            b_q,
            stride,
            pad,
            dilation,
            groups,
            Y_scale,
            Y_zero_point,
        )

        # Back to NCHW format
        Y_q = Y_q.permute([0, 3, 1, 2]).contiguous()


        # Make sure the results match
        # assert_array_almost_equal compares using the following formula:
        #     abs(desired-actual) < 1.5 * 10**(-decimal)
        # (https://docs.scipy.org/doc/numpy/reference/generated/numpy.testing.assert_almost_equal.html)

        # We use decimal = 0 to ignore off-by-1 differences between reference and
        # test. Off-by-1 differences arise due to the order of round and
        # zero_point addition operation, i.e., if addition followed by round is
        # used by reference and round followed by addition is used by test, the
        # results may differ by 1.

        # For example, the result of round(2.5) + 1 is 3 while round(2.5 + 1) is 4
        # assuming the rounding mode is round-to-nearest, ties-to-even.
        np.testing.assert_array_almost_equal(result_ref_q.int_repr().numpy(), Y_q.int_repr().numpy(), decimal=0)

    """Tests the correctness of the quantized::fbgemm_qconv_unpack op."""
    @given(X=hu.tensor_conv2d(min_batch=1, max_batch=3,
                              min_in_channels=1, max_in_channels=7,
                              min_out_channels=1, max_out_channels=7,
                              H_range=(6, 12), W_range=(6, 12),
                              kH_range=(3, 5), kW_range=(3, 5),
                              max_groups=4,
                              qparams=[hu.qparams(dtypes=torch.quint8,
                                                  zero_point_min=0,
                                                  zero_point_max=0),
                                       hu.qparams(dtypes=torch.qint8,
                                                  zero_point_min=0,
                                                  zero_point_max=0),
                                       hu.qparams(dtypes=torch.qint32,
                                                  zero_point_min=0,
                                                  zero_point_max=0)]),
           strideH=st.integers(1, 3), strideW=st.integers(1, 3),
           padH=st.integers(1, 2), padW=st.integers(1, 2))
    def test_qconv_unpack(self, X, strideH, strideW, padH, padW):
        (inputs, filters, bias, groups) = X
        inputs, (inputs_scale, inputs_zero_point, inputs_qtype) = inputs
        filters, (filters_scale, filters_zero_point, filters_qtype) = filters
        bias, (bias_scale, bias_zero_point, bias_qtype) = bias

        qconv_prepack = torch.ops.quantized.fbgemm_conv_prepack
        qconv_unpack = torch.ops.quantized.fbgemm_conv_unpack

        # Orig tensor is assumed to be in K(C/G)RS format
        W = torch.from_numpy(filters).to(torch.float)
        # K(C/G)RS -> KRS(C/G)
        W_KRSC = W.permute([0, 2, 3, 1]).contiguous()
        W_q = torch.quantize_linear(W_KRSC, scale=filters_scale, zero_point=filters_zero_point, dtype=filters_qtype)

        # Pack weights using weight packing operator
        strides = [strideH, strideW]
        paddings = [padH, padW]
        dilations = [1, 1]
        W_packed = qconv_prepack(W_q, strides, paddings, dilations, groups)
        # Unpack weights weight unpacking operator (Used for serialization)
        W_unpacked = qconv_unpack(W_packed)

        # Assert equal
        np.testing.assert_equal(W_q.int_repr().numpy(), W_unpacked.int_repr().numpy())
        np.testing.assert_equal(W_q.q_scale(), W_unpacked.q_scale())
        np.testing.assert_equal(W_q.q_zero_point(), W_unpacked.q_zero_point())
Exemplo n.º 3
0
class TestQuantizedConv(unittest.TestCase):
    """Tests the correctness of quantized convolution op."""
    @given(batch_size=st.integers(1, 3),
           input_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]),
           height=st.integers(10, 16),
           width=st.integers(7, 14),
           output_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]),
           groups=st.integers(1, 3),
           kernel_h=st.integers(1, 7),
           kernel_w=st.integers(1, 7),
           stride_h=st.integers(1, 2),
           stride_w=st.integers(1, 2),
           pad_h=st.integers(0, 2),
           pad_w=st.integers(0, 2),
           dilation=st.integers(1, 1),
           use_bias=st.booleans(),
           use_relu=st.booleans())
    def test_qconv(self, batch_size, input_channels_per_group, height, width,
                   output_channels_per_group, groups, kernel_h, kernel_w,
                   stride_h, stride_w, pad_h, pad_w, dilation, use_bias,
                   use_relu):

        qconv = torch.ops.quantized.fbgemm_conv2d
        if use_relu:
            qconv = torch.ops.quantized.fbgemm_conv2d_relu
        qconv_prepack = torch.ops.quantized.fbgemm_conv_prepack

        # C
        input_channels = input_channels_per_group * groups
        # K
        output_channels = output_channels_per_group * groups

        dilation_h = dilation_w = dilation

        # For testing, we use small values for weights and for activations so that no overflow occurs
        # in vpmaddubsw instruction. If the overflow occurs in qconv implementation and if there is no overflow
        # in reference we can't exactly match the results with reference.
        # Please see the comment in qconv implementation file (aten/src/ATen/native/quantized/cpu/qconv.cpp)
        # for more details.
        W_value_min = -5
        W_value_max = 5

        # the operator expects them in the format (output_channels, input_channels/groups, kernel_h, kernel_w)
        W_init = torch.from_numpy(
            np.random.randint(W_value_min, W_value_max,
                              (output_channels, int(input_channels / groups),
                               kernel_h, kernel_w)), )

        b_init = torch.from_numpy(np.random.randint(0, 10,
                                                    (output_channels, )))

        # Existing floating point conv operator
        conv_op = torch.nn.Conv2d(
            input_channels,
            output_channels,
            (kernel_h, kernel_w),
            (stride_h, stride_w),
            (pad_h, pad_w),
            (dilation_h, dilation_w),
            groups,
        )

        # assign the weights
        conv_op.weight = torch.nn.Parameter(W_init.to(dtype=torch.float),
                                            requires_grad=False)
        conv_op.bias = torch.nn.Parameter(
            b_init.to(
                dtype=torch.float), requires_grad=False) if use_bias else None

        X_value_min = 0
        X_value_max = 4
        X_init = torch.from_numpy(
            np.random.randint(X_value_min, X_value_max,
                              (batch_size, input_channels, height, width)))

        # run on an input tensor
        result_ref = conv_op(X_init.to(dtype=torch.float))

        # reformat X_init and W_init in the required format by conv operator
        # NCHW -> NHWC
        X_NHWC = X_init.permute([0, 2, 3, 1]).contiguous()
        # K(C/G)RS -> KRS(C/G)
        W_KRSC = W_init.permute([0, 2, 3, 1]).contiguous()

        X_scale = 1.5
        # Currently only 0 as zero point is supported.
        X_zero_point = 0
        X = X_scale * (X_NHWC - X_zero_point).to(dtype=torch.float)

        W_scale = 2.5
        W_zero_point = 0
        W = W_scale * (W_KRSC - W_zero_point).to(dtype=torch.float)

        b = X_scale * W_scale * (b_init - 0).to(dtype=torch.float)

        X_q = torch.quantize_linear(X,
                                    scale=X_scale,
                                    zero_point=X_zero_point,
                                    dtype=torch.quint8)
        W_q = torch.quantize_linear(W,
                                    scale=W_scale,
                                    zero_point=W_zero_point,
                                    dtype=torch.qint8)
        b_q = torch.quantize_linear(
            b, scale=X_scale *
            W_scale, zero_point=0, dtype=torch.qint32) if use_bias else None

        W_prepack = qconv_prepack(W_q, [stride_h, stride_w], [pad_h, pad_w],
                                  [dilation_h, dilation_w], groups)
        Y_scale = 7.3
        Y_zero_point = 5

        Y_q = qconv(
            X_q,
            W_prepack,
            b_q,
            [stride_h, stride_w],  # stride
            [pad_h, pad_w],  # padding
            [dilation_h, dilation_w],  # dilation
            groups,  # groups
            Y_scale,
            Y_zero_point,
        )

        result_NHWK = result_ref.permute([0, 2, 3, 1])
        result_q = _requantize(result_NHWK.numpy(),
                               X_scale * W_scale / Y_scale, Y_zero_point)
        if use_relu:
            result_q[result_q < Y_zero_point] = Y_zero_point

        # Make sure the results match
        np.testing.assert_equal(result_q, Y_q.int_repr().numpy())

    """Tests the correctness of the quantized::fbgemm_qconv_unpack op."""

    @given(X=hu.tensor_conv2d(min_batch=1,
                              max_batch=3,
                              min_in_channels=1,
                              max_in_channels=7,
                              min_out_channels=1,
                              max_out_channels=7,
                              H_range=(6, 12),
                              W_range=(6, 12),
                              kH_range=(3, 5),
                              kW_range=(3, 5),
                              max_groups=4,
                              qparams=[
                                  hu.qparams(dtypes=torch.quint8,
                                             zero_point_min=0,
                                             zero_point_max=0),
                                  hu.qparams(dtypes=torch.qint8,
                                             zero_point_min=0,
                                             zero_point_max=0),
                                  hu.qparams(dtypes=torch.qint32,
                                             zero_point_min=0,
                                             zero_point_max=0)
                              ]),
           strideH=st.integers(1, 3),
           strideW=st.integers(1, 3),
           padH=st.integers(1, 2),
           padW=st.integers(1, 2))
    def test_qconv_unpack(self, X, strideH, strideW, padH, padW):
        (inputs, filters, bias, groups) = X
        inputs, (inputs_scale, inputs_zero_point, inputs_qtype) = inputs
        filters, (filters_scale, filters_zero_point, filters_qtype) = filters
        bias, (bias_scale, bias_zero_point, bias_qtype) = bias

        qconv_prepack = torch.ops.quantized.fbgemm_conv_prepack
        qconv_unpack = torch.ops.quantized.fbgemm_conv_unpack

        # Orig tensor is assumed to be in K(C/G)RS format
        W = torch.from_numpy(filters).to(torch.float)
        # K(C/G)RS -> KRS(C/G)
        W_KRSC = W.permute([0, 2, 3, 1]).contiguous()
        W_q = torch.quantize_linear(W_KRSC,
                                    scale=filters_scale,
                                    zero_point=filters_zero_point,
                                    dtype=filters_qtype)

        # Pack weights using weight packing operator
        strides = [strideH, strideW]
        paddings = [padH, padW]
        dilations = [1, 1]
        W_packed = qconv_prepack(W_q, strides, paddings, dilations, groups)
        # Unpack weights weight unpacking operator (Used for serialization)
        W_unpacked = qconv_unpack(W_packed)

        # Assert equal
        np.testing.assert_equal(W_q.int_repr().numpy(),
                                W_unpacked.int_repr().numpy())
        np.testing.assert_equal(W_q.q_scale(), W_unpacked.q_scale())
        np.testing.assert_equal(W_q.q_zero_point(), W_unpacked.q_zero_point())