Пример #1
0
    def test_qbatch_norm(self):
        bn_module = {
            # TODO: quantized batchnorm 1d module is missing
            # 1 : torch.nn.BatchNorm1d,
            2: torch.nn.BatchNorm2d,
            3: torch.nn.BatchNorm3d,
        }

        class M(torch.nn.Module):
            def __init__(self, dim):
                super(M, self).__init__()
                self.bn = bn_module[dim](3).to(torch.float)

            def forward(self, x):
                return self.bn(x)

        options = itertools.product(self.static_quant_types, [2, 3])
        quantized_nodes = {
            # 1: ns.call_module(nnq.BatchNorm1d),
            2: ns.call_module(nnq.BatchNorm2d),
            3: ns.call_module(nnq.BatchNorm3d),
        }
        for quant_type, dim in options:
            model = self.checkGraphModeFxOp(M(dim), self.img_data_dict[dim],
                                            quant_type, quantized_nodes[dim])
Пример #2
0
    def test_addmm(self):
        class M(torch.nn.Module):
            def __init__(self):
                super().__init__()
                self.weight = torch.randn(5, 5)
                self.bias = torch.randn(5)

            def forward(self, x):
                return torch.addmm(self.bias, x, self.weight)

        m = M().eval()
        prepared = prepare_fx(
            m, {"": self.qconfig}, backend_config_dict=self.trt_backend_config_dict)
        node_occurrence = {
            # weight
            ns.call_module(torch.ao.quantization.MinMaxObserver): 1,
            # activation
            ns.call_module(torch.ao.quantization.HistogramObserver): 2,
        }
        self.checkGraphModuleNodes(prepared, expected_node_occurrence=node_occurrence)
        quantized = _convert_fx_do_not_use(
            prepared, is_reference=True, backend_config_dict=self.trt_backend_config_dict)
        node_occurrence = {
            # input activation, output activation and weight
            ns.call_function(torch.quantize_per_tensor): 3,
            ns.call_function(torch.addmm): 1,
            ns.call_method("dequantize"): 3,
        }
        self.checkGraphModuleNodes(quantized, expected_node_occurrence=node_occurrence)
Пример #3
0
    def test_ops(self):
        class M(torch.nn.Module):
            def __init__(self):
                super().__init__()
                self.conv = torch.nn.Conv2d(3, 3, 3)
                self.linear = torch.nn.Linear(5, 5)
                self.relu = torch.nn.ReLU()

            def forward(self, x):
                x = self.conv(x)
                x = self.linear(x)
                x = x + 3
                x = self.relu(x)
                x = x + 6
                return x

        m = M().eval()
        m = prepare_fx(m, {"": default_qconfig})
        m = _convert_fx_do_not_use(m, is_reference=True)
        expected_occurrence = {
            ns.call_function(torch.quantize_per_tensor): 5,
            ns.call_method("dequantize"): 5,
            ns.call_module(torch.nn.quantized._reference.Linear): 1,
            ns.call_module(torch.nn.quantized._reference.Conv2d): 1,
        }
        self.checkGraphModuleNodes(
            m, expected_node_occurrence=expected_occurrence)
Пример #4
0
    def test_qconfig_none(self):
        class M(torch.nn.Module):
            def __init__(self):
                super(M, self).__init__()
                self.conv1 = nn.Conv2d(1, 1, 1)
                self.conv2 = nn.Conv2d(1, 1, 1)

            def forward(self, x):
                x = self.conv1(x)
                x = self.conv2(x)
                return x

        m = M().eval()
        m = symbolic_trace(m)
        qconfig_dict = {'': default_qconfig, 'conv2': None}
        m = prepare_static_fx(m, qconfig_dict)
        data = torch.randn(1, 1, 1, 1)
        m(data)
        m = convert_static_fx(m)
        m(data)
        # first conv is quantized, second conv is not quantized
        node_list = [
            ns.call_function(torch.quantize_per_tensor),
            ns.call_module(nnq.Conv2d),
            ns.call_method('dequantize'),
            ns.call_module(nn.Conv2d),
        ]
        self.checkGraphModuleNodes(m, expected_node_list=node_list)
Пример #5
0
    def test_unsupported_qconfig(self):
        """ Check that we won't quantize the model if the qconfig is not supported
        """
        class LinearModule(torch.nn.Module):
            def __init__(self):
                super().__init__()
                self.linear = torch.nn.Linear(5, 10)

            def forward(self, x):
                return self.linear(x)

        linear_module_input = torch.rand(8, 5)

        m = LinearModule().eval()
        trt_unsupported_qconfig = default_qconfig
        prepared = prepare_fx(m, {"": trt_unsupported_qconfig},
                              backend_config_dict=self.trt_backend_config_dict)
        # calibration
        prepared(linear_module_input)
        quantized = _convert_fx_do_not_use(prepared, is_reference=True)
        node_occurrence = {
            ns.call_function(torch.quantize_per_tensor): 0,
            ns.call_method("dequantize"): 0,
            ns.call_module(torch.nn.Linear): 1,
            ns.call_module(torch.nn.quantized._reference.Linear): 0,
        }
        # check model is not quantized
        self.checkGraphModuleNodes(quantized,
                                   expected_node_occurrence=node_occurrence)
Пример #6
0
    def test_quantized_conv(self):
        conv_module = {
            1: torch.nn.Conv1d,
            2: torch.nn.Conv2d,
            3: torch.nn.Conv3d
        }

        class Conv(torch.nn.Module):
            def __init__(self, dim):
                super(Conv, self).__init__()
                self.conv = conv_module[dim](3, 3, 3).float()

            def forward(self, x):
                return self.conv(x)

        options = itertools.product([1, 2, 3], self.static_quant_types)
        quantized_nodes = {
            # dim
            1: ns.call_module(nnq.Conv1d),
            2: ns.call_module(nnq.Conv2d),
            3: ns.call_module(nnq.Conv3d),
        }
        for dim, quant_type in options:
            model = self.checkGraphModeFxOp(Conv(dim), self.img_data_dict[dim],
                                            quant_type, quantized_nodes[dim])
Пример #7
0
 def test_standalone_module_float_interface(self):
     float_interface_config = {
         "input_quantized_idxs": [],  # float input
         "output_quantized_idxs": [],  # float output
     }
     interface_config = float_interface_config
     # input and output of first conv, observer for standalone module
     # will be inserted in the standalone module itself
     prepare_count_check = {
         ns.call_module(torch.ao.quantization.HistogramObserver): 2
     }
     # for input and output of conv in the standalone module
     standalone_prepare_count_check = {
         ns.call_module(torch.ao.quantization.HistogramObserver): 2
     }
     convert_count_check = {
         # input and output of reference conv
         ns.call_function(torch.quantize_per_tensor) : 2,
         ns.call_module(nnqr.Conv2d) : 1,
         ns.call_method("dequantize") : 2,
     }
     standalone_convert_count_check = {
         # standalone module will take float as input and output
         # so we'll see quantize and dequantize in the modoule
         ns.call_function(torch.quantize_per_tensor) : 2,
         ns.call_module(nnqr.Conv2d): 1,
         ns.call_method("dequantize") : 2,
     }
     self._test_standalone_module(
         interface_config,
         prepare_count_check,
         standalone_prepare_count_check,
         convert_count_check,
         standalone_convert_count_check)
    def test_embedding(self):
        class M(torch.nn.Module):
            def __init__(self):
                super().__init__()
                self.emb = torch.nn.Embedding(num_embeddings=10,
                                              embedding_dim=12)

            def forward(self, indices):
                return self.emb(indices)

        model = M().eval()
        indices = torch.randint(low=0, high=10, size=(20, ))

        quantized_node = ns.call_module(nnq.Embedding)
        configs = [
            (float_qparams_weight_only_qconfig, ns.call_module(nnq.Embedding)),
            (None, ns.call_module(nn.Embedding)),
            (default_qconfig, ns.call_module(nn.Embedding)),
        ]

        for qconfig, node in configs:
            qconfig_dict = {"": qconfig}
            m = prepare_fx(model, qconfig_dict)
            m = convert_fx(m)
            self._compare_script_and_mobile(m, input=indices)
Пример #9
0
    def _get_conv_linear_test_cases(self):
        ''' Returns a list of test cases, with format:
        is_dynamic, ModuleClass, module_constructor_inputs,
        inputs, quantized_node, weight_prepack_op
        '''
        class Conv(torch.nn.Module):
            def __init__(self, weight):
                super().__init__()
                self.weight = torch.nn.Parameter(weight)
                self.stride = (1, 1)
                self.padding = (0, 0)
                self.dilation = (1, 1)
                self.groups = 1

            def forward(self, x):
                return F.conv2d(x, self.weight, None, self.stride, self.padding, self.dilation, self.groups)

        conv_input = torch.rand(1, 3, 224, 224)
        conv_weight = torch.rand(3, 3, 3, 3)

        class Linear(torch.nn.Module):
            def __init__(self, weight):
                super().__init__()
                self.weight = torch.nn.Parameter(weight)

            def forward(self, x):
                return F.linear(x, self.weight)

        linear_input = torch.rand(8, 5)
        linear_weight = torch.rand(10, 5)

        class LinearModule(torch.nn.Module):
            def __init__(self):
                super().__init__()
                self.linear = torch.nn.Linear(5, 10)

            def forward(self, x):
                return self.linear(x)

        linear_module_input = torch.rand(8, 5)

        tests = [
            (False, Conv, (conv_weight,), (conv_input,),
             ns.call_function(torch.ops.quantized.conv2d),
             ns.call_function(torch.ops.quantized.conv2d_prepack)),
            (True, Linear, (linear_weight,), (linear_input,),
             ns.call_function(torch.ops.quantized.linear_dynamic),
             ns.call_function(torch.ops.quantized.linear_prepack)),
            (False, Linear, (linear_weight,), (linear_input,),
             ns.call_function(torch.ops.quantized.linear),
             ns.call_function(torch.ops.quantized.linear_prepack)),
            (True, LinearModule, (), (linear_module_input,),
             ns.call_module(nnqd.Linear),
             None),
            (False, LinearModule, (), (linear_module_input,),
             ns.call_module(nnq.Linear),
             None),
        ]
        return tests
Пример #10
0
    def test_input_weight_equalization_branching(self):
        """ Tests that graphs containing branches are prepared correctly.
        Specifically, equalization observers should not be inserted in front of
        branches in which both initial layers in the branches plan to be
        quantized.
        """

        # Tests that we do not add an equalization observer due to both initial
        # nodes in the branch containing layers that need to be equalized.
        # Note that this should print out 2 warning messages for not being able
        # to equalize layers linear1 and linear1 because it is part of a branch
        class TestBranchingWithoutEqualizationModel(nn.Module):
            def __init__(self) -> None:
                super().__init__()
                self.linear1 = nn.Linear(5, 5)
                self.linear2 = nn.Linear(5, 5)

            def forward(self, x):
                y = self.linear1(x)
                z = self.linear2(x)
                return torch.add(y, z)

        no_eq_branching_node_occurrence = {
            ns.call_module(_InputEqualizationObserver): 0,
            ns.call_module(MinMaxObserver): 3,
        }

        m = TestBranchingWithoutEqualizationModel().eval()
        prepared = prepare_fx(
            m,
            specific_qconfig_dict,
            equalization_qconfig_dict=default_equalization_qconfig_dict)
        self.checkGraphModuleNodes(
            prepared, expected_node_occurrence=no_eq_branching_node_occurrence)

        # Tests that we will add an equalization observer because there is only
        # one initial node in the branch that needs to be equalized
        class TestBranchingWithEqualizationModel(nn.Module):
            def __init__(self) -> None:
                super().__init__()
                self.linear1 = nn.Linear(5, 5)

            def forward(self, x):
                y = self.linear1(x)
                z = torch.add(x, 5)
                return torch.add(y, z)

        eq_branching_node_occurrence = {
            ns.call_module(_InputEqualizationObserver): 1,
            ns.call_module(MinMaxObserver): 2,
        }

        m = TestBranchingWithEqualizationModel().eval()
        prepared = prepare_fx(
            m,
            specific_qconfig_dict,
            equalization_qconfig_dict=default_equalization_qconfig_dict)
        self.checkGraphModuleNodes(
            prepared, expected_node_occurrence=eq_branching_node_occurrence)
Пример #11
0
    def test_functional(self):
        """ Test quantizing functional conv and linear
        """
        class Conv(torch.nn.Module):
            def __init__(self):
                super().__init__()
                self.stride = (1, 1)
                self.padding = (0, 0)
                self.dilation = (1, 1)
                self.groups = 1

            def forward(self, x, weight):
                return F.conv2d(x, weight, None, self.stride, self.padding,
                                self.dilation, self.groups)

        conv_input = torch.rand(1, 3, 224, 224)
        conv_weight = torch.rand(3, 3, 3, 3)

        class Linear(torch.nn.Module):
            def __init__(self):
                super().__init__()

            def forward(self, x, weight):
                return F.linear(x, weight)

        linear_input = torch.rand(8, 5)
        linear_weight = torch.rand(10, 5)

        class LinearModule(torch.nn.Module):
            def __init__(self):
                super().__init__()
                self.linear = torch.nn.Linear(5, 10)

            def forward(self, x):
                return self.linear(x)

        linear_module_input = torch.rand(8, 5)

        tests = [
            (False, Conv, (conv_input, conv_weight),
             ns.call_function(torch.ops.quantized.conv2d)),
            (True, Linear, (linear_input, linear_weight),
             ns.call_function(torch.ops.quantized.linear_dynamic)),
            (False, Linear, (linear_input, linear_weight),
             ns.call_function(torch.ops.quantized.linear)),
            (True, LinearModule, (linear_module_input, ),
             ns.call_module(nnqd.Linear)),
            (False, LinearModule, (linear_module_input, ),
             ns.call_module(nnq.Linear)),
        ]

        for is_dynamic, M, inputs, quantized_node in tests:
            quant_type = QuantType.DYNAMIC if is_dynamic else QuantType.STATIC
            self.checkGraphModeFxOp(M(), inputs, quant_type, quantized_node)
Пример #12
0
    def test_quantized_conv_relu(self):
        """tests for conv1d_relu/conv2d_relu/conv3d_relu"""
        conv_module = {
            1: torch.nn.Conv1d,
            2: torch.nn.Conv2d,
            3: torch.nn.Conv3d
        }

        class ConvNdRelu(torch.nn.Module):
            def __init__(self, dim, inplace):
                super(ConvNdRelu, self).__init__()
                self.conv = conv_module[dim](3, 3, 3).float()
                self.relu = torch.nn.ReLU(inplace)

            def forward(self, x):
                return self.relu(self.conv(x))

        class ConvNdFunctionalRelu(torch.nn.Module):
            def __init__(self, dim):
                super(ConvNdFunctionalRelu, self).__init__()
                self.conv = conv_module[dim](3, 3, 3).float()

            def forward(self, x):
                return F.relu(self.conv(x))

        class ConvNdInplaceFunctionalRelu(torch.nn.Module):
            def __init__(self, dim):
                super(ConvNdInplaceFunctionalRelu, self).__init__()
                self.conv = conv_module[dim](3, 3, 3).float()

            def forward(self, x):
                return F.relu(self.conv(x), True)

        options = itertools.product([1, 2, 3], self.static_quant_types)
        quantized_nodes = {
            # dim
            1: ns.call_module(nniq.ConvReLU1d),
            2: ns.call_module(nniq.ConvReLU2d),
            3: ns.call_module(nniq.ConvReLU3d),
        }
        for dim, quant_type in options:
            for orig_m in [
                    ConvNdRelu(dim, True),
                    ConvNdRelu(dim, False),
                    ConvNdFunctionalRelu(dim),
                    ConvNdInplaceFunctionalRelu(dim)
            ]:
                conv_name = "conv{}d".format(dim)
                m = self.checkGraphModeFxOp(orig_m, self.img_data_dict[dim],
                                            quant_type, quantized_nodes[dim])
Пример #13
0
    def _test_activation_impl(self, float_module, float_op, quantized_module,
                              quantized_op):
        ''' Test for activation op(with inplace options), float_op can be
        torch op or functional op
        '''
        class M(torch.nn.Module):
            def __init__(self, is_module, inplace):
                super(M, self).__init__()
                self.is_module = is_module
                self.inplace = inplace
                if self.is_module:
                    self.op = float_module(self.inplace)
                else:
                    self.op = float_op

            def forward(self, input):
                if self.is_module:
                    return self.op(input)
                else:
                    return self.op(input, self.inplace)

        options = itertools.product([True, False], [True, False],
                                    self.static_quant_types)
        quantized_nodes = {
            # is_module
            True: ns.call_module(quantized_module),
            False: ns.call_function(quantized_op),
        }

        for is_module, is_inplace, quant_type in options:
            self.checkGraphModeFxOp(M(is_module, is_inplace), self.img_data_2d,
                                    quant_type, quantized_nodes[is_module])
Пример #14
0
    def _test_norm_impl(
            self, float_module, float_op, op_args, data, quantized_module, quantized_op,
            skip_op_arg_for_functional=False):
        ''' Test for normalization op, float_op can be torch op or functional op,
        op_args is a list of positional argument for the module/op
        '''
        class M(torch.nn.Module):
            def __init__(self, is_module):
                super(M, self).__init__()
                self.is_module = is_module
                if self.is_module:
                    self.op = float_module(*op_args)
                else:
                    self.op = float_op

            def forward(self, input):
                if self.is_module:
                    return self.op(input)
                else:
                    args = [input]
                    if not skip_op_arg_for_functional:
                        args += op_args
                    return self.op(*args)

        options = itertools.product([True, False], self.static_quant_types)
        quantized_nodes = {
            # is_module
            True: ns.call_module(quantized_module),
            False: ns.call_function(quantized_op),
        }

        for is_module, quant_type in options:
            self.checkGraphModeFxOp(
                M(is_module), data, quant_type, quantized_nodes[is_module])
Пример #15
0
    def test_match_activations_fun(self):
        class M(nn.Module):
            def __init__(self):
                super().__init__()
                self.w1 = nn.Parameter(torch.empty(4, 4))
                self.b1 = nn.Parameter(torch.zeros(4))
                self.w2 = nn.Parameter(torch.empty(4, 4))
                self.b2 = nn.Parameter(torch.zeros(4))
                torch.nn.init.kaiming_uniform_(self.w1, a=math.sqrt(5))
                torch.nn.init.kaiming_uniform_(self.w2, a=math.sqrt(5))

            def forward(self, x):
                x = F.linear(x, self.w1, self.b1)
                x = F.linear(x, self.w2, self.b2)
                x = F.relu(x)
                return x

        m = M().eval()
        expected_occurrence = {
            ns.call_module(OutputLogger): 2,
        }
        self._test_match_activations(
            m, (torch.randn(4, 4), ),
            prepared_expected_node_occurrence=expected_occurrence,
            results_len=2)
Пример #16
0
    def test_linear_fp16(self):
        class M(nn.Module):
            def __init__(self):
                super().__init__()
                self.w1 = nn.Parameter(torch.Tensor(4, 4))
                self.b1 = nn.Parameter(torch.zeros(4))
                torch.nn.init.kaiming_uniform_(self.w1, a=math.sqrt(5))

            def forward(self, x):
                x = F.linear(x, self.w1, self.b1)
                x = F.relu(x)
                return x

        qconfig_dict = {'': torch.quantization.float16_static_qconfig}

        m = M().eval()
        self._test_extract_weights(m, results_len=1, qconfig_dict=qconfig_dict)

        m = M().eval()
        expected_occurrence = {
            ns.call_module(OutputLogger): 1,
        }
        self._test_match_activations(
            m, (torch.randn(4, 4), ),
            prepared_expected_node_occurrence=expected_occurrence,
            results_len=1,
            qconfig_dict=qconfig_dict)
Пример #17
0
    def test_clamp(self):
        class M(torch.nn.Module):
            def __init__(self):
                super(M, self).__init__()
                self.conv = torch.nn.Conv2d(2, 2, 2).float()
                self.relu6 = torch.nn.ReLU6()
                self.relu6_ = torch.nn.ReLU6(True)
                self.hardtanh = torch.nn.Hardtanh()
                self.hardtanh_ = torch.nn.Hardtanh(inplace=True)

            def forward(self, x):
                x = self.conv(x)
                x = self.relu6(x)
                self.relu6_(x)
                x = F.relu6(x)
                x = torch.clamp(x, -3, 3)
                x = x.clamp(-2.5, 2.5)
                # x = x.clamp_(-2, 2)  # Enable when quantized `clamp_` is ready
                x = self.hardtanh(x)
                self.hardtanh_(x)
                x = F.hardtanh(x)
                F.hardtanh_(x)
                return x

        data = (torch.rand((1, 2, 5, 5), dtype=torch.float),)
        # list of node that should occur in order
        node_list = [
            ns.call_function(torch.quantize_per_tensor),
            ns.call_module(nnq.Conv2d),
            ns.call_function(F.hardtanh_),
            ns.call_method('dequantize')
        ]
        for quant_type in self.static_quant_types:
            m = self.checkGraphModeFxOp(
                M(), data, quant_type, expected_node_list=node_list)
Пример #18
0
    def test_qbatch_norm_relu(self):
        bn_module = {2: torch.nn.BatchNorm2d, 3: torch.nn.BatchNorm3d}

        class BNRelu(torch.nn.Module):
            def __init__(self, dim, inplace):
                super(BNRelu, self).__init__()
                self.bn = bn_module[dim](3).to(torch.float)
                self.relu = torch.nn.ReLU(inplace=inplace)

            def forward(self, x):
                return self.relu(self.bn(x))

        class BNFuncRelu(torch.nn.Module):
            def __init__(self, dim):
                super(BNFuncRelu, self).__init__()
                self.bn = bn_module[dim](3).to(torch.float)

            def forward(self, x):
                return F.relu(self.bn(x), False)

        class BNFuncInplaceRelu(torch.nn.Module):
            def __init__(self, dim):
                super(BNFuncInplaceRelu, self).__init__()
                self.bn = bn_module[dim](3).to(torch.float)

            def forward(self, x):
                return F.relu(self.bn(x), True)

        options = itertools.product(self.static_quant_types, [2, 3])
        quantized_nodes = {
            2: ns.call_module(nniq.BNReLU2d),
            3: ns.call_module(nniq.BNReLU3d),
        }
        for quant_type, dim in options:
            for instance in [
                    BNRelu(dim, True),
                    BNRelu(dim, False),
                    BNFuncRelu(dim),
                    BNFuncInplaceRelu(dim)
            ]:
                self.checkGraphModeFxOp(instance, self.img_data_dict[dim],
                                        quant_type, quantized_nodes[dim])
Пример #19
0
 def test_fp32_input_fp32_output(self):
     prepare_custom_config_dict = {}
     prepare_count_check = {
         ns.call_module(torch.ao.quantization.MinMaxObserver): 3,
     }
     convert_count_check = {
         ns.call_function(torch.quantize_per_tensor): 3,
         ns.call_method('dequantize'): 3,
     }
     self._test_quantized_inputs_outputs(
         prepare_custom_config_dict, prepare_count_check, convert_count_check)
Пример #20
0
    def test_input_weight_equalization_prepare(self):
        """ Tests that graphs created after prepare_fx is as expected
        """

        linear_node_occurrence = {
            ns.call_module(_InputEqualizationObserver): 1,
            ns.call_module(MinMaxObserver): 2,
        }

        linear2_node_occurrence = {
            ns.call_module(_InputEqualizationObserver): 2,
            ns.call_module(MinMaxObserver): 3,
        }

        functionalLinear_node_occurrence = {
            ns.call_module(_InputEqualizationObserver): 2,
            ns.call_module(_WeightEqualizationObserver): 2,
            ns.call_module(MinMaxObserver): 5,
        }

        functionalLinear2Add_node_occurrence = {
            ns.call_module(_InputEqualizationObserver): 2,
            ns.call_module(_WeightEqualizationObserver): 2,
            ns.call_module(MinMaxObserver): 6,
        }

        tests = [
            (SingleLayerLinearModel, linear_node_occurrence),
            (TwoLayerLinearModel, linear2_node_occurrence),
            (TwoLayerFunctionalLinearModel, functionalLinear_node_occurrence),
            (FunctionalLinearAddModel, functionalLinear2Add_node_occurrence)
        ]

        for (M, node_occurrence) in tests:
            m = M().eval()
            prepared = prepare_fx(
                m,
                qconfig_dict,
                equalization_qconfig_dict=default_equalization_qconfig_dict)
            self.checkGraphModuleNodes(
                prepared, expected_node_occurrence=node_occurrence)
Пример #21
0
    def test_conv_add(self):
        class M(torch.nn.Module):
            def __init__(self):
                super().__init__()
                self.conv = torch.nn.Conv2d(3, 3, 3)

            def forward(self, x, y):
                return self.conv(x) + y

        weighted_op_qint8_dtype_config = {
            # optional, input activation dtype
            "input_dtype": torch.qint8,
            # optional, weight dtype
            "weight_dtype": torch.qint8,
            # optional, bias dtype
            "bias_dtype": torch.float,
            # optional, output activation dtype
            "output_dtype": torch.qint8
        }

        conv_add_config = {
            "pattern": (operator.add, torch.nn.Conv2d, MatchAllNode),
            "observation_type":
            ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
            "dtype_configs": [
                weighted_op_qint8_dtype_config,
            ],
            "root_module":
            torch.nn.Conv2d,
            "reference_quantized_module_for_root":
            torch.nn.quantized._reference.Conv2d,
        }

        m = M().eval()
        modified_backend_config_dict = copy.deepcopy(
            self.trt_backend_config_dict)
        modified_backend_config_dict["configs"].insert(0, conv_add_config)
        m = prepare_fx(m, {"": self.qconfig},
                       backend_config_dict=modified_backend_config_dict)
        node_occurrence = {
            ns.call_module(torch.ao.quantization.HistogramObserver): 3,
        }
        self.checkGraphModuleNodes(m, expected_node_occurrence=node_occurrence)
        m = _convert_fx_do_not_use(
            m,
            is_reference=True,
            backend_config_dict=modified_backend_config_dict)
        node_occurrence = {
            ns.call_function(torch.quantize_per_tensor): 3,
            ns.call_method("dequantize"): 3,
        }
        self.checkGraphModuleNodes(m, expected_node_occurrence=node_occurrence)
Пример #22
0
 def test_match_activations_mod(self):
     m = nn.Sequential(
         torch.quantization.QuantStub(),
         nn.Conv2d(1, 1, 1),
         nn.Conv2d(1, 1, 1),
     ).eval()
     expected_occurrence = {
         ns.call_module(OutputLogger): 2,
     }
     self._test_match_activations(
         m, (torch.randn(2, 1, 2, 2), ),
         prepared_expected_node_occurrence=expected_occurrence,
         results_len=2)
Пример #23
0
    def test_match_activations_fun(self):
        class M(nn.Module):
            def __init__(self):
                super().__init__()
                self.w1 = nn.Parameter(torch.Tensor(4, 4))
                self.b1 = nn.Parameter(torch.zeros(4))
                self.w2 = nn.Parameter(torch.Tensor(4, 4))
                self.b2 = nn.Parameter(torch.zeros(4))
                torch.nn.init.kaiming_uniform_(self.w1, a=math.sqrt(5))
                torch.nn.init.kaiming_uniform_(self.w2, a=math.sqrt(5))

            def forward(self, x):
                x = F.linear(x, self.w1, self.b1)
                x = F.linear(x, self.w2, self.b2)
                x = F.relu(x)
                return x

        m = M().eval()
        mp = prepare_fx(m, {'': torch.quantization.default_qconfig})
        mp(torch.randn(4, 4))
        # TODO(future PR): prevent the need for copying here, we can copy the
        # modules but should reuse the underlying tensors
        mp_copy = copy.deepcopy(mp)
        mq = convert_fx(mp_copy)

        mp_ns, mq_ns = prepare_model_outputs('fp32_prepared', mp, 'int8', mq,
                                             OutputLogger)

        expected_occurrence = {
            ns.call_module(OutputLogger): 2,
        }
        self.checkGraphModuleNodes(
            mp_ns, expected_node_occurrence=expected_occurrence)
        self.checkGraphModuleNodes(
            mq_ns, expected_node_occurrence=expected_occurrence)

        # TODO(before land): test both scripted and non-scripted
        mp_ns = torch.jit.script(mp_ns)
        mq_ns = torch.jit.script(mq_ns)

        # calibrate
        input_fp32 = torch.randn(4, 4)
        mp_ns(input_fp32)
        mq_ns(input_fp32)

        # check activation result correctness
        act_compare_dict = get_matching_activations('fp32_prepared', mp_ns,
                                                    'int8', mq_ns,
                                                    OutputLogger)
        self.assertTrue(len(act_compare_dict) == 2)
        self.assert_ns_logger_act_compare_dict_valid(act_compare_dict)
Пример #24
0
 def test_linear_fp16_shadow_activations(self):
     for should_log_inputs in (True, False):
         qconfig_dict = {'': torch.quantization.float16_static_qconfig}
         m = LinearReluFunctional().eval()
         num_loggers = 4 if should_log_inputs else 2
         expected_occurrence = {
             ns.call_module(OutputLogger): num_loggers,
         }
         res2 = self._test_match_shadow_activations(
             m, (torch.randn(4, 4), ),
             prepared_expected_node_occurrence=expected_occurrence,
             results_len=1,
             qconfig_dict=qconfig_dict,
             should_log_inputs=should_log_inputs)
Пример #25
0
 def test_quantized_input_fp32_output(self):
     prepare_custom_config_dict = {
         'input_quantized_idxs': [0]}
     prepare_count_check = {
         ns.call_module(torch.ao.quantization.MinMaxObserver): 2,
     }
     convert_count_check = {
         # output of conv1, conv2
         ns.call_function(torch.quantize_per_tensor): 2,
         # input of ref conv1, input of ref conv2, final output
         ns.call_method('dequantize'): 3,
     }
     self._test_quantized_inputs_outputs(
         prepare_custom_config_dict, prepare_count_check, convert_count_check)
Пример #26
0
 def _test_match_activations_fun_impl(self, prepare_fn=prepare_fx):
     m = LinearReluLinearFunctional().eval()
     qconfig_dict = None
     if prepare_fn == prepare_qat_fx:
         qconfig_dict = {
             '': torch.quantization.get_default_qat_qconfig('fbgemm')
         }
     expected_occurrence = {
         ns.call_module(OutputLogger): 2,
     }
     self._test_match_activations(
         m, (torch.randn(4, 4), ),
         prepared_expected_node_occurrence=expected_occurrence,
         results_len=2,
         prepare_fn=prepare_fn,
         qconfig_dict=qconfig_dict)
Пример #27
0
    def test_dynamic_quant_fp16(self):
        class Linear(torch.nn.Module):
            def __init__(self, weight):
                super().__init__()
                self.weight = torch.nn.Parameter(weight)

            def forward(self, x):
                return F.linear(x, self.weight)

        linear_input = torch.rand(8, 5)
        linear_weight = torch.rand(10, 5)

        class LinearModule(torch.nn.Module):
            def __init__(self):
                super().__init__()
                self.linear = torch.nn.Linear(5, 10)

            def forward(self, x):
                return self.linear(x)

        linear_module_input = torch.rand(8, 5)

        tests = [
            (Linear, (linear_weight,), (linear_input,),
             ns.call_function(torch.ops.quantized.linear_dynamic),
             ns.call_function(torch.ops.quantized.linear_prepack_fp16)),
            (LinearModule, (), (linear_module_input,),
             ns.call_module(nnqd.Linear),
             None),
        ]
        for (ModuleClass, module_constructor_inputs,
             inputs, quantized_node, weight_prepack_node) in tests:
            for debug in [True, False]:
                node_occurrence = dict()
                if weight_prepack_node:
                    if debug:
                        node_occurrence[weight_prepack_node] = 1
                    else:
                        node_occurrence[weight_prepack_node] = 0
                m = ModuleClass(*module_constructor_inputs).eval()
                m = symbolic_trace(m)
                qconfig_dict = {"": float16_dynamic_qconfig}
                m = quantize_dynamic_fx(m, qconfig_dict, debug=debug)
                self.checkGraphModuleNodes(m, expected_node_occurrence=node_occurrence)
Пример #28
0
 def _test_match_activations_mod_impl(self, prepare_fn=prepare_fx):
     m = nn.Sequential(
         torch.quantization.QuantStub(),
         nn.Conv2d(1, 1, 1),
         nn.Conv2d(1, 1, 1),
     ).eval()
     qconfig_dict = None
     if prepare_fn == prepare_qat_fx:
         qconfig_dict = {
             '': torch.quantization.get_default_qat_qconfig('fbgemm')
         }
     expected_occurrence = {
         ns.call_module(OutputLogger): 2,
     }
     self._test_match_activations(
         m, (torch.randn(2, 1, 2, 2), ),
         prepared_expected_node_occurrence=expected_occurrence,
         results_len=2,
         qconfig_dict=qconfig_dict,
         prepare_fn=prepare_fn)
Пример #29
0
    def test_match_activations_mod(self):
        m = nn.Sequential(
            torch.quantization.QuantStub(),
            nn.Conv2d(1, 1, 1),
            nn.Conv2d(1, 1, 1),
        ).eval()
        mp = prepare_fx(m, {'': torch.quantization.default_qconfig})
        mp(torch.randn(2, 1, 2, 2))
        # TODO(future PR): prevent the need for copying here, we can copy the
        # modules but should reuse the underlying tensors
        mp_copy = copy.deepcopy(mp)
        mq = convert_fx(mp_copy)

        mp_ns, mq_ns = prepare_model_outputs('fp32_prepared', mp, 'int8', mq,
                                             OutputLogger)

        expected_occurrence = {
            ns.call_module(OutputLogger): 2,
        }
        self.checkGraphModuleNodes(
            mp_ns, expected_node_occurrence=expected_occurrence)
        self.checkGraphModuleNodes(
            mq_ns, expected_node_occurrence=expected_occurrence)

        # TODO(before land): test both scripted and non-scripted
        mp_ns = torch.jit.script(mp_ns)
        mq_ns = torch.jit.script(mq_ns)

        # calibrate
        input_fp32 = torch.randn(2, 1, 2, 2)
        mp_ns(input_fp32)
        mq_ns(input_fp32)

        # check activation result correctness
        act_compare_dict = get_matching_activations('fp32_prepared', mp_ns,
                                                    'int8', mq_ns,
                                                    OutputLogger)
        self.assertTrue(len(act_compare_dict) == 2)
        self.assert_ns_logger_act_compare_dict_valid(act_compare_dict)
Пример #30
0
    def test_selective_equalization(self):
        """ Tests that we are able to run numeric suite on the equalized model
        and construct a valid equalization_qconfig_dict equalizing only the top
        4 layers with the highest quantization errors.
        """

        torch.manual_seed(1)

        class M(nn.Module):
            def __init__(self):
                super().__init__()
                self.bot = torch.nn.Sequential(torch.nn.Linear(5, 5))
                self.top = torch.nn.Sequential(torch.nn.Linear(5, 5))

            def forward(self, x):
                x = self.bot(x)
                x = torch.add(x, 5)
                x = self.top(x)
                return x

        float_model = M().eval()
        # Hard coded so that the top layer has a higher quantization error
        x = torch.tensor([[0.0642, 0.7824, 0.4255, 0.7106, 0.5957],
                          [0.8373, 0.8851, 0.8229, 0.0212, 0.8987],
                          [0.9077, 0.7538, 0.4530, 0.5772, 0.1376],
                          [0.0690, 0.9002, 0.7998, 0.2768, 0.8985],
                          [0.0282, 0.5068, 0.6725, 0.1829, 0.5480]])

        # Quantize the float model
        prepared_model = prepare_fx(copy.deepcopy(float_model), specific_qconfig_dict)
        prepared_model(x)
        quantized_model = convert_fx(copy.deepcopy(prepared_model))

        # Get the SQNR between the float and quantized model
        layer_to_sqnr_dict = get_layer_sqnr_dict(copy.deepcopy(prepared_model), quantized_model, x)

        # Construct the equalization_qconfig_dict equalizing layers with the highest
        # quantization errors
        selective_equalization_qconfig_dict = get_equalization_qconfig_dict(layer_to_sqnr_dict, 1)

        # Create the selectively equalized model
        prepared_model = prepare_fx(
            copy.deepcopy(float_model),
            specific_qconfig_dict,
            equalization_qconfig_dict=selective_equalization_qconfig_dict,
        )
        prepared_model(x)
        equalized_model = convert_fx(prepared_model)

        node_list = [
            ns.call_function(torch.quantize_per_tensor),
            ns.call_module(nnq.Linear),
            ns.call_method('dequantize'),
            ns.call_function(torch.add),
            ns.call_function(torch.mul),
            ns.call_function(torch.quantize_per_tensor),
            ns.call_module(nnq.Linear),
            ns.call_method('dequantize')
        ]

        # Check the order of nodes in the graph
        self.checkGraphModuleNodes(equalized_model, expected_node_list=node_list)