示例#1
0
    def dynamic_quant_test(
        self,
        model_fp32_path,
        data_reader,
        activation_type,
        weight_type,
        extra_options={},
    ):
        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = "u8" if (activation_type
                                       == QuantType.QUInt8) else "s8"
        weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8"
        model_int8_path = "gemm_fp32.quant_dynamic_{}{}.onnx".format(
            activation_type_str, weight_type_str)

        quantize_dynamic(
            model_fp32_path,
            model_int8_path,
            weight_type=weight_type,
            extra_options=extra_options,
        )
        quant_nodes = {"MatMulInteger": 2}
        check_op_type_count(self, model_int8_path, **quant_nodes)
        qnode_io_qtypes = {"MatMulInteger": [["i", 2, activation_proto_qtype]]}
        check_qtype_by_node_type(self, model_int8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(
            self,
            model_fp32_path,
            model_int8_path,
            {"input": np.random.rand(5, 10).astype(np.float32)},
        )
示例#2
0
    def test_quantize_batch_size_1(self):
        batch = 1
        hidden_size = 4
        sequence_length = 4

        model_f32_path = "test_embed_layer_norm_unit_test_batch1.onnx"
        model_uint8_path = "test_embed_layer_norm_unit_test_batch1_uint8.onnx"

        self.construct_model(batch, hidden_size, sequence_length, model_f32_path)

        data_reader = self.input_feeds_int32(
            1,
            {
                "input_ids": [batch, sequence_length],
                "segment_ids": [batch, sequence_length],
            },
        )

        quantize_dynamic(model_f32_path, model_uint8_path)

        # Quantization should not have any DequantizeLinear nodes:
        qnode_counts = {"DequantizeLinear": 0, "QEmbedLayerNormalization": 1}
        check_op_type_count(self, model_uint8_path, **qnode_counts)
        data_reader.rewind()

        check_model_correctness(self, model_f32_path, model_uint8_path, data_reader.get_next())
示例#3
0
    def test_quantize_resize(self):
        np.random.seed(1)

        model_fp32_path = 'resize_fp32.onnx'
        model_uint8_path = 'resize_uint8.onnx'
        model_uint8_qdq_path = 'resize_uint8_qdq.onnx'

        kwargs = {'coordinate_transformation_mode': 'asymmetric', 'mode': 'nearest', 'nearest_mode': 'floor'}
        self.construct_model_conv_resize(model_fp32_path,
                                         [1, 2, 26, 42], [3, 2, 3, 3],
                                         [1, 3, 24, 40], [1, 3, 48, 80],
                                         kwargs,
                                         [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 2.0, 2.0], None)

        # Verify QOperator mode
        data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
        quantize_static(model_fp32_path, model_uint8_path, data_reader)

        # make sure resize become xint8 operator, its input name could tell that
        check_op_nodes(self, model_uint8_path, lambda node: (node.name != "resize_node" or node.input[0] != 'conv_output'))
        qnode_counts = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 2, 'Resize': 1}
        check_op_type_count(self, model_uint8_path, **qnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
        qdqnode_counts = {'Conv': 1, 'QuantizeLinear': 2, 'DequantizeLinear': 3, 'Resize': 1}
        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
    def test_quantize_avgpool(self):
        np.random.seed(1)

        model_fp32_path = 'avgpool_fp32.onnx'
        model_uint8_path = 'avgpool_uint8.onnx'
        model_uint8_qdq_path = 'avgpool_uint8_qdq.onnx'

        self.construct_model_conv_avgpool(model_fp32_path,
                                          [1, 2, 26, 42], [3, 2, 3, 3],
                                          [1, 3, 24, 40], {'kernel_shape': [3, 3]},
                                          [1, 3, 22, 38])

        # Verify QOperator mode
        data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
        quantize_static(model_fp32_path, model_uint8_path, data_reader)
        qnode_counts = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 2, 'QLinearAveragePool': 1}
        check_op_type_count(self, model_uint8_path, **qnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
        qdqnode_counts = {'Conv': 1, 'QuantizeLinear': 3, 'DequantizeLinear': 4, 'AveragePool': 1}
        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
示例#5
0
 def verify_quantize_with_pad_mode(self,
                                   pad_mode,
                                   constant_value=None,
                                   quantize_mode='static'):
     np.random.seed(108)
     tag_pad_mode = pad_mode if pad_mode is not None else 'none'
     tag_constant_value = '' if constant_value is None else '_value'
     model_fp32_path = 'qop_pad_{}_fp32_{}{}.onnx'.format(
         quantize_mode, tag_pad_mode, tag_constant_value)
     model_i8_path = 'qop_pad_{}_i8_{}{}.onnx'.format(
         quantize_mode, tag_pad_mode, tag_constant_value)
     data_reader = self.input_feeds(1, {'input': [1, 8, 33, 33]})
     self.construct_model_conv_pad(model_fp32_path, [1, 8, 33, 33],
                                   [16, 8, 3, 3], [1, 16, 31, 31],
                                   pad_mode, [0, 0, 1, 2, 0, 0, 3, 4],
                                   constant_value=constant_value)
     self.quantize_model(model_fp32_path, model_i8_path,
                         None if quantize_mode != 'static' else data_reader)
     data_reader.rewind()
     # DequantizeLinear=2 means there are one DequantizeLinear Node aftr both conv and pad,
     # which means pad node is running in quantized semantic.
     # In dynamic quantize mode, pad operator in fact not quantized as input is fp32.
     kwargs = {
         'DynamicQuantizeLinear': 1
     } if quantize_mode != 'static' else {
         'DequantizeLinear': 2,
         'QuantizeLinear': 1
     }
     check_op_type_count(self, model_i8_path, **kwargs)
     check_model_correctness(self, model_fp32_path, model_i8_path,
                             data_reader.get_next())
示例#6
0
 def dynamic_quant_conv(self, model_fp32_path, model_int8_path):
     quantize_dynamic(model_fp32_path, model_int8_path)
     quant_nodes = {'ConvInteger': 2}
     check_op_type_count(self, model_int8_path, **quant_nodes)
     check_model_correctness(
         self, model_fp32_path, model_int8_path,
         {'input': np.random.rand(4, 2, 8, 8).astype(np.float32)})
示例#7
0
    def dynamic_quant_test(self,
                           model_fp32_path,
                           data_reader,
                           activation_type,
                           weight_type,
                           extra_options={}):
        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = 'u8' if (activation_type
                                       == QuantType.QUInt8) else 's8'
        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
        model_int8_path = 'gemm_fp32.quant_dynamic_{}{}.onnx'.format(
            activation_type_str, weight_type_str)

        quantize_dynamic(model_fp32_path,
                         model_int8_path,
                         activation_type=activation_type,
                         weight_type=weight_type,
                         extra_options=extra_options)
        quant_nodes = {'MatMulInteger': 2}
        check_op_type_count(self, model_int8_path, **quant_nodes)
        qnode_io_qtypes = {'MatMulInteger': [['i', 2, activation_proto_qtype]]}
        check_qtype_by_node_type(self, model_int8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(
            self, model_fp32_path, model_int8_path,
            {'input': np.random.rand(5, 10).astype(np.float32)})
示例#8
0
    def verify(self, per_channel):
        np.random.seed(1)
        model_fp32_path = 'conv_clip_fp32.{}.onnx'.format(per_channel)
        model_int8_qdq_path = 'conv_clip_quant_qdq.{}.onnx'.format(per_channel)
        model_int8_qop_path = 'conv_clip_quant_qop.{}.onnx'.format(per_channel)
        data_reader = self.input_feeds(1, {'input': [1, 8, 33, 33]})
        self.construct_model_conv_clip(model_fp32_path,
                                       [1, 8, 33, 33],
                                       [16, 8, 3, 3],
                                       [15376])
        quantize_static(model_fp32_path,
                        model_int8_qdq_path,
                        data_reader,
                        quant_format=QuantFormat.QDQ,
                        per_channel = per_channel,
                        reduce_range = per_channel
                        )
        data_reader.rewind()
        #topo sort check
        check_op_type_order(self, model_int8_qdq_path, ['DequantizeLinear', 'QuantizeLinear', 'DequantizeLinear', 'Conv', 'QuantizeLinear', 'DequantizeLinear', 'Reshape', 'QuantizeLinear', 'DequantizeLinear'])
        check_model_correctness(self, model_fp32_path, model_int8_qdq_path, data_reader.get_next())

        data_reader.rewind()
        quantize_static(model_fp32_path,
                        model_int8_qop_path,
                        data_reader,
                        quant_format=QuantFormat.QOperator,
                        per_channel = per_channel,
                        reduce_range = per_channel
                        )
        data_reader.rewind()
        qop_nodes = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 1}
        check_op_type_count(self, model_int8_qop_path, **qop_nodes)
        check_model_correctness(self, model_fp32_path, model_int8_qop_path, data_reader.get_next())
示例#9
0
    def verify_quantize_conv(self, has_bias, per_channel):
        np.random.seed(1)
        model_fp32_path = 'conv_fp32.{}.{}.onnx'.format(has_bias, per_channel)
        model_int8_qdq_path = 'conv_quant_qdq.{}.{}.onnx'.format(has_bias, per_channel)
        model_int8_qop_path = 'conv_quant_qop.{}.{}.onnx'.format(has_bias, per_channel)
        data_reader = self.input_feeds(1, {'input': [1, 8, 33, 33]})
        self.construct_model_conv(model_fp32_path,
                                  [1, 8, 33, 33],
                                  [16, 8, 3, 3],
                                  [1, 16, 31, 31],
                                  has_bias)
        quantize_static(model_fp32_path,
                        model_int8_qdq_path,
                        data_reader,
                        quant_format=QuantFormat.QDQ,
                        per_channel = per_channel,
                        reduce_range = per_channel
                        )
        data_reader.rewind()
        qdq_nodes = {'Conv': 1, 'QuantizeLinear': 2, 'DequantizeLinear': 4 if has_bias else 3}
        check_op_type_count(self, model_int8_qdq_path, **qdq_nodes)
        check_model_correctness(self, model_fp32_path, model_int8_qdq_path, data_reader.get_next())

        data_reader.rewind()
        quantize_static(model_fp32_path,
                        model_int8_qop_path,
                        data_reader,
                        quant_format=QuantFormat.QOperator,
                        per_channel = per_channel,
                        reduce_range = per_channel
                        )
        data_reader.rewind()
        qop_nodes = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 1}
        check_op_type_count(self, model_int8_qop_path, **qop_nodes)
        check_model_correctness(self, model_fp32_path, model_int8_qop_path, data_reader.get_next())
示例#10
0
    def dynamic_quant_conv_test(self, weight_type, extra_options={}):
        np.random.seed(1)
        model_fp32_path = "conv_bias.fp32.onnx"
        self.construct_model(model_fp32_path)

        activation_proto_qtype = TensorProto.UINT8
        activation_type_str = "u8"
        weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8"
        model_int8_path = "conv_bias.quant.{}{}.onnx".format(
            activation_type_str, weight_type_str)

        quantize_dynamic(
            model_fp32_path,
            model_int8_path,
            weight_type=weight_type,
            extra_options=extra_options,
        )
        quant_nodes = {"ConvInteger": 2}
        check_op_type_count(self, model_int8_path, **quant_nodes)
        qnode_io_qtypes = {"ConvInteger": [["i", 2, activation_proto_qtype]]}
        check_qtype_by_node_type(self, model_int8_path, qnode_io_qtypes)
        check_model_correctness(
            self,
            model_fp32_path,
            model_int8_path,
            {"input": np.random.rand(4, 2, 8, 8).astype(np.float32)},
        )
示例#11
0
    def dynamic_quant_conv_test(self,
                                activation_type,
                                weight_type,
                                extra_options={}):
        np.random.seed(1)
        model_fp32_path = 'conv_bias.fp32.onnx'
        self.construct_model(model_fp32_path)

        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = 'u8' if (activation_type
                                       == QuantType.QUInt8) else 's8'
        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
        model_int8_path = 'conv_bias.quant.{}{}.onnx'.format(
            activation_type_str, weight_type_str)

        quantize_dynamic(model_fp32_path,
                         model_int8_path,
                         activation_type=activation_type,
                         weight_type=weight_type,
                         extra_options=extra_options)
        quant_nodes = {'ConvInteger': 2}
        check_op_type_count(self, model_int8_path, **quant_nodes)
        qnode_io_qtypes = {'ConvInteger': [['i', 2, activation_proto_qtype]]}
        check_qtype_by_node_type(self, model_int8_path, qnode_io_qtypes)
        check_model_correctness(
            self, model_fp32_path, model_int8_path,
            {'input': np.random.rand(4, 2, 8, 8).astype(np.float32)})
    def run_quantize_squeezes_of_opset(self, opset = 13):
        np.random.seed(1)

        model_fp32_path = 'squeezes_opset{}_fp32.onnx'.format(opset)
        model_uint8_path = 'squeezes_opset{}_uint8.onnx'.format(opset)
        model_uint8_qdq_path = 'squeezes_opset{}_uint8_qdq.onnx'.format(opset)

        self.construct_model_conv_squeezes(model_fp32_path, [1, 2, 26, 42], [3, 2, 3, 3], [1, 3, 24, 40], opset=opset)

        # Verify QOperator mode
        data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
        quantize_static(model_fp32_path, model_uint8_path, data_reader)

        # make sure squeezes become xint8 operator, its input name could tell that
        qnode_counts = {'QuantizeLinear': 1, 'DequantizeLinear': 1}
        check_op_type_count(self, model_uint8_path, **qnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next(), rtol=0.01, atol=0.5)

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
        qdqnode_counts = {'Conv': 3, 'QuantizeLinear': 8, 'DequantizeLinear': 11}
        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next(), rtol=0.01, atol=0.5)
示例#13
0
    def test_quantize_batch_size_2(self):
        batch = 2
        hidden_size = 4
        sequence_length = 4

        model_f32_path = 'test_embed_layer_norm_unit_test_batch2.onnx'
        model_uint8_path = 'test_embed_layer_norm_unit_test_batch2_uint8.onnx'

        self.construct_model(batch, hidden_size, sequence_length,
                             model_f32_path)

        data_reader = self.input_feeds_int32(
            1, {
                'input_ids': [batch, sequence_length],
                'segment_ids': [batch, sequence_length]
            })

        quantize_dynamic(model_f32_path, model_uint8_path)

        # Quantization should not have any DequantizeLinear nodes:
        qnode_counts = {'DequantizeLinear': 0, 'QEmbedLayerNormalization': 1}
        check_op_type_count(self, model_uint8_path, **qnode_counts)
        data_reader.rewind()

        check_model_correctness(self, model_f32_path, model_uint8_path,
                                data_reader.get_next())
示例#14
0
    def test_quantize_concat(self):
        np.random.seed(1)

        model_fp32_path = 'concat_fp32.onnx'
        model_uint8_path = 'concat_uint8.onnx'
        model_uint8_qdq_path = 'concat_uint8_qdq.onnx'

        self.construct_model(model_fp32_path)

        # Verify QOperator mode
        data_reader = InputFeedsNegOneZeroOne(1, {'input': [1, 3, 15, 15]})
        quantize_static(model_fp32_path, model_uint8_path, data_reader)

        qnode_counts = {'QLinearConv': 3, 'QuantizeLinear': 1, 'DequantizeLinear': 1, 'QLinearConcat': 1}
        check_op_type_count(self, model_uint8_path, **qnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
        qdqnode_counts = {'Conv': 3, 'QuantizeLinear': 5, 'DequantizeLinear': 8, 'Concat': 1}
        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
示例#15
0
    def test_quantize_transpose(self):
        np.random.seed(1)
        model_fp32_path = 'transpose_fp32.onnx'
        model_uint8_path = 'transpose_uint8.onnx'
        model_uint8_qdq_path = 'transpose_uint8_qdq.onnx'

        self.construct_model_matmul_transpose(model_fp32_path, [3, 7], [7, 5], [5, 3])

        # Verify QOperator model
        data_reader = self.input_feeds(1, {'input': [3, 7]})
        quantize_static(model_fp32_path, model_uint8_path, data_reader)
        # make sure transpose become xint8 operator, its input name could tell that
        check_op_nodes(self, model_uint8_path, lambda node: (node.name != "transpose_node" or node.input[0] != 'matmul_output'))
        qnode_counts = {'QLinearMatMul': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 1, 'Transpose': 1}
        check_op_type_count(self, model_uint8_path, **qnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())

        # Verify QDQ model
        data_reader.rewind()
        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
        qdqnode_counts = {'MatMul': 1, 'QuantizeLinear': 2, 'DequantizeLinear': 3, 'Transpose': 1}
        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
示例#16
0
    def static_quant_test_qdq(self,
                              model_fp32_path,
                              data_reader,
                              activation_type,
                              weight_type,
                              extra_options={}):
        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = 'u8' if (activation_type
                                       == QuantType.QUInt8) else 's8'
        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
        model_int8_path = 'gemm_fp32.quant_dqd_{}{}.onnx'.format(
            activation_type_str, weight_type_str)

        data_reader.rewind()
        quantize_static(model_fp32_path,
                        model_int8_path,
                        data_reader,
                        quant_format=QuantFormat.QDQ,
                        activation_type=activation_type,
                        weight_type=weight_type,
                        extra_options=extra_options)
        quant_nodes = {'Gemm': 2, 'QuantizeLinear': 3, 'DequantizeLinear': 7}
        check_op_type_count(self, model_int8_path, **quant_nodes)
        qnode_io_qtypes = {
            'QuantizeLinear': [['i', 2, activation_proto_qtype],
                               ['o', 0, activation_proto_qtype]]
        }
        check_qtype_by_node_type(self, model_int8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_int8_path,
                                data_reader.get_next())
示例#17
0
    def verify(self, per_channel, is_quant_type_int8):
        np.random.seed(1)
        model_fp32_path = "conv_relu_fp32.{}.onnx".format(per_channel)
        model_int8_qdq_path = "conv_relu_quant_qdq.{}.onnx".format(per_channel)
        model_int8_qop_path = "conv_relu_quant_qop.{}.onnx".format(per_channel)
        data_reader = self.input_feeds(1, {"input": [1, 8, 33, 33]})
        self.construct_model_conv_relu(model_fp32_path, [1, 8, 33, 33],
                                       [16, 8, 3, 3], [1, 16, 31, 31])
        quantize_static(
            model_fp32_path,
            model_int8_qdq_path,
            data_reader,
            quant_format=QuantFormat.QDQ,
            per_channel=per_channel,
            reduce_range=per_channel,
            activation_type=QuantType.QInt8
            if is_quant_type_int8 else QuantType.QUInt8,
            weight_type=QuantType.QInt8
            if is_quant_type_int8 else QuantType.QUInt8,
        )
        data_reader.rewind()
        # topo sort check
        check_op_type_order(
            self,
            model_int8_qdq_path,
            [
                "DequantizeLinear",
                "QuantizeLinear",
                "DequantizeLinear",
                "Conv",
                "QuantizeLinear",
                "DequantizeLinear",
            ],
        )
        check_model_correctness(self, model_fp32_path, model_int8_qdq_path,
                                data_reader.get_next())

        data_reader.rewind()
        quantize_static(
            model_fp32_path,
            model_int8_qop_path,
            data_reader,
            quant_format=QuantFormat.QOperator,
            per_channel=per_channel,
            reduce_range=per_channel,
            activation_type=QuantType.QInt8
            if is_quant_type_int8 else QuantType.QUInt8,
            weight_type=QuantType.QInt8
            if is_quant_type_int8 else QuantType.QUInt8,
        )
        data_reader.rewind()
        qop_nodes = {
            "QLinearConv": 1,
            "QuantizeLinear": 1,
            "DequantizeLinear": 1
        }
        check_op_type_count(self, model_int8_qop_path, **qop_nodes)
        check_model_correctness(self, model_fp32_path, model_int8_qop_path,
                                data_reader.get_next())
示例#18
0
 def dynamic_attention_quant_test(self, model_fp32_path, model_int8_path,
                                  per_channel, reduce_range):
     quantize_dynamic(model_fp32_path,
                      model_int8_path,
                      per_channel=per_channel,
                      reduce_range=reduce_range)
     quant_nodes = {'QAttention': 1, 'MatMulInteger': 1}
     check_op_type_count(self, model_int8_path, **quant_nodes)
     check_model_correctness(
         self, model_fp32_path, model_int8_path,
         {'input': np.random.rand(1, 5, 10).astype(np.float32)})
示例#19
0
 def static_quant_test(self, model_fp32_path, model_int8_path):
     data_reader = self.input_feeds(1, {'input': [5, 10]})
     quantize_static(model_fp32_path,
                     model_int8_path,
                     data_reader)
     data_reader.rewind()
     quant_nodes = {'QLinearMatMul' : 2,
                    'QLinearAdd' : 2,
                    'QuantizeLinear' : 1,
                    'DequantizeLinear' : 1}
     check_op_type_count(self, model_int8_path, **quant_nodes)
     check_model_correctness(self, model_fp32_path, model_int8_path, data_reader.get_next())
示例#20
0
    def quantize_gavgpool_test(self,
                               activation_type,
                               weight_type,
                               extra_options={}):
        np.random.seed(1)
        model_fp32_path = "gavg_pool_fp32.onnx"
        data_reader = self.input_feeds(1, {"input": [1, 8, 33, 33]})
        self.construct_model_gavgpool(model_fp32_path, [1, 8, 33, 33],
                                      [16, 8, 3, 3], [1, 16, 1, 1])

        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = "u8" if (activation_type
                                       == QuantType.QUInt8) else "s8"
        weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8"
        model_q8_path = "gavg_pool_{}{}.onnx".format(activation_type_str,
                                                     weight_type_str)

        data_reader.rewind()
        quantize_static(
            model_fp32_path,
            model_q8_path,
            data_reader,
            quant_format=QuantFormat.QOperator,
            activation_type=activation_type,
            weight_type=weight_type,
            extra_options=extra_options,
        )

        quant_nodes = {
            "QLinearConv": 1,
            "GlobalAveragePool": 1,
            "QLinearGlobalAveragePool": 1,
            "QuantizeLinear": 1,
            "DequantizeLinear": 1,
        }
        check_op_type_count(self, model_q8_path, **quant_nodes)
        qnode_io_qtypes = {
            "QuantizeLinear": [
                ["i", 2, activation_proto_qtype],
                ["o", 0, activation_proto_qtype],
            ]
        }
        qnode_io_qtypes.update({
            "QLinearGlobalAveragePool": [
                ["i", 2, activation_proto_qtype],
                ["i", 4, activation_proto_qtype],
            ]
        })
        check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_q8_path,
                                data_reader.get_next())
示例#21
0
 def static_quant_test_qdq(self, model_fp32_path, model_int8_path):
     data_reader = self.input_feeds(1, {'input': [5, 10]})
     quantize_static(model_fp32_path,
                     model_int8_path,
                     data_reader,
                     quant_format=QuantFormat.QDQ)
     data_reader.rewind()
     quant_nodes = {'MatMul' : 2,
                    'Add' : 2,
                    'QuantizeLinear' : 4,
                    'DequantizeLinear' : 8}
     check_op_type_count(self, model_int8_path, **quant_nodes)
     check_model_correctness(self, model_fp32_path, model_int8_path, data_reader.get_next())
示例#22
0
    def test_quantize_maxpool(self):
        np.random.seed(1)

        model_fp32_path = 'maxpool_fp32.onnx'
        model_uint8_path = 'maxpool_uint8.onnx'
        model_uint8_qdq_path = 'maxpool_uint8_qdq.onnx'

        self.construct_model_conv_maxpool(model_fp32_path, [1, 2, 26, 42],
                                          [3, 2, 3, 3], [1, 3, 24, 40],
                                          {'kernel_shape': [3, 3]},
                                          [1, 3, 22, 38])

        # Verify QOperator mode
        data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
        quantize_static(model_fp32_path, model_uint8_path, data_reader)

        # make sure maxpool become xint8 operator, its input name could tell that
        check_op_nodes(
            self, model_uint8_path, lambda node:
            (node.name != "maxpool_node" or node.input[0] != 'conv_output'))
        qnode_counts = {
            'QLinearConv': 1,
            'QuantizeLinear': 1,
            'DequantizeLinear': 2,
            'MaxPool': 1
        }
        check_op_type_count(self, model_uint8_path, **qnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_path,
                                data_reader.get_next())

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(model_fp32_path,
                        model_uint8_qdq_path,
                        data_reader,
                        quant_format=QuantFormat.QDQ)
        qdqnode_counts = {
            'Conv': 1,
            'QuantizeLinear': 2,
            'DequantizeLinear': 3,
            'MaxPool': 1
        }
        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_qdq_path,
                                data_reader.get_next())
示例#23
0
    def static_quant_test(
        self,
        model_fp32_path,
        data_reader,
        activation_type,
        weight_type,
        extra_options={},
    ):
        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = "u8" if (activation_type
                                       == QuantType.QUInt8) else "s8"
        weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8"
        model_int8_path = "relu_fp32.quant_{}{}.onnx".format(
            activation_type_str, weight_type_str)

        data_reader.rewind()
        quantize_static(
            model_fp32_path,
            model_int8_path,
            data_reader,
            quant_format=QuantFormat.QOperator,
            activation_type=activation_type,
            weight_type=weight_type,
            extra_options=extra_options,
        )

        qdq_count = 1 if activation_type == QuantType.QUInt8 else 2
        relu_count = 0 if activation_type == QuantType.QUInt8 else 1
        quant_nodes = {
            "QGemm": 2,
            "QuantizeLinear": qdq_count,
            "DequantizeLinear": qdq_count,
            "Relu": relu_count
        }
        check_op_type_count(self, model_int8_path, **quant_nodes)
        qnode_io_qtypes = {
            "QuantizeLinear": [
                ["i", 2, activation_proto_qtype],
                ["o", 0, activation_proto_qtype],
            ]
        }
        qnode_io_qtypes.update(
            {"DequantizeLinear": [["i", 2, activation_proto_qtype]]})
        check_qtype_by_node_type(self, model_int8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_int8_path,
                                data_reader.get_next())
示例#24
0
    def static_quant_test_qdq(
        self,
        model_fp32_path,
        data_reader,
        activation_type,
        weight_type,
        extra_options={},
    ):
        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = "u8" if (activation_type
                                       == QuantType.QUInt8) else "s8"
        weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8"
        model_int8_path = "gemm_fp32.quant_dqd_{}{}.onnx".format(
            activation_type_str, weight_type_str)

        data_reader.rewind()
        quantize_static(
            model_fp32_path,
            model_int8_path,
            data_reader,
            quant_format=QuantFormat.QDQ,
            activation_type=activation_type,
            weight_type=weight_type,
            extra_options=extra_options,
        )

        clip_count = 0 if activation_type == QuantType.QUInt8 else 1
        q_count = 3 if activation_type == QuantType.QUInt8 else 4
        dq_count = 7 if activation_type == QuantType.QUInt8 else 8
        quant_nodes = {
            "Gemm": 2,
            "QuantizeLinear": q_count,
            "DequantizeLinear": dq_count,
            "Clip": clip_count
        }
        check_op_type_count(self, model_int8_path, **quant_nodes)
        qnode_io_qtypes = {
            "QuantizeLinear": [
                ["i", 2, activation_proto_qtype],
                ["o", 0, activation_proto_qtype],
            ]
        }
        check_qtype_by_node_type(self, model_int8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_int8_path,
                                data_reader.get_next())
示例#25
0
    def quantize_gavgpool_test(self,
                               activation_type,
                               weight_type,
                               extra_options={}):
        np.random.seed(1)
        model_fp32_path = 'gavg_pool_fp32.onnx'
        data_reader = self.input_feeds(1, {'input': [1, 8, 33, 33]})
        self.construct_model_gavgpool(model_fp32_path, [1, 8, 33, 33],
                                      [16, 8, 3, 3], [1, 16, 1, 1])

        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = 'u8' if (activation_type
                                       == QuantType.QUInt8) else 's8'
        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
        model_q8_path = 'gavg_pool_{}{}.onnx'.format(activation_type_str,
                                                     weight_type_str)

        data_reader.rewind()
        quantize_static(model_fp32_path,
                        model_q8_path,
                        data_reader,
                        quant_format=QuantFormat.QOperator,
                        activation_type=activation_type,
                        weight_type=weight_type,
                        extra_options=extra_options)

        quant_nodes = {
            'QLinearConv': 1,
            'GlobalAveragePool': 1,
            'QLinearGlobalAveragePool': 1,
            'QuantizeLinear': 1,
            'DequantizeLinear': 1
        }
        check_op_type_count(self, model_q8_path, **quant_nodes)
        qnode_io_qtypes = {
            'QuantizeLinear': [['i', 2, activation_proto_qtype],
                               ['o', 0, activation_proto_qtype]]
        }
        qnode_io_qtypes.update({
            'QLinearGlobalAveragePool': [['i', 2, activation_proto_qtype],
                                         ['i', 4, activation_proto_qtype]]
        })
        check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_q8_path,
                                data_reader.get_next())
示例#26
0
    def test_activation_only(self):
        float_model_path = str(
            Path(self._tmp_model_dir.name) / "float_relu_convs_model.onnx")
        self.construct_model_clip_relu(float_model_path, [1, 3, 1, 3],
                                       [1, 3, 1, 3])
        data_reader = self.input_feeds(2, {"input": [1, 3, 1, 3]})

        qdq_model_path = str(
            Path(self._tmp_model_dir.name) / "qdq_relu_convs_model.onnx")
        quantize_static(float_model_path, qdq_model_path, data_reader)

        qop_nodes = {
            "Clip": 1,
            "Relu": 1,
            "QuantizeLinear": 0,
            "DequantizeLinear": 0
        }
        check_op_type_count(self, qdq_model_path, **qop_nodes)
示例#27
0
 def verify_should_not_trigger(self, quantize_mode='static'):
     np.random.seed(108)
     model_fp32_path = 'qop_pad_notrigger_fp32_{}.onnx'.format(
         quantize_mode)
     model_i8_path = 'qop_pad_notrigger_i8_{}.onnx'.format(quantize_mode)
     data_reader = self.input_feeds(1, {'input': [1, 16, 31, 31]})
     self.construct_model_pad(model_fp32_path, 'constant', [1, 16, 31, 31],
                              [0, 0, 1, 2, 0, 0, 3, 4])
     self.quantize_model(model_fp32_path, model_i8_path,
                         None if quantize_mode != 'static' else data_reader)
     data_reader.rewind()
     # DequantizeLinear=0 pad node is not been quantized as input is not quantized.
     check_op_type_count(self,
                         model_i8_path,
                         DynamicQuantizeLinear=0,
                         QuantizeLinear=0,
                         DequantizeLinear=0)
     check_model_correctness(self, model_fp32_path, model_i8_path,
                             data_reader.get_next())
示例#28
0
 def test_quantize_reshape(self):
     np.random.seed(1)
     model_fp32_path = 'gavg_pool_fp32.onnx'
     model_int8_path = 'gavg_pool_fp32.quant.onnx'
     data_reader = self.input_feeds(1, {'input': [1, 8, 33, 33]})
     self.construct_model_gavgpool(model_fp32_path,
                                   [1, 8, 33, 33],
                                   [16, 8, 3, 3],
                                   [1, 16, 1, 1])
     quantize_static(model_fp32_path,
                     model_int8_path,
                     data_reader)
     data_reader.rewind()
     quant_nodes = {'QLinearConv' : 1,
                    'GlobalAveragePool' : 1,
                    'QLinearGlobalAveragePool' : 1,
                    'QuantizeLinear' : 1,
                    'DequantizeLinear' : 1}
     check_op_type_count(self, model_int8_path, **quant_nodes)
     check_model_correctness(self, model_fp32_path, model_int8_path, data_reader.get_next())
示例#29
0
    def quantize_resize_test(self, activation_type, weight_type, extra_options = {}):
        np.random.seed(1)
        model_fp32_path = 'resize_fp32.onnx'

        kwargs = {'coordinate_transformation_mode': 'asymmetric', 'mode': 'nearest', 'nearest_mode': 'floor'}
        self.construct_model_conv_resize(model_fp32_path,
                                         [1, 2, 26, 42], [3, 2, 3, 3],
                                         [1, 3, 24, 40], [1, 3, 48, 80],
                                         kwargs,
                                         [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 2.0, 2.0], None)

        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
        model_uint8_path = 'resize_{}{}.onnx'.format(activation_type_str, weight_type_str)
        model_uint8_qdq_path = 'resize_{}{}_qdq.onnx'.format(activation_type_str, weight_type_str)

        # Verify QOperator mode
        data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
        quantize_static(model_fp32_path, model_uint8_path, data_reader,
                        activation_type = activation_type, weight_type = weight_type, extra_options = extra_options)
        # make sure resize become xint8 operator, its input name could tell that
        check_op_nodes(self, model_uint8_path, lambda node: (node.name != "resize_node" or node.input[0] != 'conv_output'))
        qnode_counts = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 2, 'Resize': 1}
        check_op_type_count(self, model_uint8_path, **qnode_counts)
        qnode_io_qtypes = {'QuantizeLinear' : [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
        qnode_io_qtypes.update({'DequantizeLinear' : [['i', 2, activation_proto_qtype]]})
        check_qtype_by_node_type(self, model_uint8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ,
                        activation_type = activation_type, weight_type = weight_type, extra_options = extra_options)
        qdqnode_counts = {'Conv': 1, 'QuantizeLinear': 3, 'DequantizeLinear': 4, 'Resize': 1}
        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
        qnode_io_qtypes = {'QuantizeLinear' : [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
        check_qtype_by_node_type(self, model_uint8_qdq_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
示例#30
0
    def quantize_maxpool_test(self, activation_type, weight_type, extra_options={}):
        np.random.seed(1)
        model_fp32_path = 'maxpool_fp32.onnx'
        self.construct_model_conv_maxpool(model_fp32_path,
                                          [1, 2, 26, 42], [3, 2, 3, 3],
                                          [1, 3, 24, 40], {'kernel_shape': [3, 3]},
                                          [1, 3, 22, 38])
        data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})

        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8'
        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
        model_q8_path = 'maxpool_{}{}.onnx'.format(activation_type_str, weight_type_str)
        model_q8_qdq_path = 'maxpool_dqd_{}{}.onnx'.format(activation_type_str, weight_type_str)

        # Verify QOperator mode
        data_reader.rewind()
        quantize_static(model_fp32_path, model_q8_path, data_reader, quant_format=QuantFormat.QOperator,
                        activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
        # make sure maxpool become xint8 operator, its input name could tell that
        check_op_nodes(self, model_q8_path, lambda node: (node.name != "maxpool_node" or node.input[0] != 'conv_output'))
        qnode_counts = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 2, 'MaxPool': 1}
        check_op_type_count(self, model_q8_path, **qnode_counts)
        qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
        qnode_io_qtypes.update({'DequantizeLinear': [['i', 2, activation_proto_qtype]]})
        check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_q8_path, data_reader.get_next())

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(model_fp32_path, model_q8_qdq_path, data_reader, quant_format=QuantFormat.QDQ,
                        activation_type=activation_type, weight_type=weight_type, extra_options=extra_options)
        qdqnode_counts = {'Conv': 1, 'QuantizeLinear': 3, 'DequantizeLinear': 4, 'MaxPool': 1}
        check_op_type_count(self, model_q8_qdq_path, **qdqnode_counts)
        qnode_io_qtypes = {'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]]}
        qnode_io_qtypes.update({'DequantizeLinear': [['i', 2, activation_proto_qtype]]})
        check_qtype_by_node_type(self, model_q8_qdq_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_q8_qdq_path, data_reader.get_next())