def test_dequantize(self):
        original_type = np.float32
        nodes = nodes_dict(original_type, np.int8)

        graph = build_graph(nodes, [
            *connect('weights:0', '0:cast'),
            *connect('cast:0', '0:FQ'),
            *connect('il:0', '1:FQ'),
            *connect('ih:0', '2:FQ'),
            *connect('ol:0', '3:FQ'),
            *connect('oh:0', '4:FQ'),
            *connect('FQ:0', 'output'),
        ],
                            nodes_with_edges_only=True)

        error_message = 'Unexpected number of {} nodes {} CompressQuantizeWeights.dequantize_data call `{}`'
        fq_nodes = graph.get_op_nodes(type='FakeQuantize')
        cast_nodes = graph.get_op_nodes(name='cast')
        self.assertEqual(
            len(fq_nodes), 1,
            error_message.format('FakeQuantize', 'before', len(fq_nodes)))
        self.assertEqual(
            len(cast_nodes), 1,
            error_message.format('Convert', 'before', len(cast_nodes)))
        cast_nodes[0]['need_shape_inference'] = True

        CompressQuantizeWeights.dequantize_data(fq_nodes[0], original_type,
                                                np.int8)
        graph.clean_up()

        fq_nodes = graph.get_op_nodes(type='FakeQuantize')
        self.assertEqual(
            len(fq_nodes), 0,
            error_message.format('FakeQuantize', 'after', len(fq_nodes)))

        graph_ref = build_graph(nodes, [
            *connect('int_weights:0', '0:cast'),
            *connect('cast:0', '0:sub'),
            *connect('zp:0', '1:sub'),
            *connect('sub:0', '0:mul'),
            *connect('scale:0', '1:mul'),
            *connect('mul:0', 'output'),
        ], {'cast': {
            'dst_type': original_type
        }},
                                nodes_with_edges_only=True)

        (flag, resp) = compare_graphs(graph,
                                      graph_ref,
                                      'output',
                                      check_op_attrs=True)
        self.assertTrue(flag, resp)
Exemplo n.º 2
0
def compress_weights(model: Graph):
    """Apply transformations to save model weights to INT8."""
    add_removed_converts(model)
    CompressQuantizeWeights().find_and_replace_pattern(model)
    model.clean_up()
    ForceStrictPrecision().find_and_replace_pattern(model)
    model.clean_up()
    def test_data_type(self, model_dtype, original, transformed=None):
        if transformed is None:
            transformed = original
        nodes = nodes_dict(original, transformed)

        graph = build_graph(nodes, [
            *connect('weights:0', '0:FQ'),
            *connect('il:0', '1:FQ'),
            *connect('ih:0', '2:FQ'),
            *connect('ol:0', '3:FQ'),
            *connect('oh:0', '4:FQ'),
            *connect('FQ:0', 'output'),
        ],
                            nodes_with_edges_only=True,
                            cli=Namespace(data_type=model_dtype,
                                          static_shape=True))

        CompressQuantizeWeights().find_and_replace_pattern(graph)
        graph.clean_up()

        graph_ref = build_graph(nodes, [
            *connect('int_weights:0', '0:cast'),
            *connect('cast:0', '0:sub'),
            *connect('zp:0', '1:sub'),
            *connect('sub:0', '0:mul'),
            *connect('scale:0', '1:mul'),
            *connect('mul:0', 'output'),
        ],
                                nodes_with_edges_only=True)
        (flag, resp) = compare_graphs(graph,
                                      graph_ref,
                                      'output',
                                      check_op_attrs=True)
        self.assertTrue(flag, resp)
Exemplo n.º 4
0
    def test_data_type_new_fp16(self):
        nodes = nodes_dict(np.float16)

        graph = build_graph(nodes, [
            *connect('weights:0', '0:weights_cast'),
            *connect('weights_cast:0', '0:FQ'),
            *connect('il:0', '1:FQ'),
            *connect('ih:0', '2:FQ'),
            *connect('ol:0', '3:FQ'),
            *connect('oh:0', '4:FQ'),
            *connect('FQ:0', 'output'),
        ],
                            nodes_with_edges_only=True,
                            cli=Namespace(data_type='FP16', static_shape=True))

        CompressQuantizeWeights().find_and_replace_pattern(graph)
        graph.clean_up()

        graph_ref = build_graph(nodes, [
            *connect('int_weights:0', '0:weights_cast'),
            *connect('weights_cast:0', '0:sub'),
            *connect('zp:0', '1:sub'),
            *connect('sub:0', '0:mul'),
            *connect('scale:0', '1:mul'),
            *connect('mul:0', 'output'),
        ],
                                nodes_with_edges_only=True)
        (flag, resp) = compare_graphs(graph,
                                      graph_ref,
                                      'output',
                                      check_op_attrs=True)
        self.assertTrue(flag, resp)
    def test_quantize(self):
        original_type = np.float32
        nodes = nodes_dict(original_type)

        graph = build_graph(nodes, [
            *connect('weights:0', '0:FQ'),
            *connect('il:0', '1:FQ'),
            *connect('ih:0', '2:FQ'),
            *connect('ol:0', '3:FQ'),
            *connect('oh:0', '4:FQ'),
            *connect('FQ:0', 'output'),
        ],
                            nodes_with_edges_only=True)

        error_message = 'Unexpected number of FakeQuantize nodes {} CompressQuantizeWeights.quantize_data call `{}`'
        fq_nodes = graph.get_op_nodes(type='FakeQuantize')
        self.assertEqual(len(fq_nodes), 1,
                         error_message.format('before', len(fq_nodes)))
        fake_quantize = fq_nodes[0]

        CompressQuantizeWeights.quantize_data(fake_quantize, original_type,
                                              np.int8, "signed")
        graph.clean_up()

        fq_nodes = graph.get_op_nodes(type='FakeQuantize')
        self.assertEqual(len(fq_nodes), 1,
                         error_message.format('after', len(fq_nodes)))
        self.assertEqual(
            fq_nodes[0].in_port(0).get_source().node.soft_get('type'), 'Const')
        self.assertEqual(fq_nodes[0].in_port(0).get_source().node.data_type,
                         np.int8)

        graph_ref = build_graph(nodes, [
            *connect('int_weights:0', '0:FQ'),
            *connect('il:0', '1:FQ'),
            *connect('ih:0', '2:FQ'),
            *connect('ol:0', '3:FQ'),
            *connect('oh:0', '4:FQ'),
            *connect('FQ:0', 'output'),
        ],
                                nodes_with_edges_only=True)

        (flag, resp) = compare_graphs(graph,
                                      graph_ref,
                                      'output',
                                      check_op_attrs=True)
        self.assertTrue(flag, resp)
    def test_accuracy(self, data, in_low, in_high, out_low, out_high, levels):
        nodes = nodes_dict(np.float32, None, levels, data, in_low, in_high,
                           out_low, out_high)

        graph = build_graph(nodes, [
            *connect('weights:0', '0:FQ'),
            *connect('il:0', '1:FQ'),
            *connect('ih:0', '2:FQ'),
            *connect('ol:0', '3:FQ'),
            *connect('oh:0', '4:FQ'),
            *connect('FQ:0', 'output'),
        ],
                            nodes_with_edges_only=True)
        graph_ref = graph.copy()

        CompressQuantizeWeights().find_and_replace_pattern(graph)

        for node in graph.get_op_nodes() + graph_ref.get_op_nodes():
            node['stop_value_propagation'] = False
            node['need_shape_inference'] = node.soft_get(
                'need_shape_inference', True)

        graph.clean_up()
        graph_ref.clean_up()

        const_result_graph = build_graph(
            {
                **shaped_const_with_data('weights',
                                         np.array(data).shape),
                **result()
            }, [*connect('weights', 'output')],
            nodes_with_edges_only=True)
        (flag, resp) = compare_graphs(graph,
                                      const_result_graph,
                                      'output',
                                      check_op_attrs=True)
        self.assertTrue(flag, resp)

        (flag, resp) = compare_graphs(graph_ref,
                                      const_result_graph,
                                      'output',
                                      check_op_attrs=True)
        self.assertTrue(flag, resp)

        # as this two graphs calculated the same data through different constant folding functions, they resulted in
        # constants of different data type since FakeQuantize always have f32 output dtype, but eltwises use numpy
        # for folding which doesn't have such restriction
        const_node = graph.get_op_nodes(type='Const')
        self.assertEqual(len(const_node), 1)
        if const_node[0].data_type == np.float64:
            const_node[0].data_type = np.float32

        (flag, resp) = compare_graphs(graph,
                                      graph_ref,
                                      'output',
                                      check_op_attrs=True)
        self.assertTrue(flag, resp)
    def test_negative_fq_unacceptable_levels(self, levels):
        nodes = nodes_dict(np.float32, None, levels)

        graph = build_graph(nodes, [
            *connect('weights:0', '0:FQ'),
            *connect('il:0', '1:FQ'),
            *connect('ih:0', '2:FQ'),
            *connect('ol:0', '3:FQ'),
            *connect('oh:0', '4:FQ'),
            *connect('FQ:0', 'output'),
        ],
                            nodes_with_edges_only=True)
        graph_ref = graph.copy()
        CompressQuantizeWeights().find_and_replace_pattern(graph)

        (flag, resp) = compare_graphs(graph,
                                      graph_ref,
                                      'output',
                                      check_op_attrs=True)
        self.assertTrue(flag, resp)