def test_dequantize(self): original_type = np.float32 nodes = nodes_dict(original_type, np.int8) graph = build_graph(nodes, [ *connect('weights:0', '0:cast'), *connect('cast:0', '0:FQ'), *connect('il:0', '1:FQ'), *connect('ih:0', '2:FQ'), *connect('ol:0', '3:FQ'), *connect('oh:0', '4:FQ'), *connect('FQ:0', 'output'), ], nodes_with_edges_only=True) error_message = 'Unexpected number of {} nodes {} CompressQuantizeWeights.dequantize_data call `{}`' fq_nodes = graph.get_op_nodes(type='FakeQuantize') cast_nodes = graph.get_op_nodes(name='cast') self.assertEqual( len(fq_nodes), 1, error_message.format('FakeQuantize', 'before', len(fq_nodes))) self.assertEqual( len(cast_nodes), 1, error_message.format('Convert', 'before', len(cast_nodes))) cast_nodes[0]['need_shape_inference'] = True CompressQuantizeWeights.dequantize_data(fq_nodes[0], original_type, np.int8) graph.clean_up() fq_nodes = graph.get_op_nodes(type='FakeQuantize') self.assertEqual( len(fq_nodes), 0, error_message.format('FakeQuantize', 'after', len(fq_nodes))) graph_ref = build_graph(nodes, [ *connect('int_weights:0', '0:cast'), *connect('cast:0', '0:sub'), *connect('zp:0', '1:sub'), *connect('sub:0', '0:mul'), *connect('scale:0', '1:mul'), *connect('mul:0', 'output'), ], {'cast': { 'dst_type': original_type }}, nodes_with_edges_only=True) (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True) self.assertTrue(flag, resp)
def compress_weights(model: Graph): """Apply transformations to save model weights to INT8.""" add_removed_converts(model) CompressQuantizeWeights().find_and_replace_pattern(model) model.clean_up() ForceStrictPrecision().find_and_replace_pattern(model) model.clean_up()
def test_data_type(self, model_dtype, original, transformed=None): if transformed is None: transformed = original nodes = nodes_dict(original, transformed) graph = build_graph(nodes, [ *connect('weights:0', '0:FQ'), *connect('il:0', '1:FQ'), *connect('ih:0', '2:FQ'), *connect('ol:0', '3:FQ'), *connect('oh:0', '4:FQ'), *connect('FQ:0', 'output'), ], nodes_with_edges_only=True, cli=Namespace(data_type=model_dtype, static_shape=True)) CompressQuantizeWeights().find_and_replace_pattern(graph) graph.clean_up() graph_ref = build_graph(nodes, [ *connect('int_weights:0', '0:cast'), *connect('cast:0', '0:sub'), *connect('zp:0', '1:sub'), *connect('sub:0', '0:mul'), *connect('scale:0', '1:mul'), *connect('mul:0', 'output'), ], nodes_with_edges_only=True) (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True) self.assertTrue(flag, resp)
def test_data_type_new_fp16(self): nodes = nodes_dict(np.float16) graph = build_graph(nodes, [ *connect('weights:0', '0:weights_cast'), *connect('weights_cast:0', '0:FQ'), *connect('il:0', '1:FQ'), *connect('ih:0', '2:FQ'), *connect('ol:0', '3:FQ'), *connect('oh:0', '4:FQ'), *connect('FQ:0', 'output'), ], nodes_with_edges_only=True, cli=Namespace(data_type='FP16', static_shape=True)) CompressQuantizeWeights().find_and_replace_pattern(graph) graph.clean_up() graph_ref = build_graph(nodes, [ *connect('int_weights:0', '0:weights_cast'), *connect('weights_cast:0', '0:sub'), *connect('zp:0', '1:sub'), *connect('sub:0', '0:mul'), *connect('scale:0', '1:mul'), *connect('mul:0', 'output'), ], nodes_with_edges_only=True) (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True) self.assertTrue(flag, resp)
def test_quantize(self): original_type = np.float32 nodes = nodes_dict(original_type) graph = build_graph(nodes, [ *connect('weights:0', '0:FQ'), *connect('il:0', '1:FQ'), *connect('ih:0', '2:FQ'), *connect('ol:0', '3:FQ'), *connect('oh:0', '4:FQ'), *connect('FQ:0', 'output'), ], nodes_with_edges_only=True) error_message = 'Unexpected number of FakeQuantize nodes {} CompressQuantizeWeights.quantize_data call `{}`' fq_nodes = graph.get_op_nodes(type='FakeQuantize') self.assertEqual(len(fq_nodes), 1, error_message.format('before', len(fq_nodes))) fake_quantize = fq_nodes[0] CompressQuantizeWeights.quantize_data(fake_quantize, original_type, np.int8, "signed") graph.clean_up() fq_nodes = graph.get_op_nodes(type='FakeQuantize') self.assertEqual(len(fq_nodes), 1, error_message.format('after', len(fq_nodes))) self.assertEqual( fq_nodes[0].in_port(0).get_source().node.soft_get('type'), 'Const') self.assertEqual(fq_nodes[0].in_port(0).get_source().node.data_type, np.int8) graph_ref = build_graph(nodes, [ *connect('int_weights:0', '0:FQ'), *connect('il:0', '1:FQ'), *connect('ih:0', '2:FQ'), *connect('ol:0', '3:FQ'), *connect('oh:0', '4:FQ'), *connect('FQ:0', 'output'), ], nodes_with_edges_only=True) (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True) self.assertTrue(flag, resp)
def test_accuracy(self, data, in_low, in_high, out_low, out_high, levels): nodes = nodes_dict(np.float32, None, levels, data, in_low, in_high, out_low, out_high) graph = build_graph(nodes, [ *connect('weights:0', '0:FQ'), *connect('il:0', '1:FQ'), *connect('ih:0', '2:FQ'), *connect('ol:0', '3:FQ'), *connect('oh:0', '4:FQ'), *connect('FQ:0', 'output'), ], nodes_with_edges_only=True) graph_ref = graph.copy() CompressQuantizeWeights().find_and_replace_pattern(graph) for node in graph.get_op_nodes() + graph_ref.get_op_nodes(): node['stop_value_propagation'] = False node['need_shape_inference'] = node.soft_get( 'need_shape_inference', True) graph.clean_up() graph_ref.clean_up() const_result_graph = build_graph( { **shaped_const_with_data('weights', np.array(data).shape), **result() }, [*connect('weights', 'output')], nodes_with_edges_only=True) (flag, resp) = compare_graphs(graph, const_result_graph, 'output', check_op_attrs=True) self.assertTrue(flag, resp) (flag, resp) = compare_graphs(graph_ref, const_result_graph, 'output', check_op_attrs=True) self.assertTrue(flag, resp) # as this two graphs calculated the same data through different constant folding functions, they resulted in # constants of different data type since FakeQuantize always have f32 output dtype, but eltwises use numpy # for folding which doesn't have such restriction const_node = graph.get_op_nodes(type='Const') self.assertEqual(len(const_node), 1) if const_node[0].data_type == np.float64: const_node[0].data_type = np.float32 (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True) self.assertTrue(flag, resp)
def test_negative_fq_unacceptable_levels(self, levels): nodes = nodes_dict(np.float32, None, levels) graph = build_graph(nodes, [ *connect('weights:0', '0:FQ'), *connect('il:0', '1:FQ'), *connect('ih:0', '2:FQ'), *connect('ol:0', '3:FQ'), *connect('oh:0', '4:FQ'), *connect('FQ:0', 'output'), ], nodes_with_edges_only=True) graph_ref = graph.copy() CompressQuantizeWeights().find_and_replace_pattern(graph) (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True) self.assertTrue(flag, resp)