Exemplo n.º 1
0
    def do_aquant(self, args: argparse.Namespace):
        """
Attempt to calculate quantization for graph using one or more sample input files."""
        self._check_graph()
        stats_collector = ActivationRangesCollector()
        # if replaying state file then load the activation stats if they are present
        if args.scheme == 'SQ8':
            bits = 8
        else:
            bits = args.force_width
        if self.replaying_history and self.history_stats:
            astats = self.history_stats
        else:
            input_args = self._get_input_args(args)
            processed_input = False
            for file_per_input in glob_input_files(args.input_files,
                                                   self.G.num_inputs):
                LOG.info("input file %s", file_per_input)
                processed_input = True
                data = [
                    import_data(input_file, **input_args)
                    for input_file in file_per_input
                ]
                stats_collector.collect_stats(self.G, data)
            if not processed_input:
                self.perror("No input files found")
                return
            astats = stats_collector.stats
            self._record_stats(astats)

        quantizer = UnifiedQuantizer(args.scheme,
                                     astats,
                                     quantized_dimension=args.quant_dimension,
                                     narrow_weights=not args.no_narrow_weights,
                                     bits=bits)

        qrecs = quantizer.quantize(self.G)
        self.G.quantization = qrecs
        # These should now be unnecessary
        # if args.scheme == 'SQ8':
        #     concats_matcher = EqualizeSymmetricMultiplicativeQuantivedConcats()
        #     concats_matcher.match(self.G, set_identity=False)
        #     rnns_matcher = PropagateUpRNNInputQ()
        #     rnns_matcher.match(self.G, set_identity=False)
        #     softmax_qrec_matcher = PropagateSoftmaxSymQrec()
        #     softmax_qrec_matcher.match(self.G, set_identity=False)
        #     sig_swish_qrec_matcher = PropagateUpSigSwishInputQ()
        #     sig_swish_qrec_matcher.match(self.G, set_identity=False)
        LOG.info("Quantization set. Use qshow command to see it.")
Exemplo n.º 2
0
def tune_scaled(G, nodes):
    all_nodes = get_nodes_and_fusion_nodes(nodes)
    force_scheme = {node: 'SQ8' for node in all_nodes}
    quantizer = UnifiedQuantizer.from_quantized_graph(G, extra_schemes=['SQ8'])
    quantizer.quantize(G, start_nodes=nodes, force_scheme=force_scheme)
    RemoveUnnecessaryQuantizeOperators().match(G)
    G.add_dimensions()
Exemplo n.º 3
0
def tune_options(G, nodes, options):
    all_nodes = get_nodes_and_fusion_nodes(nodes)
    force_options = {node: options for node in all_nodes}
    quantizer = UnifiedQuantizer.from_quantized_graph(G)
    quantizer.quantize(G, start_nodes=nodes, force_options=force_options)
    RemoveUnnecessaryQuantizeOperators().match(G)
    G.add_dimensions()
Exemplo n.º 4
0
    def create_graph(self, filename, opts):
        opts = self.get_opts(opts)
        self._name_cache = {}
        add_sys_path(os.path.dirname(__file__))
        buf = open(filename, "rb").read()
        model = Model.GetRootAsModel(buf, 0)
        LOG.info("Importing TFLITE model version %s", model.Version())
        check(model.Version() == 3, "Only support version 3 graphs at present")
        if model.SubgraphsLength() > 1:
            LOG.warning(
                "nntool only supports one subgraph. There may be errors loading this graph."
            )
        G = NNGraph(model=model,
                    filename=filename,
                    name=opts.get('name'),
                    constant_store=ConstantStore())
        if opts.get('load_quantization'):
            G.quantization = QuantizationSet()
            G.has_quantized_parameters = True
            G.quantization.schemes_present.add('SQ8')

        self._import_tflite_graph(G, model, opts)
        clean_dangling_nodes(G)
        fix_split_in_edges(G)
        MatchDuplicateConstants().match(G)
        # DrawGraphReporter().report(G)
        G.add_dimensions()
        remove_concats(G)
        if opts['remove_quantize_ops']:
            RemoveQuantizeOperators().match(G)
            G.add_dimensions()

        if opts.get('load_quantization'):
            # get rid of qrecs on nodes that were not used
            to_remove = []
            for nid in G.quantization:
                if nid.node_name not in G:
                    to_remove.append(nid)
            for nid in to_remove:
                del G.quantization[nid]
            nodes_with_bad_quantization = self.find_nodes_with_bad_quantization(
                G)
            quantizer = UnifiedQuantizer.from_quantized_graph(G)
            # check for quantization problems
            # 1) need to force softmax/Sigmoid input to POW2 quantization
            # 2) need to check that all concats and splits have same input and
            #    output quantization
            # 3) Need to check that all nodes have qrecs and that they are consistent
            nodes_with_bad_quantization |= set(
                G.nodes(node_classes=(ConcatParameters, SoftMaxParameters,
                                      SplitParameters,
                                      SigmoidActivationParameters)))
            G.quantization = quantizer.quantize(
                G, start_nodes=nodes_with_bad_quantization)
            G.add_dimensions()

        return G
Exemplo n.º 5
0
    def do_fquant(self, args: argparse.Namespace):
        """
Attempt to calculate a fake quantization for graph using random tensors and parameters.
This is intended to allow code generation for performance testing even if no real
weights and input data are avalaible."""
        self._check_graph()
        self.G.constant_store.fake = True
        stats_collector = ActivationRangesCollector()
        for _ in range(args.num_inference):
            if args.uniform:
                input_tensors = [
                    np.random.uniform(-args.uniform, args.uniform,
                                      inp.dims.shape)
                    for inp in self.G.input_nodes()
                ]
            else:
                input_tensors = [
                    np.random.normal(0, 0.2, inp.dims.shape)
                    for inp in self.G.input_nodes()
                ]
            stats_collector.collect_stats(self.G, input_tensors)
        if args.scheme == 'SQ8':
            bits = 8
        else:
            bits = args.force_width
        astats = stats_collector.stats

        quantizer = UnifiedQuantizer(args.scheme,
                                     astats,
                                     quantized_dimension=args.quant_dimension,
                                     narrow_weights=not args.no_narrow_weights,
                                     bits=bits)
        self._record_stats(astats)
        qrecs = quantizer.quantize(self.G)
        self.G.quantization = qrecs
        if args.scheme == 'SQ8':
            concats_matcher = EqualizeSymmetricMultiplicativeQuantivedConcats()
            concats_matcher.match(self.G, set_identity=False)
            softmax_qrec_matcher = PropagateSoftmaxSymQrec()
            softmax_qrec_matcher.match(self.G, set_identity=False)
        self.G.constant_store.fake = False
Exemplo n.º 6
0
def tune_float(G, nodes, float_type):
    all_nodes = get_nodes_and_fusion_nodes(nodes)
    force_scheme = {node: 'float' for node in all_nodes}
    force_options = {node: {'float_type': float_type} for node in all_nodes}
    quantizer = UnifiedQuantizer.from_quantized_graph(G,
                                                      extra_schemes=['float'])
    quantizer.quantize(G,
                       start_nodes=nodes,
                       force_scheme=force_scheme,
                       force_options=force_options)
    RemoveUnnecessaryQuantizeOperators().match(G)
    G.add_dimensions()
Exemplo n.º 7
0
    def do_aquant(self, args: argparse.Namespace):
        """
Attempt to calculate quantization for graph using one or more sample input files."""
        self._check_graph()
        stats_collector = ActivationRangesCollector()
        # if replaying state file then load the activation stats if they are present
        opts = get_options_from_args(args)
        if self.replaying_history and self.history_stats:
            astats = self.history_stats
        else:
            input_args = self._get_input_args(args)
            processed_input = False
            for file_per_input in glob_input_files(args.input_files,
                                                   self.G.num_inputs):
                LOG.info("input file %s", file_per_input)
                processed_input = True
                data = [
                    import_data(input_file, **input_args)
                    for input_file in file_per_input
                ]
                stats_collector.collect_stats(self.G, data)
            if not processed_input:
                self.perror("No input files found")
                return
            astats = stats_collector.stats
            self._record_stats(astats)

        if args.force_width:
            opts['bits'] = args.force_width

        quantizer = UnifiedQuantizer(args.scheme, astats, **opts)
        # clear the existing quantization
        self.G.quantization = None
        qrecs = quantizer.quantize(self.G)
        self.G.quantization = qrecs
        RemoveUnnecessaryQuantizeOperators().match(self.G)
        self.G.add_dimensions()
        LOG.info("Quantization set. Use qshow command to see it.")
Exemplo n.º 8
0
    def do_fquant(self, args: argparse.Namespace):
        """
Attempt to calculate a fake quantization for graph using random tensors and parameters.
This is intended to allow code generation for performance testing even if no real
weights and input data are avalaible."""
        self._check_graph()
        opts = get_options_from_args(args)
        if self.replaying_history and self.history_stats:
            astats = self.history_stats
        else:
            self.G.constant_store.fake = True
            stats_collector = ActivationRangesCollector()
            for _ in range(args.num_inference):
                if args.uniform:
                    input_tensors = [np.random.uniform(-args.uniform, args.uniform, inp.dims.shape)
                                     for inp in self.G.input_nodes()]
                else:
                    input_tensors = [np.random.normal(0, 0.2, inp.dims.shape)
                                     for inp in self.G.input_nodes()]
                stats_collector.collect_stats(self.G, input_tensors)
            astats = stats_collector.stats
            self._record_stats(astats)
            self.G.constant_store.fake = False

        if args.force_width:
            opts['bits'] = args.force_width

        quantizer = UnifiedQuantizer(args.scheme, astats,
                                     **opts)

        # clear the existing quantization
        self.G.quantization = None
        qrecs = quantizer.quantize(self.G)
        self.G.quantization = qrecs
        RemoveUnnecessaryQuantizeOperators().match(self.G)
        self.G.add_dimensions()
        LOG.info("Quantization set. Use qshow command to see it.")
Exemplo n.º 9
0
def tune_pow2(G, nodes, pow2_type):
    all_nodes = get_nodes_and_fusion_nodes(nodes)
    force_scheme = {node: 'POW2' for node in all_nodes}
    force_options = {
        node: {
            'bits': 16 if pow2_type == 'int16' else 8
        }
        for node in all_nodes
    }
    quantizer = UnifiedQuantizer.from_quantized_graph(G,
                                                      extra_schemes=['POW2'])
    quantizer.quantize(G,
                       start_nodes=nodes,
                       force_scheme=force_scheme,
                       force_options=force_options)
    RemoveUnnecessaryQuantizeOperators().match(G)
    G.add_dimensions()
Exemplo n.º 10
0
    def _match(self, G: GraphView, set_identity: bool = True, **kwargs):
        has_modified_graph = False
        has_transposed = False
        for params in G.nodes(node_classes=MatMulOpParameters):
            while True:
                out_edges = G.out_edges(params.name)
                # can't fuse if there is a branch
                if len(out_edges) > 1:
                    break
                out_edge = out_edges[0]
                op_node = out_edge.to_node
                # must be a valid matrix op
                if not isinstance(op_node,
                                  (MatrixAddParameters, MatrixMulParameters)):
                    break
                # other edge to the op must be a constant
                other_idx = 1 if out_edge.to_idx == 0 else 0
                other_in_edge = G.indexed_in_edges(op_node.name)[other_idx]
                if not isinstance(other_in_edge.from_node,
                                  ConstantInputParameters):
                    break
                const_node = other_in_edge.from_node
                remove_constant = len(G.out_edges(const_node.name))

                flat_value = const_node.dqvalue.flatten()
                out_shape = params.out_dims[0].shape
                if len(out_shape) != 2:
                    raise ValueError(
                        f'strange outputs shape of {out_shape} for matmul {params.name}'
                    )
                if len(flat_value) != out_shape[0] and len(
                        flat_value) != out_shape[1]:
                    LOG.info(
                        "can't fuse %s into %s - value shape is not correct for bias",
                        const_node.name, params.name)
                    break
                has_bias = len(params.in_dims) == 3
                if isinstance(op_node, MatrixAddParameters):
                    if has_bias:
                        if len(flat_value.shape) != len(params.in_dims[2]):
                            LOG.info(
                                "can't fuse %s into %s - bias shape is not the same",
                                const_node.name, params.name)
                            break
                        bias_node = G.indexed_in_edges(
                            params.name)[2].from_node
                        LOG.info(
                            "folding additive bias from %s into existing bias on %s",
                            op_node.name, params.name)
                        bias_node.value = bias_node.dq_value + flat_value
                    else:
                        if len(flat_value) == out_shape[1]:
                            # matmul needs to be transposed to fuse this
                            reverse_matmul(G, params)
                            has_transposed = True
                        bias_node = ConstantInputParameters(
                            G.unique_name(f'{params.name}_bias'),
                            value=flat_value,
                            dims=Dim.unnamed(flat_value.shape))
                        G.add_edge(
                            NNEdge(from_node=bias_node,
                                   to_node=params,
                                   to_idx=2))
                        # extend the inward transpose
                        if params.transpose_in:
                            params.transpose_in = params.transpose_in + [None]
                        LOG.info(
                            "folding additive bias from %s into new bias on %s",
                            op_node.name, params.name)
                else:
                    params_in = G.indexed_in_edges(params.name)
                    consts = [
                        isinstance(edge.from_node, ConstantInputParameters)
                        for edge in params_in
                    ]
                    if not any(consts):
                        break
                    mult_const_node = params_in[1].from_node if consts[
                        1] else params_in[0].from_node
                    mult_const_node.value = mult_const_node.dqvalue * const_node.dqvalue
                    if has_bias:
                        bias_node = params_in[2].from_node
                        bias_node.value = bias_node.dqvalue * const_node.dqvalue

                    LOG.info(
                        "folding multaplicative bias from %s into new bias on %s",
                        op_node.name, params.name)

                out_edges = G.out_edges(op_node.name)
                G.remove(op_node)
                if remove_constant:
                    G.remove(const_node)
                for edge in out_edges:
                    G.add_edge(
                        NNEdge(from_node=params,
                               to_node=edge.to_node,
                               to_idx=edge.to_idx))
                G.add_dimensions()
                if G.quantization:
                    quantizer = UnifiedQuantizer.from_quantized_graph(G)
                    quantizer.quantize(G, start_nodes=[params])
                    RemoveUnnecessaryQuantizeOperators().match(G)

        if has_transposed:
            G.adjust_order()

        if set_identity:
            self.set_identity(G)

        return has_modified_graph
Exemplo n.º 11
0
    def _match(self, G: GraphView, set_identity: bool = True, **kwargs):
        has_modified_graph = False
        to_quantize = []
        node_sets = self.find_sets(G)
        for node_set in node_sets:
            Symbol.set_default_control(SymbolStats())
            has_modified_graph = True
            in_edges, out_edges, internal_edges = group_edges(G, node_set)
            frag = GraphView()
            for node in node_set:
                frag.add_node(node)
            for edge in internal_edges:
                frag.add_edge(edge)
            in_mapping = [[(edge.to_node, edge.to_idx) for edge in edge_group]
                          for edge_group in in_edges.values()]
            in_dims = [
                from_node.out_dims[from_idx]
                for from_node, from_idx in in_edges
            ]
            out_dims = [
                from_node.out_dims[from_idx]
                for from_node, from_idx in out_edges
            ]
            out_mapping = list(out_edges.keys())
            constant_inputs = [
                node_edge_idx[0] for node_edge_idx in in_edges
                if isinstance(node_edge_idx[0], ConstantInputParameters)
            ]
            LOG.debug(
                "inputs coming from: %s",
                ",".join(f"{from_node.__repr__()}:{from_idx}"
                         for from_node, from_idx in in_edges))
            LOG.info("fusing nodes: %s into expr_%s",
                     ",".join(node.__repr__() for node in node_set),
                     self._expr_num)
            expr = ExpressionFusionParameters(
                G.unique_name(f"expr_{self._expr_num}"),
                subgraph=frag,
                qrecs=G.quantization,
                input_mapping=in_mapping,
                output_mapping=out_mapping,
                in_dims=in_dims,
                out_dims=out_dims,
                constant_inputs=constant_inputs)
            in_edge_mapping = list(in_edges.keys())
            out_edge_mapping = [[(edge.to_node, edge.to_idx)
                                 for edge in edge_set]
                                for edge_set in out_edges.values()]
            G.replace_fragment(
                frag,
                expr,
                frag_in_edges=list(set.union(*in_edges.values())),
                frag_out_edges=list(set.union(*out_edges.values())),
                edge_in_mapping=in_edge_mapping,
                edge_out_mapping=out_edge_mapping,
                edge_class=NNEdge)
            if G.quantization:
                qrecs = G.quantization
                in_qs = [
                    qrecs[NodeId(in_map[0][0])].in_qs[in_map[0][1]]
                    for in_map in in_mapping
                ]
                out_qs = [
                    qrecs[NodeId(node)].out_qs[idx]
                    for node, idx in out_mapping
                ]
                stats = Symbol.CURRENT_CONTROL.stats
                func_col = expr.func_col
                for idx, qtype in enumerate(in_qs):
                    symbol = func_col.variables[func_col.input_names[idx]]
                    stats[symbol.name] = {
                        'min': qtype.min_val,
                        'max': qtype.max_val
                    }
                for idx, qtype in enumerate(out_qs):
                    symbol = func_col.variables[func_col.output_names[idx]]
                    stats[symbol.name] = {
                        'min': qtype.min_val,
                        'max': qtype.max_val
                    }
                G.quantization[NodeId(expr)] = QRec(in_qs=in_qs,
                                                    out_qs=out_qs,
                                                    expression=stats,
                                                    ktype='scaled')
                # delete any quantize parameters on outputs to allow the quantizer
                # to fuse them into the expression
                out_edges = G.out_edges(expr.name)
                for edge in out_edges:
                    if isinstance(edge.to_node, QuantizeParameters):
                        G.remove_and_reconnect(edge.to_node)
                        if NodeId(edge.to_node) in G.quantization:
                            del G.quantization[NodeId(edge.to_node)]
                to_quantize.append(expr)

            self._expr_num += 1

        if to_quantize:
            quantizer = UnifiedQuantizer.from_quantized_graph(G)
            G.quantization = quantizer.quantize(G, start_nodes=to_quantize)

        if set_identity:
            self.set_identity(G)

        return has_modified_graph
Exemplo n.º 12
0
    def _match(self,
               G: GraphView,
               set_identity: bool = True,
               **kwargs) -> bool:
        has_modified_graph = False
        slices_by_origin = {}
        for slice_node in [
                node for node in G.nodes()
                if isinstance(node, StridedSliceParameters)
        ]:
            in_edge = G.in_edges(slice_node.name)[0]
            group = slices_by_origin.setdefault(
                (in_edge.from_node, in_edge.from_idx), [])
            group.append(slice_node)
        for in_edge, slice_nodes in slices_by_origin.items():
            slices = list(zip(*[node.act_slice for node in slice_nodes]))
            if len(slice_nodes) == 1:
                self.slice_to_split(G, slice_nodes, slices)
                continue

            diff_slices = [(idx, elems) for idx, elems in enumerate(slices)
                           if not all(elems[0] == elem for elem in elems[1::])]
            if len(diff_slices) != 1:
                continue
            # strides must be one
            if any(sl[2] != 1 for sl in diff_slices[0][1]):
                continue
            # check if slices are consecutive and non overlapping
            slices = sorted(diff_slices[0][1], key=lambda x: x[0])
            if not all(sl[0] + sl[1] == slices[i + 1][0]
                       for i, sl in enumerate(slices[:-1:])):
                continue
            szes = [sl[1] - sl[0] for sl in slices]
            axis = diff_slices[0][0]
            slice_nodes = sorted(slice_nodes,
                                 key=lambda x: x.act_slice[axis][0])
            act_slices, out_shapes, axis = SplitParameters.get_splits(
                slice_nodes[0].in_dims[0].shape, axis, splits=szes)
            params = SplitParameters(slice_nodes[0].name + '_split',
                                     act_slices=act_slices,
                                     out_shapes=out_shapes,
                                     axis=axis)
            in_edge = G.in_edges(slice_nodes[0].name)[0]
            G.add_edge(
                NNEdge(from_node=in_edge.from_node,
                       to_node=params,
                       from_idx=in_edge.from_idx))
            sub_names = []
            for idx, node in enumerate(slice_nodes):
                sub_names.append(node.name)
                out_edges = G.out_edges(node.name)
                G.remove(node)
                for out_edge in out_edges:
                    G.add_edge(
                        NNEdge(from_node=params,
                               to_node=out_edge.to_node,
                               from_idx=idx,
                               to_idx=out_edge.to_idx))
            if G.quantization:
                G.add_dimensions()
                quantizer = UnifiedQuantizer.from_quantized_graph(G)
                quantizer.quantize(G, start_nodes=[params])
                RemoveUnnecessaryQuantizeOperators().match(G)

            LOG.info(
                f'replaced slice nodes {",".join(sub_names)} with split node {sub_names[0]}'
            )

            has_modified_graph = True

        if set_identity:
            self.set_identity(G)

        return has_modified_graph
Exemplo n.º 13
0
 def slice_to_split(G, slice_nodes, slices):
     slice_node = slice_nodes[0]
     in_dims = slice_node.in_dims[0].shape
     slices = [sl[0] for sl in slices]
     if any(sl[2] != 1 for sl in slices):
         return
     szes = tuple([sl[1] - sl[0] for sl in slices])
     # find sliced axes that differ
     diff_axis = tuple(idx
                       for idx, (d1, d2) in enumerate(zip(szes, in_dims))
                       if d1 != d2)
     if len(diff_axis) != 1:
         return
     # good to convert to a split
     axis = diff_axis[0]
     axis_slice = slices[axis]
     axis_dim = in_dims[axis]
     outs = []
     splits = []
     if axis_slice[0] > 0:
         splits.append(axis_slice[0])
         oparams = OutputParameters(G.unique_name('unused'))
         oparams.at_options.allocate = 1
         outs.append(((oparams, 0), ))
     splits.append(axis_slice[1] - axis_slice[0])
     outs.append([(edge.to_node, edge.to_idx)
                  for edge in G.out_edges(slice_node.name)])
     if axis_slice[1] < axis_dim:
         splits.append(axis_dim - axis_slice[1])
         oparams = OutputParameters(G.unique_name('unused'))
         oparams.at_options.allocate = 1
         outs.append(((oparams, 0), ))
     in_edge = G.in_edges(slice_node.name)[0]
     G.remove(slice_node)
     act_slices, out_shapes, axis = SplitParameters.get_splits(
         in_dims, axis, splits=splits)
     LOG.info(
         'replacing strided slice %s with split with %s redundant outputs',
         slice_node.name,
         len(outs) - 1)
     if axis != 0:
         LOG.warning('adjust needs to be rerun')
     split_params = SplitParameters(slice_node.name,
                                    act_slices=act_slices,
                                    out_shapes=out_shapes,
                                    axis=axis)
     G.add_edge(
         NNEdge(from_node=in_edge.from_node,
                from_idx=in_edge.from_idx,
                to_node=split_params))
     for out_idx, out_cons in enumerate(outs):
         for out_con in out_cons:
             G.add_edge(
                 NNEdge(from_node=split_params,
                        from_idx=out_idx,
                        to_node=out_con[0],
                        to_idx=out_con[1]))
     if G.quantization:
         G.add_dimensions()
         quantizer = UnifiedQuantizer.from_quantized_graph(G)
         quantizer.quantize(G, start_nodes=[split_params])
         RemoveUnnecessaryQuantizeOperators().match(G)