def do_aquant(self, args: argparse.Namespace): """ Attempt to calculate quantization for graph using one or more sample input files.""" self._check_graph() stats_collector = ActivationRangesCollector() # if replaying state file then load the activation stats if they are present if args.scheme == 'SQ8': bits = 8 else: bits = args.force_width if self.replaying_history and self.history_stats: astats = self.history_stats else: input_args = self._get_input_args(args) processed_input = False for file_per_input in glob_input_files(args.input_files, self.G.num_inputs): LOG.info("input file %s", file_per_input) processed_input = True data = [ import_data(input_file, **input_args) for input_file in file_per_input ] stats_collector.collect_stats(self.G, data) if not processed_input: self.perror("No input files found") return astats = stats_collector.stats self._record_stats(astats) quantizer = UnifiedQuantizer(args.scheme, astats, quantized_dimension=args.quant_dimension, narrow_weights=not args.no_narrow_weights, bits=bits) qrecs = quantizer.quantize(self.G) self.G.quantization = qrecs # These should now be unnecessary # if args.scheme == 'SQ8': # concats_matcher = EqualizeSymmetricMultiplicativeQuantivedConcats() # concats_matcher.match(self.G, set_identity=False) # rnns_matcher = PropagateUpRNNInputQ() # rnns_matcher.match(self.G, set_identity=False) # softmax_qrec_matcher = PropagateSoftmaxSymQrec() # softmax_qrec_matcher.match(self.G, set_identity=False) # sig_swish_qrec_matcher = PropagateUpSigSwishInputQ() # sig_swish_qrec_matcher.match(self.G, set_identity=False) LOG.info("Quantization set. Use qshow command to see it.")
def tune_scaled(G, nodes): all_nodes = get_nodes_and_fusion_nodes(nodes) force_scheme = {node: 'SQ8' for node in all_nodes} quantizer = UnifiedQuantizer.from_quantized_graph(G, extra_schemes=['SQ8']) quantizer.quantize(G, start_nodes=nodes, force_scheme=force_scheme) RemoveUnnecessaryQuantizeOperators().match(G) G.add_dimensions()
def tune_options(G, nodes, options): all_nodes = get_nodes_and_fusion_nodes(nodes) force_options = {node: options for node in all_nodes} quantizer = UnifiedQuantizer.from_quantized_graph(G) quantizer.quantize(G, start_nodes=nodes, force_options=force_options) RemoveUnnecessaryQuantizeOperators().match(G) G.add_dimensions()
def create_graph(self, filename, opts): opts = self.get_opts(opts) self._name_cache = {} add_sys_path(os.path.dirname(__file__)) buf = open(filename, "rb").read() model = Model.GetRootAsModel(buf, 0) LOG.info("Importing TFLITE model version %s", model.Version()) check(model.Version() == 3, "Only support version 3 graphs at present") if model.SubgraphsLength() > 1: LOG.warning( "nntool only supports one subgraph. There may be errors loading this graph." ) G = NNGraph(model=model, filename=filename, name=opts.get('name'), constant_store=ConstantStore()) if opts.get('load_quantization'): G.quantization = QuantizationSet() G.has_quantized_parameters = True G.quantization.schemes_present.add('SQ8') self._import_tflite_graph(G, model, opts) clean_dangling_nodes(G) fix_split_in_edges(G) MatchDuplicateConstants().match(G) # DrawGraphReporter().report(G) G.add_dimensions() remove_concats(G) if opts['remove_quantize_ops']: RemoveQuantizeOperators().match(G) G.add_dimensions() if opts.get('load_quantization'): # get rid of qrecs on nodes that were not used to_remove = [] for nid in G.quantization: if nid.node_name not in G: to_remove.append(nid) for nid in to_remove: del G.quantization[nid] nodes_with_bad_quantization = self.find_nodes_with_bad_quantization( G) quantizer = UnifiedQuantizer.from_quantized_graph(G) # check for quantization problems # 1) need to force softmax/Sigmoid input to POW2 quantization # 2) need to check that all concats and splits have same input and # output quantization # 3) Need to check that all nodes have qrecs and that they are consistent nodes_with_bad_quantization |= set( G.nodes(node_classes=(ConcatParameters, SoftMaxParameters, SplitParameters, SigmoidActivationParameters))) G.quantization = quantizer.quantize( G, start_nodes=nodes_with_bad_quantization) G.add_dimensions() return G
def do_fquant(self, args: argparse.Namespace): """ Attempt to calculate a fake quantization for graph using random tensors and parameters. This is intended to allow code generation for performance testing even if no real weights and input data are avalaible.""" self._check_graph() self.G.constant_store.fake = True stats_collector = ActivationRangesCollector() for _ in range(args.num_inference): if args.uniform: input_tensors = [ np.random.uniform(-args.uniform, args.uniform, inp.dims.shape) for inp in self.G.input_nodes() ] else: input_tensors = [ np.random.normal(0, 0.2, inp.dims.shape) for inp in self.G.input_nodes() ] stats_collector.collect_stats(self.G, input_tensors) if args.scheme == 'SQ8': bits = 8 else: bits = args.force_width astats = stats_collector.stats quantizer = UnifiedQuantizer(args.scheme, astats, quantized_dimension=args.quant_dimension, narrow_weights=not args.no_narrow_weights, bits=bits) self._record_stats(astats) qrecs = quantizer.quantize(self.G) self.G.quantization = qrecs if args.scheme == 'SQ8': concats_matcher = EqualizeSymmetricMultiplicativeQuantivedConcats() concats_matcher.match(self.G, set_identity=False) softmax_qrec_matcher = PropagateSoftmaxSymQrec() softmax_qrec_matcher.match(self.G, set_identity=False) self.G.constant_store.fake = False
def tune_float(G, nodes, float_type): all_nodes = get_nodes_and_fusion_nodes(nodes) force_scheme = {node: 'float' for node in all_nodes} force_options = {node: {'float_type': float_type} for node in all_nodes} quantizer = UnifiedQuantizer.from_quantized_graph(G, extra_schemes=['float']) quantizer.quantize(G, start_nodes=nodes, force_scheme=force_scheme, force_options=force_options) RemoveUnnecessaryQuantizeOperators().match(G) G.add_dimensions()
def do_aquant(self, args: argparse.Namespace): """ Attempt to calculate quantization for graph using one or more sample input files.""" self._check_graph() stats_collector = ActivationRangesCollector() # if replaying state file then load the activation stats if they are present opts = get_options_from_args(args) if self.replaying_history and self.history_stats: astats = self.history_stats else: input_args = self._get_input_args(args) processed_input = False for file_per_input in glob_input_files(args.input_files, self.G.num_inputs): LOG.info("input file %s", file_per_input) processed_input = True data = [ import_data(input_file, **input_args) for input_file in file_per_input ] stats_collector.collect_stats(self.G, data) if not processed_input: self.perror("No input files found") return astats = stats_collector.stats self._record_stats(astats) if args.force_width: opts['bits'] = args.force_width quantizer = UnifiedQuantizer(args.scheme, astats, **opts) # clear the existing quantization self.G.quantization = None qrecs = quantizer.quantize(self.G) self.G.quantization = qrecs RemoveUnnecessaryQuantizeOperators().match(self.G) self.G.add_dimensions() LOG.info("Quantization set. Use qshow command to see it.")
def do_fquant(self, args: argparse.Namespace): """ Attempt to calculate a fake quantization for graph using random tensors and parameters. This is intended to allow code generation for performance testing even if no real weights and input data are avalaible.""" self._check_graph() opts = get_options_from_args(args) if self.replaying_history and self.history_stats: astats = self.history_stats else: self.G.constant_store.fake = True stats_collector = ActivationRangesCollector() for _ in range(args.num_inference): if args.uniform: input_tensors = [np.random.uniform(-args.uniform, args.uniform, inp.dims.shape) for inp in self.G.input_nodes()] else: input_tensors = [np.random.normal(0, 0.2, inp.dims.shape) for inp in self.G.input_nodes()] stats_collector.collect_stats(self.G, input_tensors) astats = stats_collector.stats self._record_stats(astats) self.G.constant_store.fake = False if args.force_width: opts['bits'] = args.force_width quantizer = UnifiedQuantizer(args.scheme, astats, **opts) # clear the existing quantization self.G.quantization = None qrecs = quantizer.quantize(self.G) self.G.quantization = qrecs RemoveUnnecessaryQuantizeOperators().match(self.G) self.G.add_dimensions() LOG.info("Quantization set. Use qshow command to see it.")
def tune_pow2(G, nodes, pow2_type): all_nodes = get_nodes_and_fusion_nodes(nodes) force_scheme = {node: 'POW2' for node in all_nodes} force_options = { node: { 'bits': 16 if pow2_type == 'int16' else 8 } for node in all_nodes } quantizer = UnifiedQuantizer.from_quantized_graph(G, extra_schemes=['POW2']) quantizer.quantize(G, start_nodes=nodes, force_scheme=force_scheme, force_options=force_options) RemoveUnnecessaryQuantizeOperators().match(G) G.add_dimensions()
def _match(self, G: GraphView, set_identity: bool = True, **kwargs): has_modified_graph = False has_transposed = False for params in G.nodes(node_classes=MatMulOpParameters): while True: out_edges = G.out_edges(params.name) # can't fuse if there is a branch if len(out_edges) > 1: break out_edge = out_edges[0] op_node = out_edge.to_node # must be a valid matrix op if not isinstance(op_node, (MatrixAddParameters, MatrixMulParameters)): break # other edge to the op must be a constant other_idx = 1 if out_edge.to_idx == 0 else 0 other_in_edge = G.indexed_in_edges(op_node.name)[other_idx] if not isinstance(other_in_edge.from_node, ConstantInputParameters): break const_node = other_in_edge.from_node remove_constant = len(G.out_edges(const_node.name)) flat_value = const_node.dqvalue.flatten() out_shape = params.out_dims[0].shape if len(out_shape) != 2: raise ValueError( f'strange outputs shape of {out_shape} for matmul {params.name}' ) if len(flat_value) != out_shape[0] and len( flat_value) != out_shape[1]: LOG.info( "can't fuse %s into %s - value shape is not correct for bias", const_node.name, params.name) break has_bias = len(params.in_dims) == 3 if isinstance(op_node, MatrixAddParameters): if has_bias: if len(flat_value.shape) != len(params.in_dims[2]): LOG.info( "can't fuse %s into %s - bias shape is not the same", const_node.name, params.name) break bias_node = G.indexed_in_edges( params.name)[2].from_node LOG.info( "folding additive bias from %s into existing bias on %s", op_node.name, params.name) bias_node.value = bias_node.dq_value + flat_value else: if len(flat_value) == out_shape[1]: # matmul needs to be transposed to fuse this reverse_matmul(G, params) has_transposed = True bias_node = ConstantInputParameters( G.unique_name(f'{params.name}_bias'), value=flat_value, dims=Dim.unnamed(flat_value.shape)) G.add_edge( NNEdge(from_node=bias_node, to_node=params, to_idx=2)) # extend the inward transpose if params.transpose_in: params.transpose_in = params.transpose_in + [None] LOG.info( "folding additive bias from %s into new bias on %s", op_node.name, params.name) else: params_in = G.indexed_in_edges(params.name) consts = [ isinstance(edge.from_node, ConstantInputParameters) for edge in params_in ] if not any(consts): break mult_const_node = params_in[1].from_node if consts[ 1] else params_in[0].from_node mult_const_node.value = mult_const_node.dqvalue * const_node.dqvalue if has_bias: bias_node = params_in[2].from_node bias_node.value = bias_node.dqvalue * const_node.dqvalue LOG.info( "folding multaplicative bias from %s into new bias on %s", op_node.name, params.name) out_edges = G.out_edges(op_node.name) G.remove(op_node) if remove_constant: G.remove(const_node) for edge in out_edges: G.add_edge( NNEdge(from_node=params, to_node=edge.to_node, to_idx=edge.to_idx)) G.add_dimensions() if G.quantization: quantizer = UnifiedQuantizer.from_quantized_graph(G) quantizer.quantize(G, start_nodes=[params]) RemoveUnnecessaryQuantizeOperators().match(G) if has_transposed: G.adjust_order() if set_identity: self.set_identity(G) return has_modified_graph
def _match(self, G: GraphView, set_identity: bool = True, **kwargs): has_modified_graph = False to_quantize = [] node_sets = self.find_sets(G) for node_set in node_sets: Symbol.set_default_control(SymbolStats()) has_modified_graph = True in_edges, out_edges, internal_edges = group_edges(G, node_set) frag = GraphView() for node in node_set: frag.add_node(node) for edge in internal_edges: frag.add_edge(edge) in_mapping = [[(edge.to_node, edge.to_idx) for edge in edge_group] for edge_group in in_edges.values()] in_dims = [ from_node.out_dims[from_idx] for from_node, from_idx in in_edges ] out_dims = [ from_node.out_dims[from_idx] for from_node, from_idx in out_edges ] out_mapping = list(out_edges.keys()) constant_inputs = [ node_edge_idx[0] for node_edge_idx in in_edges if isinstance(node_edge_idx[0], ConstantInputParameters) ] LOG.debug( "inputs coming from: %s", ",".join(f"{from_node.__repr__()}:{from_idx}" for from_node, from_idx in in_edges)) LOG.info("fusing nodes: %s into expr_%s", ",".join(node.__repr__() for node in node_set), self._expr_num) expr = ExpressionFusionParameters( G.unique_name(f"expr_{self._expr_num}"), subgraph=frag, qrecs=G.quantization, input_mapping=in_mapping, output_mapping=out_mapping, in_dims=in_dims, out_dims=out_dims, constant_inputs=constant_inputs) in_edge_mapping = list(in_edges.keys()) out_edge_mapping = [[(edge.to_node, edge.to_idx) for edge in edge_set] for edge_set in out_edges.values()] G.replace_fragment( frag, expr, frag_in_edges=list(set.union(*in_edges.values())), frag_out_edges=list(set.union(*out_edges.values())), edge_in_mapping=in_edge_mapping, edge_out_mapping=out_edge_mapping, edge_class=NNEdge) if G.quantization: qrecs = G.quantization in_qs = [ qrecs[NodeId(in_map[0][0])].in_qs[in_map[0][1]] for in_map in in_mapping ] out_qs = [ qrecs[NodeId(node)].out_qs[idx] for node, idx in out_mapping ] stats = Symbol.CURRENT_CONTROL.stats func_col = expr.func_col for idx, qtype in enumerate(in_qs): symbol = func_col.variables[func_col.input_names[idx]] stats[symbol.name] = { 'min': qtype.min_val, 'max': qtype.max_val } for idx, qtype in enumerate(out_qs): symbol = func_col.variables[func_col.output_names[idx]] stats[symbol.name] = { 'min': qtype.min_val, 'max': qtype.max_val } G.quantization[NodeId(expr)] = QRec(in_qs=in_qs, out_qs=out_qs, expression=stats, ktype='scaled') # delete any quantize parameters on outputs to allow the quantizer # to fuse them into the expression out_edges = G.out_edges(expr.name) for edge in out_edges: if isinstance(edge.to_node, QuantizeParameters): G.remove_and_reconnect(edge.to_node) if NodeId(edge.to_node) in G.quantization: del G.quantization[NodeId(edge.to_node)] to_quantize.append(expr) self._expr_num += 1 if to_quantize: quantizer = UnifiedQuantizer.from_quantized_graph(G) G.quantization = quantizer.quantize(G, start_nodes=to_quantize) if set_identity: self.set_identity(G) return has_modified_graph
def _match(self, G: GraphView, set_identity: bool = True, **kwargs) -> bool: has_modified_graph = False slices_by_origin = {} for slice_node in [ node for node in G.nodes() if isinstance(node, StridedSliceParameters) ]: in_edge = G.in_edges(slice_node.name)[0] group = slices_by_origin.setdefault( (in_edge.from_node, in_edge.from_idx), []) group.append(slice_node) for in_edge, slice_nodes in slices_by_origin.items(): slices = list(zip(*[node.act_slice for node in slice_nodes])) if len(slice_nodes) == 1: self.slice_to_split(G, slice_nodes, slices) continue diff_slices = [(idx, elems) for idx, elems in enumerate(slices) if not all(elems[0] == elem for elem in elems[1::])] if len(diff_slices) != 1: continue # strides must be one if any(sl[2] != 1 for sl in diff_slices[0][1]): continue # check if slices are consecutive and non overlapping slices = sorted(diff_slices[0][1], key=lambda x: x[0]) if not all(sl[0] + sl[1] == slices[i + 1][0] for i, sl in enumerate(slices[:-1:])): continue szes = [sl[1] - sl[0] for sl in slices] axis = diff_slices[0][0] slice_nodes = sorted(slice_nodes, key=lambda x: x.act_slice[axis][0]) act_slices, out_shapes, axis = SplitParameters.get_splits( slice_nodes[0].in_dims[0].shape, axis, splits=szes) params = SplitParameters(slice_nodes[0].name + '_split', act_slices=act_slices, out_shapes=out_shapes, axis=axis) in_edge = G.in_edges(slice_nodes[0].name)[0] G.add_edge( NNEdge(from_node=in_edge.from_node, to_node=params, from_idx=in_edge.from_idx)) sub_names = [] for idx, node in enumerate(slice_nodes): sub_names.append(node.name) out_edges = G.out_edges(node.name) G.remove(node) for out_edge in out_edges: G.add_edge( NNEdge(from_node=params, to_node=out_edge.to_node, from_idx=idx, to_idx=out_edge.to_idx)) if G.quantization: G.add_dimensions() quantizer = UnifiedQuantizer.from_quantized_graph(G) quantizer.quantize(G, start_nodes=[params]) RemoveUnnecessaryQuantizeOperators().match(G) LOG.info( f'replaced slice nodes {",".join(sub_names)} with split node {sub_names[0]}' ) has_modified_graph = True if set_identity: self.set_identity(G) return has_modified_graph
def slice_to_split(G, slice_nodes, slices): slice_node = slice_nodes[0] in_dims = slice_node.in_dims[0].shape slices = [sl[0] for sl in slices] if any(sl[2] != 1 for sl in slices): return szes = tuple([sl[1] - sl[0] for sl in slices]) # find sliced axes that differ diff_axis = tuple(idx for idx, (d1, d2) in enumerate(zip(szes, in_dims)) if d1 != d2) if len(diff_axis) != 1: return # good to convert to a split axis = diff_axis[0] axis_slice = slices[axis] axis_dim = in_dims[axis] outs = [] splits = [] if axis_slice[0] > 0: splits.append(axis_slice[0]) oparams = OutputParameters(G.unique_name('unused')) oparams.at_options.allocate = 1 outs.append(((oparams, 0), )) splits.append(axis_slice[1] - axis_slice[0]) outs.append([(edge.to_node, edge.to_idx) for edge in G.out_edges(slice_node.name)]) if axis_slice[1] < axis_dim: splits.append(axis_dim - axis_slice[1]) oparams = OutputParameters(G.unique_name('unused')) oparams.at_options.allocate = 1 outs.append(((oparams, 0), )) in_edge = G.in_edges(slice_node.name)[0] G.remove(slice_node) act_slices, out_shapes, axis = SplitParameters.get_splits( in_dims, axis, splits=splits) LOG.info( 'replacing strided slice %s with split with %s redundant outputs', slice_node.name, len(outs) - 1) if axis != 0: LOG.warning('adjust needs to be rerun') split_params = SplitParameters(slice_node.name, act_slices=act_slices, out_shapes=out_shapes, axis=axis) G.add_edge( NNEdge(from_node=in_edge.from_node, from_idx=in_edge.from_idx, to_node=split_params)) for out_idx, out_cons in enumerate(outs): for out_con in out_cons: G.add_edge( NNEdge(from_node=split_params, from_idx=out_idx, to_node=out_con[0], to_idx=out_con[1])) if G.quantization: G.add_dimensions() quantizer = UnifiedQuantizer.from_quantized_graph(G) quantizer.quantize(G, start_nodes=[split_params]) RemoveUnnecessaryQuantizeOperators().match(G)