def evaluate(self, cur_G, node, direction, qrecs, fusion=None): in_qs = self.get_inqtypes_down(cur_G, node) if fusion: out_qs = self.get_outqtypes_up_fusion(cur_G, node) else: out_qs = self.get_outqtypes_up(cur_G, node) nid = NodeId(node) if fusion is None else NodeId(fusion, fnode=node) pnid = NodeId(node) if fusion is None else NodeId(fusion) stat = self._stats.get(nid, None) opts = self.get_options(pnid) scheme_priority = self.get_scheme_priority(pnid) if isinstance(node, FusionBase) and node.quantize_internals: try: if direction == "up": qrec = self.elimination_fusion_pass_up( node, qrecs, in_qs, out_qs) else: qrec = self.elimination_fusion_pass_down( node, qrecs, in_qs, out_qs) # qrec on fusion except CantContinueError: # no change qrec = self._qset[nid] else: qrec = self._resolve_qrec(cur_G, direction, node, stat, opts, scheme_priority, in_qs, out_qs, fusion=fusion) qrecs[nid] = qrec return qrec
def match(self, G: GraphView, set_identity: bool = True): split_nodes = [ node for node in G.nodes() if isinstance(node, SplitParameters) ] has_modified_graph = False for node in split_nodes: # traverse reshapes or transposes that do nothing - check gen # find edges connected to concats res = self.find_split_concat(G, node) if res is None: continue # TODO(martin) - group edges that have adjacent inputs and outputs if G.quantization: qrec = G.quantization[NodeId(node)] for idx, bundle in enumerate(res): if not bundle: continue has_modified_graph = True copy_node = CopyParameters("%s_copy_%s" % (node.name, idx)) for edge_set in bundle: first_edge = edge_set[0] G.remove_edge(first_edge) G.add_edge( NNEdge(copy_node, first_edge.to_node, to_idx=first_edge.to_idx)) G.add_edge(NNEdge(node, copy_node, from_idx=idx)) if G.quantization: G.quantization[NodeId(copy_node)] = qrec.__class__( in_qs=deepcopy(qrec.out_qs[idx]), out_qs=deepcopy(qrec.out_qs[idx])) return has_modified_graph
def _resolve_qrec(self, cur_G, direction, node, stat, opts, scheme_priority, in_qs, out_qs=None, fusion=None, set_out_qs=False): qrec = None set_in_qs = in_qs while qrec is None: handler = self._choose_quantizer(cur_G, node, set_in_qs, out_qs, stat, opts, scheme_priority, fusion, set_out_qs, direction) if handler is not None: pnid = NodeId(node) if fusion is None else NodeId(fusion) handler_opts = self.get_options(pnid, handler) cur_in_qs = set_in_qs if cur_in_qs is None: cur_in_qs = self._call_get_in_qs_from_stats( cur_G, direction, handler, node, in_qs, out_qs, stat, handler_opts, fusion, set_out_qs=set_out_qs) qrec = self._call_quantizer(cur_G, handler, node, cur_in_qs, out_qs, stat, handler_opts, fusion, direction=direction, set_out_qs=set_out_qs) if qrec is None: if out_qs is not None: out_qs = None elif set_in_qs is None: raise ValueError( f'No quantizer found for node {node.name} with options {opts}') else: set_in_qs = None return qrec
def do_nodeoption(self, args): """ Allows setting of autotiler generator control parameters and other code generation options such as the location of inputs and outputs. For a complete set of the parameters that can be set refer to the autotiler documentation.""" self._check_graph() if args.step is None and args.parameter is None: for nodeid, elem in self.G.node_options.items(): print("{}: {}".format(nodeid, elem)) return nodes = self.get_node_step_or_name(args.step, allow_comma=True)[0] if args.parameter is None: nothing = True for node in nodes: node_options = self.G.node_options.get(NodeId(node)) if node_options: nothing = False self.poutput(f'Node: {node.name}:') print(node_options) if nothing: self.poutput("no nodeoptions set") return if not nodes: self.perror("No nodes selected") return for node in nodes: node_options = node.at_options if args.value is None: val = None else: try: option_type = node_options.valid_options[args.parameter] except KeyError: self.pwarning( f"{args.parameter} is not a valid parameter for node {node.name}" ) continue val = option_type(args.value) try: if args.parameter in [ "RNN_STATES_AS_INPUTS", "LSTM_OUTPUT_C_STATE" ] and val: if args.parameter == "RNN_STATES_AS_INPUTS": node.rnn_states_as_inputs = (val, self.G) if args.parameter == "LSTM_OUTPUT_C_STATE": node.lstm_output_c_state = val node.set_c_state_as_output(self.G) else: setattr(node_options, args.parameter, val) self.pfeedback( f'set option {args.parameter} on node {node.name} to {val}' ) self.G.node_options[NodeId(node)] = node_options except KeyError: self.pwarning( f"{args.parameter} is not a valid parameter for node {node.name}" )
def common_quantize(cls, in_qtype, out_qtype, node, **kwargs): all_nodes = kwargs['all_nodes'] opts = kwargs['opts'] G = kwargs['G'] inputs = [all_nodes[t] for t in node.input] x = inputs[0] if cls.is_constant(x): LOG.info("reducing %s to a constant", node.name) if out_qtype: val = x[0].value_as(out_qtype) else: val = cls.get_constant(x) params = ConstantInputParameters(node.name, value=val, dims=Dim.unnamed(val.shape), qtype=out_qtype, constant_store=G.constant_store) if opts.get('load_quantization'): G.quantization[NodeId(params)] = MultQuantizationRecord( in_qs=[out_qtype], out_qs=[out_qtype]) else: params = QuantizeParameters(node.name, from_qtype=in_qtype, to_qtype=out_qtype) G.add_edge(NNEdge(from_node=x[0], to_node=params, from_idx=x[1], to_idx=0)) if opts.get('load_quantization'): G.quantization[NodeId(params)] = MultQuantizationRecord( in_qs=[in_qtype], out_qs=[out_qtype]) all_nodes[node.output[0]] = (params, 0, deepcopy(x[2])) return params
def test_conv_pool_relu_kernel_gen(mnist_unfused_8bit_state): G = load_state(mnist_unfused_8bit_state) conv_params = G.graph_state.steps[1]['node'] relu_params = G.graph_state.steps[2]['node'] pool_params = G.graph_state.steps[3]['node'] conv_q = G.quantization[NodeId(conv_params)] pool_q = G.quantization[NodeId(pool_params)] relu_q = G.quantization[NodeId(relu_params)] code_block = gen_conv_pool_relu("Test", conv_params, conv_q, None, None, None, None) assert str(code_block) ==\ 'CNN_ConvolutionPoolReLU("Test", 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 32, 28, 28,\n KOP_CONV, 5, 5, 1, 1, 1, 1, 0,\n KOP_NONE, 0, 0, 0, 0, 0, 0, 0, KOP_NONE);' code_block = gen_conv_pool_relu("Test", conv_params, conv_q, pool_params, pool_q, relu_params, relu_q) assert str(code_block) ==\ 'CNN_ConvolutionPoolReLU("Test", 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 32, 28, 28,\n KOP_CONV, 5, 5, 1, 1, 1, 1, 0,\n KOP_MAXPOOL, 2, 2, 1, 1, 2, 2, 0, KOP_RELU);' code_block = gen_conv_pool_relu("Test", conv_params, conv_q, None, None, relu_params, relu_q) assert str(code_block) ==\ 'CNN_ConvolutionPoolReLU("Test", 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 32, 28, 28,\n KOP_CONV, 5, 5, 1, 1, 1, 1, 0,\n KOP_NONE, 0, 0, 0, 0, 0, 0, 0, KOP_RELU);' code_block = gen_conv_pool_relu("Test", conv_params, conv_q, pool_params, pool_q, None, None) assert str(code_block) ==\ 'CNN_ConvolutionPoolReLU("Test", 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 32, 28, 28,\n KOP_CONV, 5, 5, 1, 1, 1, 1, 0,\n KOP_MAXPOOL, 2, 2, 1, 1, 2, 2, 0, KOP_NONE);' code_block = gen_conv_pool_relu("Test", None, None, pool_params, pool_q, relu_params, relu_q) assert str(code_block) ==\ 'CNN_PoolReLU("Test", 0, 1, 1, 1, 1, 32, 32, 24, 24,\n KOP_MAXPOOL, 2, 2, 1, 1, 2, 2, 0, KOP_RELU);' code_block = gen_conv_pool_relu("Test", None, None, None, None, relu_params, relu_q) assert str(code_block) ==\ 'CNN_PoolReLU("Test", 0, 1, 1, 1, 1, 32, 32, 24, 24,\n KOP_NONE, 0, 0, 0, 0, 0, 0, 0, KOP_RELU);'
def _import_nodes(self, G, graph, handlers, all_nodes, outputs, opts): for node in graph.nodes: handler = handlers.get(node.op_name, None) if not handler: raise ValueError("no handler found for %s" % node.op_type) if node.is_custom and handler: handler = handler.get(node.custom_op_name, None) if not handler: raise ValueError("no handler found for custom operation %s" % node.custom_op_name) params = handler.handle(node, all_nodes=all_nodes, G=G, opts=opts, importer=self) if params is None: continue for idx, out_tensor in enumerate(node.output): output = outputs.get(out_tensor) if not output: continue G.add_edge(NNEdge(from_node=params, to_node=output[0], from_idx=idx, to_idx=output[1])) if opts.get('load_quantization'): qtype = deepcopy(G.quantization[NodeId(params)].out_qs[idx]) G.quantization[NodeId(output[0])] = MultQuantizationRecord( in_qs=[qtype], out_qs=[qtype] )
def find_direct_connects(self, G, node, has_modified_graph, find_output=True): # traverse reshapes or transposes that do nothing - check gen # find edges connected to concats res = self.find_split_concat(G, node, find_output=find_output) if res is None: return has_modified_graph if G.quantization: qrec = G.quantization[NodeId(node)] for idx, bundle in enumerate(res): if not bundle: continue has_modified_graph = True copy_node = CopyParameters("%s_copy_%s" % (node.name, idx)) for edge_set in bundle: first_edge = edge_set[0] G.remove_edge(first_edge) LOG.info('inserting copy between %s/%s and %s/%s', node.name, idx, first_edge.to_node.name, first_edge.to_idx) G.add_edge( NNEdge(copy_node, first_edge.to_node, to_idx=first_edge.to_idx)) G.add_edge(NNEdge(node, copy_node, from_idx=idx)) if G.quantization: G.quantization[NodeId(copy_node)] = QRec.copy_ktype( qrec, in_qs=[deepcopy(qrec.out_qs[idx])], out_qs=[deepcopy(qrec.out_qs[idx])]) return True
def format_dump_file(G, outputs, quantized): # simplify the output since we only have one for now and add weights foutputs = [] for idx, out in enumerate(outputs): tensors = [out[0]] node = G.graph_state.steps[idx]['node'] if isinstance(node, FusionParameters): for filt in node.contained_filters(): if quantized: qrec = G.quantization[NodeId(node, filt)] tensors.append(qrec.weights_q.quantize(filt.weights)) tensors.append(qrec.biases_q.quantize(filt.biases)) else: tensors.append(np.copy(filt.weights)) tensors.append(np.copy(filt.biases)) elif isinstance(node, FilterParameters): if quantized: qrec = G.quantization[NodeId(node, None)] tensors.append(qrec.weights_q.quantize(node.weights)) tensors.append(qrec.biases_q.quantize(node.biases)) else: tensors.append(np.copy(node.weights)) tensors.append(np.copy(node.biases)) else: tensors.append(None) tensors.append(None) foutputs.append(tuple(tensors)) return foutputs
def move_node(G, node, edges): nid = NodeId(node) qrec = G.quantization[nid] if G.quantization and nid in G.quantization else None node_in_edge = G.in_edges(node.name)[0] node_out_edges = G.out_edges(node.name) G.remove(node) for node_out_edge in node_out_edges: new_edge = NNEdge(from_node=node_in_edge.from_node, to_node=node_out_edge.to_node, from_idx=node_in_edge.from_idx, to_idx=node_out_edge.to_idx) G.add_edge(new_edge) cnt = 0 original_node = node for edge in edges: LOG.info("Moving node %s between %s and %s", node.name, edge.from_node.name, edge.to_node.name) if cnt > 0: new_node = deepcopy(node) new_node.name = f'{original_node.name}_{cnt}' else: new_node = node cnt += 1 new_node.in_dims = [edge.from_node.out_dims[edge.from_idx].clone()] new_node.out_dims = [edge.to_node.in_dims[edge.to_idx].clone()] G.insert_node(new_node, edge.from_node, edge.to_node, from_idx=edge.from_idx, to_idx=edge.to_idx, edge_class=NNEdge) if qrec: from_qrec = G.quantization[NodeId(edge.from_node)] new_qrec = deepcopy(qrec) new_qrec.in_qs[0] = deepcopy(from_qrec.out_qs[edge.from_idx]) G.quantization[NodeId(new_node)] = new_qrec G.quantization.propagate( G, new_node, node_in_edge.from_node, qtype=new_qrec.out_qs[0])
def match(self, G: GraphView, set_identity: bool = True): if not G.quantization: return for nid in [nid for nid, qrec in G.quantization.sorted_iterator(G) if qrec is None or not (qrec.in_qs and qrec.out_qs)]: if nid.fnode_name: LOG.warning("can't add quantization to fused node %s", nid.fnode_name) continue if nid.node_name not in G: # previous fusions may have removed nodes from the graph continue node = nid.get_node(G) predecessors = [NodeId(pred) for pred in G.predecessors(node.name)] successors = [NodeId(succ) for succs in G.successors(node.name) for succ in succs] go_back = not successors or (predecessors and all(pred in G.quantization for pred in predecessors)) go_forward = not predecessors or (successors and all(succ in G.quantization for succ in successors)) if not (go_back or go_forward): LOG.warning("node %s is not connected to anything and has no quantization", node.name) continue if go_forward: out_qrecs = set(G.quantization[nid] for nid in successors) if not all(isinstance(out_qrec, MultQuantizationRecord) for out_qrec in out_qrecs): continue out_qtypes = reduce_qtypes([(edge.from_idx, G.quantization[NodeId(edge.to_node)].in_qs[edge.to_idx]) for edge in G.out_edges(node.name)]) else: out_qtypes = None if go_back: in_qrecs = set(G.quantization[nid] for nid in predecessors) if not all(isinstance(in_qrec, MultQuantizationRecord) for in_qrec in in_qrecs): continue in_qtypes = reduce_qtypes([(edge.to_idx, G.quantization[NodeId(edge.from_node)].out_qs[edge.from_idx]) for edge in G.in_edges(node.name)]) else: in_qtypes = None if not in_qtypes: if not predecessors: LOG.info("setting quantization on input node %s", node.name) qrec = MultQuantizationRecord(in_qs=deepcopy(out_qtypes), out_qs=deepcopy(out_qtypes)) else: raise NotImplementedError("propagating qrecs not implemented") elif not out_qtypes: if not successors: LOG.info("setting quantization on output node %s", node.name) qrec = MultQuantizationRecord(in_qs=deepcopy(in_qtypes), out_qs=deepcopy(in_qtypes)) else: raise NotImplementedError("propagating qrecs not implemented") else: LOG.info("setting quantization on node %s", node.name) qrec = MultQuantizationRecord(in_qs=deepcopy(in_qtypes), out_qs=deepcopy(out_qtypes)) G.quantization[nid] = qrec if set_identity: self.set_identity(G) return False
def _match(self, G: GraphView, set_identity: bool = True, **kwargs): nodes_removed = [] modified_graph = False for node in G.nodes(node_classes=QuantizeParameters): if issubclass(node.from_qtype.dtype, (np.floating, bfloat16)): if issubclass(node.to_qtype.dtype, (np.floating, bfloat16)): LOG.warning( 'node %s quantizes from floating type to floating type and cannot directly be removed', node.name) continue if self.propagate_up(G, node, node.to_qtype): modified_graph = True nodes_removed.append(node) G.remove_and_reconnect(node, edge_class=NNEdge) if G.quantization: del G.quantization[NodeId(node)] else: LOG.warning('unable to remove quantize node %s', node.name) else: if self.propagate_down(G, node, node.from_qtype): modified_graph = True nodes_removed.append(node) G.remove_and_reconnect(node, edge_class=NNEdge) if G.quantization: del G.quantization[NodeId(node)] else: LOG.warning('unable to remove quantize node %s', node.name) if set_identity: self.set_identity(G) return modified_graph
def replace_function(self, G: GraphView, subgraph: GraphView): relu_node = None constant_node = None mul_node = None for node in subgraph.nodes(): if isinstance(node, ReluActivationParameters): relu_node = node elif isinstance(node, ConstantInputParameters): constant_node = node elif isinstance(node, MatrixMulParameters): mul_node = node activation = HSigmoidActivationParameters(mul_node.name + "_fused_close_hsigmoid", offset=0) if G.quantization: reluqrec = G.quantization[NodeId(relu_node)] mulqrec = G.quantization[NodeId(mul_node)] del G.quantization[NodeId(constant_node)] if isinstance(reluqrec, (SymmetricQuantizationRecord)): pqrec = SymmetricQuantizationRecord(in_qs=reluqrec.in_qs, out_qs=mulqrec.out_qs) elif isinstance(reluqrec, (MultQuantizationRecord)): pqrec = MultQuantizationRecord(in_qs=reluqrec.in_qs, out_qs=mulqrec.out_qs) elif isinstance(reluqrec, (Float32QuantizationRecord)): pqrec = Float32QuantizationRecord(in_qs=reluqrec.in_qs, out_qs=mulqrec.out_qs) else: raise NotImplementedError() G.quantization[NodeId(activation)] = pqrec return activation, None, None
def set_stats(G, current_stats=None, current_options=None): stats = {} if current_stats is None else current_stats.copy() current_options = {} if current_options is None else current_options.copy() for node in G.nodes(): nid = NodeId(node) if nid not in stats or stats[nid] is None: stats[nid] = build_stat(G, nid, node=node) if isinstance(node, FusionBase) and node.quantize_internals: for fnode in node.subgraph.nodes(): nid = NodeId(node, fnode) if nid not in stats: # if fusion input or output recs are not present build them if isinstance(fnode, (FusionInputParameters, FusionOutputParameters)): continue else: qrec = G.quantization.get( nid) if G.quantization else None stats[nid] = build_stat_from_qrec(qrec) elif isinstance(node, ExpressionFusionParameters): if stats[nid] is None or 'expression' not in stats[nid]: if (G.quantization is None or nid not in G.quantization or G.quantization[nid].cache is None or 'expression' not in G.quantization[nid].cache): raise ValueError( f"quantized expression for {node.name} not found in current stats") stats[nid]['expression'] = G.quantization[nid].cache['expression'] elif isinstance(node, ConstantInputParameters): if G.quantization and nid in G.quantization: current_options.setdefault(nid, {})['qtype_ind'] = G.quantization[nid].out_qs[0] return stats, current_options
def replace_function(self, G: GraphView, subgraph: GraphView): relu_node = None constant_node = None mul_node = None for node in subgraph.nodes(): if isinstance(node, ReluActivationParameters): relu_node = node elif isinstance(node, ConstantInputParameters): constant_node = node elif isinstance(node, MatrixMulParameters): mul_node = node activation = HSigmoidActivationParameters(mul_node.name + "_fused_close_hsigmoid", offset=0) if G.quantization: reluqrec = G.quantization[NodeId(relu_node)] mulqrec = G.quantization[NodeId(mul_node)] del G.quantization[NodeId(constant_node)] pqrec = QRec.copy_ktype(reluqrec, in_qs=reluqrec.in_qs, out_qs=mulqrec.out_qs) G.quantization[NodeId(activation)] = pqrec return activation, None, None
def insert_resizer(G, out_edge, resize_op, from_shape): input_node = out_edge.from_node net_in_dim = input_node.in_dims[0] from_dim = deepcopy(net_in_dim) from_dim.h = from_shape[0] from_dim.w = from_shape[1] if resize_op == 'bilinear': resize_node = BilinearResizerParameters(input_node.name + "_resizer", (net_in_dim.h, net_in_dim.w)) elif resize_op == 'nearest': resize_node = NearestNeighborResizerParameters( input_node.name + "_resizer", (net_in_dim.h, net_in_dim.w)) to_node = out_edge.to_node to_idx = out_edge.to_idx resize_node.in_dims = [from_dim] input_node.dims.h = from_shape[0] input_node.dims.w = from_shape[1] # qrec updated to reflect resizer input_qrec = G.quantization and G.quantization.get(NodeId(input_node)) if input_qrec: resizer_qrec = deepcopy(input_qrec) resizer_qrec.in_qs = resizer_qrec.out_qs G.quantization[NodeId(resize_node)] = resizer_qrec G.remove_edge(out_edge) G.add_node(resize_node) G.add_edge(NNEdge(input_node, resize_node)) G.add_edge(NNEdge(resize_node, to_node, to_idx=to_idx))
def quantize_forward(self, G: NNGraph, edge_recs, result=None): if result is None: result = QuantizationSet() for node in [step['node'] for step in G.graph_state.steps]: LOG.debug("quantize forward %s", node.name) in_qs = self.get_in_qs(G, edge_recs, node) if isinstance(node, ConvFusionParameters): qrec, qrecs = self.quantize_fusion(G, node, in_qs) for node_id, fqrec in qrecs.items(): result[node_id] = fqrec elif isinstance(node, ConcatParameters): qrec = self.quantize_backward(G, result, edge_recs, node) else: qrec = self.calculate_q( node, self._activation_stats.get(NodeId(node, None)), self._filter_stats.get(NodeId(node, None)), in_qs, self._min_qsnr, self._force_width) result[NodeId(node, None)] = qrec if not qrec: break for edges in G.indexed_out_edges(node.name): for edge in edges: edge_recs[edge.params] = qrec.out_qs[edge.from_idx] return result
def execute_uncached_step(G, in_tensors, step_idx, qrecs, qmode): if qmode is None: qmode = QuantizationMode.none() ExecutionProgress.start() node = G.graph_state.steps[step_idx] assert not isinstance( node, InputParameters), "executing input step is not supported" ExecutionProgress.progress(step_idx, node.name) output = in_tensors nid = NodeId(node, None) if qmode.get_quantized(node, step_idx): qrec = qrecs[nid] if qmode.is_step: __quantize_input(qrec, output) else: qrec = None if isinstance(node, FusionParameters): for fusion_node in node.contained_nodes(): fnid = NodeId(node, fusion_node) fqrec = None if not qrec else qrecs[fnid] output, _ = Executer.execute(fusion_node, output, qrec=fqrec) elif isinstance(node, InputParameters): output, _ = Executer.execute(node, in_tensors, qrec=qrec) else: output, _ = Executer.execute(node, output, qrec=qrec) if qmode.is_step and qmode.get_quantized(node, step_idx): qrec = qrecs[NodeId(node, None)] output = [ qrec.out_qs[i].dequantize(out) for i, out in enumerate(output) ] return output
def quantize_fusion(self, G, node, in_qs, dtype): fin_qs = in_qs nodes = node.contained_nodes() if node.fusion_type in ['conv_active_pool', 'conv_active']: conv_node = nodes[0] act_node = nodes[1] act_astats = self._activation_stats.get(NodeId(node, act_node)) conv_qrec = self.calculate_q(G, conv_node, act_astats, fin_qs, dtype, out_dtype=np.int8) self.qrecs[NodeId(node, conv_node)] = conv_qrec fin_qs = conv_qrec.out_qs nodes = nodes[1:] for fnode in nodes: qrec = self.calculate_q(G, fnode, self._activation_stats.get(NodeId(node, fnode)), fin_qs, dtype) self.qrecs[NodeId(node, fnode)] = qrec fin_qs = qrec.out_qs return MultQuantizationRecord(in_qs=in_qs, out_qs=fin_qs)
def insert_copy_on_common_concat_in(self, G, concat_nodes): # in every concat nodes collect all the in edges (from_node, from_idx) # if there are repetition of tuples, insert a copy in every repetition # different concats cannot have the same in edge (from_node, from_idx) concat_in_edges = [] has_modified_graph = False for concat_node in concat_nodes: for idx, in_edge in enumerate(G.indexed_in_edges( concat_node.name)): real_in_edge = find_real_in_edge(G, in_edge) if real_in_edge in concat_in_edges: has_modified_graph = True copy_node = CopyParameters("%s_copy_%s" % (concat_node.name, idx)) G.remove_edge(in_edge) LOG.info( 'common_concat: inserting copy between %s/%s and %s/%s', in_edge.from_node.name, idx, concat_node.name, in_edge.to_idx) G.add_edge( NNEdge(in_edge.from_node, copy_node, from_idx=in_edge.from_idx)) G.add_edge( NNEdge(copy_node, concat_node, to_idx=in_edge.to_idx)) if G.quantization: qrec = G.quantization[NodeId(concat_node)] G.quantization[NodeId(copy_node)] = QRec.copy_ktype( qrec, in_qs=[deepcopy(qrec.in_qs[idx])], out_qs=[deepcopy(qrec.in_qs[idx])]) else: concat_in_edges.append(real_in_edge) return has_modified_graph
def _collect(self, G, input_tensors, step_idx) -> Mapping[NodeId, Mapping]: LOG.debug("gather quantization statistics") if G.has_quantized_parameters: quantization = G.quantization else: quantization = None executer = GraphExecuter(G, qrecs=quantization) foutputs = self._collect_execution(executer, input_tensors, quantization) executer = GraphExecuter(G, qrecs=G.quantization) qoutputs = self._collect_execution(executer, input_tensors, G.quantization, qmode=QuantizationMode.all_dequantize()) stats = OrderedDict() for idx, fstat in enumerate(foutputs): qstat = qoutputs[idx] if fstat['fusion_outputs']: for jdx, ffstat in enumerate(fstat['fusion_outputs']): nid = NodeId(fstat['node'], ffstat['node']) stats[nid] =\ self._collect_one(ffstat, qstat['fusion_outputs'][jdx], G.quantization[nid], quant_compare=self._quant_compare) nid = NodeId(fstat['node'], None) stats[nid] = self._collect_one(fstat, qstat, G.quantization[nid], quant_compare=self._quant_compare) return stats
def bindings_generator(cls, gen, node, qrec, in_eparams, out_eparams, cname) -> bool: step_idx = node.step_idx cnodes = node.contained_nodes() quants = [gen.G.quantization[NodeId(node, fnode)] for fnode in cnodes] add_node = [node for node in cnodes if isinstance( node, MatrixAddParameters)] if add_node: quants = [gen.G.quantization[NodeId( node, fnode)] for fnode in cnodes] set_add_in_scale(quants[1]) scaled_idx = quants[1].cache['scaled_idx'] not_scaled_idx = 0 if scaled_idx else 1 gen.bindings.append( CommentBindingList("Node {} in1q {} in2q {} outq {}", cname, quants[1].in_qs[scaled_idx], quants[1].in_qs[not_scaled_idx], quants[-1].out_qs[0]) ) gen.bindings.append( NodeBindingList(cname, GNodeArgEdge(in_eparams[scaled_idx]), GNodeArgEdge(in_eparams[not_scaled_idx]), GNodeArgEdge(out_eparams[0], "GNA_OUT"), GNodeArgNode(node, 'infos'), GNodeArgNode(node.contained_nodes()[0], 'infos') )) return True
def _match(self, G: GraphView, set_identity: bool = True, **kwargs) -> bool: has_modified_graph = False for node in [ node for node in G.nodes(node_classes=StridedSliceParameters) ]: if node.slice_shape != tuple(node.in_dims[0].shape): continue has_modified_graph = True nid = NodeId(node) if node.slice_shape == node.out_shape: LOG.info( f'removing strided slice {node.name} that does nothing') G.remove_and_reconnect(node, edge_class=NNEdge) if G.quantization and nid in G.quantization: del G.quantization[nid] else: reshape = ReshapeParameters( G.unique_name(f'{node.name}_reshape'), old_shape=node.slice_shape, shape=node.out_shape) LOG.info( f'replacing strided slice {node.name} with reshape {reshape.name}' ) G.replace_node(node, reshape) if G.quantization and nid in G.quantization: G.quantization[NodeId(reshape)] = G.quantization[nid] del G.quantization[nid] if set_identity: self.set_identity(G) return has_modified_graph
def match(self, G: GraphView, set_identity: bool = True): if not G.quantization: return softmaxes = [ node for node in G.nodes() if isinstance(node, SoftMaxParameters) ] qrecs = [G.quantization[NodeId(node)] for node in softmaxes] if not all(isinstance(qrec, MultQuantizationRecord) for qrec in qrecs): return for softmax, qrec in zip(softmaxes, qrecs): in_q = qrec.in_qs[0] in_q.scale_to_pow2() for edge in G.in_edges(softmax.name): propagate_qtype_up(G, in_q, edge) for edge in G.out_edges(softmax.name): assert isinstance( edge.to_node, (OutputParameters, QuantizeParameters )), "Softmax is supported only at the end of the graph" out_qrec = G.quantization[NodeId(edge.to_node)] out_qrec.in_qs[0] = qrec.out_qs[0] out_qrec.out_qs[0] = qrec.out_qs[0] if set_identity: self.set_identity(G) return False
def check_quantization(self, G, node, reshape, direction='in'): if G.quantization: qclass = self.get_output_qrec_class(G) node_qrec = G.quantization[NodeId(node)] qtype = getattr(node_qrec, f'{direction}_qs')[0] G.quantization[NodeId(reshape)] = qclass( in_qs=[deepcopy(qtype)], out_qs=[deepcopy(qtype)], ktype=node_qrec.ktype)
def replace_function(self, G: GraphView, subgraph: GraphView): filter_node = None constant_node = None for node in subgraph.nodes(): if isinstance(node, FilterParameters): filter_node = node elif isinstance(node, ConstantInputParameters): constant_node = node LOG.info("fusing bias in %s into %s", constant_node.name, filter_node.name) flattened_constant = constant_node.value.flatten() # shape needs to match if flattened_constant.shape[0] == filter_node.filter.out_c: if filter_node.has_bias: assert filter_node.biases is not None, "can't absorb bias into filter. maybe weights are not loaded" filter_node.biases += flattened_constant else: filter_node.biases = flattened_constant else: raise DontReplaceError() if G.quantization: fnid = NodeId(filter_node) cnid = NodeId(constant_node) if fnid in G.quantization and cnid in G.quantization: G.quantization[fnid].biases_q = G.quantization[cnid].out_qs[0] return filter_node, None, None
def _match(self, G: GraphView, set_identity: bool = True, **kwargs): rnn_nodes = [ self.find_unpack(G, node) for node in G.nodes() if isinstance(node, RNNBaseParameters) and node.n_output_cells > 1 ] rnn_nodes_by_slice = self.validate_slices(G, rnn_nodes) rnn_nodes_by_slice = self.validate_multi_branch(G, rnn_nodes_by_slice) if not rnn_nodes_by_slice: return False for unpack_node, rnn_unpacks in rnn_nodes_by_slice.items(): modified_nodes = set() for rnn_unpack in rnn_unpacks: self.process_path(G, rnn_unpack, modified_nodes) # since process path will have removed all unnecessary nodes the edges will be correct here out_edges = G.out_edges(unpack_node.name) in_edges = G.in_edges(unpack_node.name) assert len(in_edges ) == 1, "expecting unpack node to have only one in edge" in_edge = in_edges[0] changes_shape = unpack_node.changes_shape if isinstance( unpack_node, StridedSliceParameters) else False LOG.info("Eliminating last cell unpack: %s", unpack_node.name) G.remove(unpack_node) # Here the strided slice can change the output shape of the RNN # so insert a reshape to do the shape change if changes_shape: reshape = ReshapeParameters( unpack_node.name + '_reshape', old_shape=Dim.unnamed(unpack_node.post_slice_shape), shape=Dim.unnamed(unpack_node.out_shape)) G.add_edge( NNEdge(from_node=in_edge.from_node, to_node=reshape, from_idx=in_edge.from_idx)) for out_edge in out_edges: G.add_edge( NNEdge(from_node=reshape, to_node=out_edge.to_node, to_idx=out_edge.to_idx)) if G.quantization: G.quantization[NodeId(reshape)] = G.quantization[NodeId( unpack)] else: for out_edge in out_edges: G.add_edge( NNEdge(from_node=in_edge.from_node, to_node=out_edge.to_node, from_idx=in_edge.from_idx, to_idx=out_edge.to_idx)) if G.quantization: del G.quantization[NodeId(unpack_node)] if set_identity: self.set_identity(G) return True
def execute_triangle_iterator(G, in_tensors: Sequence, qrecs, value_cache, include_nodes: Optional[Set[Node]] = None): LOG.info("execute triangle") cache_entry = value_cache.get_outputs(G, in_tensors[0]) for step_idx, step in enumerate(G.graph_state.steps): node = step['node'] if include_nodes and node not in include_nodes: continue # collect outputs from previous nodes # InputNode is already set above if isinstance(node, InputParameters): inputs = [in_tensors] else: inputs = [None] * len(node.in_dims) for edge in G.in_edges(node.name): inputs[edge.to_idx] = edge.from_node.value in_values = len(inputs[0]) fixed_inputs = [] for in_values_idx in range(in_values): fixed_inputs.append([None] * len(node.in_dims)) for in_values_idx in range(in_values): fixed_input = fixed_inputs[in_values_idx] for input_idx in range(len(node.in_dims)): if inputs[input_idx]: fixed_input[input_idx] = inputs[input_idx][ in_values_idx] inputs = fixed_inputs # # regroup inputs [[a1, a2], [b1, b2], ...] to [[a1, b1], [a2, b2], ...] # assert all(len(inputs[idx]) == len(inputs[0]) for idx in range(1, len(inputs))) # # pylint: disable=unsubscriptable-object # inputs = [[inp[idx] for inp in inputs] for idx in range(len(inputs[0]))] outputs = [] qrec = qrecs[NodeId(node, None)] for inp in inputs: ExecutionProgress.progress(step_idx, node.name) if isinstance(node, FusionParameters): for fusion_node in node.contained_nodes(): fqrec = qrecs[NodeId(node, fusion_node)] outputs.append( Executer.execute(fusion_node, inp, qrec=fqrec)) else: outputs.append(Executer.execute(node, inp, qrec=qrec)[0]) report_outputs = [qrec.out_qs[i](out) for i, out in enumerate(outputs)] report_outputs.append(cache_entry[step_idx][0]) yield step_idx, node, report_outputs outputs.append(qrec.out_qs[0].quantize(cache_entry[step_idx][0])) node.value = outputs
def move_to_fusion(self, node: Parameters, new_pnode: Parameters): nid = NodeId(node) fnid = NodeId(new_pnode, node) if nid in self.qset: self.qset[fnid] = self.qset[nid] del self.qset[nid] if self.stats and nid in self.stats: self.stats[fnid] = self.stats[nid]
def quantize_fusion(self, G: NNGraph, node: ConvFusionParameters, in_qs, force_out=None) -> SymmetricQuantizationRecord: if node.fusion_type == 'conv_active': result = OrderedDict() nodes = node.contained_nodes() conv_node = nodes[0] conv_astats = self._activation_stats.get(NodeId(node, conv_node)) conv_qrec = self.calculate_filter_q(conv_node, conv_astats, in_q=in_qs[0], force_width=self._force_width, out_as_acc=True) result[NodeId(node, conv_node)] = conv_qrec act_node = nodes[1] act_astats = self._activation_stats.get(NodeId(node, act_node)) if force_out and force_out.bits: act_max_q = self.compute_activation_out_maxq( act_node, force_out.bits) if force_out.q is not None: if (act_max_q is not None and force_out.q > act_max_q ) or force_out.q > conv_qrec.out_qs[0].q: # We cannot shift left in the kernel # TODO - This should try to increase the input q and perhaps the width # Unlikely to happen raise NotImplementedError() act_o_q = QType(bits=force_out.bits, q=force_out.q, signed=True) else: if act_max_q is not None: act_o_q.q = min(act_max_q, act_o_q.q) else: act_o_q = QType.from_min_max( max_val=act_astats['range_out'][0]['max'], min_val=act_astats['range_out'][0]['min'], bits=self._force_width) act_o_q.q = min(act_o_q.q, conv_qrec.out_qs[0].q) if force_out and force_out.q: if force_out.q > act_max_q or force_out.q > conv_qrec.out_qs[ 0].q: # We cannot shift left in the kernel # TODO - This should try to increase the input q and perhaps the width # Unlikely to happen raise NotImplementedError() act_o_q.q = force_out.q act_qrec = SymmetricQuantizationRecord(in_qs=conv_qrec.out_qs, out_qs=[act_o_q]) result[NodeId(node, act_node)] = act_qrec return SymmetricQuantizationRecord(in_qs=in_qs, out_qs=act_qrec.out_qs), result else: return self.default_quantize_fusion(G, node, in_qs, force_out=force_out)