def transform(self, model: onnx.ModelProto) -> onnx.ModelProto: orig_model = ModelProto() orig_model.CopyFrom(model) optimizer = BertOnnxModel(model) optimizer.fuse_layer_normalization() model = optimizer.model layer_norm_by_input_name = {node.input[0]: node for node in model.graph.node if node.op_type == 'LayerNormalization'} # nodes are not topologically sorted as a result of onnxruntime_tools optimization sorted_nodes = [] visited = 0 for node in orig_model.graph.node: if node in model.graph.node: sorted_nodes.append(node) if node.output[0] in layer_norm_by_input_name.keys(): sorted_nodes.append(layer_norm_by_input_name[node.output[0]]) visited += 1 if not visited: sorted_nodes = model.graph.node model = utils.rebuild_model(model, sorted_nodes) check_model(model) return model
def remove_softmax_new_pattern(self, model): nodes_by_output_name = { node_output: node for node in model.graph.node for node_output in node.output } vi_by_output_name = {vi.name: vi for vi in model.graph.value_info} outputs_by_output_name = { output.name: output for output in model.graph.output } optimized_nodes = [] removed_nodes = [] for node in model.graph.node: if node in removed_nodes: continue if node.op_type != 'Transpose': optimized_nodes.append(node) continue if node.output[0] not in outputs_by_output_name.keys(): optimized_nodes.append(node) continue prev_node = nodes_by_output_name[node.input[0]] if prev_node.op_type != 'Softmax': optimized_nodes.append(node) continue pprev_node = nodes_by_output_name[prev_node.input[0]] if pprev_node.op_type != 'Transpose': optimized_nodes.append(node) continue output_node = outputs_by_output_name[node.output[0]] removed_nodes.extend([node, prev_node, pprev_node]) model.graph.output.remove(output_node) # Graph must have at least one graph output if not len(model.graph.output): ppprev_node = nodes_by_output_name[pprev_node.input[0]] model.graph.output.append( vi_by_output_name[ppprev_node.output[0]]) # remove duplicate node(s) in optimized nodes seen = [] for op_node in optimized_nodes: if op_node in seen: continue seen.append(op_node) optimized_nodes = seen new_nodes = list( filter(lambda node: node not in removed_nodes, optimized_nodes)) model = utils.rebuild_model(model, new_nodes) check_model(model) return model
def check_model(self): check_runnable = True if self.mode == self.quant_mode.dfg: # pass runnable check, as dfg mode does not assume to run on onnxruntime check_runnable = False check_model(self.model, check_runnable) self._quant_value_info_key = [ vi.name for vi in list(self.model.graph.value_info) + list(self.model.graph.input) + list(self.model.graph.output) ] self._quant_initializer_key = [ init.name for init in self.model.graph.initializer ] self._quant_annotation_key = [ annot.tensor_name for annot in self.model.graph.quantization_annotation ] self._check_quant_initializer() self._check_quant_value_info() if self.mode == QuantizationMode.dfg: self._check_quant_annotation() self._check_quant_param()
def augment_model(self): new_list = [] for input in self.input_tensors: dtype = get_vi_dtype(self.value_info[input]) if dtype != onnx.TensorProto.FLOAT: continue new_list += self._attach_minmax_observer(input) for node in self.model.graph.node: new_list.append(node) for output in node.output: dtype = get_vi_dtype(self.value_info[output]) if dtype != onnx.TensorProto.FLOAT: continue new_list += self._attach_minmax_observer(output) self.model = utils.rebuild_model(self.model, new_list) check_model(self.model) return self.model
def inference_shape(self) -> onnx.ModelProto: self.model, check = onnxsim.simplify( self.model, skipped_optimizers=['eliminate_duplicate_initializer']) assert check check_model(self.model) return utils.name_nodes(self.model)
def transform(self, model: onnx.ModelProto) -> onnx.ModelProto: optimized_nodes = [] for node in model.graph.node: if node.op_type != 'Sum': optimized_nodes.append(node) continue if len(node.input) != 2: optimized_nodes.append(node) continue new_node = make_node('Add', inputs=[node.input[0], node.input[1]], outputs=[node.output[0]]) optimized_nodes.append(new_node) # remove duplicate node(s) in optimized nodes seen = [] for op_node in optimized_nodes: if op_node in seen: continue seen.append(op_node) optimized_nodes = seen model = utils.rebuild_model(model, optimized_nodes) check_model(model) return model
def analyze_constant_of_shape(self): value_info = {vi.name: vi for vi in self.model.graph.value_info} traversal = list() tensor_to_be_value_analyzed = list() for node in self.model.graph.node: if node.op_type == 'ConstantOfShape': vi = value_info[node.output[0]] dtype = vi.type.tensor_type.elem_type rank = len([dim for dim in vi.type.tensor_type.shape.dim]) start_vertex = node.output[0] traversal.extend(self.depth_first_search(start_vertex, end_op_type='Shape')) tensor_to_be_value_analyzed.append(start_vertex) vi = make_tensor_value_info(node.output[0], dtype, ('',) * rank) self.model.graph.output.append(vi) new_nodes = list(filter(lambda node: node not in traversal, self.model.graph.node)) if tensor_to_be_value_analyzed: self.assign_value_analyzed_shapes_to_initializer( value_dict=self.run_onnx_model(self.model.SerializeToString(), tensor_to_be_value_analyzed)) # rebuild model graph without nodes in shaping subgraph self.model = utils.rebuild_model(self.model, new_nodes) check_model(self.model) self.model = shape_inference.infer_shapes(self.model)
def to_static_shape_graph(self): tensor_to_be_value_analyzed = list() dynamic_shape_nodes = self.get_dynamic_shape_nodes() tensor_to_be_value_analyzed.extend(self.get_value_analysis_nodes( removed_nodes=dynamic_shape_nodes, target_op_types=['Reshape', 'Pad', 'Resize', 'Expand'], value_analysis_op_types=['Concat', 'Cast', 'Shape'], dtype=TensorProto.INT64, rank=1 )) # find input of broad-casting mul/div operators with Gather/Add node as its input scalar_nodes = self.get_scalar_nodes() tensor_to_be_value_analyzed.extend(self.get_value_analysis_nodes( removed_nodes=scalar_nodes, target_op_types=['Mul', 'Div'], value_analysis_op_types=['Gather', 'Add'], dtype=TensorProto.FLOAT, rank=0 )) if tensor_to_be_value_analyzed: print('Run model on ONNXRuntime for value analysis. It will take some time..') # assign value-analyzed shape to dynamic-shaping operator as its initializer self.assign_value_analyzed_shapes_to_initializer( value_dict=self.run_onnx_model(self.model.SerializeToString(), tensor_to_be_value_analyzed)) new_nodes = list( filter(lambda node: node not in dynamic_shape_nodes + scalar_nodes, self.model.graph.node)) # rebuild model graph without nodes in shaping subgraph self.model = utils.rebuild_model(self.model, new_nodes, renaming=False) check_model(self.model)
def transform(self, model: onnx.ModelProto) -> onnx.ModelProto: nodes_by_output_name = { node.output[0]: node for node in model.graph.node } initializer = {init.name: init for init in model.graph.initializer} initializer_key = initializer.keys() # assume Conv is followed by Mul(Conv --> Mul) & Mul takes one data input and one init input # a * (x * w + b) = (x * aw + ab) optimized_nodes = [] removed_nodes = [] for node in model.graph.node: for node_input in node.input: try: prev_node = nodes_by_output_name[node_input] except KeyError: optimized_nodes.append(node) continue if prev_node.op_type != 'Reshape': optimized_nodes.append(node) continue if prev_node.input[0] not in initializer_key: optimized_nodes.append(node) continue init = initializer[prev_node.input[0]] shape_init = initializer[prev_node.input[1]] init_arr = numpy_helper.to_array( initializer[prev_node.input[0]]) shape_init_arr = numpy_helper.to_array(shape_init) reshaped_init_arr = init_arr.reshape(shape_init_arr) model.graph.initializer.append( make_tensor(name=node_input, data_type=init.data_type, dims=shape_init_arr, vals=reshaped_init_arr.flatten())) model.graph.initializer.remove(init) model.graph.initializer.remove(shape_init) removed_nodes.append(prev_node) seen = [] for op_node in optimized_nodes: if op_node in seen: continue seen.append(op_node) optimized_nodes = seen new_nodes = list( filter(lambda node: node not in removed_nodes, optimized_nodes)) model = utils.rebuild_model(model, new_nodes) check_model(model) return model
def _remove_quant_dequantlinear_operator_with_initializer(self): rm_nodes = [] new_nodes = [] for node in self.model.graph.node: if node.op_type != 'QuantizeLinear': new_nodes.append(node) continue if node.input[0] not in self.initializer.keys(): new_nodes.append(node) continue rm_nodes.append(node) rm_nodes.extend([ dequant_node for dequant_node in self.model.graph.node if dequant_node.op_type == 'DequantizeLinear' if dequant_node.input[0] == node.output[0] ]) for node in self.model.graph.node: if node.op_type == 'QuantizeLinear' or node.op_type == 'DequantizeLinear': continue for idx, node_input in enumerate(node.input): if '_dequantized' not in node_input: continue init_name = node_input.split('_dequantized')[0] if init_name not in self.initializer.keys(): continue node.input[idx] = init_name + '_fake_quantized' init = self.initializer[init_name] s = self.initializer[init_name + '_scale'] zp = self.initializer[init_name + '_zero_point'] fake_quantized_data = self._fake_quantize_data(init, s, zp) self.initializer.update({ init_name + '_fake_quantized': make_tensor(name=init_name + '_fake_quantized', data_type=onnx.TensorProto.FLOAT, dims=init.dims, vals=fake_quantized_data.flatten()) }) self.model = utils.rebuild_model( model=self.model, new_nodes=[node for node in new_nodes if node not in rm_nodes], eliminate=True) self._update_graph_field(field='initializer', proto=self.initializer.values()) check_model(self.model, check_runnable=True)
def remove_quantizelinear_operator_with_initializer(self): new_nodes = [] rm_nodes = [] for node in self.model.graph.node: if node.op_type != 'QuantizeLinear': new_nodes.append(node) continue if node.input[0] not in self.initializer.keys(): new_nodes.append(node) continue # node.input[0] to be removed from model.graph.input self.graph_input.pop(node.input[0]) # node.input[0] to be removed from model.graph.initializer init = self.initializer.pop(node.input[0]) # quantize initializer s = self.initializer[node.input[1]] zp = self.initializer[node.input[2]] quantized_data = self._quantize_data(init, s, zp) # node.output[0] to be updated to model.graph.initializer instead self.initializer.update({ node.output[0]: make_tensor(node.output[0], zp.data_type, init.dims, quantized_data.flatten()) }) # node.output[0] to be removed from model.graph.value_info vi = self.value_info.pop(node.output[0]) # node.output[0] to be updated to model.graph.input instead self.graph_input.update({node.output[0]: vi}) rm_nodes.append(node) self.model = utils.rebuild_model( model=self.model, new_nodes=[node for node in new_nodes if node not in rm_nodes], eliminate=False, renaming=False) self._update_graph_field(field='initializer', proto=self.initializer.values()) self._update_graph_field(field='value_info', proto=self.value_info.values()) self._update_graph_field(field='input', proto=self.graph_input.values()) check_model(self.model, check_runnable=False)
def transform(self, model: onnx.ModelProto) -> onnx.ModelProto: vi_by_names = {vi.name: vi for vi in model.graph.value_info} removed_nodes = self.get_postprocess_nodes(model, self.ssd_outputs) new_nodes = list( filter(lambda node: node not in removed_nodes, model.graph.node)) model = utils.rebuild_model(model, new_nodes) for output in self.ssd_outputs: model.graph.output.append(vi_by_names[output]) check_model(model) return model
def build_optimized_model(self, model): model = self.update_graph_fields(model) new_nodes = [] for member in self.get_map_values('node'): if isinstance(member, onnx.NodeProto): new_nodes.append(member) elif isinstance(member, list): new_nodes.extend(member) else: raise Exception(member) model = utils.rebuild_model(model, new_nodes) check_model(model) return model
def transform(self, model: onnx.ModelProto) -> onnx.ModelProto: optimized_nodes = [] for node in model.graph.node: if node.op_type != 'Clip': optimized_nodes.append(node) continue if len(node.input) >= 2: optimized_nodes.append(node) continue node_input = node.input[0] node_output = node.output[0] input_names = dict() added_inits = dict() input_names['min'] = '' input_names['max'] = '' # The filter() method constructs an iterator from elements of an iterable for which a function returns true. for attr in filter(lambda x: x.name == 'min' or x.name == 'max', node.attribute): tensor_name = f'{node.input[0]}_clip_{attr.name}' tensor = make_tensor(tensor_name, TensorProto.FLOAT, (), [attr.f]) input_names[attr.name] = tensor_name added_inits[attr.name] = tensor model.graph.initializer.extend([*added_inits.values()]) new_node = make_node('Clip', inputs=[node_input, input_names['min'], input_names['max']], outputs=[node_output]) optimized_nodes.append(new_node) # remove duplicate node(s) in optimized nodes seen = [] for op_node in optimized_nodes: if op_node in seen: continue seen.append(op_node) optimized_nodes = seen model = utils.rebuild_model(model, optimized_nodes) check_model(model) return model
def transform(self, model: onnx.ModelProto) -> onnx.ModelProto: model = PolishModel().transform(model) self.nodes_by_output_name = { node.output[0]: node for node in model.graph.node } self.initializers = { init.name: init for init in model.graph.initializer } self.outputs_by_name = {oup.name: oup for oup in model.graph.output} model = self.transform_matmul_add( model) # transform matmul + add --> conv check_model(model) return PolishModel().transform(model)
def transform(self, model: onnx.ModelProto) -> onnx.ModelProto: # https://github.com/onnx/onnx/issues/2873#issuecomment-652541006 # # > There is an underlying issue in version converter. It relies on the # > C++ IR which I believe has not been updated after IR v3. Because of # > this I think it expects initializers also be added as graph inputs. # > If you try to change the version of your model to IRv3 or create a # > model with initializers also as inputs then I think this will work. model = include_initializer_to_graph_input(model) version = int(model.opset_import[0].version) if version != __OPSET_VERSION__: try: model = version_converter.convert_version( model, __OPSET_VERSION__) check_model(model) except Exception: raise Exception( f"Can't convert the model (ONNX opset {version}) to ONNX opset {__OPSET_VERSION__}" ) return model
def transform_to_integer_arithmetic_operator(self): new_nodes = [] rm_nodes = [] for node in self.model.graph.node: if node.op_type not in ['Conv', 'MatMul']: new_nodes.append(node) continue node_i0 = self.node_by_output[node.input[0]] node_i1 = self.node_by_output[node.input[1]] node_o0 = self.node_by_input[node.output[0]] rm_nodes.extend([node_i0, node_i1, node_o0]) self.value_info.pop(node_i0.output[0]) self.value_info.pop(node_i1.output[0]) self.value_info.pop(node_o0.input[0]) node_i2 = None if len(node.input) == 3: node_i2 = self.node_by_output[node.input[2]] rm_nodes.append(node_i2) self.value_info.pop(node_i2.output[0]) rm_nodes.extend([node]) new_nodes.append( self._make_integer_arithmetic_operator(node, node_i0, node_i1, node_o0, node_i2)) self.model = utils.rebuild_model( model=self.model, new_nodes=[node for node in new_nodes if node not in rm_nodes], eliminate=False) self._update_graph_field(field='value_info', proto=self.value_info.values()) check_model(self.model, check_runnable=False)
def transform(self, model: onnx.ModelProto) -> onnx.ModelProto: orig_model = ModelProto() orig_model.CopyFrom(model) optimizer = BertOnnxModel(model) optimizer.fuse_gelu() model = optimizer.model gelu_by_input_name = { node.input[0]: node for node in model.graph.node if node.op_type == 'Gelu' } value_info = { vi.name: vi for vi in list(model.graph.value_info) + list(model.graph.input) + list(model.graph.output) } # nodes are not topologically sorted as a result of onnxruntime_tools optimization sorted_nodes = [] visited = 0 for node in orig_model.graph.node: if node in model.graph.node: sorted_nodes.append(node) if node.output[0] in gelu_by_input_name.keys(): sorted_nodes.append(gelu_by_input_name[node.output[0]]) visited += 1 if not visited: sorted_nodes = model.graph.node model = utils.rebuild_model(model, sorted_nodes) check_model(model) return model
def transform(self, model: onnx.ModelProto) -> onnx.ModelProto: nodes_by_output_name = { node.output[0]: node for node in model.graph.node } outputs_by_output_name = { output.name: output for output in model.graph.output } optimized_nodes = [] removed_nodes = [] # handle case where Identity occurs in the middle of graph for node in model.graph.node: if node.op_type == 'Constant': continue # TODO need to ease assumption that node has only one input if necessary try: prev_node = nodes_by_output_name[node.input[0]] except KeyError: continue if prev_node.op_type != 'Identity': continue node.input[0] = prev_node.input[0] removed_nodes.append(prev_node) # handle case where Identity occurs at the end of graph for node in model.graph.node: if node.op_type != 'Identity': optimized_nodes.append(node) continue # Identity must be a graph output try: output_node = outputs_by_output_name[node.output[0]] model.graph.output.remove(output_node) except KeyError: continue removed_nodes.append(node) # Graph must have at least one graph output prev_node = nodes_by_output_name[node.input[0]] new_output_node = outputs_by_output_name[node.output[0]] new_output_node.name = prev_node.output[0] model.graph.output.append(new_output_node) # remove duplicate node(s) in optimized nodes seen = [] for op_node in optimized_nodes: if op_node in seen: continue seen.append(op_node) optimized_nodes = seen new_nodes = list( filter(lambda node: node not in removed_nodes, optimized_nodes)) model = utils.rebuild_model(model, new_nodes) check_model(model) return model
def transform(self, model: onnx.ModelProto) -> onnx.ModelProto: nodes_by_output_name = {node.output[0]: node for node in model.graph.node} initializer = {init.name: init for init in model.graph.initializer} value_info = {vi.name: vi for vi in list(model.graph.value_info) + list(model.graph.input) + list(model.graph.output)} post_fix = '_transposed' optimized_nodes = [] removed_nodes = [] for node in model.graph.node: if node.op_type != 'Div': optimized_nodes.append(node) continue # Div has no specific order of input according to spec. # Therefore, we need to find the input index of Exp and ReduceSum. def _is_input_op_type(node_input, op_type): if node_input in initializer.keys(): return False return nodes_by_output_name[node_input].op_type == op_type idx_exp = list(filter(lambda enum: _is_input_op_type(enum[1], 'Exp'), enumerate(node.input))) idx_rsum = list(filter(lambda enum: _is_input_op_type(enum[1], 'ReduceSum'), enumerate(node.input))) # Expect one of the inputs is Exp and the other is ReduceSum if len(idx_exp) != 1 and len(idx_rsum) != 1: optimized_nodes.append(node) continue idx_exp = idx_exp[0][0] idx_rsum = idx_rsum[0][0] exp_node = nodes_by_output_name[node.input[idx_exp]] rsum_node = nodes_by_output_name[node.input[idx_rsum]] removed_nodes.extend([node, exp_node, rsum_node]) # assert dim(input_shape) == 4 exp_shape = [dim.dim_value for dim in value_info[exp_node.output[0]].type.tensor_type.shape.dim] length = len(exp_shape) axis = rsum_node.attribute[0].ints # assert ReduceSum takes only 1 axis assert len(axis) == 1 axis = axis[0] if axis == -1: axis = length - 1 # make permutation according to axis given perm = list(range(0, length)) perm[axis], perm[-1] = perm[-1], perm[axis] new_vi = [] if axis != length - 1: trans_node_1 = make_node('Transpose', inputs=[exp_node.input[0]], outputs=[exp_node.output[0] + post_fix], perm=perm) softmax_node = make_node('Softmax', inputs=[exp_node.output[0] + post_fix], outputs=[exp_node.output[0] + '_softmax'], axis=length - 1) trans_node_2 = make_node('Transpose', inputs=[exp_node.output[0] + '_softmax'], outputs=[node.output[0]], perm=perm) optimized_nodes.extend([trans_node_1, softmax_node, trans_node_2]) perm1_shape = np.array(exp_shape)[perm].tolist() new_vi.append(make_tensor_value_info(name=softmax_node.output[0], elem_type=onnx.TensorProto.FLOAT, shape=perm1_shape)) new_vi.append(make_tensor_value_info(name=trans_node_1.output[0], elem_type=onnx.TensorProto.FLOAT, shape=perm1_shape)) else: softmax_node = make_node('Softmax', inputs=[exp_node.input[0]], outputs=[node.output[0]], axis=length - 1) optimized_nodes.extend([softmax_node]) model.graph.value_info.extend(new_vi) # remove duplicate node(s) in optimized nodes seen = [] for op_node in optimized_nodes: if op_node in seen: continue seen.append(op_node) optimized_nodes = seen new_nodes = list(filter(lambda node: node not in removed_nodes, optimized_nodes)) model = utils.rebuild_model(model, new_nodes) check_model(model) return model
def transform(self, model: onnx.ModelProto) -> onnx.ModelProto: nodes_by_output_name = { node_output: node for node in model.graph.node for node_output in node.output } nodes_by_input_name = { node_input: node for node in model.graph.node for node_input in node.input } value_info = { vi.name: vi for vi in list(model.graph.value_info) + list(model.graph.input) + list(model.graph.output) } initializer = {init.name: init for init in model.graph.initializer} # assume Conv is followed by Mul(Conv --> Mul) & Mul takes one data input and one init input # a * (x * w + b) = (x * aw + ab) post_fix = '_scalar_mul_fused' optimized_nodes = [] removed_nodes = [] for node in model.graph.node: if node.op_type != 'Mul': optimized_nodes.append(node) continue def _is_input_op_type(node_input, op_type): try: return nodes_by_output_name[node_input].op_type == op_type except KeyError: return False def _is_input_init(node_input, initializer_keys): return node_input in initializer_keys idx_conv = list( filter(lambda enum: _is_input_op_type(enum[1], 'Conv'), enumerate(node.input))) idx_init = list( filter( lambda enum: _is_input_init(enum[1], initializer.keys()), enumerate(node.input))) # Expect one of the inputs is Exp and the other is ReduceSum if not idx_conv or not idx_init: optimized_nodes.append(node) continue idx_conv = idx_conv[0][0] idx_init = idx_init[0][0] prev_node = nodes_by_output_name[node.input[idx_conv]] mul_factor = numpy_helper.to_array( initializer[node.input[idx_init]]) try: assert not mul_factor.shape except AssertionError: optimized_nodes.append(node) continue for idx, node_input in enumerate(prev_node.input): if node_input in initializer.keys(): w_init = initializer[node_input] w_arr = numpy_helper.to_array(w_init) fused_w_arr = mul_factor * w_arr fused_w_init = numpy_helper.from_array(fused_w_arr, name=w_init.name + post_fix) prev_node.input[idx] += post_fix model.graph.initializer.remove(w_init) model.graph.initializer.append(fused_w_init) model.graph.input.append( make_tensor_value_info( name=fused_w_init.name, elem_type=fused_w_init.data_type, shape=fused_w_arr.shape)) model.graph.input.remove(value_info[w_init.name]) # change next node's input name instead of prev nodes' output for nnode in model.graph.node: for idx, input in enumerate(nnode.input): if input == node.output[0]: nnode.input[idx] = prev_node.output[0] if node.output[0] in [vi.name for vi in model.graph.output]: model.graph.output.remove(value_info[node.output[0]]) model.graph.output.append(value_info[prev_node.output[0]]) # remove duplicate node(s) in optimized nodes seen = [] for op_node in optimized_nodes: if op_node in seen: continue seen.append(op_node) optimized_nodes = seen new_nodes = list( filter(lambda node: node not in removed_nodes, optimized_nodes)) model = utils.rebuild_model(model, new_nodes) check_model(model) return model