model = onnx.load(example1) # model is a ModelProto protobuf message print(model) ################################# # Draw a model with ONNX # ++++++++++++++++++++++ # We use `net_drawer.py <https://github.com/onnx/onnx/blob/master/onnx/tools/net_drawer.py>`_ # included in *onnx* package. # We use *onnx* to load the model # in a different way than before. from onnx import ModelProto model = ModelProto() with open(example1, 'rb') as fid: content = fid.read() model.ParseFromString(content) ################################### # We convert it into a graph. from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer pydot_graph = GetPydotGraph(model.graph, name=model.graph.name, rankdir="LR", node_producer=GetOpNodeProducer("docstring")) pydot_graph.write_dot("graph.dot") ####################################### # Then into an image import os os.system('dot -O -Tpng graph.dot')
def test_convert_end2end(self): predict_net_f = tempfile.NamedTemporaryFile() init_net_f = tempfile.NamedTemporaryFile() onnx_model_f = tempfile.NamedTemporaryFile() x = 'X' w = 'W' b = 'b' y = 'Y' predict_net = caffe2_pb2.NetDef() predict_net.name = 'test-convert-end2end' predict_net.external_input[:] = [x, w, b] predict_net.external_output[:] = [y] predict_net.op.extend([ core.CreateOperator( 'FC', inputs=[x, w, b], outputs=[y], axis=2, ), ]) predict_net_f.write(predict_net.SerializeToString()) predict_net_f.flush() init_net = caffe2_pb2.NetDef() init_net.name = 'test-convert-end2end-init' init_net.external_output[:] = [w, b] x_val = np.random.randn(1, 3, 2).astype(np.float32) w_val = np.random.randn(4, 2).astype(np.float32) b_val = np.random.randn(4).astype(np.float32) init_net.op.extend([ core.CreateOperator( 'GivenTensorFill', [], [w], values=w_val, shape=w_val.shape, ), core.CreateOperator( 'GivenTensorFill', [], [b], values=b_val, shape=b_val.shape, ), ]) init_net_f.write(init_net.SerializeToString()) init_net_f.flush() y_val = np.matmul(x_val, w_val.transpose()) + b_val for _ in range(5): self._run_command( caffe2_to_onnx, [ predict_net_f.name, '--caffe2-init-net', init_net_f.name, '--output', onnx_model_f.name, '--value-info', json.dumps({ x: (TensorProto.FLOAT, (1, 3, 2)), }), ], catch_exceptions=False, ) onnx_model_f.seek(0) onnx_model = ModelProto() onnx_model.ParseFromString(onnx_model_f.read()) np.testing.assert_almost_equal( c2.run_model( onnx_model, {onnx_model.graph.input[0].name: x_val}), [y_val]) self._run_command( onnx_to_caffe2, [ onnx_model_f.name, '--output', predict_net_f.name, '--init-net-output', init_net_f.name, ]) predict_net_f.seek(0) predict_net = caffe2_pb2.NetDef() predict_net.ParseFromString(predict_net_f.read()) init_net_f.seek(0) init_net = caffe2_pb2.NetDef() init_net.ParseFromString(init_net_f.read()) x = predict_net.external_input[0] np.testing.assert_almost_equal(c2_native_run_net(init_net=init_net, predict_net=predict_net, inputs={x: x_val})[1], [y_val])
def model_proto_from_file(model_path): model_proto = ModelProto() with open(model_path, "rb") as f: model_proto.ParseFromString(f.read()) return model_proto
def _parse_onnx_model(self, path): """Get nodes from _graph_parser and format them for ImporterNode field Returns a list of the high-level nodes (i.e. function blocks) that make up the ONNX model """ _logger.info("loading the model") try: if isinstance(path, onnx.ModelProto): onnx_model = path else: onnx_model = ModelProto() with open(path, 'rb') as f: content = f.read() onnx_model.ParseFromString(content) # Check that the IR is well formed # onnx.checker.check_model(onnx_model) # onnx IR version _logger.info("ONNX IR_version".format(onnx_model.ir_version)) except Exception : _logger.error("Error occurred when loading onnx model file ") raise Exception _logger.info("\nFinished loading.") _logger.info("Graph producer: {} version {}".format(onnx_model.producer_name, onnx_model.producer_version)) _logger.info("Graph total len: {}".format(len(onnx_model.graph.input))) graph_ = Graph.from_onnx(onnx_model.graph) _logger.info("Graph external Input/Output node: {} -> {}".format(graph_.inputs, graph_.outputs)) self._shape_dict = graph_.shape_dict self._auxiliary_nodes = graph_.tensor_nodes self._tensor_dict = graph_.input_tensors # weights and bias # get all tensors weights in one dict node_to_aux = {} for v in graph_.nodes: if v.input_tensors: input_tensorlist = list(v.input_tensors) node_to_aux[v.name] = input_tensorlist # TODO: for debugging print("Tensor dict \n", node_to_aux) print("Shape dict \n", self._shape_dict) to_concat = [] nodes_ = [] graph_tree = graph_.nodes for i, node in enumerate(graph_tree): # skip input node already parsed # Handle Skip nodes if node.op_type == "Skip" or node.op_type == "Reshape": _logger.info("Skipping node {} ".format(node.name)) if len(node.inputs) == 0: continue next_node = None for v in graph_tree[i+1:]: if node.name in v.inputs: next_node = v if next_node.name == 'lastNode': next_node = None if next_node is not None: # Find and replace skipped node for index, input_ in enumerate(next_node.inputs): if input_ == node.name: next_node.inputs[index] = node.inputs[0] print("Skipped node {} {} : previous node {} : next node {}".format( node.name, node.op_type, node.inputs[0], next_node.name)) continue else: to_concat.append(node.name) print(node.name, "Node not skipped: NextNode not found after all nodes been exausted. Must be a sink node.") # Handle External/Auxiliary nodes if node.op_type == "Splice" or node.op_type == "Plus" : # First thing first, let's add missing input nodes aux_node = None for in_node in node.inputs: if in_node not in self._all_nodes: aux_node = self.add_auxiliary_node(in_node, nodes_) op_node = self.parse_node(node) nodes_.append(op_node) # update self._all_nodes self._all_nodes[op_node.name] = op_node print(op_node.op_type, op_node.name, op_node.inputs, op_node.input_shape,"->", op_node.output_shape, op_node.attribute, "\n") return nodes_ # _operation_map = { # 'Input' : Input, # # Basic neural network functions # 'Convolution' : Conv, # 'Plus' : Add, # 'Splice' : Concat, # 'MaxPooling' : MaxPooling, # "BatchNormalization" : BatchNorm, # "FullyConnected" : FullyConnected, # } # _functions_map = { # "Convolution" : get_conv_con2d_out_dims, # "Maxpooling" : get_maxpool_out_dims, # "FullyConnected": get_fc_out_dims, # "Reshape" : get_reshape_out_dims, # "Splice" : get_concat_out_dims # }
def prepare(cls, model, device='CPU', **kwargs): ''' For Onnx Caffe2Backend, we require that init_graph don't initialize the actual input of the predict_graph, for example, if "img" is the input blob for the predict_net, we require that in init_graph and in initializer of the predict_graph, "img" is not initalized. We don't have a check for this, since there is no way we can know which blob is the input of the predict_graph. ''' super(Caffe2Backend, cls).prepare(model, device, **kwargs) opset_version = None for imp in model.opset_import: if not imp.HasField("domain") or imp.domain == "": opset_version = imp.version if imp.version > cls._known_opset_version: warnings.warn( "This version of onnx-caffe2 targets ONNX operator set version {}, but the model we are trying to import uses version {}. We will try to import it anyway, but if the model uses operators which had BC-breaking changes in the intervening versions, import will fail." .format(cls._known_opset_version, imp.version)) else: warnings.warn("Unrecognized operator set {}".format( imp.domain)) if opset_version is None: if model.ir_version >= 0x00000003: raise RuntimeError( "Model with IR version >= 3 did not specify ONNX operator set version (onnx-caffe2 requires it)" ) else: opset_version = 1 # Check whether we have RNN related ops pred_model = ModelProto() pred_model.ParseFromString( cls.optimize_onnx(model.SerializeToString(), predict=True)) rnn_nodes = [] for node in pred_model.graph.node: if node.op_type in {'LSTM', 'GRU', 'RNN'}: rnn_nodes.append(node) # Build the C++ backend # TODO: build a predictor that supports GPU # And for RNN nets, we need to avoid adding init_net use_cpp_backend = device == 'CPU' and not rnn_nodes # use python backend for now use_cpp_backend = False if use_cpp_backend: c2_rnn_ops = [] if rnn_nodes: init_model = ModelProto() init_model.ParseFromString( cls.optimize_onnx(model.SerializeToString(), init=True)) for node in rnn_nodes: c2ops = cls._onnx_node_to_caffe2_op( init_model, pred_model, node, opset_version) init_ops = [x.SerializeToString() for x in c2ops.init_ops] ops = [x.SerializeToString() for x in c2ops.ops] external_inputs = c2ops.interface_blobs c2_rnn_ops.append( C.Caffe2Ops(init_ops, ops, external_inputs)) del init_model cbackend = C.Caffe2Backend() rep = cbackend.prepare(model.SerializeToString(), device, c2_rnn_ops) # For testing # Dump the net descriptions to file for comparison with the Python ones if "ONNX_CAFFE2_DEBUG" in os.environ: pred_net_str = rep.pred_net() pn = caffe2_pb2.NetDef() pn.ParseFromString(pred_net_str) init_net_str = rep.init_net() inn = caffe2_pb2.NetDef() inn.ParseFromString(init_net_str) with open("cpp.txt", "w") as f: f.write("pred_net: \n{}".format(pn)) rep_wrapper = Caffe2CppRep(rep) return rep_wrapper else: ws = Workspace() device_option = get_device_option(Device(device)) # Directly load initializer data into blobs in workspace cls._direct_initialize_parameters( model.graph.initializer, ws, device_option, ) initialized = {init.name for init in model.graph.initializer} cls._direct_initialize_inputs( model.graph.input, initialized, ws, device_option, ) uninitialized = [ value_info.name for value_info in model.graph.input if value_info.name not in initialized ] init_net, predict_net = cls._onnx_model_to_caffe2_net( model, device, opset_version, False) if "ONNX_CAFFE2_DEBUG" in os.environ: with open("python.txt", "w") as f: f.write("pred_net: \n{}".format(predict_net)) retval = Caffe2Rep(init_net, predict_net, ws, uninitialized) return retval
def _simple_model(self) -> ModelProto: # Create a ModelProto. model = ModelProto() model.ir_version = IR_VERSION return model
def make_model(self, doc, input_names, output_names, optimize=True): """ Create final ModelProto for onnx from internal graph. Args: optimize: optimize graph via onnx doc: text for doc string of the model input_names: list of model inputs output_names: list of model outputs """ # create output_tensor_values output_tensor_values = [] for name in output_names: op = self.get_node_by_name(name) if op: dtype = op.dtype if not dtype: continue v = helper.make_tensor_value_info(name, dtype, self.get_shape(name)) output_tensor_values.append(v) # update attributes ops = [] for op in self.get_nodes(): onnx_op = op.op del onnx_op.attribute[:] attr = [] for a in op.attr.values(): if a.name in utils.ONNX_VALID_ATTRIBUTES: attr.append(a) if attr: onnx_op.attribute.extend(attr) ops.append(onnx_op) # create input_tensor_values, initializers initializers = list(self._initializers.values()) input_with_initializers = [] for initializer in initializers: shape = self.get_shape(initializer.name) if shape and list(shape) != initializer.dims: raise ValueError("initializer shape is inconsistent") val = helper.make_tensor_value_info(initializer.name, initializer.data_type, initializer.dims) input_with_initializers.append(val) input_with_initializers.extend(self.model_inputs) # create model proto graph = helper.make_graph(ops, "tf2onnx", input_with_initializers, output_tensor_values, initializer=initializers, doc_string=doc) kwargs = {"producer_name": "tf2onnx", "producer_version": __version__} if self._opset > 0: imp = OperatorSetIdProto() imp.version = self._opset kwargs["opset"] = imp model_proto = helper.make_model(graph, **kwargs) # optimize the model proto if optimize: optimized_model = optimizer.optimize(model_proto.SerializeToString(), ["fuse_consecutive_transposes", "fuse_transpose_into_gemm", "eliminate_nop_transpose"]) model_proto = ModelProto() model_proto.ParseFromString(optimized_model) return model_proto
def merge_models(m1: ModelProto, m2: ModelProto, io_map: List[Tuple[Text, Text]], inputs: Optional[List[Text]] = None, outputs: Optional[List[Text]] = None, prefix1: Optional[Text] = None, prefix2: Optional[Text] = None, name: Optional[Text] = None, doc_string: Optional[Text] = None, producer_name: Optional[Text] = 'onnx.compose.merge_models', producer_version: Optional[Text] = "1.0", domain: Optional[Text] = "", model_version: Optional[int] = 1) -> ModelProto: """Combines two ONNX models into a single one. The combined model is defined by connecting the specified set of outputs/inputs. Those inputs/outputs not specified in the io_map argument will remain as inputs/outputs of the combined model. Both models should have the same IR version, and same operator sets imported. Arguments: m1 (ModelProto): First model m2 (ModelProto): Second model io_map (list of pairs of string): The pairs of names [(out0, in0), (out1, in1), ...] representing outputs of the first graph and inputs of the second to be connected inputs (list of string): Optional list of inputs to be included in the combined graph By default, all inputs not present in the ``io_map`` argument will be included in the combined model outputs (list of string): Optional list of outputs to be included in the combined graph By default, all outputs not present in the ``io_map`` argument will be included in the combined model prefix1 (string): Optional prefix to be added to all names in m1 prefix2 (string): Optional prefix to be added to all names in m2 name (string): Optional name for the combined graph By default, the name is g1.name and g2.name concatenated with an undescore delimiter doc_string (string): Optional docstring for the combined graph If not provided, a default docstring with the concatenation of g1 and g2 docstrings is used producer_name (string): Optional producer name for the combined model. Default: 'onnx.compose' producer_version (string): Optional producer version for the combined model. Default: "1.0" domain (string): Optional domain of the combined model. Default: "" model_version (int): Optional version of the graph encoded. Default: 1 """ if type(m1) is not ModelProto: raise ValueError("m1 argument is not an ONNX model") if type(m2) is not ModelProto: raise ValueError("m2 argument is not an ONNX model") if m1.ir_version != m2.ir_version: raise ValueError( f"IR version mismatch {m1.ir_version} != {m2.ir_version}." " Both models should have have the same IR version") ir_version = m1.ir_version opset_import_map: MutableMapping[Text, int] = {} opset_imports = \ [entry for entry in m1.opset_import] + \ [entry for entry in m2.opset_import] for entry in opset_imports: if entry.domain in opset_import_map: found_version = opset_import_map[entry.domain] if entry.version != found_version: raise ValueError( "Can't merge two models with different operator set ids for a given domain. " f"Got: {m1.opset_import} and {m2.opset_import}") else: opset_import_map[entry.domain] = entry.version # Prefixing names in the graph if requested, adjusting io_map accordingly if prefix1 or prefix2: if prefix1: m1_copy = ModelProto() m1_copy.CopyFrom(m1) m1 = m1_copy m1 = add_prefix(m1, prefix=prefix1) if prefix2: m2_copy = ModelProto() m2_copy.CopyFrom(m2) m2 = m2_copy m2 = add_prefix(m2, prefix=prefix2) io_map = [(prefix1 + io[0] if prefix1 else io[0], prefix2 + io[1] if prefix2 else io[1]) for io in io_map] graph = merge_graphs(m1.graph, m2.graph, io_map, inputs=inputs, outputs=outputs, name=name, doc_string=doc_string) model = helper.make_model(graph, producer_name=producer_name, producer_version=producer_version, domain=domain, model_version=model_version, opset_imports=opset_imports, ir_version=ir_version) # Merging model metadata props model_props = {} for meta_entry in m1.metadata_props: model_props[meta_entry.key] = meta_entry.value for meta_entry in m2.metadata_props: if meta_entry.key in model_props: value = model_props[meta_entry.key] if value != meta_entry.value: raise ValueError( "Can't merge models with different values for the same model metadata property." f" Found: property = {meta_entry.key}, with values {value} and {meta_entry.value}." ) else: model_props[meta_entry.key] = meta_entry.value helper.set_model_props(model, model_props) # Merging functions function_overlap = list( set([f.name for f in m1.functions]) & set([f.name for f in m2.functions])) if function_overlap: raise ValueError( "Can't merge models with overlapping local function names." " Found in both graphs: " + ', '.join(function_overlap)) model.functions.MergeFrom(m1.functions) model.functions.MergeFrom(m2.functions) checker.check_model(model) return model
def add_prefix( model: ModelProto, prefix: Text, rename_nodes: Optional[bool] = True, rename_edges: Optional[bool] = True, rename_inputs: Optional[bool] = True, rename_outputs: Optional[bool] = True, rename_initializers: Optional[bool] = True, rename_value_infos: Optional[bool] = True, rename_functions: Optional[bool] = True, inplace: Optional[bool] = False, ) -> ModelProto: """Adds a prefix to names of elements in a graph: nodes, edges, inputs, outputs, initializers, sparse initializer, value infos, and local functions. It can be used as a utility before merging graphs that have overlapping names. Empty names are not _prefixed. Arguments: model (ModelProto): Model prefix (Text): Prefix to be added to each name in the graph rename_nodes (bool): Whether to prefix node names rename_edges (bool): Whether to prefix node edge names rename_inputs (bool): Whether to prefix input names rename_outputs (bool): Whether to prefix output names rename_initializers (bool): Whether to prefix initializer and sparse initializer names rename_value_infos (bool): Whether to prefix value info nanes rename_functions (bool): Whether to prefix local function names inplace (bool): If True, mutates the model directly. Otherwise, a copy will be created """ if type(model) is not ModelProto: raise ValueError("model argument is not an ONNX model") if not inplace: m = ModelProto() m.CopyFrom(model) model = m add_prefix_graph( model.graph, prefix, rename_nodes=rename_nodes, rename_edges=rename_edges, rename_inputs=rename_inputs, rename_outputs=rename_outputs, rename_initializers=rename_initializers, rename_value_infos=rename_value_infos, inplace=True # No need to create a copy, since it's a new model ) if rename_functions: f_name_map = {} for f in model.functions: new_f_name = prefix + f.name f_name_map[f.name] = new_f_name f.name = new_f_name # Adjust references to local functions in other local function # definitions for f in model.functions: for n in f.node: if n.op_type in f_name_map: n.op_type = f_name_map[n.op_type] # Adjust references to local functions in the graph for n in model.graph.node: if n.op_type in f_name_map: n.op_type = f_name_map[n.op_type] return model
def _test_add_prefix(self, rename_nodes: bool = False, rename_edges: bool = False, rename_inputs: bool = False, rename_outputs: bool = False, rename_initializers: bool = False, rename_value_infos: bool = False, inplace: bool = False) -> None: m1 = _load_model(m1_def) prefix = 'pre/' if inplace: m2 = ModelProto() m2.CopyFrom(m1) compose.add_prefix(m2, prefix, rename_nodes=rename_nodes, rename_edges=rename_edges, rename_inputs=rename_inputs, rename_outputs=rename_outputs, rename_initializers=rename_initializers, rename_value_infos=rename_value_infos, inplace=True) else: m2 = compose.add_prefix(m1, prefix, rename_nodes=rename_nodes, rename_edges=rename_edges, rename_inputs=rename_inputs, rename_outputs=rename_outputs, rename_initializers=rename_initializers, rename_value_infos=rename_value_infos) g_in = m1.graph g_out = m2.graph if rename_edges or rename_inputs or rename_outputs or rename_initializers or rename_value_infos: name_mapping = {} # Rename inputs/outputs/edges. Propagate name changes from and to edges if rename_edges: for n in g_in.node: for e in n.input: name_mapping[e] = _prefixed(prefix, e) for e in n.output: name_mapping[e] = _prefixed(prefix, e) else: if rename_inputs: for elem in g_in.input: name_mapping[elem.name] = _prefixed(prefix, elem.name) if rename_outputs: for elem in g_in.output: name_mapping[elem.name] = _prefixed(prefix, elem.name) if rename_initializers: for init in g_in.initializer: name_mapping[init.name] = _prefixed(prefix, init.name) for sparse_init in g_in.sparse_initializer: name_mapping[sparse_init.values.name] = \ _prefixed(prefix, sparse_init.values.name) name_mapping[sparse_init.indices.name] = \ _prefixed(prefix, sparse_init.indices.name) if rename_value_infos: for value_info in g_in.output: name_mapping[value_info.name] = _prefixed( prefix, value_info.name) for n1, n0 in zip(g_out.node, g_in.node): for e1, e0 in zip(n1.input, n0.input): self.assertEqual(name_mapping.get(e0, e0), e1) for e1, e0 in zip(n1.output, n0.output): self.assertEqual(name_mapping.get(e0, e0), e1) for i1, i0 in zip(g_out.input, g_in.input): self.assertEqual(name_mapping.get(i0.name, i0.name), i1.name) for o1, o0 in zip(g_out.output, g_in.output): self.assertEqual(name_mapping.get(o0.name, o0.name), o1.name) for init1, init0 in zip(g_out.initializer, g_in.initializer): self.assertEqual(name_mapping.get(init0.name, init0.name), init1.name) for sparse_init1, sparse_init0 in zip(g_out.sparse_initializer, g_in.sparse_initializer): self.assertEqual( name_mapping.get(sparse_init0.values.name, sparse_init0.values.name), sparse_init1.values.name) self.assertEqual( name_mapping.get(sparse_init0.indices.name, sparse_init0.indices.name), sparse_init1.indices.name) for vi1, vi0 in zip(g_out.value_info, g_in.value_info): self.assertEqual(name_mapping.get(vi0.name, vi0.name), vi1.name) if rename_nodes: for n1, n0 in zip(g_out.node, g_in.node): self.assertEqual(_prefixed(prefix, n0.name), n1.name)
def get_bert_inputs(onnx_file, input_ids_name=None, segment_ids_name=None, input_mask_name=None): """ Get graph inputs for bert model. First, we will deduce from EmbedLayerNormalization node. If not found, we will guess based on naming. """ model = ModelProto() with open(onnx_file, "rb") as f: model.ParseFromString(f.read()) onnx_model = OnnxModel(model) graph_inputs = onnx_model.get_graph_inputs_excluding_initializers() if input_ids_name is not None: input_ids = onnx_model.find_graph_input(input_ids_name) if input_ids is None: raise ValueError(f"Graph does not have input named {input_ids_name}") segment_ids = None if segment_ids_name: segment_ids = onnx_model.find_graph_input(segment_ids_name) if segment_ids is None: raise ValueError(f"Graph does not have input named {segment_ids_name}") input_mask = None if input_mask_name: input_mask = onnx_model.find_graph_input(input_mask_name) if input_mask is None: raise ValueError(f"Graph does not have input named {input_mask_name}") expected_inputs = 1 + (1 if segment_ids else 0) + (1 if input_mask else 0) if len(graph_inputs) != expected_inputs: raise ValueError(f"Expect the graph to have {expected_inputs} inputs. Got {len(graph_inputs)}") return input_ids, segment_ids, input_mask if len(graph_inputs) != 3: raise ValueError("Expect the graph to have 3 inputs. Got {}".format(len(graph_inputs))) embed_nodes = onnx_model.get_nodes_by_op_type('EmbedLayerNormalization') if len(embed_nodes) == 1: embed_node = embed_nodes[0] input_ids = get_graph_input_from_embed_node(onnx_model, embed_node, 0) segment_ids = get_graph_input_from_embed_node(onnx_model, embed_node, 1) input_mask = get_graph_input_from_embed_node(onnx_model, embed_node, 7) return input_ids, segment_ids, input_mask # Try guess the inputs based on naming. input_ids = None segment_ids = None input_mask = None for input in graph_inputs: input_name_lower = input.name.lower() if "mask" in input_name_lower: # matches input with name like "attention_mask" or "input_mask" input_mask = input elif "token" in input_name_lower or "segment" in input_name_lower: # matches input with name like "segment_ids" or "token_type_ids" segment_ids = input else: input_ids = input if input_ids and segment_ids and input_mask: return input_ids, segment_ids, input_mask raise ValueError("Fail to assign 3 inputs. You might try rename the graph inputs.")
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input', required=True, type=str) parser.add_argument('--output', required=True, type=str) # model parameters parser.add_argument('--num_heads', required=False, type=int, default=12, help="number of attention heads") parser.add_argument('--hidden_size', required=False, type=int, default=768) parser.add_argument('--sequence_length', required=False, type=int, default=128) # Use int32 (instead of int64) tensor as input to avoid unnecessary data type cast. parser.add_argument('--input_int32', required=False, action='store_true') parser.set_defaults(input_int32=False) # For NVidia GPU with Tensor Core like V100 and T4, half-precision float brings better performance. parser.add_argument('--float16', required=False, action='store_true') parser.set_defaults(float16=False) parser.add_argument('--verbose', required=False, action='store_true') parser.set_defaults(verbose=False) args = parser.parse_args() model = ModelProto() with open(args.input, "rb") as f: model.ParseFromString(f.read()) bert_model = BertOnnxModel(model, args.num_heads, args.hidden_size, args.sequence_length) bert_model.fuse_layer_norm() bert_model.fuse_gelu() bert_model.fuse_reshape() bert_model.fuse_attention(args.verbose) bert_model.fuse_embed_layer(args.verbose) if bert_model.embed_node is None: print("Failed to fuse embedding layer.") return if args.input_int32: bert_model.change_input_to_int32() else: bert_model.cast_input_to_int32() if args.float16: bert_model.convert_model_float32_to_float16() with open(args.output, "wb") as out: out.write(bert_model.model.SerializeToString())
def is_cpp_protobuf(): return isinstance(ModelProto().ParseFromString, types.BuiltinFunctionType)
def onnx_to_hls(yamlConfig): ###################### ## Do translation ###################### #This is a list of dictionaries to hold all the layer info we need to generate HLS layer_list = [] #Extract model architecture model = ModelProto() with open(yamlConfig['OnnxModel'], 'rb') as fid: model.ParseFromString(fid.read()) #Define supported layers core_operations = ['Gemm', 'BatchNormalization', 'Conv'] transform_operations = [ 'Squeeze', 'Unsqueeze', 'Transpose', 'Flatten', 'Identity', 'Reshape' ] pool_operations = ['AveragePool', 'MaxPool'] merge_operations = [ 'Add', 'Sub', 'Mul', 'Average', 'Max', 'Min', 'Concat', 'Sum' ] activation_operations = [ 'Relu', 'Tanh', 'Sigmoid', 'LeakyRelu', 'ThresholdedRelu', 'HardSigmoid', 'Elu', 'Selu', 'PRelu', 'Softmax', 'Softsign', 'Softplus' ] supported_operations = core_operations + transform_operations + pool_operations + merge_operations + activation_operations operation_map = { 'Gemm': 'Dense', 'Relu': 'Activation', 'Tanh': 'Activation', 'Sigmoid': 'Activation', 'LeakyRelu': 'LeakyReLU', 'ThresholdedRelu': 'ThresholdedReLU', 'HardSigmoid': 'Activation', 'Elu': 'ELU', 'Selu': 'Activation', 'PRelu': 'PReLU', 'Softmax': 'Activation', 'Softsign': 'Activation', 'Softplus': 'Activation', 'Sum': 'Add', 'Sub': 'Subtract', 'Max': 'Maximum', 'Min': 'Minimum', 'Mul': 'Multiply', 'Concat': 'Concatenate' } #Define layers to skip for conversion to HLS skip_layers = [ 'Squeeze', 'Unsqueeze', 'Dropout', 'Identity', 'Flatten', 'Transpose', 'Reshape' ] #Map inputs of skipped layers inputs_map = {} passes = [ 'fuse_transpose_into_gemm', 'fuse_matmul_add_bias_into_gemm', 'eliminate_nop_transpose', 'fuse_consecutive_transposes' ] model = shape_inference.infer_shapes( model) # have to infer shapes before optimizing the model model = optimizer.optimize(model, passes) model = shape_inference.infer_shapes( model) # have to infer shapes before optimizing the model reader = ONNXDataReader(model) #Loop through layers layer_counter = 0 all_inputs = [x.name for x in model.graph.input] all_initializers = [x.name for x in model.graph.initializer] input_layers = [x for x in all_inputs if x not in all_initializers] output_layers = [x.name for x in model.graph.output] for i, inp in enumerate(input_layers): input_layer = {} input_layer['name'] = inp input_layer['class_name'] = 'InputLayer' inp_shape = next((x.type.tensor_type.shape.dim for x in model.graph.input if x.name == inp), None) input_layer['input_shape'] = [x.dim_value for x in inp_shape] if len(input_layer['input_shape']) > 1: input_layer['input_shape'][0] = None input_layer['outputs'] = [inp] sanitize_layer_name(input_layer) input_layers[i] = input_layer['name'] layer_list.append(input_layer) # Check for unsupported layer type for operation in model.graph.node: if operation.op_type not in supported_operations: raise Exception('ERROR: Unsupported operation type: {}'.format( operation.op_type)) # Get input shape current_shape = [ d.dim_value for d in model.graph.input[0].type.tensor_type.shape.dim ] print('Input shape:', current_shape) print('Topology:') for operation in model.graph.node: if operation.op_type == 'Flatten': current_shape = [current_shape[0], np.prod(current_shape[1:])] if operation.op_type in skip_layers: #Currently supported skipped layers have only one input and output #Skipped layers can follow each other (e.g., Dropout -> Flatten) input_name = inputs_map.get(operation.input[0], operation.input[0]) output_name = operation.output[0] inputs_map[output_name] = input_name continue if operation.op_type in supported_operations: layer_counter = layer_counter + 1 #Dictionary to fill in and append to layer_list layer = {} #Extract name for finding weights and biases if operation.name: layer['name'] = operation.name else: layer['name'] = operation.op_type + str(layer_counter) layer['class_name'] = operation_map.get(operation.op_type, operation.op_type) layer['inputs'] = [ inputs_map.get(operation.input[0], operation.input[0]) ] layer['outputs'] = [x for x in operation.output] #Extract type of activation if operation.op_type in activation_operations: layer['activation'] = operation.op_type.lower() if layer_list[-1]['class_name'] != 'BatchNormalization': layer_list[-1]['activation'] = operation.op_type.lower() #Get number of inputs and outputs #(We take it from the weights to avoid dealing with InputLayer and Flatten details) if layer['class_name'] == 'Dense': current_shape = get_input_shape(model, operation) layer['n_in'] = next( (x.type.tensor_type.shape.dim[-1].dim_value for x in model.graph.input if x.name == operation.input[0]), None) layer['n_out'] = next((x.type.tensor_type.shape.dim[-1].dim_value for x in model.graph.value_info if x.name == operation.output[0]), None) tran_weight = get_onnx_attribute(operation, 'transB', 0) reader.add_input(layer['name'], operation.input, tran_weight) current_shape = [current_shape[0], layer['n_out']] elif layer['class_name'] == 'Conv': current_shape = get_input_shape(model, operation) strides = get_onnx_attribute(operation, 'strides') kernel_shape = get_onnx_attribute(operation, 'kernel_shape') if len(current_shape) == 3: # Conv1D layer['class_name'] = 'Conv1D' reader.add_input(layer['name'], operation.input) layer['y_in'] = current_shape[2] layer['y_filt'] = kernel_shape[0] layer['n_chan'] = current_shape[1] layer['n_filt'] = next( (x.type.tensor_type.shape.dim[1].dim_value for x in model.graph.value_info if x.name == operation.output[0]), None) layer['stride'] = strides[0] pads = compute_pads_1d(operation, layer) layer['pad_left'] = pads[0] layer['pad_right'] = pads[1] if all(x == 0 for x in pads): # No padding, i.e., 'VALID' padding layer['y_out'] = int( math.ceil( float(layer['y_in'] - layer['y_filt'] + 1) / float(layer['stride']))) else: layer['y_out'] = int( math.ceil( float(layer['y_in']) / float(layer['stride']))) current_shape = [ current_shape[0], layer['n_filt'], layer['y_out'] ] elif len(current_shape) == 4: # Conv2D layer['class_name'] = 'Conv2D' reader.add_input(layer['name'], operation.input, transpose=True, perm=[2, 3, 1, 0]) layer['in_height'] = current_shape[2] layer['in_width'] = current_shape[3] layer['filt_height'] = kernel_shape[0] layer['filt_width'] = kernel_shape[1] layer['n_chan'] = current_shape[1] layer['n_filt'] = next( (x.type.tensor_type.shape.dim[1].dim_value for x in model.graph.value_info if x.name == operation.output[0]), None) layer['stride_height'] = strides[0] layer['stride_width'] = strides[1] pads = compute_pads_2d(operation, layer) layer['pad_top'] = pads[0] layer['pad_bottom'] = pads[2] layer['pad_left'] = pads[1] layer['pad_right'] = pads[3] if all( x == 0 for x in pads ): # No padding, i.e., 'VALID' padding in Keras/Tensorflow layer['out_width'] = int( math.ceil( float(layer['in_width'] - layer['filt_width'] + 1) / float(layer['stride_width']))) layer['out_height'] = int( math.ceil( float(layer['in_height'] - layer['filt_height'] + 1) / float(layer['stride_height']))) else: layer['out_height'] = int( math.ceil( float(layer['in_height']) / float(layer['stride_height']))) layer['out_width'] = int( math.ceil( float(layer['in_width']) / float(layer['stride_width']))) current_shape = [ current_shape[0], layer['n_filt'], layer['out_height'], layer['out_width'] ] elif layer['class_name'] == 'BatchNormalization': layer['epsilon'] = get_onnx_attribute(operation, 'epsilon') layer['momentum'] = get_onnx_attribute(operation, 'momentum') reader.add_input(layer['name'], operation.input) in_size = 1 for dim in current_shape[1:]: in_size *= dim layer['n_in'] = in_size layer['n_out'] = layer['n_in'] if len(current_shape) == 2: layer['n_filt'] = -1 else: layer['n_filt'] = current_shape[1] elif layer['class_name'] in pool_operations: current_shape = get_input_shape(model, operation) info = layer['class_name'].replace('Pool', '') strides = get_onnx_attribute(operation, 'strides') kernel_shape = get_onnx_attribute(operation, 'kernel_shape') if len(current_shape) == 3: # 1D layer['class_name'] = info + 'Pooling1D' layer['stride'] = strides[0] layer['pool_size'] = layer['y_filt'] = kernel_shape[0] pads = compute_pads_1d(operation, layer) layer['pad_left'] = pads[0] layer['pad_right'] = pads[1] if all(x == 0 for x in pads): # No padding, i.e., 'VALID' padding layer['n_out'] = int( math.ceil( float(layer['y_in'] - layer['y_filt'] + 1) / float(layer['stride']))) else: layer['n_out'] = int( math.ceil( float(layer['y_in']) / float(layer['stride']))) current_shape = [ current_shape[0], layer['n_filt'], layer['n_out'] ] elif len(current_shape) == 4: # 2D layer['class_name'] = info + 'Pooling2D' layer['n_filt'] = current_shape[1] layer['in_height'] = current_shape[2] layer['in_width'] = current_shape[3] layer['stride_height'] = strides[0] layer['stride_width'] = strides[1] layer['pool_height'] = layer['filt_height'] = kernel_shape[0] layer['pool_width'] = layer['filt_width'] = kernel_shape[1] pads = compute_pads_2d(operation, layer) layer['pad_top'] = pads[0] layer['pad_bottom'] = pads[2] layer['pad_left'] = pads[1] layer['pad_right'] = pads[3] if all( x == 0 for x in pads ): # No padding, i.e., 'VALID' padding in Keras/Tensorflow layer['out_width'] = int( math.ceil( float(layer['in_width'] - layer['filt_width'] + 1) / float(layer['stride_width']))) layer['out_height'] = int( math.ceil( float(layer['in_height'] - layer['filt_height'] + 1) / float(layer['stride_height']))) else: layer['out_height'] = int( math.ceil( float(layer['in_height']) / float(layer['stride_height']))) layer['out_width'] = int( math.ceil( float(layer['in_width']) / float(layer['stride_width']))) layer['n_out'] = layer['out_height'] * layer[ 'out_height'] * layer['n_filt'] current_shape = [ current_shape[0], layer['n_filt'], layer['out_height'], layer['out_width'] ] elif layer['class_name'] in ['ELU', 'LeakyReLU', 'ThresholdedReLU']: layer['activation'] = layer['class_name'] layer['activ_param'] = get_onnx_attribute(operation, 'alpha', 0.01) elif layer['class_name'] == 'PReLU': layer['activation'] = layer['class_name'] elif layer['class_name'] in [ operation_map.get(op, op) for op in merge_operations ]: layer['op'] = layer['class_name'].lower() if layer['class_name'] == 'Concatenate': rank = len(current_shape[1:]) if rank > 3: raise Exception( 'ERROR: Concatenation of tensors with rank > 3 is not yet supported.' ) layer['op'] = layer['class_name'].lower() + '{}d'.format(rank) layer['axis'] = get_onnx_attribute(operation, 'axis') else: layer['class_name'] = 'Merge' layer['inputs'] = [inputs_map.get(x, x) for x in operation.input] if len(layer['inputs']) > 2: raise Exception( 'ERROR: Merging more than two tensors is not yet supported.' ) sanitize_layer_name(layer) print('Layer name: {}, layer type: {}, current shape: {}'.format( layer['name'], layer['class_name'], current_shape)) layer_list.append(layer) ################# ## Generate HLS ################# print('Creating HLS model') hls_model = HLSModel(yamlConfig, reader, layer_list, input_layers, output_layers) optimizers = [ 'eliminate_linear_activation', 'merge_batch_norm_quantized_tanh', 'quantize_dense_output' ] optimize_model(hls_model, optimizers) return hls_model
def convert_model(self, request): self._dst_type = request.args.get('destination_type') if self._dst_type == 'caffe2': dst_predict_net = request.args.get('predict_net') dst_init_net = request.args.get('init_net') logger.warn(dst_init_net) logger.warn(dst_predict_net) else: destination_path = request.args.get('destination_path') logger.warn(destination_path) if self._dst_type == 'onnx': if self._src_type == 'caffe2': data_type = onnx.TensorProto.FLOAT # data_shape = (1, 3, 299, 299) if model is inceptionv3/4 tensor_size_list = self.input_tensor_size.split(',') data_shape = tuple(map(int, tensor_size_list)) print(data_shape) value_info = { 'data': (data_type, data_shape) } predict_net = caffe2_pb2.NetDef() with open(self.src_predict_net, 'rb') as f: predict_net.ParseFromString(f.read()) init_net = caffe2_pb2.NetDef() with open(self.src_init_net, 'rb') as f: init_net.ParseFromString(f.read()) # if self._src_tb_graph._predict_net.name == '': # self._src_tb_graph._predict_net.name = 'modelName' onnx_model = c2_onnx.caffe2_net_to_onnx_model(predict_net, init_net, value_info) with open(destination_path, 'wb') as f: f.write(onnx_model.SerializeToString()) self._dst_tb_graph = onnx_util.OnnxGraph(destination_path, "onnx") elif self._src_type == 'torch': # TODO: choose input_net tensor_size_list = self.input_tensor_size.split(',') logger.warn(destination_path) x = torch.randn(tuple(map(int, tensor_size_list))) if self.model_file in ['inception_v3', 'googlenet']: model = globals().get(self.model_file)(pretrained=True, aux_logits=False ,transform_input=False) else: model = globals().get(self.model_file)(pretrained=True) torch.onnx.export(model, x, destination_path, verbose=True) self._dst_tb_graph = onnx_util.OnnxGraph(destination_path, "onnx") elif self._dst_type == 'caffe2': if self._src_type == 'onnx': onnx_model_proto = ModelProto() with open(self.model_file, "rb") as onnx_model_path: onnx_model_proto.ParseFromString(onnx_model_path.read()) init_net_model, predict_net_model = c2.onnx_graph_to_caffe2_net(onnx_model_proto) with open(dst_predict_net, 'wb') as f_pre: f_pre.write(predict_net_model.SerializeToString()) with open(dst_init_net, 'wb') as f_init: f_init.write(init_net_model.SerializeToString()) self._dst_tb_graph = c2graph_util.C2Graph(dst_predict_net, dst_init_net, "pb") logger.warn('Converting completed.') self._dst_tb_graph.ConvertNet() graph = self._dst_tb_graph.GetTBGraph() # count the number of nodes in the output model self.d_node_count = 0 for node in graph.node: self.d_node_count += 1 return http_util.Respond(request, str(graph), 'text/x-protobuf')
# Fix batch size fix_inp_shape(new_inputs[0], batch_size) # 2. Remove their initializers new_initializers = [ init for init in model.graph.initializer if init.name not in nodes_to_remove and init.name not in inputs_to_remove ] # 3. Remove nodes new_nodes = [n for n in model.graph.node if n.name not in nodes_to_remove] # Get Ouput Tensor Types to create ValueInfo for output info # by running model on dummy input temp_model = ModelProto() temp_model.CopyFrom(model) for i in new_output_names: op = ValueInfoProto() op.name = i temp_model.graph.output.append(op) onnx.save(temp_model, '__temp.onnx') sess = onnxruntime.InferenceSession('__temp.onnx') sess_inps = sess.get_inputs() input_dict = {} for i in sess_inps: shape = fix_shape(i.shape, batch_size) typ = get_np_type_from_onnxruntime(i.type) input_dict[i.name] = np.random.rand(*shape).astype(typ) output_tensors = sess.run(new_output_names, input_dict)