示例#1
0
def optimize_model(input, model_type, gpu_only, num_heads, hidden_size,
                   sequence_length, input_int32, float16):
    (optimizer_class, producer, run_onnxruntime) = MODEL_CLASSES[model_type]

    input_model_path = input
    if run_onnxruntime:
        input_model_path = optimize_by_onnxruntime(input_model_path, gpu_only)
        logger.info(
            "Use OnnxRuntime to optimize and save the optimized model to {}".
            format(input_model_path))

    model = ModelProto()
    with open(input_model_path, "rb") as f:
        model.ParseFromString(f.read())

    if model.producer_name and producer != model.producer_name:
        logger.warning(
            f"Model producer not matched: Expect {producer},  Got {model.producer_name} {model.producer_version}. Please specify correct --model_type parameter."
        )

    bert_model = optimizer_class(model, num_heads, hidden_size,
                                 sequence_length, input_int32, float16,
                                 gpu_only)
    bert_model.optimize()

    return bert_model
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input', required=True, type=str)
    parser.add_argument('--output', required=True, type=str)
    parser.add_argument('--float16', required=False, action='store_true')
    parser.set_defaults(float16=False)
    args = parser.parse_args()

    model = ModelProto()
    with open(args.input, "rb") as f:
        model.ParseFromString(f.read())

    bert_model = TinyBertOnnxModel(model, False)

    if args.float16:
        bert_model.convert_model_float32_to_float16()

    bert_model.update_graph()
    bert_model.remove_unused_constant()

    print("opset verion", bert_model.model.opset_import[0].version)

    with open(args.output, "wb") as out:
        out.write(bert_model.model.SerializeToString())

    p = Path(args.output)
    data_path = p.parent

    batch_size = 1
    sequence_length = SEQ_LEN

    generate_test_data(args.output, data_path, batch_size, sequence_length, use_cpu=not args.float16)
示例#3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input', required=True, type=str)
    parser.add_argument('--output', required=True, type=str)
    parser.add_argument('--output_optimized_model',
                        required=False,
                        action='store_true')
    parser.set_defaults(output_optimized_model=False)
    args = parser.parse_args()

    model = ModelProto()
    with open(args.input, "rb") as f:
        model.ParseFromString(f.read())

    bert_model = TinyGpt2Model(model)

    bert_model.update_graph()
    bert_model.remove_unused_constant()

    print("opset verion", bert_model.model.opset_import[0].version)

    with open(args.output, "wb") as out:
        out.write(bert_model.model.SerializeToString())

    p = Path(args.output)
    data_path = p.parent

    generate_test_data(args.output,
                       data_path,
                       batch_size=1,
                       use_cpu=True,
                       output_optimized_model=args.output_optimized_model)
示例#4
0
文件: compose.py 项目: kaydoh/onnx
def expand_out_dim(
    model: ModelProto,
    dim_idx: int,
    inplace: Optional[bool] = False,
) -> ModelProto:
    """Inserts an extra dimension with extent 1 to each output in the graph.

    Inserts an Unsqueeze node for each output. It can be used as a utility before merging graphs,
    for example when the second one expects a batch dimension.

    Arguments:
        model (ModelProto): Model
        dim_idx (int): Index of the dimension to be inserted.
                       A negative value means counting dimensions from the back.
        inplace (bool): If True, mutates the model directly.
                        Otherwise, a copy will be created
    """
    if type(model) is not ModelProto:
        raise ValueError("model argument is not an ONNX model")

    if not inplace:
        m = ModelProto()
        m.CopyFrom(model)
        model = m

    expand_out_dim_graph(
        model.graph,
        dim_idx,
        inplace=True  # No need to create a copy, since it's a new model
    )
    return model
示例#5
0
    def test_caffe2_to_onnx_value_info(self):
        caffe2_net = tempfile.NamedTemporaryFile()
        output = tempfile.NamedTemporaryFile()

        model = ModelHelper(name='caffe2-to-onnx-test')
        brew.relu(model, ["X"], "Y")
        caffe2_net.write(model.net.Proto().SerializeToString())
        caffe2_net.flush()

        args = [caffe2_net.name, '--output', output.name]
        self.assertRaisesRegexp(Exception, 'value info', self._run_command,
                                caffe2_to_onnx, args)

        args.extend(
            ['--value-info',
             json.dumps({
                 'X': (TensorProto.FLOAT, (2, 2)),
             })])
        result = self._run_command(caffe2_to_onnx, args)

        onnx_model = ModelProto()
        onnx_model.ParseFromString(output.read())
        self.assertEqual(len(onnx_model.graph.node), 1)
        self.assertEqual(onnx_model.graph.node[0].op_type, 'Relu')
        self.assertEqual(len(onnx_model.graph.initializer), 0)
示例#6
0
    def _load_onnx(self, path):
        _logger.info("loading the ONNX model from: " + path)
        try:
            start = time.time()
            if isinstance(path, onnx.ModelProto):
                onnx_model = path
            else:
                onnx_model = ModelProto()

            with open(path, 'rb') as f:
                content = f.read()
                onnx_model.ParseFromString(content)

            end = time.time()
            seconds = end - start
            _logger.info(
                "Loaded ONNX model in {:.3f} seconds.".format(seconds))
            # Check that the IR is well formed
            # onnx.checker.check_model(onnx_model)

            # onnx IR version
            _logger.info("ONNX IR_version {}".format(onnx_model.ir_version))

        except Exception as ex:
            _logger.error("Error occurred when loading onnx model file: " +
                          str(ex))
            raise ex

        _logger.info("ONNX Graph producer: {} version {}".format(
            onnx_model.producer_name, onnx_model.producer_version))
        _logger.info("ONNX Graph total len: {}".format(
            len(onnx_model.graph.input)))
        return onnx_model.graph
def parse_onnx(fname):
    with open(fname, "rb") as f:
        data = f.read()
        model = ModelProto()
        model.ParseFromString(data)

    outputs = {}
    g = Graph()

    for node in model.graph.initializer:
        n = Node()
        n.op_type = "Const"
        n.name = node.name
        g.add(n)
        outputs[n.name] = n
    for node in model.graph.node:
        n = Node(node)
        for name in node.output:
            outputs[name] = n
        for name in node.input:
            o = outputs.get(name)
            if o:
                n.add_input(o)
        g.add(n)
    for node in model.graph.output:
        o = outputs.get(node.name)
        if o:
            g.add_output(o)
    return g
示例#8
0
    def test_caffe2_to_onnx(self):
        caffe2_net = tempfile.NamedTemporaryFile()
        caffe2_init_net = tempfile.NamedTemporaryFile()
        output = tempfile.NamedTemporaryFile()

        model = ModelHelper(name='caffe2-to-onnx-test')
        brew.relu(model, ["X"], "Y")
        caffe2_net.write(model.net.Proto().SerializeToString())
        caffe2_net.flush()

        init_model = ModelHelper(name='caffe2-to-onnx-init-test')
        init_model.net.GivenTensorFill([], 'X', shape=[2, 2],
                                       values=np.zeros((2, 2)).flatten().astype(float))
        caffe2_init_net.write(init_model.net.Proto().SerializeToString())
        caffe2_init_net.flush()

        self._run_command(
            caffe2_to_onnx, [
                caffe2_net.name,
                '--caffe2-init-net', caffe2_init_net.name,
                '--output', output.name,
            ],
            catch_exceptions=False,
        )

        onnx_model = ModelProto()
        onnx_model.ParseFromString(output.read())
        self.assertEqual(len(onnx_model.graph.node), 1)
        self.assertEqual(onnx_model.graph.node[0].op_type, 'Relu')
        self.assertEqual(len(onnx_model.graph.initializer), 1)
        self.assertEqual(onnx_model.graph.initializer[0].name, onnx_model.graph.input[0].name)
def optimize_model(input,
                   model_type,
                   num_heads,
                   hidden_size,
                   opt_level=99,
                   optimization_options=None):
    (optimizer_class, producer, run_onnxruntime) = MODEL_CLASSES[model_type]

    input_model_path = input
    if run_onnxruntime and opt_level > 0:
        input_model_path = optimize_by_onnxruntime(input_model_path,
                                                   use_gpu=False,
                                                   opt_level=opt_level)
        logger.info(
            "Use OnnxRuntime to optimize and save the optimized model to {}".
            format(input_model_path))

    model = ModelProto()
    with open(input_model_path, "rb") as f:
        model.ParseFromString(f.read())

    if model.producer_name and producer != model.producer_name:
        logger.warning(
            f"Model producer not matched: Expect {producer},  Got {model.producer_name} {model.producer_version}. Please specify correct --model_type parameter."
        )

    if optimization_options is None:
        optimization_options = BertOptimizationOptions(model_type)

    bert_model = optimizer_class(model, num_heads, hidden_size)
    bert_model.optimize(optimization_options)

    return bert_model
示例#10
0
    def test_pytorch_model_0_gpu_onnxruntime(self):
        if 'CUDAExecutionProvider' not in onnxruntime.get_available_providers(
        ):
            print(
                "skip test_pytorch_model_0_gpu_onnxruntime since no gpu found")
            return

        input = _get_test_model_path('bert_pytorch_0')
        output = 'temp.onnx'
        optimize_by_onnxruntime(input,
                                use_gpu=True,
                                optimized_model_path=output)
        model = ModelProto()
        with open(output, "rb") as f:
            model.ParseFromString(f.read())
        os.remove(output)
        bert_model = OnnxModel(model)
        expected_node_count = {
            'EmbedLayerNormalization': 1,
            'Attention': 12,
            'SkipLayerNormalization': 24,
            'Gelu': 0,
            'FastGelu': 12,
            'BiasGelu': 0
        }
        self.verify_node_count(bert_model, expected_node_count,
                               'test_pytorch_model_0_gpu_onnxruntime')
示例#11
0
def onnx_to_caffe2(onnx_model, output, init_net_output):
    onnx_model_proto = ModelProto()
    onnx_model_proto.ParseFromString(onnx_model.read())

    init_net, predict_net = c2.onnx_graph_to_caffe2_net(onnx_model_proto)
    init_net_output.write(init_net.SerializeToString())
    output.write(predict_net.SerializeToString())
示例#12
0
文件: compose_test.py 项目: onnx/onnx
    def test_expand_out_dim(self) -> None:
        '''
        Tests expanding output dimensions. The resulting graph should have the same output names,
        but with one more dimension at the specified index.
        '''
        m1 = _load_model(m1_def)

        def _check_model(m1: ModelProto, m2: ModelProto, dim_idx: int) -> None:
            for out_g2, out_g1 in zip(m2.graph.output, m1.graph.output):
                self.assertEqual(out_g2.name, out_g1.name)
                self.assertEqual(out_g2.type.tensor_type.elem_type,
                                 out_g1.type.tensor_type.elem_type)
                expected_out_shape = _get_shape(out_g1)
                expected_out_shape.insert(dim_idx, 1)
                self.assertEqual(_get_shape(out_g2), expected_out_shape)

        for dim_idx in [0, 2, -1, -3]:
            m2 = compose.expand_out_dim(m1, dim_idx)
            _check_model(m1, m2, dim_idx)

        # Test inplace
        m2 = ModelProto()
        m2.CopyFrom(m1)
        dim_idx = 0
        compose.expand_out_dim(m2, dim_idx, inplace=True)
        _check_model(m1, m2, dim_idx)
示例#13
0
def main():
    args = parse_arguments()
    setup_logging(args.verbose)

    output_names = None if args.output_names is None else args.output_names.split(
        ";")

    model = ModelProto()
    with open(args.input, "rb") as input_file:
        model.ParseFromString(input_file.read())
    onnx_model = OnnxModel(model)

    optimizer = BertOnnxModelShapeOptimizer(onnx_model)

    optimizer.optimize(
        args.output,
        args.input_ids,
        args.segment_ids,
        args.input_mask,
        args.enable_shape_opt,
        args.enable_reshape_opt,
        output_names,
        args.batch_size,
        args.sequence_length,
        args.verbose,
    )
示例#14
0
def main():
    args = get_args()

    with open(args.input, "rb") as f:
        data = f.read()
        model = ModelProto()
        model.ParseFromString(data)

    if args.check:
        onnx.checker.check_model(model)

    if args.stats:
        ops = collections.Counter()
        for node in model.graph.node:
            ops[node.op_type] += 1
        print(ops, "\n\n")

    if args.meta:
        fields = [
            "ir_version", "producer_name", "producer_version", "name",
            "opset_import"
        ]
        for name in fields:
            value = getattr(model, name, None)
            if value:
                print("{} = {}".format(name, value))
        for i in model.metadata_props:
            print("meta.{} = {}", i.key, i.value)

    print(helper.printable_graph(model.graph))

    if args.pbtxt:
        with open(args.pbtxt, "w") as f:
            f.write(str(model.graph))
示例#15
0
    def run_node(cls, node, inputs, device='CPU', outputs_info=None):
        inputs_info = [(x.dtype, x.shape) for x in inputs]
        input_value_infos = [
            helper.make_tensor_value_info(x, NP_TYPE_TO_TENSOR_TYPE[t], shape)
            for x, (t, shape) in zip(node.input, inputs_info)
        ]
        output_value_infos = [
            helper.make_tensor_value_info(x, NP_TYPE_TO_TENSOR_TYPE[t], shape)
            for x, (t, shape) in zip(node.output, outputs_info)
        ]
        if outputs_info:
            graph = helper.make_graph([node], "test", input_value_infos, [])
            orig_model = helper.make_model(graph, producer_name='onnx-test')
            orig_model_str = orig_model.SerializeToString()
            inferred_model_str = onnx.shape_inference.infer_shapes(
                orig_model_str)
            inferred_model = ModelProto()
            inferred_model.ParseFromString(inferred_model_str)

            # Allow shape inference to not return anything, but if it
            # does then check that it's correct
            if inferred_model.graph.value_info:
                assert (list(
                    inferred_model.graph.value_info) == output_value_infos)
        raise BackendIsNotSupposedToImplementIt(
            "This is the dummy backend test that doesn't verify the results but does run the shape inference"
        )
示例#16
0
def make_model(graph: GraphProto, **kwargs: Any) -> ModelProto:
    """Construct a ModelProto

    Arguments:
        graph (GraphProto): *make_graph* returns
        **kwargs: any attribute to add to the returned instance
    Returns:
        ModelProto
    """
    model = ModelProto()
    # Touch model.ir_version so it is stored as the version from which it is
    # generated.
    model.ir_version = IR_VERSION
    model.graph.CopyFrom(graph)

    opset_imports: Optional[Sequence[OperatorSetIdProto]] = None
    opset_imports = kwargs.pop('opset_imports', None)  # type: ignore
    if opset_imports is not None:
        model.opset_import.extend(opset_imports)
    else:
        # Default import
        imp = model.opset_import.add()
        imp.version = defs.onnx_opset_version()

    functions: Optional[Sequence[FunctionProto]] = None
    functions = kwargs.pop('functions', None)  # type: ignore
    if functions is not None:
        model.functions.extend(functions)

    for k, v in kwargs.items():
        # TODO: Does this work with repeated fields?
        setattr(model, k, v)
    return model
示例#17
0
def get_bert_inputs(
    onnx_file: str,
    input_ids_name: Optional[str] = None,
    segment_ids_name: Optional[str] = None,
    input_mask_name: Optional[str] = None,
) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray]]:
    """Find graph inputs for BERT model.
    First, we will deduce inputs from EmbedLayerNormalization node.
    If not found, we will guess the meaning of graph inputs based on naming.

    Args:
        onnx_file (str): onnx model path
        input_ids_name (str, optional): Name of graph input for input IDs. Defaults to None.
        segment_ids_name (str, optional): Name of graph input for segment IDs. Defaults to None.
        input_mask_name (str, optional): Name of graph input for attention mask. Defaults to None.

    Returns:
        Tuple[Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray]]: input tensors of input_ids,
                                                                                 segment_ids and input_mask
    """
    model = ModelProto()
    with open(onnx_file, "rb") as file:
        model.ParseFromString(file.read())

    onnx_model = OnnxModel(model)
    return find_bert_inputs(onnx_model, input_ids_name, segment_ids_name,
                            input_mask_name)
示例#18
0
def main():  # type: () -> None
    parser = argparse.ArgumentParser(description="ONNX net drawer")
    parser.add_argument(
        "--input",
        type=Text, required=True,
        help="The input protobuf file.",
    )
    parser.add_argument(
        "--output",
        type=Text, required=True,
        help="The output protobuf file.",
    )
    parser.add_argument(
        "--rankdir", type=Text, default='LR',
        help="The rank direction of the pydot graph.",
    )
    parser.add_argument(
        "--embed_docstring", action="store_true",
        help="Embed docstring as javascript alert. Useful for SVG format.",
    )
    args = parser.parse_args()
    model = ModelProto()
    with open(args.input, 'rb') as fid:
        content = fid.read()
        model.ParseFromString(content)
    pydot_graph = GetPydotGraph(
        model.graph,
        name=model.graph.name,
        rankdir=args.rankdir,
        node_producer=GetOpNodeProducer(
            embed_docstring=args.embed_docstring,
            **OP_STYLE
        ),
    )
    pydot_graph.write_dot(args.output)
示例#19
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input', required=True, type=str)
    parser.add_argument('--output', required=True, type=str)
    parser.add_argument(
        '--framework',
        required=True,
        type=str,
        help="Original framework. Only support TensorFlow and PyTorch")

    # model parameters
    parser.add_argument('--num_heads',
                        required=False,
                        type=int,
                        default=12,
                        help="number of attention heads")
    parser.add_argument('--hidden_size', required=False, type=int, default=768)
    parser.add_argument('--sequence_length',
                        required=False,
                        type=int,
                        default=128)

    # Use int32 (instead of int64) tensor as input to avoid unnecessary data
    # type cast.
    parser.add_argument('--input_int32', required=False, action='store_true')
    parser.set_defaults(input_int32=False)

    # For NVidia GPU with Tensor Core like V100 and T4, half-precision float
    # brings better performance.
    parser.add_argument('--float16', required=False, action='store_true')
    parser.set_defaults(float16=False)

    parser.add_argument('--gpu_only', required=False, action='store_true')
    parser.set_defaults(gpu_only=False)

    parser.add_argument('--verbose', required=False, action='store_true')
    parser.set_defaults(verbose=False)

    args = parser.parse_args()

    model = ModelProto()
    with open(args.input, "rb") as f:
        model.ParseFromString(f.read())

    if args.framework.lower() == 'tensorflow':
        bert_model = BertOnnxModelTF(model, args.num_heads, args.hidden_size,
                                     args.sequence_length, args.input_int32,
                                     args.float16, args.gpu_only, args.verbose)
    elif args.framework.lower() == 'pytorch':
        bert_model = BertOnnxModel(model, args.num_heads, args.hidden_size,
                                   args.sequence_length, args.input_int32,
                                   args.float16, args.gpu_only, args.verbose)
    else:
        print("Unsupported framework:" + args.framework)

    bert_model.optimize()

    with open(args.output, "wb") as out:
        out.write(bert_model.model.SerializeToString())
 def _optimized(self, graph, opts):
     orig_model = helper.make_model(graph, producer_name='onnx-test')
     orig_model_str = orig_model.SerializeToString()
     optimized_model_str = onnx.optimizer.optimize(orig_model_str, opts)
     optimized_model = ModelProto()
     optimized_model.ParseFromString(optimized_model_str)
     checker.check_model(optimized_model)
     return optimized_model
示例#21
0
 def _optimized(self, graph):
     orig_model = helper.make_model(graph,
                                    producer_name='onnx-to-caffe2-test')
     orig_model_str = orig_model.SerializeToString()
     optimized_model_str = c2.Caffe2Backend.optimize_onnx(orig_model_str)
     optimized_model = ModelProto()
     optimized_model.ParseFromString(optimized_model_str)
     return optimized_model
示例#22
0
 def create_caffe2_predictor(onnx_file_path):
     with open(onnx_file_path, 'rb') as onnx_model:
         onnx_model_proto = ModelProto()
         onnx_model_proto.ParseFromString(onnx_model.read())
         init_net, predict_net = c2.onnx_graph_to_caffe2_net(
             onnx_model_proto)
         predictor = workspace.Predictor(init_net, predict_net)
     return predictor
示例#23
0
def main():
    parser = argparse.ArgumentParser(description="ONNX net drawer")
    parser.add_argument(
        "--input",
        type=str,
        required=True,
        help="The input protobuf file.",
    )
    parser.add_argument(
        "--output",
        type=str,
        required=True,
        help="The output protobuf file.",
    )
    parser.add_argument(
        "--rankdir",
        type=str,
        default='LR',
        help="The rank direction of the pydot graph.",
    )
    parser.add_argument(
        "--embed_docstring",
        action="store_true",
        help="Embed docstring as javascript alert. Useful for SVG format.",
    )
    parser.add_argument(
        "--marked",
        type=int,
        default=0,
        help="0: original, 1: marked",
    )
    parser.add_argument(
        "--marked_list",
        type=str,
        default="",
        help="if 2_3_4, means node 2,3,4 will be marked",
    )
    args = parser.parse_args()
    if args.marked:
        marked_list = [int(e) for e in args.marked_list.split('_')]
    else:
        marked_list = []
    model = ModelProto()
    with open(args.input, 'rb') as fid:
        content = fid.read()
        model.ParseFromString(content)
    pydot_graph = GetPydotGraph(
        model.graph,
        name=model.graph.name,
        rankdir=args.rankdir,
        node_producer=GetOpNodeProducer(embed_docstring=args.embed_docstring,
                                        #**OP_STYLE
                                        ),
        marked_list=marked_list,
    )
    pydot_graph.write_dot(args.output)
示例#24
0
def main():
    args = get_args()

    with open(args.input, "rb") as f:
        data = f.read()
        model = ModelProto()
        model.ParseFromString(data)

    if args.stats:
        ops = collections.Counter()
        for node in model.graph.node:
            ops[node.op_type] += 1
        print(ops, "\n\n")

    if args.meta:
        fields = [
            "ir_version", "producer_name", "producer_version", "name",
            "opset_import"
        ]
        for name in fields:
            value = getattr(model, name, None)
            if value:
                print("{} = {}".format(name, value))
        for i in model.metadata_props:
            print("meta.{} = {}", i.key, i.value)

    print(helper.printable_graph(model.graph))

    if args.check:
        onnx.checker.check_model(model)
        inferred_model = shape_inference.infer_shapes(model)
        onnx.checker.check_model(inferred_model)

    if args.pbtxt:
        with open(args.pbtxt, "w") as f:
            f.write(str(model.graph))

    if args.dot:
        with open(args.dot, "w") as f:
            f.write("digraph graphname {\n")
            for node in model.graph.node:
                output_name = node.name
                name = node.name
                color = ""
                if node.op_type.startswith("_"):
                    color = ' color="yellow"'
                if node.op_type == "CELL":
                    color = ' color="red"'
                f.write('"{}" [label="{},{}"{}];\n'.format(
                    output_name, node.op_type, name, color))
                for input_name in node.input:
                    parts = input_name.split(":")
                    input_name = re.sub(r"^\^", "", parts[0])
                    f.write('  "{}" -> "{}";\n'.format(input_name,
                                                       output_name))
            f.write("}\n")
示例#25
0
文件: helper.py 项目: zmoon111/onnx
def make_model(graph, **kwargs):
    model = ModelProto()
    # Touch model.ir_version so it is stored as the version from which it is
    # generated.
    model.ir_version = IR_VERSION
    model.graph.CopyFrom(graph)

    for k, v in kwargs.items():
        setattr(model, k, v)
    return model
示例#26
0
def model_proto_from_zip(zip_path, external_tensor_storage):
    model_proto = ModelProto()
    with zipfile.ZipFile(zip_path, 'r') as z:
        for n in z.namelist():
            f = z.open(n)
            if n.endswith(".onnx"):
                model_proto.ParseFromString(f.read())
            else:
                external_tensor_storage.name_to_tensor_data[n] = f.read()
    return model_proto
示例#27
0
 def test_version_exists(self):  # type: () -> None
     model = ModelProto()
     # When we create it, graph should not have a version string.
     self.assertFalse(model.HasField('ir_version'))
     # We should touch the version so it is annotated with the current
     # ir version of the running ONNX
     model.ir_version = IR_VERSION
     model_string = model.SerializeToString()
     model.ParseFromString(model_string)
     self.assertTrue(model.HasField('ir_version'))
     # Check if the version is correct.
     self.assertEqual(model.ir_version, IR_VERSION)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input', required=True, type=str)
    parser.add_argument('--output', required=True, type=str)

    # model parameters
    parser.add_argument('--num_heads', required=False, type=int, default=12, help="number of attention heads")
    parser.add_argument('--hidden_size', required=False, type=int, default=768)
    parser.add_argument('--sequence_length', required=False, type=int, default=128)

    # Use int32 (instead of int64) tensor as input to avoid unnecessary data type cast.
    parser.add_argument('--input_int32', required=False, action='store_true')
    parser.set_defaults(input_int32=False)

    # For NVidia GPU with Tensor Core like V100 and T4, half-precision float brings better performance.
    parser.add_argument('--float16', required=False, action='store_true')
    parser.set_defaults(float16=False)

    parser.add_argument('--verbose', required=False, action='store_true')
    parser.set_defaults(verbose=False)

    args = parser.parse_args()

    model = ModelProto()
    with open(args.input, "rb") as f:
        model.ParseFromString(f.read())

    bert_model = BertOnnxModel(model, args.num_heads, args.hidden_size, args.sequence_length)

    bert_model.fuse_layer_norm()

    bert_model.fuse_gelu()

    bert_model.fuse_reshape()

    bert_model.fuse_attention(args.verbose)

    bert_model.fuse_embed_layer(args.verbose)
    
    if bert_model.embed_node is None:
        print("Failed to fuse embedding layer.")
        return

    if args.input_int32:
        bert_model.change_input_to_int32()
    else:
        bert_model.cast_input_to_int32()

    if args.float16:
        bert_model.convert_model_float32_to_float16()

    with open(args.output, "wb") as out:
        out.write(bert_model.model.SerializeToString())
示例#29
0
def build_engine_from_onnx(onnx_path, engine_name, batch_size, TRT_LOGGER):
    model = ModelProto()
    with open(onnx_path, "rb") as f:
        model.ParseFromString(f.read())

    d0 = model.graph.input[0].type.tensor_type.shape.dim[1].dim_value
    d1 = model.graph.input[0].type.tensor_type.shape.dim[2].dim_value
    d2 = model.graph.input[0].type.tensor_type.shape.dim[3].dim_value
    shape = [batch_size, d0, d1, d2]
    engine = eng.build_engine(TRT_LOGGER, onnx_path, shape=shape)
    eng.save_engine(engine, engine_name)
    return engine
示例#30
0
    def run(self, onnx_model):
        model = ModelProto()
        content = onnx_model
        model.ParseFromString(content)

        pydot_graph = self.GetPydotGraph(
            model.graph,
            name=model.graph.name,
            rankdir='TD',
            node_producer=self.GetOpNodeProducer(
                **OP_STYLE
            ),
        )
        return pydot_graph.create(format='png')
示例#31
0
def main():
    args = parse_arguments()
    setup_logging(args.verbose)

    exclude_names = set() if args.exclude is None else set(args.exclude.split(';'))

    model = ModelProto()
    with open(args.input, "rb") as input_file:
        model.ParseFromString(input_file.read())

    convert_initializers(model, exclude_names, args.sparsity_threshold, args.tolerance)

    with open(args.output, "wb") as output_file:
        s = model.SerializeToString()
        output_file.write(s)
示例#32
0
文件: helper.py 项目: harshit98/onnx
def make_model(graph, **kwargs):  # type: (GraphProto, **Any) -> ModelProto
    model = ModelProto()
    # Touch model.ir_version so it is stored as the version from which it is
    # generated.
    model.ir_version = IR_VERSION
    model.graph.CopyFrom(graph)

    opset_imports = None  # type: Optional[Sequence[OperatorSetIdProto]]
    opset_imports = kwargs.pop('opset_imports', None)  # type: ignore
    if opset_imports is not None:
        model.opset_import.extend(opset_imports)
    else:
        # Default import
        imp = model.opset_import.add()
        imp.version = defs.onnx_opset_version()

    for k, v in kwargs.items():
        # TODO: Does this work with repeated fields?
        setattr(model, k, v)
    return model
示例#33
0
 def _simple_model(self):  # type: () -> ModelProto
     # Create a ModelProto.
     model = ModelProto()
     model.ir_version = IR_VERSION
     return model