Exemplo n.º 1
0
    def test_save_and_load_model(self):  # type: () -> None
        proto = self._simple_model()
        cls = ModelProto
        proto_string = onnx._serialize(proto)

        # Test if input is string
        loaded_proto = onnx.load_model_from_string(proto_string)
        self.assertTrue(proto == loaded_proto)

        # Test if input has a read function
        f = io.BytesIO()
        onnx.save_model(proto_string, f)
        f = io.BytesIO(f.getvalue())
        loaded_proto = onnx.load_model(f, cls)
        self.assertTrue(proto == loaded_proto)

        # Test if input is a file name
        try:
            fi = tempfile.NamedTemporaryFile(delete=False)
            onnx.save_model(proto, fi)
            fi.close()

            loaded_proto = onnx.load_model(fi.name, cls)
            self.assertTrue(proto == loaded_proto)
        finally:
            os.remove(fi.name)
Exemplo n.º 2
0
def run(args):
    onnx_model = onnx.load_model(os.path.join(args.test_dir, 'model.onnx'))
    symbol, params = nnvm.frontend.from_onnx(onnx_model)
    input_names = symbol.list_input_names()
    output_names = symbol.list_output_names()

    test_data_dir = os.path.join(args.test_dir, 'test_data_set_0')
    inputs, outputs = load_test_data(test_data_dir, input_names, output_names)
    inputs = dict(inputs)

    # assert len(input_names) == len(inputs) + len(params)
    # assert len(output_names) == len(outputs)

    graph, lib, params = compile(
        symbol, args.target, input_names, inputs, params,
        args.opt_level, args.autotvm_log)

    if args.dump_nnvm:
        print(graph.ir())
        print(graph.json())

    ctx = tvm.gpu()

    # Prepare inputs.
    tvm_inputs = {}
    for name, value in inputs.items():
        tvm_inputs[name] = tvm.nd.array(value, ctx=ctx)
    for name, value in params.items():
        tvm_inputs[name] = tvm.nd.array(value, ctx=ctx)

    graph_module = None
    if args.debug:
        try:
            graph_module = debug_runtime.create(graph, lib, ctx)
        except:
            print('debug_runtime is disabled. '
                  'Set USE_GRAPH_RUNTIME_DEBUG=ON and rebuild TVM')
    if graph_module is None:
        graph_module = graph_runtime.create(graph, lib, ctx)

    graph_module.set_input(**tvm_inputs)

    graph_module.run()

    for i, (name, expected) in enumerate(outputs):
        tvm_output = tvm.nd.empty(expected.shape, expected.dtype, ctx=ctx)
        actual = graph_module.get_output(i, tvm_output).asnumpy()
        np.testing.assert_allclose(expected, actual,
                                   rtol=1e-3, atol=1e-4), name
        print('%s: OK' % name)
    print('ALL OK')

    if args.iterations > 1:
        num_iterations = args.iterations - 1
        start = time.time()
        for t in range(num_iterations):
            graph_module.run()
            cupy.cuda.device.Device().synchronize()
        elapsed = time.time() - start
        print('Elapsed: %.3f msec' % (elapsed * 1000 / num_iterations))
Exemplo n.º 3
0
def import_to_gluon(model_file, ctx):
    """
    Imports the ONNX model files, passed as a parameter, into Gluon SymbolBlock object.

    Parameters
    ----------
    model_file : str
        ONNX model file name
    ctx : Context or list of Context
        Loads the model into one or many context(s).

    Returns
    -------
    sym_block : :class:`~mxnet.gluon.SymbolBlock`
        A SymbolBlock object representing the given model file.

    Notes
    -----
    This method is available when you ``import mxnet.contrib.onnx``

    """
    graph = GraphProto()
    try:
        import onnx
    except ImportError:
        raise ImportError("Onnx and protobuf need to be installed. Instructions to"
                          + " install - https://github.com/onnx/onnx#installation")
    model_proto = onnx.load_model(model_file)
    net = graph.graph_to_gluon(model_proto.graph, ctx)
    return net
Exemplo n.º 4
0
def get_model_metadata(model_file):
    """
    Returns the name and shape information of input and output tensors of the given ONNX model file.

    Notes
    -----
    This method is available when you ``import mxnet.contrib.onnx``

    Parameters
    ----------
    model_file : str
        ONNX model file name

    Returns
    -------
    model_metadata : dict
        A dictionary object mapping various metadata to its corresponding value.
        The dictionary will have the following template::

          'input_tensor_data' : list of tuples representing the shape of the input paramters
          'output_tensor_data' : list of tuples representing the shape of the output of the model
    """
    graph = GraphProto()

    try:
        import onnx
    except ImportError:
        raise ImportError("Onnx and protobuf need to be installed. "
                          + "Instructions to install - https://github.com/onnx/onnx")
    model_proto = onnx.load_model(model_file)
    metadata = graph.get_graph_metadata(model_proto.graph)
    return metadata
Exemplo n.º 5
0
    def test_import_export(self):
        for test in test_cases:
            test_name, mxnet_op, onnx_name, inputs, attrs, mxnet_specific, fix_attrs, check_value, check_shape = test
            with self.subTest(test_name):
                names, input_tensors, inputsym = get_input_tensors(inputs)
                if inputs:
                    test_op = mxnet_op(*inputsym, **attrs)
                    mxnet_output = forward_pass(test_op, None, None, names, inputs)
                    outputshape = np.shape(mxnet_output)
                else:
                    test_op = mxnet_op(**attrs)
                    shape = attrs.get('shape', (1,))
                    x = mx.nd.zeros(shape, dtype='float32')
                    xgrad = mx.nd.zeros(shape, dtype='float32')
                    exe = test_op.bind(ctx=mx.cpu(), args={'x': x}, args_grad={'x': xgrad})
                    mxnet_output = exe.forward(is_train=False)[0].asnumpy()
                    outputshape = np.shape(mxnet_output)

                if mxnet_specific:
                    onnxmodelfile = onnx_mxnet.export_model(test_op, {}, [np.shape(ip) for ip in inputs],
                                                            np.float32,
                                                            onnx_name + ".onnx")
                    onnxmodel = load_model(onnxmodelfile)
                else:
                    onnx_attrs = _fix_attributes(attrs, fix_attrs)
                    onnxmodel = get_onnx_graph(test_name, names, input_tensors, onnx_name, outputshape, onnx_attrs)

                bkd_rep = backend.prepare(onnxmodel, operation='export')
                output = bkd_rep.run(inputs)

                if check_value:
                    npt.assert_almost_equal(output[0], mxnet_output)

                if check_shape:
                    npt.assert_equal(output[0].shape, outputshape)

        input1 = get_rnd((1, 10, 2, 3))
        ipsym = mx.sym.Variable("input1")
        for test in test_scalar_ops:
            if test == 'Add':
                outsym = 2 + ipsym
            if test == "Sub":
                outsym = ipsym - 2
            if test == "rSub":
                outsym = ipsym.__rsub__(2)
            if test == "Mul":
                outsym = 2 * ipsym
            if test == "Div":
                outsym = ipsym / 2
            if test == "Pow":
                outsym = ipsym ** 2
            forward_op = forward_pass(outsym, None, None, ['input1'], input1)
            converted_model = onnx_mxnet.export_model(outsym, {}, [np.shape(input1)], np.float32,
                                                      onnx_file_path=outsym.name + ".onnx")

            sym, arg_params, aux_params = onnx_mxnet.import_model(converted_model)
        result = forward_pass(sym, arg_params, aux_params, ['input1'], input1)

        npt.assert_almost_equal(result, forward_op)
Exemplo n.º 6
0
def verify_onnx_forward_impl(graph_file, data_shape, out_shape):
    dtype = 'float32'
    x = np.random.uniform(size=data_shape)
    model = onnx.load_model(graph_file)
    c2_out = get_caffe2_output(model, x, dtype)
    for target, ctx in ctx_list():
        tvm_out = get_tvm_output(model, x, target, ctx, out_shape, dtype)
        tvm.testing.assert_allclose(c2_out, tvm_out, rtol=1e-5, atol=1e-5)
Exemplo n.º 7
0
 def test_exports(self):
     input_shape = (2,1,3,1)
     for test in export_test_cases:
         test_name, onnx_name, mx_op, attrs = test
         input_sym = mx.sym.var('data')
         outsym = mx_op(input_sym, **attrs)
         converted_model = onnx_mxnet.export_model(outsym, {}, [input_shape], np.float32,
                                                   onnx_file_path=outsym.name + ".onnx")
         model = load_model(converted_model)
         checker.check_model(model)
Exemplo n.º 8
0
def compare_graph(onnx_file, nnvm_sym, ishape):
    onnx_model = onnx.load_model(onnx_file)
    onnx_sym, params = nnvm.frontend.from_onnx(onnx_model)
    g1 = nnvm.graph.create(onnx_sym)
    g2 = nnvm.graph.create(nnvm_sym)
    input_name = onnx_model.graph.input[0].name
    ishapes = {input_name: ishape}
    graph_attr.set_shape_inputs(g1, ishapes)
    graph_attr.set_shape_inputs(g2, ishapes)
    g1 = g1.apply("InferShape").apply("SimplifyInference")
    g2 = g2.apply("InferShape").apply("SimplifyInference")
    graph_util.check_graph_equal(g1, g2)
Exemplo n.º 9
0
def get_network(name, batch_size):
    """Get the symbol definition and random weight of a network"""
    input_shape = (batch_size, 3, 224, 224)
    output_shape = (batch_size, 1000)

    if "resnet" in name:
        n_layer = int(name.split('-')[1])
        net, params = nnvm.testing.resnet.get_workload(num_layers=n_layer, batch_size=batch_size)
    elif "vgg" in name:
        n_layer = int(name.split('-')[1])
        net, params = nnvm.testing.vgg.get_workload(num_layers=n_layer, batch_size=batch_size)
    elif name == 'mobilenet':
        net, params = nnvm.testing.mobilenet.get_workload(batch_size=batch_size)
    elif name == 'squeezenet_v1.1':
        net, params = nnvm.testing.squeezenet.get_workload(batch_size=batch_size, version='1.1')
    elif name == 'inception_v3':
        input_shape = (1, 3, 299, 299)
        net, params = nnvm.testing.inception_v3.get_workload(batch_size=batch_size)
    elif name == 'custom':
        # an example for custom network
        from nnvm.testing import utils
        net = nnvm.sym.Variable('data')
        net = nnvm.sym.conv2d(net, channels=4, kernel_size=(3,3), padding=(1,1))
        net = nnvm.sym.flatten(net)
        net = nnvm.sym.dense(net, units=1000)
        net, params = utils.create_workload(net, batch_size, (3, 224, 224))
    elif name == 'mxnet':
        # an example for mxnet model
        from mxnet.gluon.model_zoo.vision import get_model
        block = get_model('resnet18_v1', pretrained=True)
        net, params = nnvm.frontend.from_mxnet(block)
        net = nnvm.sym.softmax(net)
    else:
        onnx_model = onnx.load_model(
            'out/models/resnet50_conv_bs1_0/model.onnx')
        net, params = nnvm.frontend.from_onnx(onnx_model)
        output_shape = (batch_size, 6, 112, 112)

    return net, params, input_shape, output_shape
Exemplo n.º 10
0
def import_model(model_file):
    """Imports the ONNX model file, passed as a parameter, into MXNet symbol and parameters.
    Operator support and coverage -
    https://cwiki.apache.org/confluence/display/MXNET/MXNet-ONNX+Integration

    Parameters
    ----------
    model_file : str
        ONNX model file name

    Returns
    -------
    sym : :class:`~mxnet.symbol.Symbol`
        MXNet symbol object

    arg_params : dict of ``str`` to :class:`~mxnet.ndarray.NDArray`
        Dict of converted parameters stored in ``mxnet.ndarray.NDArray`` format

    aux_params : dict of ``str`` to :class:`~mxnet.ndarray.NDArray`
        Dict of converted parameters stored in ``mxnet.ndarray.NDArray`` format

    Notes
    -----
    This method is available when you ``import mxnet.contrib.onnx``

    """
    graph = GraphProto()

    try:
        import onnx
    except ImportError:
        raise ImportError("Onnx and protobuf need to be installed. "
                          + "Instructions to install - https://github.com/onnx/onnx")
    # loads model file and returns ONNX protobuf object
    model_proto = onnx.load_model(model_file)
    sym, arg_params, aux_params = graph.from_onnx(model_proto.graph)
    return sym, arg_params, aux_params
Exemplo n.º 11
0
def optimize_model(input,
                   model_type='bert',
                   num_heads=12,
                   hidden_size=768,
                   optimization_options=None,
                   opt_level=0,
                   use_gpu=False,
                   only_onnxruntime=False):
    """ Optimize Model by OnnxRuntime and/or offline fusion logic.

    The following optimizes model by OnnxRuntime only, and no offline fusion logic:
        optimize_model(input, opt_level=1, use_gpu=False, only_onnxruntime=True)
    If you want to optimize model by offline fusion logic.
        optimize_model(input, model_type, num_heads=12, hidden_size=768, optimization_options=your_options)

    Args:
        input (str): input model path.
        model_type (str): model type - like bert, bert_tf, bert_keras or gpt2.
        num_heads (int): number of attention heads.
        hidden_size (int): hidden size.
        optimization_options (OptimizationOptions or None): optimization options that can use to turn on/off some fusions.
        opt_level (int): onnxruntime graph optimization level (0, 1, 2 or 99). When the level > 0, onnxruntime will be used to optimize model first.
        use_gpu (bool): use gpu or not for onnxruntime.
        only_onnxruntime (bool): only use onnxruntime to optimize model, and no offline fusion logic is used.

     Returns:
        object of an optimizer class.
    """
    (optimizer_class, producer, run_onnxruntime) = MODEL_CLASSES[model_type]

    temp_model_path = None
    if opt_level > 1:  # Optimization specified for an execution provider.
        temp_model_path = optimize_by_onnxruntime(input,
                                                  use_gpu=use_gpu,
                                                  opt_level=opt_level)
    elif run_onnxruntime:
        # Use Onnxruntime to do optimizations (like constant folding and cast elimation) that is not specified to exection provider.
        # CPU provider is used here so that there is no extra node for GPU memory copy.
        temp_model_path = optimize_by_onnxruntime(input,
                                                  use_gpu=False,
                                                  opt_level=1)

    model = load_model(temp_model_path or input,
                       format=None,
                       load_external_data=True)

    if model.producer_name and producer != model.producer_name:
        logger.warning(
            f"Model producer not matched: Expect {producer},  Got {model.producer_name} {model.producer_version}. Please specify correct --model_type parameter."
        )

    if optimization_options is None:
        optimization_options = BertOptimizationOptions(model_type)

    optimizer = optimizer_class(model, num_heads, hidden_size)

    if not only_onnxruntime:
        optimizer.optimize(optimization_options)

    # Remove the temporary model.
    if temp_model_path:
        os.remove(temp_model_path)
        logger.debug("Remove tempoary model: {}".format(temp_model_path))

    optimizer.model.producer_name = "onnxruntime_tools"
    optimizer.model.producer_version = "1.4"

    return optimizer
Exemplo n.º 12
0
from mxnet import autograd, np, npx, gluon, init
from onnx import checker
import onnx
from mxnet.contrib import onnx as onnx_mxnet
npx.set_np()


sym = './test-symbol.json'
params = './test-0010.params'
input_shape = (1, 1, 28, 28)
onnx_file = './test.onnx'
converted_model_path = onnx_mxnet.export_model(sym, params, [input_shape], np.float32, onnx_file, 1)

# Load onnx model
model_proto = onnx.load_model(converted_model_path)

# Check if converted ONNX protobuf is valid
checker.check_graph(model_proto.graph)
Exemplo n.º 13
0
 def test_check_model_by_model(self):  # type: () -> None
     model = onnx.load_model(self.model_filename, load_external_data=False)
     with pytest.raises(ValueError):
         load_external_data_for_model(
             model, self.temp_dir)  # Exceeds maximum protobuf
         checker.check_model(model)  # checker catches 2GB models as well
Exemplo n.º 14
0
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import onnx
import onnx.utils
import sys

model_file = sys.argv[1]

print("-- Opening ONNX file=%s" % model_file)
model = onnx.load_model(model_file)  # type: onnx.ModelProto

print("-- ONNX OpSet=%s" % model.opset_import)

print("-- ONNX model - Number of nodes=%d" % len(model.graph.node))

print()
print("-- Begin ONNX model --")
# Print a human readable representation of the model graph
print(onnx.helper.printable_graph(model.graph))
print("-- End ONNX model --")
print()

onnx.checker.check_model(model)
print('-- ONNX model validated OK')
print()
Exemplo n.º 15
0
def check_lstm_with_type(lstm_type,
                         target=tvm.target.Target("llvm -mcpu=core-avx2"),
                         dev=tvm.cpu(0)):
    has_proj = "p" in lstm_type

    device = torch.device("cpu")
    hidden_layers_num = 1
    model = None
    for batch_first in (True, False):
        for use_bias in (True, False):
            for rnd_weights in (True, False):
                if lstm_type == "uni":
                    model = LSTM_Model(
                        device,
                        batch_first=batch_first,
                        rnd_weights_init=rnd_weights,
                        use_bias=use_bias,
                    )
                elif lstm_type == "b":
                    model = LSTM_Model(
                        device,
                        batch_first=batch_first,
                        bidirectional=True,
                        rnd_weights_init=rnd_weights,
                        use_bias=use_bias,
                    )
                    hidden_layers_num = 2
                elif lstm_type == "p":
                    model = LSTM_Model(
                        device,
                        batch_first=batch_first,
                        proj_size=projection_size,
                        rnd_weights_init=rnd_weights,
                        use_bias=use_bias,
                    )
                elif lstm_type == "s":
                    model = LSTM_Model(
                        device,
                        batch_first=batch_first,
                        layer_num=model_num_layers,
                        rnd_weights_init=rnd_weights,
                        use_bias=use_bias,
                    )
                    hidden_layers_num = model_num_layers
                elif lstm_type == "sb":
                    model = LSTM_Model(
                        device,
                        batch_first=batch_first,
                        bidirectional=True,
                        layer_num=model_num_layers,
                        rnd_weights_init=rnd_weights,
                        use_bias=use_bias,
                    )
                    hidden_layers_num = 2 * model_num_layers
                elif lstm_type == "sp":
                    model = LSTM_Model(
                        device,
                        batch_first=batch_first,
                        layer_num=model_num_layers,
                        proj_size=projection_size,
                        rnd_weights_init=rnd_weights,
                        use_bias=use_bias,
                    )
                    hidden_layers_num = model_num_layers
                elif lstm_type == "bp":
                    model = LSTM_Model(
                        device,
                        batch_first=batch_first,
                        bidirectional=True,
                        proj_size=projection_size,
                        rnd_weights_init=rnd_weights,
                        use_bias=use_bias,
                    )
                    hidden_layers_num = 2
                elif lstm_type == "sbp":
                    model = LSTM_Model(
                        device,
                        batch_first=batch_first,
                        bidirectional=True,
                        layer_num=model_num_layers,
                        proj_size=projection_size,
                        rnd_weights_init=rnd_weights,
                        use_bias=use_bias,
                    )
                    hidden_layers_num = 2 * model_num_layers
                else:
                    print(
                        "WARNING: LSTM type {} is not supported here!".format(
                            lstm_type))
                    return

                model.eval()

                # Get golden output from original model
                input_hidden_shape = (hidden_layers_num, batch_size,
                                      model_hidden_size)
                input_hidden_shape_with_proj = (hidden_layers_num, batch_size,
                                                projection_size)
                dummy_input, input_shape = model.get_dummy_input()
                golden_output_batch = model.forward(
                    dummy_input.to(device)).detach().cpu().numpy()

                dtype = "float32"
                h_zeros = np.zeros(input_hidden_shape, dtype=dtype)
                if has_proj:
                    h_zeros = np.zeros(input_hidden_shape_with_proj,
                                       dtype=dtype)
                c_zeros = np.zeros(input_hidden_shape, dtype=dtype)

                tvm_output = None
                for format in ("ts", "onnx"):
                    if format == "ts":
                        # Use torch.jit.trace to generate a torch.jit.ScriptModule via tracing.
                        traced_script_module = torch.jit.trace(
                            model, dummy_input).eval()

                        # Import model to Relay
                        shape_list = [("input", input_shape)]
                        mod, params = relay.frontend.from_pytorch(
                            traced_script_module, shape_list)

                        # Model compilation by tvm
                        with tvm.transform.PassContext(opt_level=3):
                            lib = relay.build(mod,
                                              target=target,
                                              params=params)
                    elif format == "onnx":
                        if has_proj:
                            print(
                                "WARNING: torch.onnx.export does not support conversion LSTM with projection "
                                "from pytorch! TODO: waiting for the support and correct test after that."
                            )
                            continue
                        onnx_io = io.BytesIO()
                        with torch.no_grad():
                            h0 = torch.rand(input_hidden_shape)
                            if has_proj:
                                h0 = torch.rand(input_hidden_shape_with_proj)
                            c0 = torch.rand(input_hidden_shape)
                            input_names = ["input", "h0", "c0"]

                            # default export (without dynamic input)
                            torch.onnx.export(model, (dummy_input, (h0, c0)),
                                              onnx_io,
                                              input_names=input_names)
                        onnx_io.seek(0, 0)
                        onnx_model = onnx.load_model(onnx_io)

                        # Import model to Relay
                        shape_dict = {
                            "input": input_shape,
                            "h0": input_hidden_shape,
                            "c0": input_hidden_shape,
                        }
                        if has_proj:
                            shape_dict = {
                                "input": input_shape,
                                "h0": input_hidden_shape_with_proj,
                                "c0": input_hidden_shape,
                            }
                        mod, params = relay.frontend.from_onnx(
                            onnx_model, shape_dict)

                        # Model compilation by tvm
                        with tvm.transform.PassContext(opt_level=1):
                            lib = relay.build(mod,
                                              target=target,
                                              params=params)

                    # Inference of the model with given input data
                    m = graph_executor.GraphModule(lib["default"](dev))

                    # Set inputs
                    m.set_input(
                        input=tvm.nd.array(dummy_input.numpy().astype(dtype)),
                        h0=tvm.nd.array(h_zeros),
                        c0=tvm.nd.array(c_zeros),
                    )
                    # Execute
                    m.run()
                    # Get outputs (converted to numpy array)
                    tvm_output = m.get_output(0).numpy()

                    compare(tvm_output, golden_output_batch)
Exemplo n.º 16
0
import onnx
import sys
size = 256
if __name__ == "__main__":
    if len(sys.argv) == 1:
        print("缺少模型文件名")
    else:
        model_name = sys.argv[1]
        print("model name: " + model_name)
        model = onnx.load_model(model_name)
        d = model.graph.input[0].type.tensor_type.shape.dim
        print(d)
        d[2].dim_value = size
        d[3].dim_value = size
        for output in model.graph.output:
            d = output.type.tensor_type.shape.dim
            d[2].dim_value = size
            d[3].dim_value = size
            print(d)
        onnx.save_model(model,"convert.onnx" )
import onnx
import math

input_size = (480, 640)
# input_size = (720, 1280)
model = onnx.load_model(
    "/home/nano/workspace/CenterFace/models/onnx/centerface.onnx")
d = model.graph.input[0].type.tensor_type.shape.dim
print(d)
rate = (
    int(math.ceil(input_size[0] / d[2].dim_value)),
    int(math.ceil(input_size[1] / d[3].dim_value)),
)
print("rare", rate)
d[0].dim_value = 1
d[2].dim_value *= rate[0]
d[3].dim_value *= rate[1]
for output in model.graph.output:
    d = output.type.tensor_type.shape.dim
    print(d)
    d[0].dim_value = 1
    d[2].dim_value *= rate[0]
    d[3].dim_value *= rate[1]

onnx.save_model(
    model,
    "/home/nano/workspace/CenterFace/models/onnx/centerface_480_640.onnx")
# onnx.save_model(model, "/home/nano/workspace/CenterFace/models/onnx/centerface_720_1280.onnx")

print("Conversion done!")
Exemplo n.º 18
0
# # It is very important to check numpy arrays shape
print('layer 0 : ', len(layer0), layer1.shape, type(layer0))
print('layer 1 : ', len(layer1), layer2.shape, type(layer1))
print('layer 2 : ', len(layer2), layer3.shape, type(layer2))
print('layer 3 : ', len(layer3), layer4.shape, type(layer3))
print('layer 4 : ', len(layer4), layer5.shape, type(layer4))
print('layer 5 : ', len(layer5), layer6.shape, type(layer5))
print('layer 6 : ', len(layer6), layer7.shape, type(layer6))
print('layer 7 : ', len(layer7), layer8.shape, type(layer7))
print('layer 8 : ', len(layer8), layer9.shape, type(layer8))
print('layer 9 : ', len(layer9), layer10.shape, type(layer9))
print('layer 10 : ', len(layer10), layer10.shape, type(layer10))

# onnx model load
onnx_model = onnx.load_model(ONNX_MODEL_PATH)

# onnx_graph information extraction
onnx_weights = onnx_model.graph.initializer

# Also, Checking ONNX model's weights shape is very important,
# this is because they have some different shape both.
# -------------------------------------
# ------ Layer shape information ------
#   SNN                       ONNX
# 34832,
# 3, 3, 1, 32               784, 10
# 32,                       10
# 3, 3, 32, 64              3200, 784
# 64,                       784
# 3, 3, 64, 128             128, 64, 3, 3
Exemplo n.º 19
0
 def load(self):
     if not self.exist():
         self._download()
     return onnx.load_model(self.model_path())
Exemplo n.º 20
0
def from_onnx(fname: str, config: Config) -> nx.DiGraph:

    # load onnx graph into memory
    model = onnx.load_model(fname)

    # check optimize and infer shapes
    #polished_model = onnx.utils.polish_model(model)
    polished_model = model

    initializers = {}
    value_info = {}
    io_map = {}

    # this will capture all of the graph
    for init in polished_model.graph.initializer:
        initializers[init.name] = init
        logging.log(logging.DEBUG, f"Registered initializer: {init.name}")

    # this captures all internal values, but not the graph output for some
    # reason (onnx spec is strange)
    #for vi in polished_model.graph.value_info:
    #    value_info[vi.name] = vi
    #    logging.log(logging.DEBUG, f"Registered value info: {vi.name}")

    ## this captures the graph output
    #for vi in polished_model.graph.output:
    #    value_info[vi.name] = vi
    #    logging.log(logging.DEBUG, f"Registered value info: {vi.name} (out)")

    # this captures all model inputs
    for inp in polished_model.graph.input:
        new_io = InOut(inp.name, None, None, None)

        # directly convert onnx initializers to static IOs in the graph
        if inp.name in initializers:
            new_io.kind = "static"
            new_io.data = numpy_helper.to_array(initializers[inp.name]).astype(
                np.float32
            )
            new_io.shape = np.shape(new_io.data)

        # pointers will be allocated later by the allocate pass
        else:
            new_io.kind = "pointer"
            new_io.data = None
            new_io.shape = onnx_type_to_shape(inp.type, config.user_width)

        io_map[inp.name] = new_io
        logging.log(logging.DEBUG, f"Built IO: {new_io}")

    # Create IOs for all node outputs
    for node in polished_model.graph.node:
        for out in node.output:
            new_io = InOut(out, None, None, None)
            new_io.kind = "pointer"
            new_io.data = None
            #new_io.shape = onnx_type_to_shape(value_info[out].type, config.user_width)
            new_io.shape = None
            io_map[out] = new_io
            logging.log(logging.DEBUG, f"Built IO: {new_io}")

    # at this point all inputs and outputs are availiable
    graph = nx.DiGraph()

    # usage map holds the uses of all of the _pointer_ IOs in the graph
    # eg IO : {use = [node2, node3], def = [node1]}
    # pointer IOs represent graph edges

    usage_map = {}
    for io_name, io_v in io_map.items():
        if io_v.kind == "pointer":
            usage_map[io_name] = {"use": [], "def": []}

    # start numbering nodes at zero
    node_id = 0

    # attach a load node for each of the dynamic inputs
    for dyninp_vi in polished_model.graph.input:
        if dyninp_vi.name not in initializers:
            built_node = build_load_node(dyninp_vi.name, io_map, usage_map, node_id)
            graph.add_node(node_id)
            graph.nodes[node_id]["node"] = built_node

            logging.log(logging.DEBUG, f"Built node: {built_node}")

            node_id += 1

    # build normal nodes here
    for onnx_node in polished_model.graph.node:
        built_node = build_node(onnx_node, io_map, usage_map, node_id)
        graph.add_node(node_id)

        graph.nodes[node_id]["node"] = built_node
        logging.log(logging.DEBUG, f"Built node: {built_node}")

        node_id += 1

    # attach a store node for each of the model outputs
    for out_vi in polished_model.graph.output:
        built_node = build_store_node(out_vi.name, io_map, usage_map, node_id)
        graph.add_node(node_id)
        graph.nodes[node_id]["node"] = built_node

        logging.log(logging.DEBUG, f"Built node: {built_node}")

        node_id += 1

    # we don't know the iteration order so we build edges here
    # by checking the usage map
    for name, info in usage_map.items():

        defs = info["def"]
        if len(defs) > 1:
            logging.log(logging.ERROR, f"Multiple defn of {name} at {defs}")
        if len(defs) == 0:
            logging.log(logging.ERROR, f"{name} never defined")

        source = info["def"][0]
        for use in info["use"]:
            graph.add_edge(source, use, buffer=name)
            logging.log(logging.DEBUG, f"Added edge {source} -> {use}" f" via {name}")

    # we sanity check that there are no nodes that have not been connected to
    # the graph
    num_wcc = nx.number_weakly_connected_components(graph)
    if num_wcc > 1:
        wcc = nx.weakly_connected_components(graph)
        logging.log(logging.WARN, "Multiple components in ouput graph")
        for i, wc in enumerate(wcc):
            logging.log(logging.WARN, f"\t<{i}> {wc}")

    return graph
Exemplo n.º 21
0
    def from_torch(model: TorchBertModel,
                   device: Optional[torch.device] = None,
                   backend: Optional[str] = None,
                   use_memory_opt=False):
        """
        Args:
            model : a PyTorch Bert Model
            device : cpu or GPU
            backend : a string to indicates kernel provides
            Four options. [onnxrt-cpu, onnxrt-gpu, turbo-cpu, turbo-gpu]
            use_memory_opt [bool] whether or not use memory opt for variable length inputs.
        """
        use_gpu = False
        if device is None:
            device = model.device
        # we may need to move to GPU explicitly
        if 'cuda' in device.type and torch.cuda.is_available():
            model.to(device)
            if backend is None:
                backend = "turbo"  # On GPU turbo is faster
            use_gpu = True
        else:
            if backend is None:
                backend = "onnxrt"  # On CPU onnxrt is faster

        if backend == "turbo":
            embeddings = BertEmbeddings.from_torch(model.embeddings)
            encoder = BertEncoder.from_torch(model.encoder)
            bertmodel_nopooler = BertModelNoPooler(embeddings, encoder)
            pooler = BertPooler.from_torch(model.pooler)
            return BertModel(bertmodel_nopooler, pooler, "turbo", model.config)
        elif backend == "onnxrt":
            import onnx
            import onnxruntime
            import onnxruntime.backend
            inputs = {
                'input_ids':
                torch.randint(32, [2, 32], dtype=torch.long).to(
                    device),  # list of numerical ids for the tokenised text
                'attention_mask':
                torch.ones([2, 32],
                           dtype=torch.long).to(device),  # dummy list of ones
                'token_type_ids':
                torch.ones([2, 32],
                           dtype=torch.long).to(device),  # dummy list of ones
            }
            onnx_model_path = "/tmp/temp_turbo_onnx.model"
            with open(onnx_model_path, 'wb') as outf:
                torch.onnx.export(
                    model=model,
                    args=(inputs['input_ids'], inputs['attention_mask'],
                          inputs['token_type_ids']
                          ),  # model input (or a tuple for multiple inputs)
                    f=outf,
                    input_names=[
                        'input_ids', 'attention_mask', 'token_type_ids'
                    ],
                    opset_version=11,  # the ONNX version to export the model to
                    do_constant_folding=
                    True,  # whether to execute constant folding for optimization
                    output_names=['output'],
                    dynamic_axes={
                        'input_ids': [0, 1],
                        'attention_mask': [0, 1],
                        'token_type_ids': [0, 1]
                    })
            # num_threads = "8"
            # os.environ['OMP_NUM_THREADS'] = str(num_threads)
            # os.environ['MKL_NUM_THREADS'] = str(num_threads)
            onnx_model = onnx.load_model(f=onnx_model_path)
            onnx_model = onnxruntime.backend.prepare(
                model=onnx_model,
                device='GPU' if use_gpu else "CPU",
                graph_optimization_level=onnxruntime.GraphOptimizationLevel.
                ORT_ENABLE_ALL)
            return BertModel(onnx_model, None, "onnxrt")
Exemplo n.º 22
0
 def _load_model(self):
     path = self._find_with_extension(EXTENSION)
     self.nodes = onnx.load_model(path).graph.node
     self.sess = onnxr.InferenceSession(path)
Exemplo n.º 23
0
or please refer to offical site.
https://github.com/onnx/onnx
"""

import sys
import timeit

import nnvm
import nnvm.compiler
import tvm
import onnx
import numpy as np

from tvm.contrib import graph_runtime

onnx_model = onnx.load_model(sys.argv[1])
# we can load the graph as NNVM compatible model
sym, params = nnvm.frontend.from_onnx(onnx_model)

x = np.ones([1, 3, 224, 224], dtype=np.float32)

######################################################################
# Compile the model on NNVM
# ---------------------------------------------
# We should be familiar with the process right now.

#target = 'cuda'
target = 'llvm'
# assume first input name is data
input_name = sym.list_input_names()[0]
shape_dict = {input_name: x.shape}
    def _impl_(model_name: str,
               seq_len: int,
               batch_size: int,
               n: int,
               enable_random: bool,
               min_seq_len: int,
               max_seq_len: int,
               num_threads: int = 1,
               use_gpu: bool = False,
               enable_mem_opt: bool = False):
        import multiprocessing
        import os
        temp_fn = f"/tmp/temp_{model_name}_onnx.model"
        if enable_random and os.path.exists(temp_fn):
            import transformers
            cfg = transformers.BertConfig()
            vocab_size = cfg.vocab_size
        else:
            p = multiprocessing.Pool(1)
            vocab_size, cfg = p.apply(generate_onnx_model,
                                      args=(model_name, use_gpu, temp_fn,
                                            seq_len, batch_size, backend,
                                            enable_random))
            p.close()
        import contexttimer
        import onnxruntime.backend
        import onnx
        import numpy
        import json
        import random

        if not onnxruntime.backend.supports_device(backend):
            raise RuntimeError(
                f"onnxruntime does not support {backend}, recompile it!")

        os.environ['OMP_NUM_THREADS'] = str(num_threads)
        os.environ['MKL_NUM_THREADS'] = str(num_threads)

        model = onnx.load_model(f=temp_fn)
        model = onnxruntime.backend.prepare(
            model=model,
            device=backend,
            graph_optimization_level=onnxruntime.GraphOptimizationLevel.
            ORT_ENABLE_ALL)
        # Prepare a torch bert model to check correctness if benchmarking bert
        if model_name == "bert" and checkonnxrest:
            import transformers
            import torch
            torch.set_grad_enabled(False)
            torch_model = transformers.BertModel.from_pretrained(
                "bert-base-uncased")

            if enable_random:
                input_ids = numpy.random.randint(low=0,
                                                 high=cfg.vocab_size - 1,
                                                 size=(2, 17),
                                                 dtype=numpy.int64)
            else:
                input_ids = numpy.random.randint(low=0,
                                                 high=cfg.vocab_size - 1,
                                                 size=(batch_size, seq_len),
                                                 dtype=numpy.int64)
            torch_model.eval()
            torch_res = torch_model(torch.tensor(input_ids))
            onnx_res = model.run(inputs=[input_ids])
            assert (numpy.max(
                numpy.abs(torch_res[0].cpu().numpy() - onnx_res[0])) < 0.01)

        if enable_random:
            request_list = []
            random.seed(0)
            for i in range(n):
                generated_seq_len = random.randint(min_seq_len, max_seq_len)
                input_ids = numpy.random.randint(low=0,
                                                 high=cfg.vocab_size - 1,
                                                 size=(1, generated_seq_len),
                                                 dtype=numpy.int64)
                request_list.append(input_ids)

            if enable_latency_plot:
                import torch
                print(
                    f"dump results to onnxrt_{num_threads}_{model_name}_latency.txt"
                )
                result_list = []
                with open(f"onnxrt_{num_threads}_{model_name}_latency.txt",
                          "w") as of:
                    for request in request_list:
                        if use_gpu:
                            start = torch.cuda.Event(enable_timing=True)
                            end = torch.cuda.Event(enable_timing=True)
                            start.record()

                        with contexttimer.Timer() as t:
                            model.run(inputs=[request])

                        if not use_gpu:
                            qps = n / t.elapsed
                            time_consume = t.elapsed
                        else:
                            end.record()
                            torch.cuda.synchronize()
                            torch_elapsed = start.elapsed_time(end) / 1e3
                            qps = n / torch_elapsed
                            time_consume = torch_elapsed
                        result_list.append(
                            [len(request.flatten()), time_consume])
                    elapse = 0.
                    result_list = sorted(result_list, key=lambda s: s[0])
                    for item in result_list:
                        of.write(f"{item[0]}, {item[1]}\n")
                        elapse += item[1]
                    print(f"elapsed {elapse} QPS {n/elapse}")
            else:
                if use_gpu:
                    start = torch.cuda.Event(enable_timing=True)
                    end = torch.cuda.Event(enable_timing=True)
                    start.record()

                with contexttimer.Timer() as t:
                    for request in request_list:
                        model.run(inputs=[request])

                if not use_gpu:
                    qps = n / t.elapsed
                    time_consume = t.elapsed
                else:
                    end.record()
                    torch.cuda.synchronize()
                    torch_elapsed = start.elapsed_time(end) / 1e3
                    qps = n / torch_elapsed
                    time_consume = torch_elapsed
        else:
            input_ids = numpy.random.randint(low=0,
                                             high=vocab_size - 1,
                                             size=(batch_size, seq_len),
                                             dtype=numpy.int64)
            with contexttimer.Timer() as t:
                for _ in range(n):
                    model.run(inputs=[input_ids])

            if enable_random:
                print(
                    json.dumps({
                        "QPS": qps,
                        "elapsed": time_consume,
                        "n": n,
                        "max_seq_len": max_seq_len,
                        "min_seq_len": min_seq_len,
                        "framework": f"onnx_rt_{backend}",
                        "thread_num": num_threads,
                        "model_name": model_name
                    }))
            else:
                print(
                    json.dumps({
                        "QPS": n / t.elapsed,
                        "elapsed": t.elapsed,
                        "n": n,
                        "batch_size": batch_size,
                        "seq_len": seq_len,
                        "framework": f"onnx_rt_{backend}",
                        "n_threads": num_threads,
                        "model_name": model_name
                    }))
Exemplo n.º 25
0
            1: 'sequence_length'
        }
    },
    custom_opsets={"com.microsoft": 1})
print(f"ONNX model exported to {onnx_model_path}")

if args.sequence_length % model.config.attention_window[0] == 0:
    print(
        f"*Attention*: You need input padding for inference: input sequece length shall be multiple of {model.config.attention_window[0]}. It is because the example input for export ONNX model does not need padding so padding logic is not in onnx model."
    )

# Restore Huggingface implementaiton like the following:
# LongformerSelfAttention.forward = original_forward

if args.precision != 'fp32' or args.optimize_onnx:
    from onnx import load_model
    from onnxruntime.transformers.onnx_model_bert import BertOnnxModel, BertOptimizationOptions
    model = load_model(onnx_model_path, format=None, load_external_data=True)
    optimization_options = BertOptimizationOptions('bert')
    optimizer = BertOnnxModel(model, num_heads=16, hidden_size=768)
    optimizer.optimize(optimization_options)
    optimized_model_path = model_name + "_fp32.onnx"
    optimizer.save_model_to_file(optimized_model_path)
    print(f"optimized fp32 model saved to {optimized_model_path}")

    if args.precision == 'fp16':
        optimizer.convert_model_float32_to_float16(cast_input_output=True)
        optimized_model_path = model_name + "_fp16.onnx"
        optimizer.save_model_to_file(optimized_model_path)
        print(f"optimized fp16 model saved to {optimized_model_path}")
Exemplo n.º 26
0
    def expect(self,
               model,
               args,
               name=None,
               skip_opset_version=None,
               skip_outvalue_version=None,
               custom_model_test_func=None,
               expected_num_initializers=None,
               **kwargs):
        """Compare model output and test runtime output.

        Make an ONNX model from target model with args, and put output
        directory. Then test runtime load the model, and compare.

        Arguments:
            model (~chainer.Chain): The target model.
            args (list or dict): Arguments of the target model.
            name (str): name of test. Set class name on default.
            skip_opset_version (list): Versions to skip test.
            skip_outvalue_version (list): Versions to skip output value check.
            custom_model_test_func (func): A function to check generated
                model. The functions is called before checking output values.
                ONNX model is passed to arguments.
            expected_num_initializers (int): The expected number of
                initializers in the output ONNX model.
            **kwargs (dict): keyward arguments for ``onnx_chainer.export``.
        """

        test_name = name
        if test_name is None:
            test_name = self.default_name

        for opset_version in self.target_opsets:
            if skip_opset_version is not None and\
                    opset_version in skip_opset_version:
                continue

            dir_name = 'test_' + test_name
            test_path = gen_test_data_set(model, args, dir_name, opset_version,
                                          **kwargs)

            onnx_model_path = os.path.join(test_path, 'model.onnx')
            assert os.path.isfile(onnx_model_path)
            with open(onnx_model_path, 'rb') as f:
                onnx_model = onnx.load_model(f)
            check_all_connected_from_inputs(onnx_model)

            if expected_num_initializers is not None:
                actual_num_initializers = len(onnx_model.graph.initializer)
                assert expected_num_initializers == actual_num_initializers

            graph_input_names = _get_graph_input_names(onnx_model)
            if kwargs.get('input_names', {}):
                input_names = kwargs['input_names']
                if isinstance(input_names, dict):
                    expected_names = list(sorted(input_names.values()))
                else:
                    expected_names = list(sorted(input_names))
                assert list(sorted(graph_input_names)) == expected_names
            if kwargs.get('output_names', {}):
                output_names = kwargs['output_names']
                if isinstance(output_names, dict):
                    expected_names = list(sorted(output_names.values()))
                else:
                    expected_names = list(sorted(output_names))
                graph_output_names = [v.name for v in onnx_model.graph.output]
                assert list(sorted(graph_output_names)) == expected_names

            # Input data is generaged by `network_inputs` dict, this can
            # introduce unexpected conversions. Check values of input PB with
            # test args.
            if isinstance(args, (tuple, list)):
                flat_args = args
            elif isinstance(args, dict):
                flat_args = args.values()
            else:
                flat_args = [args]
            input_data = load_input_data(test_path)
            assert len(input_data) == len(flat_args)
            for i, arg in enumerate(flat_args):
                array = arg.array if isinstance(arg, chainer.Variable) else arg
                array = chainer.cuda.to_cpu(array)
                np.testing.assert_allclose(array,
                                           input_data[i],
                                           rtol=1e-5,
                                           atol=1e-5)

            if custom_model_test_func is not None:
                custom_model_test_func(onnx_model)

            if skip_outvalue_version is not None and\
                    opset_version in skip_outvalue_version:
                continue

            # Export function can be add unexpected inputs. Collect inputs
            # from ONNX model, and compare with another input list got from
            # test runtime.
            if self.check_out_values is not None:
                self.check_out_values(test_path, input_names=graph_input_names)
Exemplo n.º 27
0
def optimize_model(input,
                   model_type='bert',
                   num_heads=0,
                   hidden_size=0,
                   optimization_options=None,
                   opt_level=None,
                   use_gpu=False,
                   only_onnxruntime=False):
    """ Optimize Model by OnnxRuntime and/or python fusion logic.

    ONNX Runtime has graph optimizations (https://onnxruntime.ai/docs/resources/graph-optimizations.html). 
    However, the coverage is limited. We also have graph fusions that implemented in Python to improve the coverage.
    They can combined: ONNX Runtime will run first when opt_level > 0, then graph fusions in Python will be applied.

    To use ONNX Runtime only and no Python fusion logic, use only_onnxruntime flag and a positive opt_level like
        optimize_model(input, opt_level=1, use_gpu=False, only_onnxruntime=True)

    When opt_level is None, we will choose default optimization level according to model type.

    When opt_level is 0 and only_onnxruntime is False, only python fusion logic is used and onnxruntime is disabled.

    When opt_level > 1, use_gpu shall set properly since the optimized graph might contain operators for GPU or CPU only. 
    If your model is intended for GPU inference only (especially float16 or mixed precision model), it is recommended to 
    set use_gpu to be True, otherwise the model is not optimized for GPU inference.

    For BERT model, num_heads and hidden_size are optional. For other model types, you need specify these parameters.

    Args:
        input (str): input model path.
        model_type (str, optional): model type - like bert, bert_tf, bert_keras or gpt2. Defaults to 'bert'.
        num_heads (int, optional): number of attention heads. Defaults to 0.
                                   0 allows detect the parameter from graph automatically (for model_type "bert" only). 
        hidden_size (int, optional): hidden size. Defaults to 0.
                                     0 allows detect the parameter from graph automatically (for model_type "bert" only). 
        optimization_options (FusionOptions, optional): optimization options that turn on/off some fusions. Defaults to None.
        opt_level (int, optional): onnxruntime graph optimization level (0, 1, 2 or 99) or None. Defaults to None.
                                   When the value is None, default value (1 for bert and gpt2, 0 for other model types) will be used.
                                   When the level > 0, onnxruntime will be used to optimize model first.
        use_gpu (bool, optional): use gpu or not for onnxruntime. Defaults to False.
        only_onnxruntime (bool, optional): only use onnxruntime to optimize model, and no python fusion. Defaults to False.

     Returns:
        object of an optimizer class.
    """
    assert opt_level is None or opt_level in [0, 1, 2, 99]

    if model_type != "bert" and (num_heads == 0 or hidden_size == 0):
        logger.warning("Please specify parameters of num_heads and hidden_size when model_type is not 'bert'")

    (optimizer_class, producer, default_opt_level) = MODEL_TYPES[model_type]

    if opt_level is None:
        opt_level = default_opt_level

    temp_model_path = None
    if opt_level > 1:
        temp_model_path = optimize_by_onnxruntime(input, use_gpu=use_gpu, opt_level=opt_level)
    elif opt_level == 1:
        # basic optimizations (like constant folding and cast elimation) are not specified to exection provider.
        # CPU provider is used here so that there is no extra node for GPU memory copy.
        temp_model_path = optimize_by_onnxruntime(input, use_gpu=False, opt_level=1)

    if only_onnxruntime and not temp_model_path:
        logger.warning("Please specify a positive value for opt_level when only_onnxruntime is True")

    model = load_model(temp_model_path or input, format=None, load_external_data=True)

    if model.producer_name and producer != model.producer_name:
        logger.warning(
            f"Model producer not matched: Expect {producer}, Got {model.producer_name} {model.producer_version}. Please specify correct --model_type parameter."
        )

    if optimization_options is None:
        optimization_options = FusionOptions(model_type)

    optimizer = optimizer_class(model, num_heads, hidden_size)

    if not only_onnxruntime:
        optimizer.optimize(optimization_options)

    # Remove the temporary model.
    if temp_model_path:
        os.remove(temp_model_path)
        logger.debug("Remove tempoary model: {}".format(temp_model_path))

    optimizer.model.producer_name = "onnxruntime.transformers"
    from onnxruntime import __version__ as onnxruntime_version
    optimizer.model.producer_version = onnxruntime_version

    return optimizer
Exemplo n.º 28
0
 def __init__(self, model_path):
     self.model_path = model_path
     self.model_framework = None
     self.testX = None; self.testY = None; # test data
     self.onnx_load_model = onnx.load_model(model_path)
     self.model_type = None
Exemplo n.º 29
0
 def load(self):
     return onnx.load_model(self.model_path())
Exemplo n.º 30
0
    def export_onnx(
        decoder: Union[T5Decoder, T5DecoderInit],
        device: torch.device,
        onnx_model_path: str,
        verbose: bool = True,
        use_external_data_format: bool = False,
        use_int32_inputs: bool = False,
    ):
        """Export decoder to ONNX

        Args:
            decoder (Union[T5Decoder, T5DecoderNoPastState]): decoder object
            device (torch.device): device of decoder object
            onnx_model_path (str): onnx path
            verbose (bool, optional): print verbose information. Defaults to True.
            use_external_data_format (bool, optional): use external data format or not. Defaults to False.
            use_int32_inputs (bool, optional): use int32 inputs
        """
        assert isinstance(decoder, (T5Decoder, T5DecoderInit))

        inputs = T5DecoderInputs.create_dummy(
            decoder.config,
            batch_size=2,
            encode_sequence_length=3,
            past_decode_sequence_length=5 if isinstance(decoder, T5Decoder) else 0,
            device=device,
            use_int32_inputs=use_int32_inputs,
        )
        input_list = inputs.to_list()

        past_names = PastKeyValuesHelper.get_past_names(decoder.config.num_layers, present=False)
        present_names = PastKeyValuesHelper.get_past_names(decoder.config.num_layers, present=True)
        present_self_names = present_names[: 2 * decoder.config.num_layers]

        input_past_names = past_names if isinstance(decoder, T5Decoder) else []
        output_present_names = present_self_names if isinstance(decoder, T5Decoder) else present_names
        output_names = ["logits"] + output_present_names

        # Shape of input tensors (sequence_length==1):
        #    input_ids: (batch_size, sequence_length)
        #    encoder_attention_mask: (batch_size, encode_sequence_length)
        #    encoder_hidden_states: (batch_size, encode_sequence_length, hidden_size)
        #    past_self_*: (batch_size, num_heads, past_decode_sequence_length, head_size)
        #    past_cross_*: (batch_size, num_heads, encode_sequence_length, head_size)

        # Shape of output tensors:
        #    logits: (batch_size, sequence_length, vocab_size)
        #    past_self_*: (batch_size, num_heads, past_decode_sequence_length + sequence_length, head_size)
        #    past_cross_*: (batch_size, num_heads, encode_sequence_length, head_size)

        input_names = ["input_ids"]
        input_names.append("encoder_attention_mask")
        input_names.append("encoder_hidden_states")
        input_names.extend(input_past_names)

        dynamic_axes = {
            "input_ids": {
                0: "batch_size",
                # 1: 'sequence_length'
            },
            "encoder_attention_mask": {0: "batch_size", 1: "encode_sequence_length"},
            "encoder_hidden_states": {0: "batch_size", 1: "encode_sequence_length"},
            "logits": {
                0: "batch_size",
                # 1: 'sequence_length'
            },
        }

        for name in input_past_names:
            dynamic_axes[name] = {
                0: "batch_size",
                2: "past_decode_sequence_length" if "self" in name else "encode_sequence_length",
            }

        for name in output_present_names:
            if "cross" in name:
                dynamic_axes[name] = {0: "batch_size", 2: "encode_sequence_length"}
            else:  # self attention past state
                if isinstance(decoder, T5Decoder):
                    dynamic_axes[name] = {
                        0: "batch_size",
                        2: "past_decode_sequence_length + 1",
                    }
                else:
                    dynamic_axes[name] = {
                        0: "batch_size",
                        # 2: 'sequence_length'
                    }

        Path(onnx_model_path).parent.mkdir(parents=True, exist_ok=True)

        with tempfile.TemporaryDirectory() as tmp_dir_name:
            temp_onnx_model_path = os.path.join(tmp_dir_name, "decoder.onnx")
            Path(temp_onnx_model_path).parent.mkdir(parents=True, exist_ok=True)
            torch_onnx_export(
                decoder,
                args=tuple(input_list),
                f=temp_onnx_model_path if use_external_data_format else onnx_model_path,
                export_params=True,
                input_names=input_names,
                output_names=output_names,
                dynamic_axes=dynamic_axes,
                opset_version=12,
                do_constant_folding=True,
                use_external_data_format=use_external_data_format,
                verbose=verbose,
            )

            if use_external_data_format:
                model = onnx.load_model(temp_onnx_model_path, load_external_data=True)
                OnnxModel.save(
                    model,
                    onnx_model_path,
                    save_as_external_data=True,
                    all_tensors_to_one_file=True,
                )
Exemplo n.º 31
0
######################################################################
# Load pretrained ONNX model
# ---------------------------------------------
# The example super resolution model used here is exactly the same model in onnx tutorial
# http://pytorch.org/tutorials/advanced/super_resolution_with_caffe2.html
# we skip the pytorch model construction part, and download the saved onnx model
model_url = ''.join([
    'https://gist.github.com/zhreshold/',
    'bcda4716699ac97ea44f791c24310193/raw/',
    '93672b029103648953c4e5ad3ac3aadf346a4cdc/', 'super_resolution_0.2.onnx'
])
model_path = download_testdata(model_url,
                               'super_resolution.onnx',
                               module='onnx')
# now you have super_resolution.onnx on disk
onnx_model = onnx.load_model(model_path)
# we can load the graph as NNVM compatible model
sym, params = nnvm.frontend.from_onnx(onnx_model)

######################################################################
# Load a test image
# ---------------------------------------------
# A single cat dominates the examples!
from PIL import Image

img_url = 'https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true'
img_path = download_testdata(img_url, 'cat.png', module='data')
img = Image.open(img_path).resize((224, 224))
img_ycbcr = img.convert("YCbCr")  # convert to YCbCr
img_y, img_cb, img_cr = img_ycbcr.split()
x = np.array(img_y)[np.newaxis, np.newaxis, :, :]
Exemplo n.º 32
0
    def test_qdq_extra_options_2(self):
        #         (input) 
        #           |    
        #          Add 
        #       /   |   \
        #  MatMul MatMul MatMul 
        #     |     |      |
        # (output)(output)(output)

        initializers = []

        input_tensor = helper.make_tensor_value_info('L', TensorProto.FLOAT, [5, 5])
        output_tensor1 = helper.make_tensor_value_info('M', TensorProto.FLOAT, [5, 5])
        output_tensor2 = helper.make_tensor_value_info('N', TensorProto.FLOAT, [5, 5])
        output_tensor3 = helper.make_tensor_value_info('O', TensorProto.FLOAT, [5, 5])

        add_weight_data = np.random.normal(0, 0.1, [5, 5]).astype(np.float32)
        initializers.append(onnx.numpy_helper.from_array(add_weight_data, name="P"))
        matmul_weight_data_1 = np.random.normal(0, 0.1, [5, 5]).astype(np.float32)
        initializers.append(onnx.numpy_helper.from_array(matmul_weight_data_1, name="Q"))
        matmul_weight_data_2 = np.random.normal(0, 0.1, [5, 5]).astype(np.float32)
        initializers.append(onnx.numpy_helper.from_array(matmul_weight_data_2, name="R"))
        matmul_weight_data_3 = np.random.normal(0, 0.1, [5, 5]).astype(np.float32)
        initializers.append(onnx.numpy_helper.from_array(matmul_weight_data_2, name="S"))

        add_node = onnx.helper.make_node('Add', ['L', 'P'], ['T'], name='Add')
        matmul_node_1 = onnx.helper.make_node('MatMul', ['T', 'Q'], ['M'], name='MatMul1')
        matmul_node_2 = onnx.helper.make_node('MatMul', ['T', 'R'], ['N'], name='MatMul2')
        matmul_node_3 = onnx.helper.make_node('MatMul', ['T', 'S'], ['O'], name='MatMul3')

        graph = helper.make_graph([add_node, matmul_node_1, matmul_node_2, matmul_node_3], 'QDQ_Test_Finetune_2', [input_tensor], [output_tensor1, output_tensor2, output_tensor3], initializer=initializers)
        model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
        test_model_path = './test_qdq_finetune_2.onnx'
        onnx.save(model, test_model_path)

        compute_range = {
            'L': [0.1, 0.1],
            'M': [0.1, 0.1],
            'N': [0.1, 0.1],
            'O': [0.1, 0.1],
            'P': [0.1, 0.1],
            'Q': [0.1, 0.1],
            'R': [0.1, 0.1],
            'S': [0.1, 0.1],
            'T': [0.1, 0.1],
        }

        op_types_to_quantize = ['Add', 'MatMul']

        mode = QuantizationMode.QLinearOps
        model = onnx.load_model(test_model_path, False)
        quantizer = QDQQuantizer(
            model,
            True, #per_channel
            False, #reduce_range
            mode,
            True,  #static
            QuantType.QInt8, #weight_type
            QuantType.QInt8, #activation_type
            compute_range,
            [], #nodes_to_quantize
            ['Add'], #nodes_to_exclude
            op_types_to_quantize,
            {'ActivationSymmetric' : True, 'AddQDQPairToWeight' : True, 'OpTypesToExcludeOutputQuantizatioin': op_types_to_quantize, 'DedicatedQDQPair': True}) #extra_options
        quantizer.quantize_model()
        qdq_model_path = './test_qdq_finetune_qdq_2.onnx'
        quantizer.model.save_model_to_file(qdq_model_path, False)

        # Three dedicated QDQ pair should be generated and feed into each MatMul node
        # Also QDQ pair should not be added to Add node 
        # QDQ pair shoud not be added to node's output
        for node in quantizer.model.nodes():
            if node.name == 'MatMul1':
                self.assertTrue("T_DequantizeLinear_1" in node.input)
            if node.name == 'MatMul2':
                self.assertTrue("T_DequantizeLinear_2" in node.input)
            if node.name == 'MatMul3':
                self.assertTrue("T_DequantizeLinear_3" in node.input)
            if node.name == 'Add':
                for input in node.input:
                    self.assertTrue("DequantizeLinear" not in input)

            # QDQ pair shoud not be added to MatMul's output
            if node.op_type == 'QuantizeLinear':
                self.assertTrue(node.input[0] not in ['M_QuantizeLinearInput', 'N_QuantizeLinearInput', 'O_QuantizeLinearInput']) 
Exemplo n.º 33
0
parser = argparse.ArgumentParser()
parser.add_argument("--onnx_path",  help="Path of onnx model", type=str, required=True)
parser.add_argument("--batch_size", help="Batch size",    type=int, default=1)
parser.add_argument("--to_rasp",    help="Compile to Raspberry", action='store_true')
parser.add_argument("--to_local",   help="Compile to local", action='store_true')
args = parser.parse_args()

import cvtransforms as dataset
import tvm
import onnx
import nnvm
import numpy as np

model_name = args.onnx_path.split('/')[-1].split('.')[0]
print("model_name = ", model_name)
onnx_model = onnx.load_model( args.onnx_path )
sym, params = nnvm.frontend.from_onnx(onnx_model)

import nnvm.compiler
# assume first input name is data
input_name = sym.list_input_names()[0]
shape_dict = {input_name: (1,3,32,32)}
for x in params:
    if params[x].shape == ():
        params[x] = tvm.nd.array(np.float32(0))

# Set cross-compilation target
postfixs = []
targets  = []

if args.to_local:
Exemplo n.º 34
0
mlp_model.init_params(mx.init.Xavier())

# Save the parameters and symbol to files
mlp_model.save_params(MXNET_PARAMS_PATH_DEFAULT)
mlp.save(MXNET_SYMBOL_PATH_DEFAULT)

# Export the ONNX specification of the model, using the parameters and symbol files
onnx_mxnet.export_model(sym=MXNET_SYMBOL_PATH_DEFAULT,
                        params=MXNET_PARAMS_PATH_DEFAULT,
                        input_shape=[(64, input_length)],
                        onnx_file_path=ONNX_FILE_PATH_DEFAULT)
############################################################################

############################################################################
# Load ONNX file and remove files
model = onnx.load_model(ONNX_FILE_PATH_DEFAULT)
if os.path.exists(MXNET_PARAMS_PATH_DEFAULT):
    os.remove(MXNET_PARAMS_PATH_DEFAULT)
if os.path.exists(MXNET_SYMBOL_PATH_DEFAULT):
    os.remove(MXNET_SYMBOL_PATH_DEFAULT)
if os.path.exists(ONNX_FILE_PATH_DEFAULT):
    os.remove(ONNX_FILE_PATH_DEFAULT)
############################################################################
# Run the model on the task (requires an API key).
run = openml.runs.run_model_on_task(model, task, avoid_duplicate_runs=False)
# Publish the experiment on OpenML (optional, requires an API key).
run.publish()

print('URL for run: %s/run/%d' % (openml.config.server, run.run_id))

############################################################################
Exemplo n.º 35
0
    def from_torch(model: TorchGPT2Model,
                   device: Optional[torch.device] = None,
                   backend: Optional[str] = "onnxrt"):
        """
        Args:
            model : a PyTorch GPT2 Model
            device : cpu or GPU
            backend : a string to indicates kernel providers
            Four options. [onnxrt, turbo]
        """
        use_gpu = False
        if device is None:
            device = model.device
        # may need to move to GPU explicitly
        if 'cuda' in device.type and torch.cuda.is_available():
            model.to(device)
            if backend is None:
                backend = "onnxrt"  # On GPU turbo is faster
            use_gpu = True
        else:
            if backend is None:
                backend = "onnxrt"  # On CPU onnxrt is faster

        if backend == "turbo":
            raise ("Not Implemented GPT2 on Turbo Backend")

        if backend == "onnxrt":
            import onnx
            import onnxruntime
            import onnxruntime.backend
            # TODO(jiaruifang) Figure out the meaning of GPT2
            enable_past_input = False

            num_layer = model.config.n_layer
            present_names = [f'present_{i}' for i in range(num_layer)]
            output_names = ["last_state"] + present_names

            input_names = ['input_ids']
            dynamic_axes = {
                'input_ids': {
                    0: 'batch_size',
                    1: 'seq_len'
                },
                #'token_type_ids' : {0: 'batch_size', 1: 'seq_len'},
                #'attention_mask' : {0: 'batch_size', 1: 'seq_len'},
                'last_state': {
                    0: 'batch_size',
                    1: 'seq_len'
                }
            }
            for name in present_names:
                dynamic_axes[name] = {1: 'batch_size', 3: 'seq_len'}

            inputs = {
                'input_ids':
                torch.randint(32, [2, 32], dtype=torch.long).to(device)
            }
            if enable_past_input:
                past_names = [f'past_{i}' for i in range(num_layer)]
                input_names = [
                    'input_ids'
                ] + past_names  #+ ['token_type_ids', 'attention_mask']
                dummy_past = [
                    torch.zeros(list(outputs[1][0].shape))
                    for _ in range(num_layer)
                ]
                for name in past_names:
                    dynamic_axes[name] = {1: 'batch_size', 3: 'seq_len'}
                export_inputs = (
                    inputs['input_ids'], tuple(dummy_past)
                )  #, inputs['token_type_ids'], inputs['attention_mask'])
            else:
                export_inputs = (inputs['input_ids'])
            output_dir = './gpt2_onnx'
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)

            onnx_model_path = os.path.join(
                output_dir, 'gpt2_past{}.onnx'.format(int(enable_past_input)))

            torch.onnx.export(model,
                              args=export_inputs,
                              f=onnx_model_path,
                              input_names=input_names,
                              output_names=output_names,
                              dynamic_axes=dynamic_axes,
                              opset_version=11,
                              do_constant_folding=True,
                              verbose=False)
            onnx_model = onnx.load_model(f=onnx_model_path)
            onnx_model = onnxruntime.backend.prepare(
                model=onnx_model,
                device='GPU' if use_gpu else 'CPU',
                graph_optimization_level=onnxruntime.GraphOptimizationLevel.
                ORT_ENABLE_ALL)
            return GPT2Model(onnx_model, "onnxrt")
Exemplo n.º 36
0
def parse():
    parser = argparse.ArgumentParser()
    parser.add_argument('onnx_model_path',
                        help='The path of onnx model to be converted.')
    return parser.parse_args()


if __name__ == '__main__':
    args = parse()
    model_path = args.onnx_model_path

    if not os.path.exists(args.onnx_model_path):
        print("Model file <{}> does not exists.".format(model_path))
    else:
        model_onnx = onnx.load_model(model_path)

        # preprocess
        # each pixel range: [-1, 1]
        preprocessing_args = {
            'is_bgr': False,
            'red_bias': -1.0,
            'green_bias': -1.0,
            'blue_bias': -1.0,
            'image_scale': 2.0 / 255.0
        }

        # conversion
        mlmodel = convert(model_onnx,
                          mode='regression',
                          preprocessing_args=preprocessing_args,
Exemplo n.º 37
0

if __name__ == '__main__':
    args = docopt(__doc__)
    input_file_path = args['INPUT_FILE']
    if not args['OUTPUT_FILE']:
        output_file_path = _get_output_file_path(
            *os.path.splitext(input_file_path))
    else:
        output_file_path = args['OUTPUT_FILE']

    print('Converting {} to {}.'.format(input_file_path, output_file_path))

    if not os.path.exists(input_file_path):
        sys.exit('ERROR: Provided input model path does not exists: {}'.format(
            input_file_path))

    # convert from binary format to text format
    if _is_bin_file(input_file_path) and _is_txt_file(output_file_path):
        str_msg = _bin2txt(onnx.load_model(input_file_path))
        with open(output_file_path, 'w') as f:
            f.write(str_msg)
    # convert from text format to binary format
    elif _is_txt_file(input_file_path) and _is_bin_file(output_file_path):
        with open(input_file_path, 'r') as f:
            converted_model = _txt2bin(f.read())
        onnx.save(converted_model, output_file_path)
    else:
        sys.exit(
            'ERROR: Provided input or output file has unsupported format.')
Exemplo n.º 38
0
parser.add_argument('--pretrained',
                    help='pretrained dladcnv2 model',
                    default='./dladcnv2.onnx',
                    type=str)
parser.add_argument('--input_shape',
                    nargs='+',
                    default=[3, 3, 640, 640],
                    type=int,
                    help='input shape.')
parser.add_argument('--onnx',
                    help='onnx model',
                    default='./dladcnv2-d3.onnx',
                    type=str)
args = parser.parse_args()

model = onnx.load_model(args.pretrained)

input_shape = args.input_shape

d = model.graph.input[0].type.tensor_type.shape.dim
rate = (input_shape[2] / d[2].dim_value, input_shape[3] / d[3].dim_value)
print("rate: ", rate)
d[0].dim_value = input_shape[0]
#d[0].dim_param = '?'
d[2].dim_value = int(d[2].dim_value * rate[0])
d[3].dim_value = int(d[3].dim_value * rate[1])
for output in model.graph.output:
    d = output.type.tensor_type.shape.dim
    d[0].dim_value = input_shape[0]
    #d[0].dim_param = '?'
    d[2].dim_value = int(d[2].dim_value * rate[0])
onnx_model = onnx.load(export_onnx_file)  # load onnx model
model_simp, check = simplify(onnx_model)
assert check, "Simplified ONNX model could not be validated"
onnx.save(model_simp, export_onnx_file)
print('finished exporting onnx')

# 检查输出onnx文件
test = onnx.load(export_onnx_file)
onnx.checker.check_model(test)

# 输出onnx的计算图
# print(onnx.helper.printable_graph(test.graph))
print("\nonnx output ==> Passed!\n")

# 计算转换后的输出误差
onnx_model = onnx.load_model(export_onnx_file)  # 读取onnx模型参数,构建模型
sess = ort.InferenceSession(onnx_model.SerializeToString())  # 推理模型成员初始化
sess.set_providers(['CPUExecutionProvider'])  # 将模型部署至cpu
input_name = sess.get_inputs()[0].name  # 读取网络输入名称
output_name = sess.get_outputs()[0].name  # 读取网络输出名称
onnx_output = sess.run([output_name],
                       {input_name: x.cpu().numpy()})  # 读取数据进行onnx推理

# 计算转换误差
evalue = np.absolute(np.mean(torch_output_value - onnx_output))
print("\ntorch to onnx erro: ", evalue)

# 显示网络输出及结构图
session = ort.InferenceSession(export_onnx_file)  # 创建一个运行session,类似于tensorflow
out_r = session.run(
    None, {"input": np.random.rand(1, 3, 112, 112).astype('float32')
Exemplo n.º 40
0
def _assert_onnx_validity(model_path):
    model_proto = onnx.load_model(model_path)
    checker.check_graph(model_proto.graph)
Exemplo n.º 41
0
        import urllib
        urllib.urlretrieve(url, path)

######################################################################
# Load pretrained ONNX model
# ---------------------------------------------
# The example super resolution model used here is exactly the same model in onnx tutorial
# http://pytorch.org/tutorials/advanced/super_resolution_with_caffe2.html
# we skip the pytorch model construction part, and download the saved onnx model
model_url = ''.join(['https://gist.github.com/zhreshold/',
                     'bcda4716699ac97ea44f791c24310193/raw/',
                     '93672b029103648953c4e5ad3ac3aadf346a4cdc/',
                     'super_resolution_0.2.onnx'])
download(model_url, 'super_resolution.onnx', True)
# now you have super_resolution.onnx on disk
onnx_model = onnx.load_model('super_resolution.onnx')
# we can load the graph as NNVM compatible model
sym, params = nnvm.frontend.from_onnx(onnx_model)

######################################################################
# Load a test image
# ---------------------------------------------
# A single cat dominates the examples!
from PIL import Image
img_url = 'https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true'
download(img_url, 'cat.png')
img = Image.open('cat.png').resize((224, 224))
img_ycbcr = img.convert("YCbCr")  # convert to YCbCr
img_y, img_cb, img_cr = img_ycbcr.split()
x = np.array(img_y)[np.newaxis, np.newaxis, :, :]
Exemplo n.º 42
0
or please refer to offical site.
https://github.com/onnx/onnx
"""

import sys
import timeit

import nnvm
import nnvm.compiler
import tvm
import onnx
import numpy as np

from tvm.contrib import graph_runtime

onnx_model = onnx.load_model(sys.argv[1])
# we can load the graph as NNVM compatible model
sym, params = nnvm.frontend.from_onnx(onnx_model)

x = np.ones([1, 3, 224, 224], dtype=np.float32)

######################################################################
# Compile the model on NNVM
# ---------------------------------------------
# We should be familiar with the process right now.

#target = 'cuda'
target = 'llvm'
# assume first input name is data
input_name = sym.list_input_names()[0]
shape_dict = {input_name: x.shape}