def run(args):
    onnx_filename = os.path.join(args.test_dir, args.model_file)
    input_names, output_names = run_onnx_util.onnx_input_output_names(
        onnx_filename)
    test_data_dir = os.path.join(args.test_dir, 'test_data_set_0')
    inputs, outputs = run_onnx_util.load_test_data(
        test_data_dir, input_names, output_names)

    model = onnx.load(onnx_filename)
    ng_func = import_onnx_model(model)

    runtime = ng.runtime(backend_name=args.backend)
    computation = runtime.computation(ng_func)

    inputs = [v for n, v in inputs]
    outputs = [v for n, v in outputs]

    actual_outputs = computation(*inputs)

    for i, (name, expected, actual) in enumerate(
            zip(output_names, outputs, actual_outputs)):
        np.testing.assert_allclose(expected, actual,
                                   rtol=1e-3, atol=1e-4), name
        print('%s: OK' % name)
    print('ALL OK')

    def compute():
        computation(*inputs)

    return run_onnx_util.run_benchmark(compute, args.iterations)
Пример #2
0
def run(args):
    onnx_filename = run_onnx_util.onnx_model_file(args.test_dir,
                                                  args.model_file)
    input_names, output_names = run_onnx_util.onnx_input_output_names(
        onnx_filename)
    test_data_dir = os.path.join(args.test_dir, 'test_data_set_0')
    inputs, outputs = run_onnx_util.load_test_data(test_data_dir, input_names,
                                                   output_names)

    sess = rt.InferenceSession(onnx_filename)

    inputs = dict(inputs)
    outputs = [v for n, v in outputs]

    actual_outputs = sess.run(output_names, inputs)

    for i, (name, expected,
            actual) in enumerate(zip(output_names, outputs, actual_outputs)):
        np.testing.assert_allclose(expected, actual, rtol=1e-3,
                                   atol=1e-4), name
        print('%s: OK' % name)
    print('ALL OK')

    def compute():
        sess.run(output_names, inputs)

    return run_onnx_util.run_benchmark(compute, args.iterations)
Пример #3
0
def run(args):
    test_dir = os.path.abspath(args.test_dir)
    test_dir_name = test_dir.split(os.path.sep)[-1]

    onnx_filename = run_onnx_util.onnx_model_file(test_dir, args.model_file)
    input_names, output_names = run_onnx_util.onnx_input_output_names(
        onnx_filename)
    test_data_dir = os.path.join(test_dir, 'test_data_set_0')
    inputs, outputs = run_onnx_util.load_test_data(test_data_dir, input_names,
                                                   output_names)

    mo_output_dir = os.path.join(
        'out', 'dldt_{}.{}'.format(test_dir_name, args.data_type.lower()))
    mo_model_xml = os.path.join(mo_output_dir, 'model.xml')
    mo_model_bin = os.path.join(mo_output_dir, 'model.bin')

    # make optimized model
    not_found_mo = True
    if not os.path.exists(mo_output_dir):
        os.makedirs(mo_output_dir, exist_ok=True)
    else:
        if os.path.exists(mo_model_xml) and os.path.exists(mo_model_bin):
            not_found_mo = False
    if args.force_mo or not_found_mo:
        args.input_model = onnx_filename
        args.output_dir = mo_output_dir
        from mo.main import driver
        driver(args)
    else:
        log.basicConfig(format="[ %(levelname)s ] %(message)s",
                        level=args.log_level,
                        stream=sys.stdout)

    # compute inference engine
    return inference(args, mo_model_xml, mo_model_bin, inputs, outputs)
Пример #4
0
def run(args):
    onnx_filename = os.path.join(args.test_dir, args.model_file)
    input_names, output_names = run_onnx_util.onnx_input_output_names(
        onnx_filename)
    test_data_dir = os.path.join(args.test_dir, 'test_data_set_0')
    inputs, outputs = run_onnx_util.load_test_data(test_data_dir, input_names,
                                                   output_names)

    model = onnx.load(onnx_filename)
    tf_model = onnx_tf.backend.prepare(model)

    inputs = dict(inputs)
    outputs = dict(outputs)
    actual_outputs = tf_model.run(inputs)

    for name in output_names:
        expected = outputs[name]
        actual = actual_outputs[name]
        np.testing.assert_allclose(expected, actual, rtol=1e-3,
                                   atol=1e-4), name
        print('%s: OK' % name)
    print('ALL OK')

    def compute():
        tf_model.run(inputs)

    return run_onnx_util.run_benchmark(compute, args.iterations)
Пример #5
0
def run(args):
    onnx_model = onnx.load_model(run_onnx_util.onnx_model_file(args.test_dir, args.model_file))
    ctx = tvm.gpu()

    input_names, output_names = run_onnx_util.onnx_input_output_names(
        os.path.join(args.test_dir, args.model_file))

    test_data_dir = os.path.join(args.test_dir, 'test_data_set_0')
    inputs, outputs = run_onnx_util.load_test_data(
        test_data_dir, input_names, output_names)

    inputs = dict(inputs)
    graph_module = None
    if args.frontend == 'nnvm':
        graph_module = build_graph_nnvm(args, ctx, onnx_model, inputs, input_names)
    elif args.frontend == 'relay':
        graph_module = build_graph_relay(args, ctx, onnx_model, inputs, input_names)
    else:
        raise RuntimeError('Invalid frontend: {}'.format(args.frontend))

    graph_module.run()

    for i, (name, expected) in enumerate(outputs):
        tvm_output = tvm.nd.empty(expected.shape, expected.dtype, ctx=ctx)
        actual = graph_module.get_output(i, tvm_output).asnumpy()
        np.testing.assert_allclose(expected, actual,
                                   rtol=1e-3, atol=1e-4), name
        print('%s: OK' % name)
    print('ALL OK')

    def compute():
        graph_module.run()
        cupy.cuda.device.Device().synchronize()

    return run_onnx_util.run_benchmark(compute, args.iterations)
Пример #6
0
def run(args):
    onnx_model = onnx.load_model(os.path.join(args.test_dir, args.model_file))
    symbol, params = nnvm.frontend.from_onnx(onnx_model)
    input_names = symbol.list_input_names()
    output_names = symbol.list_output_names()

    test_data_dir = os.path.join(args.test_dir, 'test_data_set_0')
    inputs, outputs = run_onnx_util.load_test_data(test_data_dir, input_names,
                                                   output_names)
    inputs = dict(inputs)

    # assert len(input_names) == len(inputs) + len(params)
    # assert len(output_names) == len(outputs)

    graph, lib, params = compile(symbol, args.target, input_names, inputs,
                                 params, args.opt_level, args.autotvm_log)

    if args.dump_nnvm:
        print(graph.ir())
        print(graph.json())

    ctx = tvm.gpu()

    # Prepare inputs.
    tvm_inputs = {}
    for name, value in inputs.items():
        tvm_inputs[name] = tvm.nd.array(value, ctx=ctx)
    for name, value in params.items():
        tvm_inputs[name] = tvm.nd.array(value, ctx=ctx)

    graph_module = None
    if args.debug:
        try:
            graph_module = debug_runtime.create(graph, lib, ctx)
        except:
            print('debug_runtime is disabled. '
                  'Set USE_GRAPH_RUNTIME_DEBUG=ON and rebuild TVM')
    if graph_module is None:
        graph_module = graph_runtime.create(graph, lib, ctx)

    graph_module.set_input(**tvm_inputs)

    graph_module.run()

    for i, (name, expected) in enumerate(outputs):
        tvm_output = tvm.nd.empty(expected.shape, expected.dtype, ctx=ctx)
        actual = graph_module.get_output(i, tvm_output).asnumpy()
        np.testing.assert_allclose(expected, actual, rtol=1e-3,
                                   atol=1e-4), name
        print('%s: OK' % name)
    print('ALL OK')

    def compute():
        graph_module.run()
        cupy.cuda.device.Device().synchronize()

    return run_onnx_util.run_benchmark(compute, args.iterations)
def run(args):
    onnx_filename = os.path.join(args.test_dir, args.model_file)
    input_names, output_names = run_onnx_util.onnx_input_output_names(
        onnx_filename)
    test_data_dir = os.path.join(args.test_dir, 'test_data_set_0')
    inputs, outputs = run_onnx_util.load_test_data(
        test_data_dir, input_names, output_names)

    with open(onnx_filename, 'rb') as f:
        onnx_proto = f.read()

    if args.debug:
        logger = tensorrt.Logger(tensorrt.Logger.Severity.INFO)
    else:
        logger = tensorrt.Logger()
    builder = tensorrt.Builder(logger)
    if args.fp16_mode:
        builder.fp16_mode = True
    # TODO(hamaji): Infer batch_size from inputs.
    builder.max_batch_size = args.batch_size
    network = builder.create_network()
    parser = tensorrt.OnnxParser(network, logger)
    if not parser.parse(onnx_proto):
        for i in range(parser.num_errors):
             sys.stderr.write('ONNX import failure: %s\n' % parser.get_error(i))
             raise RuntimeError('ONNX import failed')
    engine = builder.build_cuda_engine(network)
    context = engine.create_execution_context()

    assert len(inputs) + len(outputs) == engine.num_bindings
    for i, (_, input) in enumerate(inputs):
        assert args.batch_size == input.shape[0]
        assert input.shape[1:] == engine.get_binding_shape(i)
    for i, (_, output) in enumerate(outputs):
        assert args.batch_size == output.shape[0]
        i += len(inputs)
        assert output.shape[1:] == engine.get_binding_shape(i)

    inputs = [v for n, v in inputs]
    outputs = [v for n, v in outputs]
    gpu_inputs = to_gpu(inputs)
    gpu_outputs = []
    for output in outputs:
        gpu_outputs.append(cupy.zeros_like(cupy.array(output)))
    bindings = [a.data.ptr for a in gpu_inputs]
    bindings += [a.data.ptr for a in gpu_outputs]

    context.execute(args.batch_size, bindings)

    actual_outputs = to_cpu(gpu_outputs)

    for i, (name, expected, actual) in enumerate(
            zip(output_names, outputs, actual_outputs)):
        np.testing.assert_allclose(expected, actual,
                                   rtol=args.rtol, atol=args.atol), name
        print('%s: OK' % name)
    print('ALL OK')

    def compute():
        context.execute(args.batch_size, bindings)
        cupy.cuda.device.Device().synchronize()

    return run_onnx_util.run_benchmark(compute, args.iterations)