def testRunModelMultipleThreads(self):
        available_providers = onnxrt.get_available_providers()

        # Skip this test for a "pure" DML onnxruntime python wheel. We keep this test enabled for instances where both DML and CUDA
        # EPs are available (Windows GPU CI pipeline has this config) - this test will pass because CUDA has higher precendence than DML
        # and the nodes are assigned to only the CUDA EP (which supports this test)
        if ('DmlExecutionProvider' in available_providers
                and not 'CUDAExecutionProvider' in available_providers):
            print(
                "Skipping testRunModelMultipleThreads as the DML EP does not support calling Run() on different threads using the same session object "
            )
        else:
            so = onnxrt.SessionOptions()
            so.log_verbosity_level = 1
            so.logid = "MultiThreadsTest"
            sess = onnxrt.InferenceSession(get_name("mul_1.onnx"),
                                           sess_options=so)
            ro1 = onnxrt.RunOptions()
            ro1.logid = "thread1"
            t1 = threading.Thread(target=self.run_model, args=(sess, ro1))
            ro2 = onnxrt.RunOptions()
            ro2.logid = "thread2"
            t2 = threading.Thread(target=self.run_model, args=(sess, ro2))
            t1.start()
            t2.start()
            t1.join()
            t2.join()
Exemplo n.º 2
0
 def testRunModelMultipleThreads(self):
     so = onnxrt.SessionOptions()
     so.log_verbosity_level = 1
     so.logid = "MultiThreadsTest"
     sess = onnxrt.InferenceSession(get_name("mul_1.onnx"), sess_options=so)
     ro1 = onnxrt.RunOptions()
     ro1.logid = "thread1"
     t1 = threading.Thread(target=self.run_model, args=(sess, ro1))
     ro2 = onnxrt.RunOptions()
     ro2.logid = "thread2"
     t2 = threading.Thread(target=self.run_model, args=(sess, ro2))
     t1.start()
     t2.start()
     t1.join()
     t2.join()
Exemplo n.º 3
0
    def Run(self, model_str: str, inputs_str: List[str]):
        model = onnx.ModelProto()
        model.ParseFromString(model_str)

        def deserialize_tp(tp_str):
            tp = onnx.TensorProto()
            tp.ParseFromString(tp_str)
            return tp

        input_tps = map(deserialize_tp, inputs_str)
        input_arrs = map(onnx.numpy_helper.to_array, input_tps)
        input_names = [x.name for x in model.graph.input]
        inputs = dict(zip(input_names, input_arrs))
        sess_options = rt.SessionOptions()
        sess_options.graph_optimization_level = rt.GraphOptimizationLevel(0)
        sess_options.log_severity_level = 3
        sess = rt.InferenceSession(
            model.SerializeToString(),
            sess_options=sess_options,
            providers=["CPUExecutionProvider"],
        )
        output_names = [x.name for x in sess.get_outputs()]
        run_options = rt.RunOptions()
        run_options.log_severity_level = 3
        output_arrs = sess.run(output_names, inputs, run_options=run_options)
        return [
            onnx.numpy_helper.from_array(x).SerializeToString()
            for x in output_arrs
        ]
Exemplo n.º 4
0
    def testAllocationPlanWorksWithOnlyExecutePathToFetchesOption(self):
        """
               (inp0)  (inp1)
                  |  \/  |
                  |  /\  |
                 Add    Sub
                  |      |
              (tsor0)  (tsor1)
                  |      |
                 Neg    Neg
                  |      |
              (outp0)  (outp1)

        In this model, tsor0 and tsor1 has the same size. Allocation plan sets tsor1 to re-uses tsor0's memory.
        With run_options.only_execute_path_to_fetches == True and only to fetch outp1, the Add op is not executed.
        As a result tsor0 is not allocated through computation. It would fail to allocate tsor1 via re-use tsor0.
        This case is handled specifically in ExecutionFrame::AllocateAsPerAllocationPlan().
        This test is to ensure that the case is covered.
        """
        name = get_name("alloc_tensor_reuse.onnx")
        sess = onnxrt.InferenceSession(name)

        run_options = onnxrt.RunOptions()
        run_options.only_execute_path_to_fetches = True
        inp0, inp1 = np.ones((10,), dtype=np.float32), np.ones((10,), dtype=np.float32)

        session_run_results = sess.run(['outp0'], {'inp0': inp0, 'inp1': inp1}, run_options)
        assert_allclose(session_run_results[0], -(inp0 + inp1))

        session_run_results = sess.run(['outp1'], {'inp0': inp0, 'inp1': inp1}, run_options)
        assert_allclose(session_run_results[0], -(inp0 - inp1))
    def testConfigureRunVerbosityLevel(self):
        ro = onnxrt.RunOptions()
        ro.run_log_verbosity_level = 1
        ro.run_tag = "testtag123"

        # use onnxruntime_ostream_redirect to redirect c++ stdout/stderr to python sys.stdout and sys.stderr
        with onnxruntime_ostream_redirect(stdout=True, stderr=True):
            sess = onnxrt.InferenceSession(self.get_name("mul_1.pb"))
            x = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32)
            sess.run([], {'X': x}, run_options=ro)
            output = sys.stderr.getvalue()
            self.assertTrue('[I:onnxruntime:testtag123,' in output)
Exemplo n.º 6
0
def run_onnx_inference(input_tensor: np.ndarray) -> np.ndarray:
    """
    Perform inference on ONNX model
    """
    input_tensor = onnx_preprocessing(input_tensor)

    sess_options = onnxruntime.RunOptions()
    sess_options.log_verbosity_level = 0

    ort_session = onnxruntime.InferenceSession(ONNX_MODEL_FILE)
    ort_inputs = {ort_session.get_inputs()[0].name: input_tensor}
    ort_outputs = ort_session.run(None, ort_inputs)
    return ort_outputs
Exemplo n.º 7
0
    def eval_step(self, *args, **kwargs):
        r"""Evaluation step method

        Args:
            *args: Arbitrary arguments that are used as model input (data only)
            **kwargs: Arbitrary keyword arguments that are used as model input (data only)

        Returns:
            ordered :py:obj:`list` with model outputs as described by :py:attr:`.ORTTrainer.model_desc`
        """
        # Get data. CombineTorchModelLossFn takes label as last input and outputs loss first
        sample_input = self._prepare_model_input(self.model_desc.inputs, None,
                                                 None, *args, **kwargs)

        # Export model to ONNX
        if self._onnx_model is None:
            if self._torch_model is not None:
                self._init_onnx_model(sample_input)
            else:
                raise RuntimeError(
                    "Model is uninitialized. Only ONNX and PyTorch models are supported"
                )

        # Prepare input/output description
        inputs_desc = self.model_desc.inputs
        outputs_desc = self.model_desc.outputs
        if self._train_step_info.fetches:
            outputs_desc = [
                o_desc for o_desc in outputs_desc
                if o_desc.name in self._train_step_info.fetches
            ]
            if len(outputs_desc) != len(self._train_step_info.fetches):
                raise RuntimeError(
                    "The specified fetches list contains invalid output names")

        # Normalize input
        if not isinstance(sample_input, (list, tuple)):
            sample_input = (sample_input, )

        # RunOptions
        run_options = ort.RunOptions()
        run_options.only_execute_path_to_fetches = True
        run_options.training_mode = False

        # Run a eval step and return
        session_run_results = self._training_session_run_helper(
            False, sample_input, inputs_desc, outputs_desc, run_options)

        # Output must be returned in the same order as defined in the model description
        results = [session_run_results[o_desc.name] for o_desc in outputs_desc]
        return results[0] if len(results) == 1 else results
Exemplo n.º 8
0
def forward(model, inputs=None, input_shapes: Optional[TensorShapes] = None) -> Dict[str, np.ndarray]:
    if input_shapes is None:
        input_shapes = {}
    sess_options = rt.SessionOptions()
    sess_options.graph_optimization_level = rt.GraphOptimizationLevel(0)
    sess_options.log_severity_level = 3
    sess = rt.InferenceSession(model.SerializeToString(), sess_options=sess_options)
    if inputs is None:
        inputs = generate_rand_input(model, input_shapes=input_shapes)
    outputs = [x.name for x in sess.get_outputs()]
    run_options = rt.RunOptions()
    run_options.log_severity_level = 3
    res = OrderedDict(zip(outputs, sess.run(outputs, inputs, run_options=run_options)))
    return res
Exemplo n.º 9
0
    def eval_step(self, *args, **kwargs):
        """
        inputs: model inputs and/or labels.
        outputs: if 'fetches' is not provided, outputs are loss and
            (if in mixed mode and is finishing gradient accumulation) all_finite.
            if fetches is provided, outputs contains these requested with fetches.
        fetches: names of requested outputs
        """

        # with model_loss_cls, the last input is label, first output is loss
        input, fetches = self._prepare_input_and_fetches(
            self.model_desc_.inputs_, None, None, *args, **kwargs)

        if self.onnx_model_ is None:
            if self.torch_model_ is not None:
                self._init_onnx_model(input)
            else:
                raise RuntimeError(
                    "Model is unintialized. Please ensure a valid ONNX model or PyTorch model is provided to this Trainer."
                )

        input_desc = self.model_desc_.inputs_[0:len(input)]
        if fetches is None:
            output_desc = self.model_desc_.outputs_
        else:
            output_desc = [
                output for fetch in fetches
                for output in self.model_desc_.outputs_
                if output.name_ == fetch
            ]

        if not isinstance(input, (list, tuple)):
            input = (input, )

        run_options = ort.RunOptions()
        run_options.only_execute_path_to_fetches = True
        run_options.training_mode = False

        session_run_results = ort_training_session_run_helper(
            self.session, self.eval_io_binding, input, input_desc, output_desc,
            self.device_, run_options)

        if len(session_run_results) == 1:
            return session_run_results[list(session_run_results.keys())[0]]
        else:
            return [
                session_run_results[output_desc.name_]
                for output_desc in output_desc
            ]
Exemplo n.º 10
0
def _forward(
    model: onnx.ModelProto,
    extra_output_nodes: Optional[List[onnx.NodeProto]] = None
) -> OrderedDict[str, np.ndarray]:
    # add outputs of the argument nodes as model outputs.
    if extra_output_nodes is not None:
        model = deepcopy(model)
        for node in extra_output_nodes:
            for output in node.output:
                value_info = onnx.ValueInfoProto(name=output)
                model.graph.output.append(value_info)

    # create ONNX runtime session
    sess_options = onnxrt.SessionOptions()
    sess_options.graph_optimization_level = onnxrt.GraphOptimizationLevel(0)
    sess_options.log_severity_level = 3
    sess = onnxrt.InferenceSession(
        model.SerializeToString(),
        sess_options=sess_options,
        providers=["CPUExecutionProvider"],
    )

    # get names of input nodes that are not initializers
    input_names = set([v.name for v in model.graph.input])
    init_names = set([v.name for v in model.graph.initializer])
    input_names = input_names - init_names

    # generate random inputs
    inputs = {}
    for v in model.graph.input:
        name = v.name
        shape = (d.dim_value for d in v.type.tensor_type.shape.dim)
        dtype = _numpy_dtype(v.type.tensor_type.elem_type)
        if name in input_names:
            inputs[name] = np.random.rand(*shape).astype(dtype)

    output_names = [x.name for x in sess.get_outputs()]
    run_options = onnxrt.RunOptions()
    run_options.log_severity_level = 3
    outputs = sess.run(output_names, inputs, run_options=run_options)

    return OrderedDict(zip(output_names, outputs))
Exemplo n.º 11
0
def forward(
        model: onnx.ModelProto,
        inputs: Dict[str, np.ndarray] = None,
        input_shapes: Optional[TensorShapes] = None) -> Dict[str, np.ndarray]:
    """Run forward on a model.

    Args:
        model (onnx.ModelProto): Input ONNX model.
        inputs (Dict[str, np.ndarray], optional): Inputs of the model.
        input_shapes (TensorShapes, optional): Input shapes of the model.

    Returns:
        Dict[str, np.ndarray]: Outputs of the model.
    """
    if input_shapes is None:
        input_shapes = {}
    sess_options = rt.SessionOptions()
    # load custom lib for onnxruntime in mmcv
    ort_custom_op_path = ''
    try:
        from mmcv.ops import get_onnxruntime_op_path
        ort_custom_op_path = get_onnxruntime_op_path()
    except ImportError:
        pass
    if os.path.exists(ort_custom_op_path):
        sess_options.register_custom_ops_library(ort_custom_op_path)
    sess_options.graph_optimization_level = rt.GraphOptimizationLevel(0)
    sess_options.log_severity_level = 3
    sess = rt.InferenceSession(model.SerializeToString(),
                               sess_options=sess_options,
                               providers=['CPUExecutionProvider'])
    if inputs is None:
        inputs = generate_rand_input(model, input_shapes=input_shapes)
    outputs = [x.name for x in sess.get_outputs()]
    run_options = rt.RunOptions()
    run_options.log_severity_level = 3
    res = OrderedDict(
        zip(outputs, sess.run(outputs, inputs, run_options=run_options)))
    return res
Exemplo n.º 12
0
def forward(model,
            input_data: Optional[Tensors] = None,
            input_shapes: Optional[TensorShapes] = None,
            custom_lib: Optional[str] = None) -> Tensors:
    if input_shapes is None:
        input_shapes = {}
    sess_options = rt.SessionOptions()
    if custom_lib is not None:
        if os.path.exists(custom_lib):
            sess_options.register_custom_ops_library(custom_lib)
        else:
            print("No such file '{}'".format(custom_lib), file=sys.stderr)
            exit(1)
    sess_options.graph_optimization_level = rt.GraphOptimizationLevel(0)
    sess_options.log_severity_level = 3
    sess = rt.InferenceSession(model.SerializeToString(),
                               sess_options=sess_options,
                               providers=['CPUExecutionProvider'])

    input_names = get_input_names(model)
    inputs = {}
    for name in input_names:
        if input_data is not None and input_data.get(name, None) is not None:
            inputs[name] = input_data[name]
        else:
            if input_shapes is not None and input_shapes.get(name,
                                                             None) is not None:
                shape = input_shapes[name]
            else:
                shape = get_shape(model, name)
            inputs.update(generate_specific_rand_input(model, {name: shape}))

    outputs = [x.name for x in sess.get_outputs()]
    run_options = rt.RunOptions()
    run_options.log_severity_level = 3
    res = OrderedDict(
        zip(outputs, sess.run(outputs, inputs, run_options=run_options)))
    return res
Exemplo n.º 13
0
 def forward(
         model: onnx.ModelProto, inputs: Tensors, custom_lib: Optional[str]=None
 ) -> Dict[str, np.ndarray]:
     sess_options = rt.SessionOptions()
     if custom_lib is not None:
         if os.path.exists(custom_lib):
             sess_options.register_custom_ops_library(custom_lib)
         else:
             raise ValueError("No such file '{}'".format(custom_lib))
     sess_options.graph_optimization_level = rt.GraphOptimizationLevel(0)
     sess_options.log_severity_level = 3
     sess = rt.InferenceSession(
         model.SerializeToString(),
         sess_options=sess_options,
         providers=["CPUExecutionProvider"],
     )
     outputs = [x.name for x in sess.get_outputs()]
     run_options = rt.RunOptions()
     run_options.log_severity_level = 3
     res = OrderedDict(
         zip(outputs, sess.run(outputs, inputs, run_options=run_options))
     )
     return res
Exemplo n.º 14
0
def detect():
    global INPUT_LAYER_NAME
    OPENME = {}

    setup_time_begin = time.time()

    # Load preprocessed image filenames:
    with open(IMAGE_LIST_FILE, 'r') as f:
        image_list = [s.strip() for s in f]

    images_total_count = len(image_list)
    first_index = SKIP_IMAGES
    last_index = BATCH_COUNT * BATCH_SIZE + first_index

    if first_index > images_total_count or last_index > images_total_count:
        print('********************************************')
        print('')
        print('DATASET SIZE EXCEEDED !!!')
        print('Dataset size  : {}'.format(images_total_count))
        print('CK_SKIP_IMAGES: {}'.format(SKIP_IMAGES))
        print('CK_BATCH_COUNT: {}'.format(BATCH_COUNT))
        print('CK_BATCH_SIZE : {}'.format(BATCH_SIZE))
        print('')
        print('********************************************')

    image_list = image_list[SKIP_IMAGES:BATCH_COUNT * BATCH_SIZE + SKIP_IMAGES]

    # Local list of processed files
    with open(IMAGE_LIST_FILE_NAME, 'w') as f:
        for line in image_list:
            f.write('{}\n'.format(line))

    # Load the ONNX model from file
    sess_options = rt.SessionOptions()
    # sess_options.session_log_verbosity_level = 0
    if CPU_THREADS > 0:
        sess_options.enable_sequential_execution = False
        sess_options.session_thread_pool_size = CPU_THREADS
    graph_load_time_begin = time.time()
    sess = rt.InferenceSession(MODEL_PATH, sess_options)
    graph_load_time = time.time() - graph_load_time_begin

    input_layer_names = [
        x.name for x in sess.get_inputs()
    ]  # FIXME: check that INPUT_LAYER_NAME belongs to this list
    INPUT_LAYER_NAME = INPUT_LAYER_NAME or input_layer_names[0]

    output_layer_names = [
        x.name for x in sess.get_outputs()
    ]  # FIXME: check that OUTPUT_LAYER_NAME belongs to this list

    model_input_shape = sess.get_inputs()[0].shape
    model_input_type = sess.get_inputs()[0].type
    model_input_type = np.uint8 if model_input_type == 'tensor(uint8)' else np.float32  # FIXME: there must be a more humane way!

    # a more portable way to detect the number of classes
    for output in sess.get_outputs():
        if output.name == OUTPUT_LAYER_LABELS:
            model_classes = output.shape[1]

    labels = load_labels(LABELS_PATH)
    #bg_class_offset = model_classes-len(labels)  # 1 means the labels represent classes 1..1000 and the background class 0 has to be skipped
    bg_class_offset = 1

    if MODEL_DATA_LAYOUT == 'NHWC':
        (samples, height, width, channels) = model_input_shape
    else:
        (samples, channels, height, width) = model_input_shape

    print("Data layout: {}".format(MODEL_DATA_LAYOUT))
    print("Input layers: {}".format(input_layer_names))
    print("Output layers: {}".format(output_layer_names))
    print("Input layer name: " + INPUT_LAYER_NAME)
    print("Expected input shape: {}".format(model_input_shape))
    print("Expected input type: {}".format(model_input_type))
    print("Output layer names: " + ", ".join(
        [OUTPUT_LAYER_BBOXES, OUTPUT_LAYER_LABELS, OUTPUT_LAYER_SCORES]))
    print("Data normalization: {}".format(MODEL_NORMALIZE_DATA))
    print("Background/unlabelled classes to skip: {}".format(bg_class_offset))
    print("")

    setup_time = time.time() - setup_time_begin

    # Run batched mode
    test_time_begin = time.time()
    total_load_time = 0
    total_detection_time = 0
    first_detection_time = 0
    images_loaded = 0

    ## Due to error in ONNX Resnet34 model
    class_map = None
    if (SKIPPED_CLASSES):
        class_map = []
        for i in range(len(labels) + bg_class_offset):
            if i not in SKIPPED_CLASSES:
                class_map.append(i)

    for image_index in range(BATCH_COUNT):

        if FULL_REPORT or (image_index % 10 == 0):
            print("\nBatch {} of {}".format(image_index + 1, BATCH_COUNT))

        begin_time = time.time()
        file_name, width, height = image_list[image_index].split(";")
        width = float(width)
        height = float(height)
        img_file = os.path.join(IMAGE_DIR, file_name)
        batch_data = load_preprocessed_file(img_file).astype(model_input_type)

        load_time = time.time() - begin_time
        total_load_time += load_time
        images_loaded += 1
        if FULL_REPORT:
            print("Batch loaded in %fs" % load_time)

        # Detect batch
        begin_time = time.time()
        run_options = rt.RunOptions()
        # run_options.run_log_verbosity_level = 0
        batch_results = sess.run(
            [OUTPUT_LAYER_BBOXES, OUTPUT_LAYER_LABELS, OUTPUT_LAYER_SCORES],
            {INPUT_LAYER_NAME: batch_data}, run_options)
        detection_time = time.time() - begin_time
        if FULL_REPORT:
            print("Batch classified in %fs" % detection_time)

        total_detection_time += detection_time
        # Remember first batch prediction time
        if image_index == 0:
            first_detection_time = detection_time

        # Process results
        # res_name = file.with.some.name.ext -> file.with.some.name.txt
        res_name = ".".join(file_name.split(".")[:-1]) + ".txt"
        res_file = os.path.join(DETECTIONS_OUT_DIR, res_name)
        with open(res_file, 'w') as f:
            f.write('{:d} {:d}\n'.format(int(width), int(height)))
            for i in range(len(batch_results[2][0])):
                score = batch_results[2][0][i]
                if score > SCORE_THRESHOLD:
                    if class_map:
                        class_num = class_map[batch_results[1][0][i]]
                    else:
                        class_num = batch_results[1][0][i] + bg_class_offset
                    class_name = labels[class_num - bg_class_offset]
                    box = batch_results[0][0][i]
                    x1 = box[0] * width
                    y1 = box[1] * height
                    x2 = box[2] * width
                    y2 = box[3] * height
                    f.write(
                        '{:.2f} {:.2f} {:.2f} {:.2f} {:.3f} {} {}\n'.format(
                            x1, y1, x2, y2, score, class_num, class_name))

    test_time = time.time() - test_time_begin

    if BATCH_COUNT > 1:
        avg_detection_time = (total_detection_time - first_detection_time) / (
            images_loaded - BATCH_SIZE)
    else:
        avg_detection_time = total_detection_time / images_loaded

    avg_load_time = total_load_time / images_loaded

    # Save processed images ids list to be able to run
    # evaluation without repeating detections (CK_SKIP_DETECTION=YES)
    # with open(IMAGE_LIST_FILE, 'w') as f:
    #    f.write(json.dumps(processed_image_ids))

    OPENME['setup_time_s'] = setup_time
    OPENME['test_time_s'] = test_time
    OPENME['load_images_time_total_s'] = total_load_time
    OPENME['load_images_time_avg_s'] = avg_load_time
    OPENME['prediction_time_total_s'] = total_detection_time
    OPENME['prediction_time_avg_s'] = avg_detection_time
    OPENME['avg_time_ms'] = avg_detection_time * 1000
    OPENME[
        'avg_fps'] = 1.0 / avg_detection_time if avg_detection_time > 0 else 0

    run_time_state = {"run_time_state": OPENME}

    with open(TIMER_JSON, 'w') as o:
        json.dump(run_time_state, o, indent=2, sort_keys=True)

    return
Exemplo n.º 15
0
    def train_step(self, *args, **kwargs):
        r"""Train step method

        After forward pass, an ordered list with all outputs described at :py:attr:`ORTTrainer.model_desc` is returned.
        Additional information relevant to the train step is maintend by :py:attr:`ORTTrainer._train_step_info`.
        See :py:class:`.TrainStepInfo` for details.

        Args:
            *args: Arbitrary arguments that are used as model input (data only)
            **kwargs: Arbitrary keyword arguments that are used as model input (data only)

        Returns:
            ordered :py:obj:`list` with model outputs as described by :py:attr:`ORTTrainer.model_desc`
        """
        # Export model to ONNX
        if self._onnx_model is None:
            sample_input = self._prepare_model_input(self.model_desc.inputs, None, None, *args, **kwargs)
            self._init_onnx_model(sample_input)

        # Prepare inputs+lr and output descriptions
        inputs_desc = self._model_desc_inputs_with_lr
        outputs_desc = self.model_desc.outputs

        # Train step must be incremented *before* gradient accumulation code
        # Gradients are accumulated when
        # self._train_step_info.step % self.options.batch.gradient_accumulation_steps != 0,
        # and they are updated otherwise
        self._train_step_info.step += 1

        # RunOptions
        run_options = None
        mixed_precision_without_fetches = False
        if self._train_step_info.fetches:
            outputs_desc = [o_desc for o_desc in outputs_desc if o_desc.name in self._train_step_info.fetches]
            if len(outputs_desc) != len(self._train_step_info.fetches):
                raise RuntimeError("The specified fetches list contains invalid output names")
        elif self._train_step_info.step % self.options.batch.gradient_accumulation_steps != 0:
            run_options = ort.RunOptions()
            run_options.only_execute_path_to_fetches = True
            outputs_desc = self._model_desc_outputs_with_gradient_accumulation
        elif self.options.mixed_precision.enabled:
            mixed_precision_without_fetches = True
            outputs_desc = self._model_desc_outputs_with_all_finite

        # Update Learning Rate if Necessary
        lr = self.optim_config.lr
        if self.options.lr_scheduler:
            lr = self.options.lr_scheduler._step(self._train_step_info)[0]

        # Loss Scale for mixed precision
        loss_scale = None
        if self.options.mixed_precision.enabled:
            loss_scaler = self.options.mixed_precision.loss_scaler
            assert loss_scaler, "Loss scaler is required when mixed precision is enabled"
            loss_scale = torch.tensor([loss_scaler.loss_scale])
            inputs_desc = self._model_desc_inputs_with_lr_and_loss_scale

        # Get data. CombineTorchModelLossFn takes label as last input and outputs loss first
        input = self._prepare_model_input(inputs_desc, lr, loss_scale, *args, **kwargs)

        # Normalize input
        if not isinstance(args, (list, tuple)):
            args = (args,)

        # Run a train step and return
        session_run_results = self._training_session_run_helper(True, input, inputs_desc,
                                                                outputs_desc, run_options)
        if mixed_precision_without_fetches:
            # After session run with all_fp32_gradients_finite, we need to clear the training I/O binding's output
            # Otherwise next run with only_execute_path_to_fetches will lead to gradient all reduce
            # because all_fp32_gradients_finite is still in the feed.
            self._train_io_binding.clear_binding_outputs()

            is_all_finite = session_run_results[self.model_desc.all_finite.name]
            self._train_step_info.all_finite = is_all_finite
            if loss_scaler:
                loss_scaler.update(self._train_step_info)
            if is_all_finite:
                # Optimization step must be incremented *after* optimization is successful
                self._train_step_info.optimization_step += 1
        elif self._train_step_info.step % self.options.batch.gradient_accumulation_steps == 0:
            # Optimization step must be incremented *after* optimization is successful
            self._train_step_info.optimization_step += 1

        # Output must be returned in the same order as defined in the model description
        # or in the order specified by TrainStepInfo.fetches, if applicable
        if self._train_step_info.fetches:
            results = [session_run_results[o_desc] for o_desc in self._train_step_info.fetches]
        else:
            results = [session_run_results[o_desc.name] for o_desc in self.model_desc.outputs]
        return results[0] if len (results) == 1 else results
def compare_runtime(test,
                    decimal=5,
                    options=None,
                    verbose=False,
                    context=None,
                    comparable_outputs=None):
    """
    The function compares the expected output (computed with
    the model before being converted to ONNX) and the ONNX output
    produced with module *onnxruntime*.
    
    :param test: dictionary with the following keys:
        - *onnx*: onnx model (filename or object)
        - *expected*: expected output (filename pkl or object)
        - *data*: input data (filename pkl or object)
    :param decimal: precision of the comparison
    :param options: comparison options
    :param context: specifies custom operators
    :param verbose: in case of error, the function may print
        more information on the standard output
    :param comparable_outputs: compare only these outputs
    :return: tuple (outut, lambda function to run the predictions)
    
    The function does not return anything but raises an error
    if the comparison failed.
    """
    lambda_onnx = None
    if context is None:
        context = {}
    load = load_data_and_model(test, **context)
    if verbose:
        print("[compare_runtime] test '{}' loaded".format(test['onnx']))

    onx = test['onnx']
    if options is None:
        if isinstance(onx, str):
            options = extract_options(onx)
        else:
            options = {}
    elif options is None:
        options = {}
    elif not isinstance(options, dict):
        raise TypeError("options must be a dictionary.")

    try:
        import onnxruntime
    except ImportError as e:
        warnings.warn("Unable to import onnxruntime.")
        return None

    if verbose:
        print("[compare_runtime] InferenceSession('{}')".format(onx))

    try:
        sess = onnxruntime.InferenceSession(onx)
    except ExpectedAssertionError as expe:
        raise expe
    except Exception as e:
        if "CannotLoad" in options:
            raise ExpectedAssertionError(
                "Unable to load onnx '{0}' due to\n{1}".format(onx, e))
        else:
            if verbose:
                import onnx
                model = onnx.load(onx)
                smodel = "\nJSON ONNX\n" + str(model)
            else:
                smodel = ""
            raise OnnxRuntimeAssertionError(
                "Unable to load onnx '{0}'\nONNX\n{1}".format(onx, smodel))

    input = load["data"]
    DF = options.pop('DF', False)
    if DF:
        inputs = {c: input[c].values for c in input.columns}
        for k in inputs:
            if inputs[k].dtype == numpy.float64:
                inputs[k] = inputs[k].astype(numpy.float32)
            inputs[k] = inputs[k].reshape((inputs[k].shape[0], 1))
    else:
        if isinstance(input, dict):
            inputs = input
        elif isinstance(input, (list, numpy.ndarray, pandas.DataFrame)):
            inp = sess.get_inputs()
            if len(inp) == len(input):
                inputs = {i.name: v for i, v in zip(inp, input)}
            elif len(inp) == 1:
                inputs = {inp[0].name: input}
            elif isinstance(input, numpy.ndarray):
                shape = sum(i.shape[1] if len(i.shape) == 2 else i.shape[0]
                            for i in inp)
                if shape == input.shape[1]:
                    inputs = {n.name: input[:, i] for i, n in enumerate(inp)}
                else:
                    raise OnnxRuntimeAssertionError(
                        "Wrong number of inputs onnx {0} != original shape {1}, onnx='{2}'"
                        .format(len(inp), input.shape, onx))
            elif isinstance(input, list):
                try:
                    array_input = numpy.array(input)
                except Exception as e:
                    raise OnnxRuntimeAssertionError(
                        "Wrong number of inputs onnx {0} != original {1}, onnx='{2}'"
                        .format(len(inp), len(input), onx))
                shape = sum(i.shape[1] for i in inp)
                if shape == array_input.shape[1]:
                    inputs = {}
                    c = 0
                    for i, n in enumerate(inp):
                        d = c + n.shape[1]
                        inputs[n.name] = _create_column(
                            [row[c:d] for row in input], n.type)
                        c = d
                else:
                    raise OnnxRuntimeAssertionError(
                        "Wrong number of inputs onnx {0} != original shape {1}, onnx='{2}'*"
                        .format(len(inp), array_input.shape, onx))
            elif isinstance(input, pandas.DataFrame):
                try:
                    array_input = numpy.array(input)
                except Exception as e:
                    raise OnnxRuntimeAssertionError(
                        "Wrong number of inputs onnx {0} != original {1}, onnx='{2}'"
                        .format(len(inp), len(input), onx))
                shape = sum(i.shape[1] for i in inp)
                if shape == array_input.shape[1]:
                    inputs = {}
                    c = 0
                    for i, n in enumerate(inp):
                        d = c + n.shape[1]
                        inputs[n.name] = _create_column(
                            input.iloc[:, c:d], n.type)
                        c = d
                else:
                    raise OnnxRuntimeAssertionError(
                        "Wrong number of inputs onnx {0}={1} columns != original shape {2}, onnx='{3}'*"
                        .format(len(inp), shape, array_input.shape, onx))
            else:
                raise OnnxRuntimeAssertionError(
                    "Wrong type of inputs onnx {0}, onnx='{2}'".format(
                        type(input), onx))
        else:
            raise OnnxRuntimeAssertionError(
                "Dict or list is expected, not {0}".format(type(input)))

        for k in inputs:
            if isinstance(inputs[k], list):
                inputs[k] = numpy.array(inputs[k])

    OneOff = options.pop('OneOff', False)
    OneOffArray = options.pop('OneOffArray', False)
    options.pop('SklCol', False)  # unused here but in dump_data_and_model
    if OneOff or OneOffArray:
        if verbose:
            print(
                "[compare_runtime] OneOff: type(inputs)={} len={} OneOffArray={}"
                .format(type(input), len(inputs), OneOffArray))
        if len(inputs) == 1 and not OneOffArray:
            name, values = list(inputs.items())[0]
            res = []
            for input in values:
                try:
                    one = sess.run(None, {name: input})
                    if lambda_onnx is None:
                        lambda_onnx = lambda: sess.run(None, {name: input})
                    if verbose:
                        import pprint
                        pprint.pprint(one)
                except ExpectedAssertionError as expe:
                    raise expe
                except Exception as e:
                    raise OnnxRuntimeAssertionError(
                        "Unable to run onnx '{0}' due to {1}".format(onx, e))
                res.append(one)
            if verbose:
                print("[compare_runtime] OneOff: _post_process_output1")
            output = _post_process_output(res)
        else:

            def to_array(vv):
                if isinstance(
                        vv, (numpy.ndarray, numpy.int64, numpy.float32, str)):
                    return numpy.array([vv])
                else:
                    return numpy.array([vv], dtype=numpy.float32)

            t = list(inputs.items())[0]
            res = []
            for i in range(0, len(t[1])):
                iii = {k: to_array(v[i]) for k, v in inputs.items()}
                try:
                    one = sess.run(None, iii)
                    if lambda_onnx is None:
                        lambda_onnx = lambda: sess.run(None, iii)
                    if verbose:
                        import pprint
                        pprint.pprint(one)
                except ExpectedAssertionError as expe:
                    raise expe
                except Exception as e:
                    if verbose:
                        import onnx
                        model = onnx.load(onx)
                        smodel = "\nJSON ONNX\n" + str(model)
                    else:
                        smodel = ""
                    raise OnnxRuntimeAssertionError(
                        "Unable to run onnx '{0}' due to {1}{2}".format(
                            onx, e, smodel))
                res.append(one)
            if verbose:
                print("[compare_runtime] OneOff: _post_process_output2")
            output = _post_process_output(res)

            if OneOffArray:
                if isinstance(output, list):
                    pass
                elif not isinstance(output, numpy.ndarray):
                    raise TypeError("output must be an array, not {}".format(
                        type(output)))
                else:
                    output = [output]
    else:
        if verbose:
            print("[compare_runtime] type(inputs)={} len={} names={}".format(
                type(input), len(inputs), list(sorted(inputs))))
        if verbose:
            run_options = onnxruntime.RunOptions()
            run_options.run_log_verbosity_level = 5
        else:
            run_options = None
        try:
            output = sess.run(None, inputs, run_options)
            lambda_onnx = lambda: sess.run(None, inputs)
            if verbose:
                import pprint
                pprint.pprint(output)
        except ExpectedAssertionError as expe:
            raise expe
        except RuntimeError as e:
            if "-Fail" in onx:
                raise ExpectedAssertionError(
                    "onnxruntime cannot compute the prediction for '{0}'".
                    format(onx))
            else:
                if verbose:
                    import onnx
                    model = onnx.load(onx)
                    smodel = "\nJSON ONNX\n" + str(model)
                else:
                    smodel = ""
                raise OnnxRuntimeAssertionError(
                    "onnxruntime cannot compute the prediction for '{0}' due to {1}{2}"
                    .format(onx, e, smodel))
        except Exception as e:
            raise OnnxRuntimeAssertionError(
                "Unable to run onnx '{0}' due to {1}".format(onx, e))
        if verbose:
            print("[compare_runtime] done type={}".format(type(output)))

    output0 = output.copy()

    if comparable_outputs:
        cmp_exp = [load["expected"][o] for o in comparable_outputs]
        cmp_out = [output[o] for o in comparable_outputs]
    else:
        cmp_exp = load["expected"]
        cmp_out = output

    try:
        _compare_expected(cmp_exp,
                          cmp_out,
                          sess,
                          onx,
                          decimal=decimal,
                          verbose=verbose,
                          **options)
    except ExpectedAssertionError as expe:
        raise expe
    except Exception as e:
        if verbose:
            import onnx
            model = onnx.load(onx)
            smodel = "\nJSON ONNX\n" + str(model)
        else:
            smodel = ""
        raise OnnxRuntimeAssertionError(
            "Model '{0}' has discrepencies.\n{1}: {2}{3}".format(
                onx, type(e), e, smodel))

    return output0, lambda_onnx
Exemplo n.º 17
0
def main():
    global INPUT_LAYER_NAME
    OPENME = {}

    setup_time_begin = time.time()

    # Load the ONNX model from file
    sess_options = rt.SessionOptions()
    # sess_options.session_log_verbosity_level = 0
    if CPU_THREADS > 0:
        sess_options.enable_sequential_execution = False
        sess_options.session_thread_pool_size = CPU_THREADS
    graph_load_time_begin = time.time()
    sess = rt.InferenceSession(MODEL_PATH, sess_options)
    graph_load_time = time.time() - graph_load_time_begin

    input_layer_names = [
        x.name for x in sess.get_inputs()
    ]  # FIXME: check that INPUT_LAYER_NAME belongs to this list
    INPUT_LAYER_NAME = INPUT_LAYER_NAME or input_layer_names[0]

    output_layer_names = [
        x.name for x in sess.get_outputs()
    ]  # FIXME: check that OUTPUT_LAYER_NAME belongs to this list

    model_input_shape = sess.get_inputs()[0].shape
    model_input_type = sess.get_inputs()[0].type
    model_input_type = np.uint8 if model_input_type == 'tensor(uint8)' else np.float32  # FIXME: there must be a more humane way!

    # a more portable way to detect the number of classes
    for output in sess.get_outputs():
        if output.name == OUTPUT_LAYER_LABELS:
            model_classes = output.shape[1]

    print("Data layout: {}".format(MODEL_DATA_LAYOUT))
    print("Input layers: {}".format(input_layer_names))
    print("Output layers: {}".format(output_layer_names))
    print("Input layer name: " + INPUT_LAYER_NAME)
    print("Expected input shape: {}".format(model_input_shape))
    print("Expected input type: {}".format(model_input_type))
    print("Output layer names: " + ", ".join(
        [OUTPUT_LAYER_BBOXES, OUTPUT_LAYER_LABELS, OUTPUT_LAYER_SCORES]))
    print("Data normalization: {}".format(MODEL_NORMALIZE_DATA))
    print("Background/unlabelled classes to skip: {}".format(bg_class_offset))
    print("")

    try:
        expected_batch_size = int(model_input_shape[0])
        if BATCH_SIZE != expected_batch_size:
            raise Exception(
                "expected_batch_size={}, desired CK_BATCH_SIZE={}, they do not match - exiting."
                .format(expected_batch_size, BATCH_SIZE))
    except ValueError:
        max_batch_size = None

    setup_time = time.time() - setup_time_begin

    # Run batched mode
    test_time_begin = time.time()
    total_load_time = 0
    next_batch_offset = 0
    total_inference_time = 0
    first_inference_time = 0
    images_loaded = 0

    for batch_index in range(BATCH_COUNT):
        batch_number = batch_index + 1

        begin_time = time.time()
        current_batch_offset = next_batch_offset
        batch_data, next_batch_offset = load_preprocessed_batch(
            image_filenames, current_batch_offset)

        load_time = time.time() - begin_time
        total_load_time += load_time
        images_loaded += BATCH_SIZE

        # Detect batch
        begin_time = time.time()
        run_options = rt.RunOptions()
        # run_options.run_log_verbosity_level = 0
        batch_results = sess.run(
            [OUTPUT_LAYER_BBOXES, OUTPUT_LAYER_LABELS, OUTPUT_LAYER_SCORES],
            {INPUT_LAYER_NAME: batch_data}, run_options)
        inference_time = time.time() - begin_time

        print("[batch {} of {}] loading={:.2f} ms, inference={:.2f} ms".format(
            batch_number, BATCH_COUNT, load_time * 1000,
            inference_time * 1000))

        total_inference_time += inference_time
        # Remember first batch prediction time
        if batch_index == 0:
            first_inference_time = inference_time

        # Process results
        for index_in_batch in range(BATCH_SIZE):
            global_image_index = current_batch_offset + index_in_batch
            width_orig, height_orig = original_w_h[global_image_index]

            filename_orig = image_filenames[global_image_index]
            detections_filename = os.path.splitext(filename_orig)[0] + '.txt'
            detections_filepath = os.path.join(DETECTIONS_OUT_DIR,
                                               detections_filename)
            with open(detections_filepath, 'w') as f:
                f.write('{:d} {:d}\n'.format(width_orig, height_orig))
                for i in range(len(batch_results[2][index_in_batch])):
                    confidence = batch_results[2][index_in_batch][i]
                    if confidence > SCORE_THRESHOLD:
                        class_number = int(batch_results[1][index_in_batch][i])
                        if class_map:
                            class_number = class_map[class_number]
                        else:
                            class_number = class_number

                        box = batch_results[0][index_in_batch][i]
                        x1 = box[0] * width_orig
                        y1 = box[1] * height_orig
                        x2 = box[2] * width_orig
                        y2 = box[3] * height_orig
                        class_label = class_labels[class_number -
                                                   bg_class_offset]
                        f.write('{:.2f} {:.2f} {:.2f} {:.2f} {:.3f} {} {}\n'.
                                format(x1, y1, x2, y2, confidence,
                                       class_number, class_label))

    test_time = time.time() - test_time_begin

    if BATCH_COUNT > 1:
        avg_inference_time = (total_inference_time - first_inference_time) / (
            images_loaded - BATCH_SIZE)
    else:
        avg_inference_time = total_inference_time / images_loaded

    avg_load_time = total_load_time / images_loaded

    # Save processed images ids list to be able to run
    # evaluation without repeating detections (CK_SKIP_DETECTION=YES)
    # with open(IMAGE_LIST_FILE, 'w') as f:
    #    f.write(json.dumps(processed_image_ids))

    OPENME['setup_time_s'] = setup_time
    OPENME['test_time_s'] = test_time
    OPENME['load_images_time_total_s'] = total_load_time
    OPENME['load_images_time_avg_s'] = avg_load_time
    OPENME['prediction_time_total_s'] = total_inference_time
    OPENME['prediction_time_avg_s'] = avg_inference_time
    OPENME['avg_time_ms'] = avg_inference_time * 1000
    OPENME[
        'avg_fps'] = 1.0 / avg_inference_time if avg_inference_time > 0 else 0

    run_time_state = {"run_time_state": OPENME}

    with open(TIMER_JSON, 'w') as o:
        json.dump(run_time_state, o, indent=2, sort_keys=True)
Exemplo n.º 18
0
    def train_step(self, *args, **kwargs):
        """
        inputs: model inputs, labels, learning rate, and, if in mixed_precision mode, loss_scale.
        outputs: if fetches is not provided, outputs are loss and
            (if in mixed mode and is finishing gradient accumulation) all_finite.
            if fetches is provided, outputs contains these requested with fetches.
        fetches: names of requested outputs
        """

        # inputs to the ONNX model includes inputs to the original PyTorch model
        # plus learning rate and loss_scale if self.use_mixed_precision is True.
        # 1. when there are internal learning_rate and loss_scale (in fp16 cases) generators,
        #   *args and **kwargs together contain ONLY and COMPLETE inputs to the PyTorch model.
        #   In this case, changes to the training script is minimized.
        # 2. without internal learning rate and loss scale (in fp16 cases) generators,
        #   *args and **kwargs passed in from the training script shall contains
        #   inputs to the PyTorch model plus learning_rate and loss_scale.
        #   it optionally contains the fetches.
        # localized arguments (*args) contains inputs to the ONNX model.
        # named arguments can contain both inputs, learning_rate and loss_scale, and the fetches

        learning_rate, loss_scale = None, None
        if self.get_lr_this_step_ is not None:
            # $args, **kwargs contains inputs to the pytorch model
            lr_this_step = self.get_lr_this_step_(self.global_step_)
            learning_rate = torch.tensor([lr_this_step])
        if self.loss_scaler_ is not None and self.use_mixed_precision:
            loss_scale = torch.tensor([self.loss_scaler_.loss_scale_])

        if self.onnx_model_ is None:
            sample_input, _ = self._prepare_input_and_fetches(
                self.model_desc_.inputs_, None, None, *args, **kwargs)
            self._init_onnx_model(sample_input)

        if self.use_mixed_precision:
            input, fetches = self._prepare_input_and_fetches(
                self.input_desc_with_lr_and_loss_scale, learning_rate,
                loss_scale, *args, **kwargs)
            assert len(self.input_desc_with_lr_and_loss_scale) == len(input)
            input_descs = self.input_desc_with_lr_and_loss_scale
        else:
            input, fetches = self._prepare_input_and_fetches(
                self.input_desc_with_lr, learning_rate, loss_scale, *args,
                **kwargs)
            assert len(self.input_desc_with_lr) == len(input)
            input_descs = self.input_desc_with_lr

        self.current_step += 1

        # handle gradient accumulation in fully optimized mode
        run_options = None
        has_if_all_finite = False
        if fetches:
            output_desc = [
                output for fetch in fetches
                for output in self.model_desc_.outputs_
                if output.name_ == fetch
            ]
        elif self.current_step % self.gradient_accumulation_steps != 0:
            run_options = ort.RunOptions()
            run_options.only_execute_path_to_fetches = True
            run_options.training_mode = True
            output_desc = self.output_desc_with_group_accumulated_gradients
        elif self.use_mixed_precision:
            has_if_all_finite = True
            output_desc = self.output_desc_with_all_fp_16_or_fp32_gradients_finite
        else:
            output_desc = self.model_desc_.outputs_

        if not isinstance(input, (list, tuple)):
            input = (input, )

        session_run_results = ort_training_session_run_helper(
            self.session, self.train_io_binding, input, input_descs,
            output_desc, self.device_, run_options)

        if has_if_all_finite:
            # After session run with all_fp32_gradients_finite, we need to clear the iobinding's output state.
            # Otherwise next run with only_execute_path_to_fetches will lead to gradient all reduce
            # because all_fp32_gradients_finite is still in the feed.
            self.train_io_binding.clear_binding_outputs()

            all_finite = session_run_results[
                self.output_desc_with_all_fp_16_or_fp32_gradients_finite[-1].
                name_]
            if self.loss_scaler_ is not None:
                self.loss_scaler_.update_loss_scale(all_finite)
            if all_finite:
                # optimization has done, increase self.global_step_
                self.global_step_ = self.global_step_ + 1
        elif self.current_step % self.gradient_accumulation_steps == 0:
            # optimization has done, increase self.global_step_
            self.global_step_ = self.global_step_ + 1

        if fetches is not None:
            results = [session_run_results[fetch] for fetch in fetches]
        elif has_if_all_finite and self.loss_scaler_ is None:
            # return descripted outputs plus the all_finite flag so that the training script can handle loss scaling.
            results = [
                session_run_results[output_desc.name_] for output_desc in
                self.output_desc_with_all_fp_16_or_fp32_gradients_finite
            ]
        else:
            results = [
                session_run_results[output_desc.name_]
                for output_desc in self.model_desc_.outputs_
            ]

        return results[0] if len(results) == 1 else results
Exemplo n.º 19
0
import numpy as np

import onnxruntime as ort

os.environ["ORT_TENSORRT_INT8_ENABLE"] = "1"
os.environ["ORT_TENSORRT_INT8_USE_NATIVE_CALIBRATION_TABLE"] = "0"
os.environ["ORT_TENSORRT_ENGINE_CACHE_ENABLE"] = "1"

sess_opt = ort.SessionOptions()
sess_opt.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
print("Create inference session...")
execution_provider = ["TensorrtExecutionProvider", "CUDAExecutionProvider"]
sess = ort.InferenceSession("model.onnx",
                            sess_options=sess_opt,
                            providers=execution_provider)
run_opt = ort.RunOptions()

sequence = 128
batch = 1
input_ids = np.ones((batch, sequence), dtype=np.int64)
attention_mask = np.ones((batch, sequence), dtype=np.int64)
token_type_ids = np.ones((batch, sequence), dtype=np.int64)

print("Warm up phase...")
sess.run(
    None,
    {
        sess.get_inputs()[0].name: input_ids,
        sess.get_inputs()[1].name: attention_mask,
        sess.get_inputs()[2].name: token_type_ids,
    },
#sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_EXTENDED
sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL
sess_options.enable_profiling = True
sess_options.log_severity_level = 0
sess_options.log_verbosity_level = 0

# For OnnxRuntime 1.2.0, you might need set intra_op_num_threads to 1 to enable OpenMP
#    sess_options.intra_op_num_threads=1
# For OnnxRuntime 1.3.0 or later, it is recommended to use the default setting so you need not set it.

# Specify providers when you use onnxruntime-gpu for CPU inference.
session = onnxruntime.InferenceSession(export_model_path,
                                       sess_options,
                                       providers=['CPUExecutionProvider'])

run_options = onnxruntime.RunOptions()
run_options.log_severity_level = 0
run_options.log_verbosity_level = 0

latency = []
for i in range(total_samples):
    data = dataset[i]
    ort_inputs = {
        'input_ids': data[0].cpu().reshape(1, max_seq_length).numpy(),
        'input_mask': data[1].cpu().reshape(1, max_seq_length).numpy(),
        'segment_ids': data[2].cpu().reshape(1, max_seq_length).numpy()
    }
    start = time.time()
    ort_outputs = session.run(None, ort_inputs, run_options=run_options)
    latency.append(time.time() - start)
print("OnnxRuntime cpu Inference time = {} ms".format(