def testSessionProviders(self): if 'CUDAExecutionProvider' in onnxrt.get_available_providers(): # create session from scratch, but constrain it to only use the CPU. sess = onnxrt.InferenceSession(get_name("mul_1.onnx"), providers=['CPUExecutionProvider']) self.assertEqual(['CPUExecutionProvider'], sess.get_providers())
"banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza ", "donut", "cake", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet ", "tvmonitor", "laptop ", "mouse ", "remote ", "keyboard ", "cell phone", "microwave", "oven ", "toaster", "sink", "refrigerator ", "book", "clock", "vase", "scissors ", "teddy bear ", "hair drier", "toothbrush ") SIZE = (416, 416) anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]] d = Detect(SIZE, 80, anchors) dirname = r"D:\Workspace\test_space_01\yolov5\yolov5-3.1_train\inference\images" fs = (os.path.join(p, name) for p, _, names in os.walk(dirname) for name in names) session = onnxruntime.InferenceSession( r"D:\Workspace\test_space_01\yolov5\yolov5-4.0\yolov5-4.0\weights\yolov5m_416x416.onnx" ) for i in session.get_inputs(): print(i) for i in session.get_outputs(): print(i) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(CLASSES))] for img_path in fs: if os.path.splitext(img_path)[1].lower() not in (".jpg", ".png", ".jpeg"): continue print("-" * 70) input_names = list(map(lambda x: x.name, session.get_inputs())) output_names = list(map(lambda x: x.name, session.get_outputs())) img_src = cv2.imread(img_path)
with open("pipeline_xgboost.onnx", "wb") as f: f.write(model_onnx.SerializeToString()) ########################### # Compare the predictions # +++++++++++++++++++++++ # # Predictions with XGBoost. print("predict", pipe.predict(X[:5])) print("predict_proba", pipe.predict_proba(X[:1])) ########################## # Predictions with onnxruntime. sess = rt.InferenceSession("pipeline_xgboost.onnx") pred_onx = sess.run(None, {"input": X[:5].astype(numpy.float32)}) print("predict", pred_onx[0]) print("predict_proba", pred_onx[1][:1]) ################################## # Display the ONNX graph # ++++++++++++++++++++++ pydot_graph = GetPydotGraph(model_onnx.graph, name=model_onnx.graph.name, rankdir="TB", node_producer=GetOpNodeProducer("docstring", color="yellow", fillcolor="yellow", style="filled"))
# This part can normally be done in a separate process or on another # machine, but we will continue in the same process so that we can # verify that ONNX Runtime and PyTorch are computing the same value # for the network. # # In order to run the model with ONNX Runtime, we need to create an # inference session for the model with the chosen configuration # parameters (here we use the default config). # Once the session is created, we evaluate the model using the run() api. # The output of this call is a list containing the outputs of the model # computed by ONNX Runtime. # import onnxruntime ort_session = onnxruntime.InferenceSession("super_resolution.onnx") def to_numpy(tensor): return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() # compute ONNX Runtime output prediction ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)} ort_outs = ort_session.run(None, ort_inputs) # compare ONNX Runtime and PyTorch results np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05) print("Exported model has been tested with ONNXRuntime, and the result looks good!") ######################################################################
def compare_runtime(test, decimal=5, options=None, verbose=False, context=None): """ The function compares the expected output (computed with the model before being converted to ONNX) and the ONNX output produced with module *onnxruntime*. :param test: dictionary with the following keys: - *onnx*: onnx model (filename or object) - *expected*: expected output (filename pkl or object) - *data*: input data (filename pkl or object) :param decimal: precision of the comparison :param options: comparison options :param context: specifies custom operators :param verbose: in case of error, the function may print more information on the standard output The function does not return anything but raises an error if the comparison failed. """ if context is None: context = {} load = load_data_and_model(test, **context) onx = test['onnx'] if options is None: if isinstance(onx, str): options = extract_options(onx) else: options = {} elif options is None: options = {} elif not isinstance(options, dict): raise TypeError("options must be a dictionary.") try: import onnxruntime except ImportError as e: warnings.warn("Unable to import onnxruntime.") return try: sess = onnxruntime.InferenceSession(onx) except ExpectedAssertionError as expe: raise expe except Exception as e: if "CannotLoad" in options: raise ExpectedAssertionError( "Unable to load onnx '{0}' due to\n{1}".format(onx, e)) else: if verbose: import onnx model = onnx.load(onx) smodel = "\nJSON ONNX\n" + str(model) else: smodel = "" raise OnnxRuntimeAssertionError( "Unable to load onnx '{0}'\nONNX\n{1}".format(onx, smodel)) input = load["data"] if isinstance(input, dict): inputs = input elif isinstance(input, (list, numpy.ndarray)): inp = sess.get_inputs() if len(inp) == len(input): inputs = {i.name: v for i, v in zip(inp, input)} elif len(inp) == 1: inputs = {inp[0].name: input} elif isinstance(input, numpy.ndarray): shape = sum(i.shape[1] if len(i.shape) == 2 else i.shape[0] for i in inp) if shape == input.shape[1]: inputs = {n.name: input[:, i] for i, n in enumerate(inp)} else: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0} != original shape {1}, onnx='{2}'" .format(len(inp), input.shape, onnx)) elif isinstance(input, list): try: array_input = numpy.array(input) except Exception as e: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0} != original {1}, onnx='{2}'" .format(len(inp), len(input), onnx)) shape = sum(i.shape[1] for i in inp) if shape == array_input.shape[1]: inputs = { n.name: _create_column([row[i] for row in input], n.type) for i, n in enumerate(inp) } else: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0} != original shape {1}, onnx='{2}'*" .format(len(inp), array_input.shape, onnx)) else: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0} != original {1}, onnx='{2}'". format(len(inp), len(input), onnx)) else: raise OnnxRuntimeAssertionError( "Dict or list is expected, not {0}".format(type(input))) for k in inputs: if isinstance(inputs[k], list): inputs[k] = numpy.array(inputs[k]) OneOff = options.pop('OneOff', False) if OneOff: if len(inputs) == 1: name, values = list(inputs.items())[0] res = [] for input in values: try: one = sess.run(None, {name: input}) except ExpectedAssertionError as expe: raise expe except Exception as e: raise OnnxRuntimeAssertionError( "Unable to run onnx '{0}' due to {1}".format(onnx, e)) res.append(one) output = _post_process_output(res) else: def to_array(vv): if isinstance(vv, (numpy.ndarray, numpy.int64, numpy.float32)): return numpy.array([vv]) else: return numpy.array([vv], dtype=numpy.float32) t = list(inputs.items())[0] res = [] for i in range(0, len(t[1])): iii = {k: to_array(v[i]) for k, v in inputs.items()} try: one = sess.run(None, iii) except ExpectedAssertionError as expe: raise expe except Exception as e: raise OnnxRuntimeAssertionError( "Unable to run onnx '{0}' due to {1}".format(onx, e)) res.append(one) output = _post_process_output(res) else: try: output = sess.run(None, inputs) except ExpectedAssertionError as expe: raise expe except RuntimeError as e: if "-Fail" in onx: raise ExpectedAssertionError( "onnxruntime cannot compute the prediction for '{0}'". format(onx)) else: raise OnnxRuntimeAssertionError( "onnxruntime cannot compute the prediction for '{0}' due to {1}" .format(onx, e)) except Exception as e: raise OnnxRuntimeAssertionError( "Unable to run onnx '{0}' due to {1}".format(onnx, e)) output0 = output.copy() try: _compare_expected(load["expected"], output, sess, onx, decimal=decimal, **options) except ExpectedAssertionError as expe: raise expe except Exception as e: if verbose: import onnx model = onnx.load(onx) smodel = "\nJSON ONNX\n" + str(model) else: smodel = "" raise OnnxRuntimeAssertionError( "Model '{0}' has discrepencies.\n{1}: {2}{3}".format( onx, type(e), e, smodel)) return output0
input_signature = [ tf.TensorSpec([2, 3], tf.float32), tf.TensorSpec([2, 3], tf.float32) ] onnx_model, _ = tf2onnx.convert.from_function(f, input_signature, opset=13) a_val = np.ones([2, 3], np.float32) b_val = np.zeros([2, 3], np.float32) print("Tensorflow result") print(f(a_val, b_val).numpy()) print("ORT result") sess = ort.InferenceSession(onnx_model.SerializeToString()) res = sess.run(None, {'a': a_val, 'b': b_val}) print(res[0]) ##################### Keras Model ##################### model = tf.keras.Sequential() model.add(tf.keras.layers.Dense(4, activation="relu")) input_signature = [tf.TensorSpec([3, 3], tf.float32, name='x')] onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature, opset=13) x_val = np.ones((3, 3), np.float32) print("Keras result") print(model(x_val).numpy())
import argparse import numpy as np import sys sys.path.append('..') from ultraface.dependencies.box_utils import predict # ------------------------------------------------------------------------------------------------------------------------------------------------ # Face detection using UltraFace-640 onnx model face_detector_onnx = "../ultraface/models/version-RFB-640.onnx" # Start from ORT 1.10, ORT requires explicitly setting the providers parameter if you want to use execution providers # other than the default CPU provider (as opposed to the previous behavior of providers getting set/registered by default # based on the build flags) when instantiating InferenceSession. # For example, if NVIDIA GPU is available and ORT Python package is built with CUDA, then call API as following: # ort.InferenceSession(path/to/model, providers=['CUDAExecutionProvider']) face_detector = ort.InferenceSession(face_detector_onnx) # scale current rectangle to box def scale(box): width = box[2] - box[0] height = box[3] - box[1] maximum = max(width, height) dx = int((maximum - width)/2) dy = int((maximum - height)/2) bboxes = [box[0] - dx, box[1] - dy, box[2] + dx, box[3] + dy] return bboxes # crop image def cropImage(image, box): num = image[box[1]:box[3], box[0]:box[2]]
def generate_test_data(onnx_file, output_path, batch_size, sequence_length, use_cpu=True, input_tensor_only=False, dictionary_size=DICT_SIZE, test_cases=1, output_optimized_model=False): input_data_type = np.int64 for test_case in range(test_cases): input_1 = np.random.randint(dictionary_size, size=(batch_size, sequence_length), dtype=input_data_type) tensor_1 = numpy_helper.from_array(input_1, 'input_ids') path = os.path.join(output_path, 'test_data_set_' + str(test_case)) try: os.mkdir(path) except OSError: print("Creation of the directory %s failed" % path) else: print("Successfully created the directory %s " % path) if input_tensor_only: return sess_options = onnxruntime.SessionOptions() sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL sess = onnxruntime.InferenceSession(onnx_file, sess_options, providers=['CPUExecutionProvider']) input1_name = sess.get_inputs()[0].name output_names = [output.name for output in sess.get_outputs()] inputs = {input1_name: input_1} result = sess.run(output_names, inputs) with open(os.path.join(path, 'input_{}.pb'.format(0)), 'wb') as f: f.write(tensor_1.SerializeToString()) for i, output_name in enumerate(output_names): if i == 0: tensor_result = numpy_helper.from_array( np.asarray(result[i]).reshape((batch_size, sequence_length, new_parameters["hidden_size"])), output_names[i]) with open(os.path.join(path, 'output_{}.pb'.format(i)), 'wb') as f: f.write(tensor_result.SerializeToString()) else: tensor_result = numpy_helper.from_array( np.asarray(result[i]).reshape( (2, batch_size, new_parameters["num_heads"], sequence_length, new_parameters["size_per_head"])), output_names[i]) with open(os.path.join(path, 'output_{}.pb'.format(i)), 'wb') as f: f.write(tensor_result.SerializeToString()) start_time = timeit.default_timer() sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_EXTENDED if output_optimized_model: path_prefix = onnx_file[:-5] #remove .onnx suffix if use_cpu: sess_options.optimized_model_filepath = path_prefix + "_optimized_cpu.onnx" else: sess_options.optimized_model_filepath = path_prefix + "_optimized_gpu.onnx" session = onnxruntime.InferenceSession(onnx_file, sess_options) if use_cpu: session.set_providers(['CPUExecutionProvider']) # use cpu else: if 'CUDAExecutionProvider' not in session.get_providers(): print("Warning: GPU not found") continue outputs = session.run(None, inputs) evalTime = timeit.default_timer() - start_time if not np.allclose(outputs[0], result[0], rtol=1e-04, atol=1e-05): print("Error: not same result after optimization. use_cpu={}, no_opt_output={}, opt_output={}".format( use_cpu, result[0].tolist(), outputs[0].tolist())) print("** Evaluation done in total {} secs".format(evalTime))
from flask import Flask, request, jsonify import torch import numpy as np from transformers import RobertaTokenizer import onnxruntime app = Flask(__name__) tokenizer = RobertaTokenizer.from_pretrained("roberta-base") session = onnxruntime.InferenceSession( "roberta-sequence-classification-9.onnx") def to_numpy(tensor): return (tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()) @app.route("/") def home(): return "<h3>RoBERTa Sentiment Analysis Prediction Container</h3>" @app.route("/predict", methods=["POST"]) def predict(): """ Input sample: [ "Containers are good" ] Output sample:
import tensorflow as tf import numpy as np import onnxruntime as rt # input value (batch_size, seq_length, input_size) x_val = np.random.rand(16, 10, 512).astype(np.float32, copy=False) # load lstm pb model with tf.Session() as sess: print("load tensorflow graph") with tf.gfile.GFile("./models/lstm.pb", "rb") as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) sess.graph.as_default() tf.import_graph_def(graph_def, name='') # output_dict output_dict = [sess.graph.get_tensor_by_name("output_1:0")] expected = sess.run(output_dict, feed_dict={"input_1:0": x_val}) print(expected) # load lstm onnx model sess = rt.InferenceSession("./models/lstm.onnx") input_name = sess.get_inputs()[0].name label_name = sess.get_outputs()[0].name expected = sess.run([label_name], {input_name: x_val})[0] print(expected)
def get_input_name(self): if self.input_name: return session = onnxruntime.InferenceSession( self.model_path, providers=['CPUExecutionProvider']) self.input_name = session.get_inputs()[0].name
def main(): # Get shape information if given. # args.shape_info in the form of 'input_index:d1xd2, input_index:d1xd2' input_shapes = {} if args.shape_info: for input_shape in args.shape_info.strip().split(","): input_index_shape = input_shape.split(":") input_index = input_index_shape[0] assert not (input_index in input_shapes), "Duplicate input indices" dims = [int(d) for d in input_index_shape[1].split("x")] input_shapes[int(input_index)] = dims # Load the onnx model. model = onnx.load(args.model_path) # Get the output names that we want to verify. # If using onnxruntime for verification, we can verify every operation output. output_names = [o.name for o in model.graph.output] output_names = list(OrderedDict.fromkeys(output_names)) if (args.verify and args.verify == "onnxruntime" and args.verify_all_ops): print("Extending the onnx model to check every node output ...\n") output_names = sum([[n for n in node.output if n != ''] for node in model.graph.node], []) output_names = list(OrderedDict.fromkeys(output_names)) model = extend_model_output(model, output_names) # Save the generated .so file of the model if required. if (args.save_onnx): print("Saving the onnx model to ", args.save_onnx, "\n") onnx.save(model, args.save_onnx) # Compile, run, and verify. with tempfile.TemporaryDirectory() as temp_dir: print("Temporary directory has been created at {}".format(temp_dir)) # Prepare input data. inputs = [] input_names = [] if args.data_folder: assert args.data_folder, "No data folder given" inputs, input_names = read_input_from_refs(model, args.data_folder) else: inputs, input_names = generate_random_input(model, input_shapes) # Print the input if required. if (args.print_input): for i, inp in enumerate(inputs): print("The {} input {}:[{}x{}] is: \n {} \n".format( ordinal(i + 1), input_names[i], 'x'.join([str(i) for i in inp.shape]), inp.dtype, inp)) shared_lib_path = "" # If a shared library is given, use it without compiling the ONNX model. # Otherwise, compile the ONNX model. if (args.load_so): shared_lib_path = args.load_so else: print("Compiling the model ...") # Save modified model & invoke onnx-mlir to compile it. temp_model_path = os.path.join(temp_dir, "model.onnx") shared_lib_path = os.path.join(temp_dir, "model.so") onnx.save(model, temp_model_path) # Prepare compiler arguments. command_str = ONNX_MLIR if args.compile_args: command_str += " " + args.compile_args if args.compile_using_input_shape: # Use shapes of the reference inputs to compile the model. assert args.data_folder, "No data folder given" assert "shapeInformation" not in command_str, "shape info was set" shape_info = "--shapeInformation=" for i in range(len(inputs)): shape_info += str(i) + ":" + 'x'.join( [str(d) for d in inputs[i].shape]) + "," shape_info = shape_info[:-1] command_str += " " + shape_info warning("the shapes of the model's inputs will be " \ "changed to the shapes of the inputs in the data folder") command_str += " " + temp_model_path start = time.perf_counter() execute_commands(command_str) end = time.perf_counter() print(" took ", end - start, " seconds.\n") # Save the generated .so file of the model if required. if (args.save_so): print("Saving the shared library to", args.save_so, "\n") execute_commands('rsync -ar {} {}'.format( shared_lib_path, args.save_so)) # Use the generated shared library to create an execution session. print("Loading the compiled model ...") start = time.perf_counter() sess = ExecutionSession(shared_lib_path) end = time.perf_counter() print(" took ", end - start, " seconds.\n") print("Running inference ...") start = time.perf_counter() outs = sess.run(inputs) end = time.perf_counter() print(" took ", end - start, " seconds.\n") # Print the output if required. if (args.print_output): for i, out in enumerate(outs): print("The {} output {}:[{}x{}] is: \n {} \n".format( ordinal(i + 1), output_names[i], 'x'.join([str(i) for i in out.shape]), out.dtype, out)) # Store the input and output if required. if args.save_data: data_folder = args.save_data if not os.path.exists(data_folder): os.mkdir(data_folder) for i in range(len(inputs)): tensor = numpy_helper.from_array(inputs[i]) tensor_path = os.path.join(data_folder, 'input_{}.pb'.format(i)) with open(tensor_path, 'wb') as f: f.write(tensor.SerializeToString()) for i in range(len(outs)): tensor = numpy_helper.from_array(outs[i]) tensor_path = os.path.join(data_folder, 'output_{}.pb'.format(i)) with open(tensor_path, 'wb') as f: f.write(tensor.SerializeToString()) # Run the model with reference backend and get results. if (args.verify): ref_outs = [] if (args.verify.lower() == "onnxruntime"): # Reference backend by using onnxruntime. import onnxruntime output_names = list(map(lambda x: x.name, model.graph.output)) input_feed = dict(zip(input_names, inputs)) print("Running inference using onnxruntime ...") start = time.perf_counter() ref_session = onnxruntime.InferenceSession(temp_model_path) ref_outs = ref_session.run(output_names, input_feed) end = time.perf_counter() print(" took ", end - start, " seconds.\n") elif (args.verify.lower() == "ref"): ref_outs = read_output_from_refs(model, args.data_folder) else: print("Invalid verify option") exit(1) # For each output tensor, compare results. for i, name in enumerate(output_names): print( "Verifying value of {}:{}".format(name, list(outs[i].shape)), "using atol={}, rtol={} ...".format(args.atol, args.rtol)) total_elements = 0 mismatched_elements = 0 for index, actual_val in np.ndenumerate(outs[i]): total_elements += 1 ref_val = ref_outs[i][index] # Use equation atol + rtol * abs(desired), that is used in assert_allclose. diff = float(args.atol) + float(args.rtol) * abs(ref_val) if (abs(actual_val - ref_val) <= diff): continue mismatched_elements += 1 print(" at {}".format(index), "mismatch {} (actual)".format(actual_val), "vs {} (reference)".format(ref_val)) if mismatched_elements == 0: print(" correct.\n".format(args.atol, args.rtol)) else: raise AssertionError( " mismatched elements {}/{}.\n".format( mismatched_elements, total_elements))
def test_session_with_ortvalue_input(ortvalue): sess = onnxrt.InferenceSession(get_name("mul_1.onnx")) res = sess.run(["Y"], {"X": ortvalue}) self.assertTrue(np.array_equal(res[0], numpy_arr_output))
def runBaseTest2(): sess = onnxrt.InferenceSession(get_name("mul_1.onnx")) self.assertTrue( 'CUDAExecutionProvider' in sess.get_providers()) # test get/set of "cuda_mem_limit" configuration. options = sess.get_provider_options() self.assertTrue('CUDAExecutionProvider' in options) option = options['CUDAExecutionProvider'] self.assertTrue('cuda_mem_limit' in option) ori_mem_limit = option['cuda_mem_limit'] new_mem_limit = int(ori_mem_limit) // 2 option['cuda_mem_limit'] = new_mem_limit sess.set_providers(['CUDAExecutionProvider'], [option]) options = sess.get_provider_options() self.assertEqual( options['CUDAExecutionProvider']['cuda_mem_limit'], str(new_mem_limit)) option['cuda_mem_limit'] = ori_mem_limit sess.set_providers(['CUDAExecutionProvider'], [option]) options = sess.get_provider_options() self.assertEqual( options['CUDAExecutionProvider']['cuda_mem_limit'], ori_mem_limit) # test get/set of "arena_extend_strategy" configuration. options = sess.get_provider_options() self.assertTrue('CUDAExecutionProvider' in options) option = options['CUDAExecutionProvider'] self.assertTrue('arena_extend_strategy' in option) for strategy in ['kNextPowerOfTwo', 'kSameAsRequested']: option['arena_extend_strategy'] = strategy sess.set_providers(['CUDAExecutionProvider'], [option]) options = sess.get_provider_options() self.assertEqual( options['CUDAExecutionProvider'] ['arena_extend_strategy'], strategy) # # Note: Tests that throw an exception leave an empty session due to how set_providers currently works, # so run them last. Each set_providers call will attempt to re-create a session, so it's # fine for a test that fails to run immediately after another one that fails. # Alternatively a valid call to set_providers could be used to recreate the underlying session # after a failed call. # option['arena_extend_strategy'] = 'wrong_value' with self.assertRaises(RuntimeError): sess.set_providers(['CUDAExecutionProvider'], [option]) option['cuda_mem_limit'] = -1024 with self.assertRaises(RuntimeError): sess.set_providers(['CUDAExecutionProvider'], [option]) option['cuda_mem_limit'] = 1024.1024 with self.assertRaises(RuntimeError): sess.set_providers(['CUDAExecutionProvider'], [option]) option['cuda_mem_limit'] = 'wrong_value' with self.assertRaises(RuntimeError): sess.set_providers(['CUDAExecutionProvider'], [option])
def detect(cfg): # Initialize device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = torch.load(opt.weights, map_location=device)['model'] model.to(device).eval() if half: model.half() # to FP16 else: model.to(torch.float32) if device.type == 'cpu': model.to(torch.float32) session = onnxruntime.InferenceSession(cfg['onnx']) # 2. Get input/output name input_name = session.get_inputs()[0].name # 'image' output_name = session.get_outputs()[0].name # 'boxes' print('onnx input name: {}, output name: {}'.format( input_name, output_name)) # Set Dataloader dataset_path = cfg['dataset_path'] dataset = DatasetReader(dataset_path, cfg, augment=TestTransform(cfg['img_size'][0], mean=cfg['brg_mean']), is_training=False, split='test') # Get names and colors names = [ 'Car', 'Van', 'Truck', 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', 'Misc' ] colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # encoder_decoder = Coder(cfg['dim_ref']) encoder_decoder = model.model[-1].encoder_decoder # Run inference t0 = time.time() videowriter = None if cfg['write_video']: videowriter = cv2.VideoWriter( 'res.avi', cv2.VideoWriter.fourcc('M', 'J', 'P', 'G'), 1, (1242, 750)) max = 1000 cnt = 0 for img, targets, path, _ in dataset: src = cv2.imread(path) ori_img = np.copy(img) img = img.to(device) img = img.half() if half else img.float() # uint8 to fp16/32 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t_st = time.time() # pred = model(img)[0] # onnxruntime result pred_onnx = session.run([output_name], {input_name: [ori_img]})[0] pred_onnx = torch.tensor(pred_onnx).to(device) bi = targets.get_field('img_id') K = targets.get_field('K') Ks = [] for i in np.unique(bi): indices = i == bi Ks.append(K[indices][None, 0]) Ks = np.concatenate(Ks, axis=0) pred = postprocess.decode_pred_logits_onnx( pred_onnx, (img.shape[3], img.shape[2]), [(src.shape[1], src.shape[0])], Ks, encoder_decoder) # postprocess.apply_batch_nms3d(pred) t_end = time.time() # print('pred after nms:', len(pred), pred[0].shape) src3d = np.copy(src) birdview = np.zeros((2 * src.shape[0], src.shape[0], 3), dtype=np.uint8) if pred[0] is not None: src = visual_utils.cv_draw_bboxes_2d(src, pred[0], names) src3d = visual_utils.cv_draw_bboxes_3d(src3d, pred[0], names) birdview = visual_utils.cv_draw_bbox3d_birdview(birdview, pred[0], color=(255, 0, 0)) birdview = visual_utils.cv_draw_bbox3d_birdview(birdview, targets, color=(0, 0, 255)) concat_img = np.concatenate([src, src3d], axis=0) concat_img = np.concatenate([concat_img, birdview], axis=1) cv2.imshow('test transform', concat_img) if cfg['write_video']: if cnt < max: concat_img = cv2.resize(concat_img, (1242, 750)) # concat_img = concat_img[:, :, ::-1] videowriter.write(concat_img) cnt += 1 print('the inference time of model is ', t_end - t_st) if cv2.waitKey(1000) == ord('q'): break if cfg['write_video']: videowriter.release() print('Done. (%.3fs)' % (time.time() - t0))
def main(): parser = argparse.ArgumentParser() parser.add_argument( "--model", type=str, default='model_128x128/model_128x128.onnx', ) parser.add_argument( "--input_size", type=str, default='128,128', ) args = parser.parse_args() model_path = args.model input_size = args.input_size input_size = [int(i) for i in input_size.split(',')] input_width, input_height = input_size[1], input_size[0] # Initialize video capture cap = cv.VideoCapture(0) # Load model onnx_session = onnxruntime.InferenceSession(model_path) while True: start_time = time.time() # Capture read ret, frame = cap.read() if not ret: break debug_image = copy.deepcopy(frame) debug_image = cv.resize(debug_image, dsize=(input_width, input_height)) # Inference execution hr_image = run_inference( onnx_session, input_size, frame, ) elapsed_time = time.time() - start_time # Draw original_image, concat_image, _, _ = draw_debug( debug_image, elapsed_time, hr_image, ) key = cv.waitKey(1) if key == 27: # ESC break cv.imshow('Fast-SRGAN Demo : Original', original_image) cv.imshow('Fast-SRGAN Demo : HR', concat_image) cap.release() cv.destroyAllWindows()
for img_path in img_files: img = Image.open(img_path) img = img.convert('RGB') img_data = resize_smallest_side(np.array(img, dtype=np.float32), 224) img_data = central_crop(img_data, (224, 224)) img_data = np.array(img_data).transpose(2, 0, 1) inputs.append(preprocess(img_data)) return inputs ########################################################### # Create InferenceSession ########################################################### print("Create InferenceSession") session = onnxruntime.InferenceSession(model_path, None, ["VitisAIExecutionProvider"]) # get the name of the first input of the model input_name = session.get_inputs()[0].name ########################################################### # Quantization using first N inputs # # Usually, to be able to accelerate inference of Neural # Network models with Vitis-AI DPU accelerators, those models # need to quantized upfront. In the ONNXRuntime Vitis-AI # execution provider we make use of on-the-fly quantization # to remove this additional preprocessing step. In this flow, # one doesn't need to quantize his/her model upfront but can # make use of the typical inference execution calls # (InferenceSession.run) to quantize the model on-the-fly
import tensorflow as tf from keras import backend as K from tensorflow.keras.models import load_model import numpy as np model = load_model('Final_model.h5') X = np.array(np.random.rand(10, 21), dtype=np.float32) print(model.predict(X)) # convert to onnx model onnx_model = keras2onnx.convert_keras(model, model.name) temp_model_file = 'NN_model.onnx' keras2onnx.save_model(onnx_model, temp_model_file) sess = onnxruntime.InferenceSession(temp_model_file) input_name = sess.get_inputs()[0].name label_name = sess.get_outputs()[0].name # This input name is needed in Classifier_cff as NNIdONNXInputName print(sess.get_inputs()[0].name) print(label_name) # The name of the output is needed in Clasifier_cff as NNIdONNXOutputName # predict on random input and compare to previous keras model for i in range(len(X)): pred_onx = sess.run([label_name], {input_name: X[i:i + 1]})[0] print(pred_onx)
onnx.checker.check_model(model) # A full list of supported optimization passes can be found using get_available_passes() all_passes = optimizer.get_available_passes() # Pick one pass as example passes = ['eliminate_unused_initializer','fuse_bn_into_conv'] # polish the model # model = onnx.utils.polish_model(model) # Apply the optimization on the original model optimized_model = optimizer.optimize(model, passes) inferred_model = shape_inference.infer_shapes(optimized_model) #save the model onnx.save(inferred_model, f'/1TBstorage/OnnxOptimized_New/{m}_Opt.onnx') #onnxruntime import onnxruntime as rt sess_options = rt.SessionOptions() # Set graph optimization level sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_BASIC # To enable model serialization after graph optimization set this sess_options.optimized_model_filepath = f'/1TBstorage/OnnxOptimized_New/{m}_Opt_Runtime.onnx' session = rt.InferenceSession( f'/1TBstorage/OnnxOptimized_New/{m}_Opt.onnx', sess_options)
def export_decoder(asr_model, args): print("Stage-3: export decoder") decoder = asr_model # NOTE(lzhin): parameters of encoder will be automatically removed # since they are not used during rescoring. decoder.forward = decoder.forward_attention_decoder decoder_outpath = os.path.join(args['output_dir'], 'decoder.onnx') print("\tStage-3.1: prepare inputs for decoder") # hardcode time->200 nbest->10 len->20, they are dynamic axes. encoder_out = torch.randn((1, 200, args['output_size'])) hyps = torch.randint(low=0, high=args['vocab_size'], size=[10, 20]) hyps[:, 0] = args['vocab_size'] - 1 # <sos> hyps_lens = torch.randint(low=15, high=21, size=[10]) print("\tStage-3.2: torch.onnx.export") dynamic_axes = { 'hyps': { 0: 'NBEST', 1: 'L' }, 'hyps_lens': { 0: 'NBEST' }, 'encoder_out': { 1: 'T' }, 'score': { 0: 'NBEST', 1: 'L' }, 'r_score': { 0: 'NBEST', 1: 'L' } } inputs = (hyps, hyps_lens, encoder_out, args['reverse_weight']) torch.onnx.export( decoder, inputs, decoder_outpath, opset_version=13, export_params=True, do_constant_folding=True, input_names=['hyps', 'hyps_lens', 'encoder_out', 'reverse_weight'], output_names=['score', 'r_score'], dynamic_axes=dynamic_axes, verbose=False) onnx_decoder = onnx.load(decoder_outpath) for (k, v) in args.items(): meta = onnx_decoder.metadata_props.add() meta.key, meta.value = str(k), str(v) onnx.checker.check_model(onnx_decoder) onnx.helper.printable_graph(onnx_decoder.graph) onnx.save(onnx_decoder, decoder_outpath) print_input_output_info(onnx_decoder, "onnx_decoder") print('\t\tExport onnx_decoder, done! see {}'.format(decoder_outpath)) print("\tStage-3.3: check onnx_decoder and torch_decoder") torch_score, torch_r_score = decoder(hyps, hyps_lens, encoder_out, args['reverse_weight']) ort_session = onnxruntime.InferenceSession(decoder_outpath) input_names = [node.name for node in onnx_decoder.graph.input] ort_inputs = { 'hyps': to_numpy(hyps), 'hyps_lens': to_numpy(hyps_lens), 'encoder_out': to_numpy(encoder_out), 'reverse_weight': np.array((args['reverse_weight'])), } for k in list(ort_inputs): if k not in input_names: ort_inputs.pop(k) onnx_output = ort_session.run(None, ort_inputs) np.testing.assert_allclose(to_numpy(torch_score), onnx_output[0], rtol=1e-03, atol=1e-05) if args['is_bidirectional_decoder'] and args['reverse_weight'] > 0.0: np.testing.assert_allclose(to_numpy(torch_r_score), onnx_output[1], rtol=1e-03, atol=1e-05) print("\t\tCheck onnx_decoder, pass!")
# Load and downscale image input_img_path = directory + '/test.jpg' orig_img = Image.open(input_img_path) downscaled_img = orig_img.resize((width, height)) downscaled_img.save(directory + "/test_downscaled.jpg") # Preprocess image for the model img_ycbcr = downscaled_img.convert('YCbCr') img_y_0, img_cb, img_cr = img_ycbcr.split() img_ndarray = np.asarray(img_y_0) img_4 = np.expand_dims(np.expand_dims(img_ndarray, axis=0), axis=0) img_5 = img_4.astype(np.float32) / 255.0 # Load onnx file and run inference session = rt.InferenceSession(model) output_name = session.get_outputs()[0].name input_name = session.get_inputs()[0].name result = session.run([output_name], {input_name: img_5}) img_out_y = result[0] print(img_out_y.shape) # Postprocess img_out_y = Image.fromarray(np.uint8((img_out_y[0] * 255.0).clip(0, 255)[0]), mode='L') # get the output image following the post-processing step from the PyTorch implementation final_img = Image.merge("YCbCr", [ img_out_y, img_cb.resize(img_out_y.size, Image.BICUBIC), img_cr.resize(img_out_y.size, Image.BICUBIC), ]).convert("RGB")
def export_encoder(asr_model, args): print("Stage-1: export encoder") encoder = asr_model.encoder encoder.forward = encoder.forward_chunk encoder_outpath = os.path.join(args['output_dir'], 'encoder.onnx') print("\tStage-1.1: prepare inputs for encoder") chunk = torch.randn( (args['batch'], args['decoding_window'], args['feature_size'])) offset = 0 # NOTE(xcsong): The uncertainty of `next_cache_start` only appears # in the first few chunks, this is caused by dynamic att_cache shape, i,e # (0, 0, 0, 0) for 1st chunk and (elayers, head, ?, d_k*2) for subsequent # chunks. One way to ease the ONNX export is to keep `next_cache_start` # as a fixed value. To do this, for the **first** chunk, if # left_chunks > 0, we feed real cache & real mask to the model, otherwise # fake cache & fake mask. In this way, we get: # 1. 16/-1 mode: next_cache_start == 0 for all chunks # 2. 16/4 mode: next_cache_start == chunk_size for all chunks # 3. 16/0 mode: next_cache_start == chunk_size for all chunks # 4. -1/-1 mode: next_cache_start == 0 for all chunks # NO MORE DYNAMIC CHANGES!! # # NOTE(Mddct): We retain the current design for the convenience of supporting some # inference frameworks without dynamic shapes. If you're interested in all-in-one # model that supports different chunks please see: # https://github.com/wenet-e2e/wenet/pull/1174 if args['left_chunks'] > 0: # 16/4 required_cache_size = args['chunk_size'] * args['left_chunks'] offset = required_cache_size # Real cache att_cache = torch.zeros( (args['num_blocks'], args['head'], required_cache_size, args['output_size'] // args['head'] * 2)) # Real mask att_mask = torch.ones( (args['batch'], 1, required_cache_size + args['chunk_size']), dtype=torch.bool) att_mask[:, :, :required_cache_size] = 0 elif args['left_chunks'] <= 0: # 16/-1, -1/-1, 16/0 required_cache_size = -1 if args['left_chunks'] < 0 else 0 # Fake cache att_cache = torch.zeros((args['num_blocks'], args['head'], 0, args['output_size'] // args['head'] * 2)) # Fake mask att_mask = torch.ones((0, 0, 0), dtype=torch.bool) cnn_cache = torch.zeros( (args['num_blocks'], args['batch'], args['output_size'], args['cnn_module_kernel'] - 1)) inputs = (chunk, offset, required_cache_size, att_cache, cnn_cache, att_mask) print("\t\tchunk.size(): {}\n".format(chunk.size()), "\t\toffset: {}\n".format(offset), "\t\trequired_cache: {}\n".format(required_cache_size), "\t\tatt_cache.size(): {}\n".format(att_cache.size()), "\t\tcnn_cache.size(): {}\n".format(cnn_cache.size()), "\t\tatt_mask.size(): {}\n".format(att_mask.size())) print("\tStage-1.2: torch.onnx.export") dynamic_axes = { 'chunk': { 1: 'T' }, 'att_cache': { 2: 'T_CACHE' }, 'att_mask': { 2: 'T_ADD_T_CACHE' }, 'output': { 1: 'T' }, 'r_att_cache': { 2: 'T_CACHE' }, } # NOTE(xcsong): We keep dynamic axes even if in 16/4 mode, this is # to avoid padding the last chunk (which usually contains less # frames than required). For users who want static axes, just pop # out specific axis. # if args['chunk_size'] > 0: # 16/4, 16/-1, 16/0 # dynamic_axes.pop('chunk') # dynamic_axes.pop('output') # if args['left_chunks'] >= 0: # 16/4, 16/0 # # NOTE(xsong): since we feed real cache & real mask into the # # model when left_chunks > 0, the shape of cache will never # # be changed. # dynamic_axes.pop('att_cache') # dynamic_axes.pop('r_att_cache') torch.onnx.export(encoder, inputs, encoder_outpath, opset_version=13, export_params=True, do_constant_folding=True, input_names=[ 'chunk', 'offset', 'required_cache_size', 'att_cache', 'cnn_cache', 'att_mask' ], output_names=['output', 'r_att_cache', 'r_cnn_cache'], dynamic_axes=dynamic_axes, verbose=False) onnx_encoder = onnx.load(encoder_outpath) for (k, v) in args.items(): meta = onnx_encoder.metadata_props.add() meta.key, meta.value = str(k), str(v) onnx.checker.check_model(onnx_encoder) onnx.helper.printable_graph(onnx_encoder.graph) # NOTE(xcsong): to add those metadatas we need to reopen # the file and resave it. onnx.save(onnx_encoder, encoder_outpath) print_input_output_info(onnx_encoder, "onnx_encoder") print('\t\tExport onnx_encoder, done! see {}'.format(encoder_outpath)) print("\tStage-1.3: check onnx_encoder and torch_encoder") torch_output = [] torch_chunk = copy.deepcopy(chunk) torch_offset = copy.deepcopy(offset) torch_required_cache_size = copy.deepcopy(required_cache_size) torch_att_cache = copy.deepcopy(att_cache) torch_cnn_cache = copy.deepcopy(cnn_cache) torch_att_mask = copy.deepcopy(att_mask) for i in range(10): print("\t\ttorch chunk-{}: {}, offset: {}, att_cache: {}," " cnn_cache: {}, att_mask: {}".format( i, list(torch_chunk.size()), torch_offset, list(torch_att_cache.size()), list(torch_cnn_cache.size()), list(torch_att_mask.size()))) # NOTE(xsong): att_mask of the first few batches need changes if # we use 16/4 mode. if args['left_chunks'] > 0: # 16/4 torch_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1 out, torch_att_cache, torch_cnn_cache = encoder( torch_chunk, torch_offset, torch_required_cache_size, torch_att_cache, torch_cnn_cache, torch_att_mask) torch_output.append(out) torch_offset += out.size(1) torch_output = torch.cat(torch_output, dim=1) onnx_output = [] onnx_chunk = to_numpy(chunk) onnx_offset = np.array((offset)).astype(np.int64) onnx_required_cache_size = np.array((required_cache_size)).astype(np.int64) onnx_att_cache = to_numpy(att_cache) onnx_cnn_cache = to_numpy(cnn_cache) onnx_att_mask = to_numpy(att_mask) ort_session = onnxruntime.InferenceSession(encoder_outpath) input_names = [node.name for node in onnx_encoder.graph.input] for i in range(10): print("\t\tonnx chunk-{}: {}, offset: {}, att_cache: {}," " cnn_cache: {}, att_mask: {}".format(i, onnx_chunk.shape, onnx_offset, onnx_att_cache.shape, onnx_cnn_cache.shape, onnx_att_mask.shape)) # NOTE(xsong): att_mask of the first few batches need changes if # we use 16/4 mode. if args['left_chunks'] > 0: # 16/4 onnx_att_mask[:, :, -(args['chunk_size'] * (i + 1)):] = 1 ort_inputs = { 'chunk': onnx_chunk, 'offset': onnx_offset, 'required_cache_size': onnx_required_cache_size, 'att_cache': onnx_att_cache, 'cnn_cache': onnx_cnn_cache, 'att_mask': onnx_att_mask } # NOTE(xcsong): If we use 16/-1, -1/-1 or 16/0 mode, `next_cache_start` # will be hardcoded to 0 or chunk_size by ONNX, thus # required_cache_size and att_mask are no more needed and they will # be removed by ONNX automatically. for k in list(ort_inputs): if k not in input_names: ort_inputs.pop(k) ort_outs = ort_session.run(None, ort_inputs) onnx_att_cache, onnx_cnn_cache = ort_outs[1], ort_outs[2] onnx_output.append(ort_outs[0]) onnx_offset += ort_outs[0].shape[1] onnx_output = np.concatenate(onnx_output, axis=1) np.testing.assert_allclose(to_numpy(torch_output), onnx_output, rtol=1e-03, atol=1e-05) meta = ort_session.get_modelmeta() print("\t\tcustom_metadata_map={}".format(meta.custom_metadata_map)) print("\t\tCheck onnx_encoder, pass!")
def predict(input_filepath, file_chunks, output_filepath, batch_size, num_workers, rank, threads, model_path): # session options sess_options = onnxruntime.SessionOptions() sess_options.intra_op_num_threads = threads sess_options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL ort_session = onnxruntime.InferenceSession(model_path + ".onnx", sess_options=sess_options) torch.set_num_threads(threads) # create output file output_filename = output_filepath + "pepper_prediction_" + str( rank) + ".hdf" prediction_data_file = DataStore(output_filename, mode='w') # data loader input_data = SequenceDataset(input_filepath, file_chunks) data_loader = DataLoader(input_data, batch_size=batch_size, shuffle=False, num_workers=num_workers) if rank == 0: progress_bar = tqdm( total=len(data_loader), ncols=100, leave=False, position=rank, desc="CALLER #" + str(rank), ) with torch.no_grad(): for contig, contig_start, contig_end, chunk_id, images, position, index in data_loader: images = images.type(torch.FloatTensor) hidden = torch.zeros(images.size(0), 2 * TrainOptions.LSTM_LAYERS, TrainOptions.HIDDEN_SIZE) # cell_state = torch.zeros(images.size(0), 2 * TrainOptions.LSTM_LAYERS, TrainOptions.HIDDEN_SIZE) prediction_base_tensor = torch.zeros( (images.size(0), images.size(1), ImageSizeOptions.TOTAL_LABELS)) for i in range(0, ImageSizeOptions.SEQ_LENGTH, TrainOptions.WINDOW_JUMP): if i + TrainOptions.TRAIN_WINDOW > ImageSizeOptions.SEQ_LENGTH: break chunk_start = i chunk_end = i + TrainOptions.TRAIN_WINDOW # chunk all the data image_chunk = images[:, chunk_start:chunk_end] # run inference on onnx mode, which takes numpy inputs # ##This looks into this section ort_inputs = { ort_session.get_inputs()[0].name: image_chunk.cpu().numpy(), ort_session.get_inputs()[1].name: hidden.cpu().numpy(), ort_session.get_inputs()[2].name: cell_state.cpu().numpy() } # #output_base, hidden, cell_state = ort_session.run(None, ort_inputs) output_base, hidden, cell_state = ort_session.run( None, ort_inputs) output_base = torch.from_numpy(output_base) hidden = torch.from_numpy(hidden) # cell_state = torch.from_numpy(cell_state) # now calculate how much padding is on the top and bottom of this chunk so we can do a simple # add operation top_zeros = chunk_start bottom_zeros = ImageSizeOptions.SEQ_LENGTH - chunk_end counts = torch.ones( (output_base.size(0), output_base.size(1), 1)) # do softmax and get prediction # we run a softmax a padding to make the output tensor compatible for adding inference_layers = nn.Sequential( nn.Softmax(dim=2), nn.ZeroPad2d((0, 0, top_zeros, bottom_zeros))) base_prediction = inference_layers(output_base) # now simply add the tensor to the global counter prediction_base_tensor = torch.add(prediction_base_tensor, base_prediction) base_values, base_labels = torch.max(prediction_base_tensor, 2) # this part is for the phred score calculation counts = torch.ones( (base_values.size(0), base_values.size(1) - 2 * ImageSizeOptions.SEQ_OVERLAP)) top_ones = nn.ZeroPad2d( (ImageSizeOptions.SEQ_OVERLAP, ImageSizeOptions.SEQ_OVERLAP)) counts = top_ones(counts) + 1 phred_score = -10 * torch.log10(1.0 - (base_values / counts)) phred_score[phred_score == float('inf')] = 100 predicted_base_labels = base_labels.cpu().numpy() phred_score = phred_score.cpu().numpy() for i in range(images.size(0)): prediction_data_file.write_prediction( contig[i], contig_start[i], contig_end[i], chunk_id[i], position[i], index[i], predicted_base_labels[i], phred_score[i]) if rank == 0: progress_bar.update(1) if rank == 0: progress_bar.close()
def pytorch2onnx(model, input_shape, opset_version=11, show=False, output_file='tmp.onnx', verify=False): """Export Pytorch model to ONNX model and verify the outputs are same between Pytorch and ONNX. Args: model (nn.Module): Pytorch model we want to export. input_shape (tuple): Use this input shape to construct the corresponding dummy input and execute the model. opset_version (int): The onnx op version. Default: 11. show (bool): Whether print the computation graph. Default: False. output_file (string): The path to where we store the output ONNX model. Default: `tmp.onnx`. verify (bool): Whether compare the outputs between Pytorch and ONNX. Default: False. """ model.cpu().eval() if isinstance(model.decode_head, nn.ModuleList): num_classes = model.decode_head[-1].num_classes else: num_classes = model.decode_head.num_classes mm_inputs = _demo_mm_inputs(input_shape, num_classes) imgs = mm_inputs.pop('imgs') img_metas = mm_inputs.pop('img_metas') img_list = [img[None, :] for img in imgs] img_meta_list = [[img_meta] for img_meta in img_metas] # replace original forward function origin_forward = model.forward model.forward = partial(model.forward, img_metas=img_meta_list, return_loss=False) register_extra_symbolics(opset_version) with torch.no_grad(): torch.onnx.export(model, (img_list, ), output_file, export_params=True, keep_initializers_as_inputs=True, verbose=show, opset_version=opset_version) print(f'Successfully exported ONNX model: {output_file}') model.forward = origin_forward if verify: # check by onnx import onnx onnx_model = onnx.load(output_file) onnx.checker.check_model(onnx_model) # check the numerical value # get pytorch output pytorch_result = model(img_list, img_meta_list, return_loss=False)[0] # get onnx output input_all = [node.name for node in onnx_model.graph.input] input_initializer = [ node.name for node in onnx_model.graph.initializer ] net_feed_input = list(set(input_all) - set(input_initializer)) assert (len(net_feed_input) == 1) sess = rt.InferenceSession(output_file) onnx_result = sess.run( None, {net_feed_input[0]: img_list[0].detach().numpy()})[0] if not np.allclose(pytorch_result, onnx_result): raise ValueError( 'The outputs are different between Pytorch and ONNX') print('The outputs are same between Pytorch and ONNX')
def _ort_inference(mdl, inputs): sess = _ort.InferenceSession(mdl.SerializeToString()) return sess.run(None, inputs)
def run_test_dir(model_or_dir): """ Run the test/s from a directory in ONNX test format. All subdirectories with a prefix of 'test' are considered test input for one test run. :param model_or_dir: Path to onnx model in test directory, or the test directory name if the directory only contains one .onnx model. :return: None """ if os.path.isdir(model_or_dir): model_dir = os.path.abspath(model_or_dir) # check there's only one onnx file onnx_models = glob.glob(os.path.join(model_dir, '*.onnx')) ort_models = glob.glob(os.path.join(model_dir, '*.ort')) models = onnx_models + ort_models if len(models) > 1: raise ValueError(f"'Multiple .onnx and/or .ort files found in {model_dir}. '" "'Please provide specific .onnx or .ort file as input.") elif len(models) == 0: raise ValueError(f"'No .onnx or .ort files found in {model_dir}.") model_path = models[0] else: model_path = os.path.abspath(model_or_dir) model_dir = os.path.dirname(model_path) print(f'Running tests in {model_dir} for {model_path}') test_dirs = [d for d in glob.glob(os.path.join(model_dir, 'test*')) if os.path.isdir(d)] if not test_dirs: raise ValueError(f"No directories with name starting with 'test' were found in {model_dir}.") sess = ort.InferenceSession(model_path) for d in test_dirs: print(d) inputs, expected_outputs = read_test_dir(d) if expected_outputs: output_names = list(expected_outputs.keys()) # handle case where there's a single expected output file but no name in it (empty string for name) # e.g. ONNX test models 20190729\opset8\tf_mobilenet_v2_1.4_224 if len(output_names) == 1 and output_names[0] == '': output_names = [o.name for o in sess.get_outputs()] assert(len(output_names) == 1) expected_outputs[output_names[0]] = expected_outputs[''] expected_outputs.pop('') else: output_names = [o.name for o in sess.get_outputs()] run_outputs = sess.run(output_names, inputs) failed = False if expected_outputs: for idx in range(len(output_names)): expected = expected_outputs[output_names[idx]] actual = run_outputs[idx] if expected.dtype.char in np.typecodes['AllFloat']: if not np.isclose(expected, actual, rtol=1.e-3, atol=1.e-3).all(): print(f'Mismatch for {output_names[idx]}:\nExpected:{expected}\nGot:{actual}') failed = True else: if not np.equal(expected, actual).all(): print(f'Mismatch for {output_names[idx]}:\nExpected:{expected}\nGot:{actual}') failed = True print('FAILED' if failed else 'PASS')
if not picked_box_probs: return np.array([]), np.array([]), np.array([]) picked_box_probs = np.concatenate(picked_box_probs) picked_box_probs[:, 0] *= width picked_box_probs[:, 1] *= height picked_box_probs[:, 2] *= width picked_box_probs[:, 3] *= height return picked_box_probs[:, :4].astype( np.int32), np.array(picked_labels), picked_box_probs[:, 4] # load the model, create runtime session & get input variable name onnx_model = onnx.load( '/home/imran/PROGRAMS/Machine Learning/Real Time FR/ultra_light_640.onnx') predictor = prepare(onnx_model) ort_session = ort.InferenceSession( '/home/imran/PROGRAMS/Machine Learning/Real Time FR/ultra_light_640.onnx') input_name = ort_session.get_inputs()[0].name # training TRAINING_BASE = 'faces/training/' dirs = os.listdir(TRAINING_BASE) # images and names for later use images = [] names = [] shape_predictor = dlib.shape_predictor( '/home/imran/PROGRAMS/Machine Learning/Real Time FR/shape_predictor_68_face_landmarks.dat' ) fa = face_utils.facealigner.FaceAligner(shape_predictor, desiredFaceWidth=112,
def create_test_dir(model_path, root_path, test_name, name_input_map=None, symbolic_dim_values_map=None, name_output_map=None): """ Create a test directory that can be used with onnx_test_runner or onnxruntime_perf_test. Generates random input data for any missing inputs. Saves output from running the model if name_output_map is not provided. :param model_path: Path to the onnx model file to use. :param root_path: Root path to create the test directory in. :param test_name: Name for test. Will be added to the root_path to create the test directory name. :param name_input_map: Map of input names to numpy ndarray data for each input. :param symbolic_dim_values_map: Map of symbolic dimension names to values to use for the input data if creating using random data. :param name_output_map: Optional map of output names to numpy ndarray expected output data. If not provided, the model will be run with the input to generate output data to save. :return: None """ model_path = os.path.abspath(model_path) root_path = os.path.abspath(root_path) test_dir = os.path.join(root_path, test_name) test_data_dir = os.path.join(test_dir, "test_data_set_0") if not os.path.exists(test_dir) or not os.path.exists(test_data_dir): os.makedirs(test_data_dir) model_filename = os.path.split(model_path)[-1] test_model_filename = os.path.join(test_dir, model_filename) shutil.copy(model_path, test_model_filename) model = onnx.load(model_path) model_inputs = model.graph.input model_outputs = model.graph.output def save_data(prefix, name_data_map, model_info): idx = 0 for name, data in name_data_map.items(): if isinstance(data, dict): # ignore. map<T1, T2> from traditional ML ops pass elif isinstance(data, list): # ignore. vector<map<T1,T2>> from traditional ML ops. e.g. ZipMap output pass else: np_type = _get_numpy_type(model_info, name) tensor = numpy_helper.from_array(data.astype(np_type), name) filename = os.path.join(test_data_dir, f"{prefix}_{idx}.pb") with open(filename, 'wb') as f: f.write(tensor.SerializeToString()) idx += 1 if not name_input_map: name_input_map = {} if not symbolic_dim_values_map: symbolic_dim_values_map = {} _create_missing_input_data(model_inputs, name_input_map, symbolic_dim_values_map) save_data("input", name_input_map, model_inputs) # save expected output data if provided. run model to create if not. if not name_output_map: output_names = [o.name for o in model_outputs] sess = ort.InferenceSession(test_model_filename) outputs = sess.run(output_names, name_input_map) name_output_map = {} for name, data in zip(output_names, outputs): name_output_map[name] = data save_data("output", name_output_map, model_outputs)
def compare_runtime(test, decimal=5, options=None, verbose=False, context=None, comparable_outputs=None, intermediate_steps=False, classes=None, disable_optimisation=False): """ The function compares the expected output (computed with the model before being converted to ONNX) and the ONNX output produced with module *onnxruntime*. :param test: dictionary with the following keys: - *onnx*: onnx model (filename or object) - *expected*: expected output (filename pkl or object) - *data*: input data (filename pkl or object) :param decimal: precision of the comparison :param options: comparison options :param context: specifies custom operators :param verbose: in case of error, the function may print more information on the standard output :param comparable_outputs: compare only these outputs :param intermediate_steps: displays intermediate steps in case of an error :param classes: classes names (if option 'nocl' is used) :param disable_optimisation: disable optimisation onnxruntime could do :return: tuple (outut, lambda function to run the predictions) The function does not return anything but raises an error if the comparison failed. """ lambda_onnx = None if context is None: context = {} load = load_data_and_model(test, **context) if verbose: print("[compare_runtime] test '{}' loaded".format(test['onnx'])) onx = test['onnx'] if options is None: if isinstance(onx, str): options = extract_options(onx) else: options = {} elif options is None: options = {} elif not isinstance(options, dict): raise TypeError("options must be a dictionary.") try: import onnxruntime except ImportError: warnings.warn("Unable to import onnxruntime.") return None if verbose: print("[compare_runtime] InferenceSession('{}')".format(onx)) if (disable_optimisation and hasattr(onnxruntime, 'GraphOptimizationLevel')): opts = onnxruntime.SessionOptions() opts.graph_optimization_level = ( onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL) else: opts = None try: sess = onnxruntime.InferenceSession(onx, sess_options=opts) except ExpectedAssertionError as expe: raise expe except Exception as e: if "CannotLoad" in options: raise ExpectedAssertionError( "Unable to load onnx '{0}' due to\n{1}".format(onx, e)) else: if intermediate_steps: _display_intermediate_steps(onx, None, disable_optimisation) if verbose: import onnx model = onnx.load(onx) smodel = "\nJSON ONNX\n" + str(model) else: smodel = "" if ("NOT_IMPLEMENTED : Could not find an implementation " "for the node" in str(e)): # onnxruntime does not implement a specific node yet. raise OnnxRuntimeMissingNewOnnxOperatorException( "onnxruntime does not implement a new operator " "'{0}'\n{1}\nONNX\n{2}".format( onx, e, smodel)) if "is not a registered function/op" in str(e): content = onnx_package.load(onx) raise OnnxRuntimeAssertionError( "Missing op? '{0}'\nONNX\n{1}\n{2}\n---\n{3}".format( onx, smodel, e, content)) raise OnnxRuntimeAssertionError( "Unable to load onnx '{0}'\nONNX\n{1}\n{2}".format( onx, smodel, e)) input = load["data"] DF = options.pop('DF', False) if DF: inputs = {c: input[c].values for c in input.columns} for k in inputs: if inputs[k].dtype == numpy.float64: inputs[k] = inputs[k].astype(numpy.float32) inputs[k] = inputs[k].reshape((inputs[k].shape[0], 1)) else: if isinstance(input, dict): inputs = input elif isinstance(input, (list, numpy.ndarray, pandas.DataFrame)): inp = sess.get_inputs() if len(inp) == len(input): inputs = {i.name: v for i, v in zip(inp, input)} elif len(inp) == 1: inputs = {inp[0].name: input} elif isinstance(input, numpy.ndarray): shape = sum(i.shape[1] if len(i.shape) == 2 else i.shape[0] for i in inp) if shape == input.shape[1]: inputs = {n.name: input[:, i] for i, n in enumerate(inp)} else: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0} != " "original shape {1}, onnx='{2}'" .format(len(inp), input.shape, onx)) elif isinstance(input, list): try: array_input = numpy.array(input) except Exception: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0} != " "original {1}, onnx='{2}'" .format(len(inp), len(input), onx)) shape = sum(i.shape[1] for i in inp) if shape == array_input.shape[1]: inputs = {} c = 0 for i, n in enumerate(inp): d = c + n.shape[1] inputs[n.name] = _create_column( [row[c:d] for row in input], n.type) c = d else: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0} != " "original shape {1}, onnx='{2}'*" .format(len(inp), array_input.shape, onx)) elif isinstance(input, pandas.DataFrame): try: array_input = numpy.array(input) except Exception: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0} != " "original {1}, onnx='{2}'" .format(len(inp), len(input), onx)) shape = sum(i.shape[1] for i in inp) if shape == array_input.shape[1]: inputs = {} c = 0 for i, n in enumerate(inp): d = c + n.shape[1] inputs[n.name] = _create_column( input.iloc[:, c:d], n.type) c = d else: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0}={1} columns != " "original shape {2}, onnx='{3}'*" .format(len(inp), shape, array_input.shape, onx)) else: raise OnnxRuntimeAssertionError( "Wrong type of inputs onnx {0}, onnx='{1}'".format( type(input), onx)) else: raise OnnxRuntimeAssertionError( "Dict or list is expected, not {0}".format(type(input))) for k in inputs: if isinstance(inputs[k], list): inputs[k] = numpy.array(inputs[k]) OneOff = options.pop('OneOff', False) OneOffArray = options.pop('OneOffArray', False) options.pop('SklCol', False) # unused here but in dump_data_and_model if OneOff or OneOffArray: if verbose: print( "[compare_runtime] OneOff: type(inputs)={} " "len={} OneOffArray={}" .format(type(input), len(inputs), OneOffArray)) if len(inputs) == 1 and not OneOffArray: name, values = list(inputs.items())[0] res = [] for input in values: try: one = sess.run(None, {name: input}) if lambda_onnx is None: lambda_onnx = lambda: sess.run(None, {name: input}) # noqa if verbose: import pprint pprint.pprint(one) except ExpectedAssertionError as expe: raise expe except Exception as e: if intermediate_steps: _display_intermediate_steps( onx, {name: input}, disable_optimisation) raise OnnxRuntimeAssertionError( "Unable to run onnx '{0}' due to {1}".format(onx, e)) res.append(one) if verbose: print("[compare_runtime] OneOff: _post_process_output1") output = _post_process_output(res) else: def to_array(vv): if isinstance( vv, (numpy.ndarray, numpy.int64, numpy.float32, str)): return numpy.array([vv]) else: return numpy.array([vv], dtype=numpy.float32) t = list(inputs.items())[0] res = [] for i in range(0, len(t[1])): iii = {k: to_array(v[i]) for k, v in inputs.items()} try: one = sess.run(None, iii) if lambda_onnx is None: lambda_onnx = lambda: sess.run(None, iii) # noqa if verbose: import pprint pprint.pprint(one) except ExpectedAssertionError as expe: raise expe except Exception as e: if intermediate_steps: _display_intermediate_steps( onx, iii, disable_optimisation) if verbose: import onnx model = onnx.load(onx) smodel = "\nJSON ONNX\n" + str(model) else: smodel = "" raise OnnxRuntimeAssertionError( "Unable to run onnx '{0}' due to {1}{2}".format( onx, e, smodel)) res.append(one) if verbose: print("[compare_runtime] OneOff: _post_process_output2") output = _post_process_output(res) if OneOffArray: if isinstance(output, list): pass elif not isinstance(output, numpy.ndarray): raise TypeError("output must be an array, not {}".format( type(output))) else: output = [output] else: if verbose: print("[compare_runtime] type(inputs)={} len={} names={}".format( type(input), len(inputs), list(sorted(inputs)))) if verbose: run_options = onnxruntime.RunOptions() if hasattr(run_options, 'run_log_verbosity_level'): run_options.run_log_verbosity_level = 5 else: run_options.log_verbosity_level = 5 else: run_options = None try: output = sess.run(None, inputs, run_options) lambda_onnx = lambda: sess.run(None, inputs) # noqa if verbose: import pprint pprint.pprint(output) except ExpectedAssertionError as expe: raise expe except RuntimeError as e: if intermediate_steps: _display_intermediate_steps(onx, inputs, disable_optimisation) if "-Fail" in onx: raise ExpectedAssertionError( "onnxruntime cannot compute the prediction for '{0}'". format(onx)) else: if verbose: import onnx model = onnx.load(onx) smodel = "\nJSON ONNX\n" + str(model) else: smodel = "" raise OnnxRuntimeAssertionError( "onnxruntime cannot compute the prediction" " for '{0}' due to {1}{2}" .format(onx, e, smodel)) except Exception as e: raise OnnxRuntimeAssertionError( "Unable to run onnx '{0}' due to {1}".format(onx, e)) if verbose: print("[compare_runtime] done type={}".format(type(output))) output0 = output.copy() if comparable_outputs: cmp_exp = [load["expected"][o] for o in comparable_outputs] cmp_out = [output[o] for o in comparable_outputs] else: cmp_exp = load["expected"] cmp_out = output try: _compare_expected(cmp_exp, cmp_out, sess, onx, decimal=decimal, verbose=verbose, classes=classes, **options) except ExpectedAssertionError as expe: raise expe except Exception as e: if verbose: import onnx model = onnx.load(onx) smodel = "\nJSON ONNX\n" + str(model) else: smodel = "" raise OnnxRuntimeAssertionError( "Model '{0}' has discrepencies.\n{1}: {2}{3}".format( onx, type(e), e, smodel)) return output0, lambda_onnx
def convert_to_onnx_with_hydra(cfg: DictConfig): onnx_model_path = cfg.onnx.model_ckpt_path onnx_model_path = onnx_model_path.split( "train/", 1)[0] + "onnxConversion/" + cfg.onnx.onnx_model_name + ".onnx" print("ONNX MODEL IS SAVED AT {}".format(onnx_model_path)) model = instantiate(cfg.model.model) model = model.cpu() model = model.load_from_checkpoint(cfg.onnx.model_ckpt_path) #print(model) print(cfg.onnx.model_width_input) dummy_input_real = torch.randn(cfg.onnx.model_batch_size, cfg.onnx.model_channel_input, cfg.onnx.model_height_input, cfg.onnx.model_width_input) print("INPUT SIZE {}".format(dummy_input_real.size())) #---------- # CONVERT MODEL TO ONNX #----------- # model is in model.adapter.model model_to_convert = model.adapter.model # Now model is a pytorch nn..Module model_to_convert = model_to_convert.cpu() # output class value #print(model_to_convert.nc) torch.onnx.export(model_to_convert, dummy_input_real, onnx_model_path, opset_version=cfg.onnx.opset_version) # ---------- # ONNX CHECK # ----------- onnx_model = onnx.load(onnx_model_path) onnx.checker.check_model(onnx_model) # ---------- # ONNXRUNTIME CHECK WITH PL MODEL # ----------- # Test Onnxruntime ort_session = onnxruntime.InferenceSession(onnx_model_path) ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(dummy_input_real)} ort_outs = ort_session.run(None, ort_inputs) modelPl = model_to_convert.eval() score = modelPl(dummy_input_real) np.testing.assert_allclose(to_numpy(score[0]), ort_outs[0], rtol=1e-03, atol=1e-05)