def fcts_model(X, y, n_jobs): "LinearRegression." model = LinearRegression(n_jobs=n_jobs) model.fit(X, y) initial_types = [('X', FloatTensorType([None, X.shape[1]]))] onx = to_onnx(model, initial_types=initial_types, black_op={'LinearRegressor'}) sess = InferenceSession(onx.SerializeToString(), providers=['CPUExecutionProvider']) outputs = [o.name for o in sess.get_outputs()] oinf = OnnxInference(onx, runtime="python") bind = SessionIOBinding(sess._sess) # ort_device = C_OrtDevice.cpu() ort_device = C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0) def predict_skl_predict(X, model=model): return model.predict(X) def predict_onnxrt_predict(X, sess=sess): return sess.run(outputs[:1], {'X': X})[0] def predict_onnx_inference(X, oinf=oinf): return oinf.run({'X': X})["variable"] def predict_onnxrt_predict_bind(X, sess=sess, bind=bind, ort_device=ort_device): if X.__array_interface__['strides'] is not None: raise RuntimeError("onnxruntime only supports contiguous arrays.") bind.bind_input('X', ort_device, X.dtype, X.shape, X.__array_interface__['data'][0]) bind.bind_output('variable', ort_device) sess._sess.run_with_iobinding(bind, None) ortvalues = bind.get_outputs() return ortvalues[0].numpy() return { 'predict': { 'skl': predict_skl_predict, 'ort': predict_onnxrt_predict, 'numpy': predict_onnx_inference, 'ort-bind': predict_onnxrt_predict_bind } }
def benchmark(name, onx, fct_numpy, *args, dims=(1, 10, 100, 200, 500, 1000, 2000, 10000)): sess = InferenceSession(onx.SerializeToString()) device = C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0) names = [i.name for i in sess.get_inputs()] out_names = [o.name for o in sess.get_outputs()] if len(names) != len(args): raise RuntimeError(f"Size mismatch {len(names)} != {len(args)}.") rows = [] for dim in tqdm(dims): new_args = [reshape(a, dim) for a in args] ortvalues = [ C_OrtValue.ortvalue_from_numpy(a, device) for a in new_args ] ms = measure_time(lambda: fct_numpy(*new_args), repeat=50, number=100) ms.update(dict(name=name, impl='numpy', dim=dim)) rows.append(ms) inps = {n: a for n, a in zip(names, new_args)} ms = measure_time(lambda: sess.run(None, inps)) ms.update(dict(name=name, impl='sess', dim=dim)) rows.append(ms) bind = SessionIOBinding(sess._sess) ms = measure_time(lambda: bind_and_run(sess._sess, bind, names, ortvalues, out_names, device)) ms.update(dict(name=name, impl='bind_run', dim=dim)) rows.append(ms) ms = measure_time(lambda: nobind_just_run(sess._sess, bind)) ms.update(dict(name=name, impl='run', dim=dim)) rows.append(ms) return rows
print(f"provider = {provider!r}") #################################### # We load the graph. with open(filename, 'rb') as f: onx = onnx.load(f) ############################### # Create of the session. so = SessionOptions() so.enable_profiling = True so.optimized_model_filepath = os.path.split(filename)[-1] + ".optimized.onnx" sess = InferenceSession(onx.SerializeToString(), so, providers=[provider]) bind = SessionIOBinding(sess._sess) print("graph_optimization_level:", so.graph_optimization_level) ##################################### # Creates random data feed = random_feed(sess, batch) ##################################### # moving the data on CPU or GPU feed_ort_value = OrderedDict( (name, (C_OrtValue.ortvalue_from_numpy(v, ort_device), v.dtype)) for name, v in feed.items()) outputs = [o.name for o in sess.get_outputs()] #######################################
def test_bind_input_types(self): opset = onnx_opset_version() devices = [( C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0), ["CPUExecutionProvider"], )] if "CUDAExecutionProvider" in onnxrt.get_all_providers(): devices.append(( C_OrtDevice(C_OrtDevice.cuda(), C_OrtDevice.default_memory(), 0), ["CUDAExecutionProvider"], )) for device, provider in devices: for dtype in [ np.float32, np.float64, np.int32, np.uint32, np.int64, np.uint64, np.int16, np.uint16, np.int8, np.uint8, np.float16, np.bool_, ]: with self.subTest(dtype=dtype, device=str(device)): x = np.arange(8).reshape((-1, 2)).astype(dtype) proto_dtype = NP_TYPE_TO_TENSOR_TYPE[x.dtype] X = helper.make_tensor_value_info("X", proto_dtype, [None, x.shape[1]]) Y = helper.make_tensor_value_info("Y", proto_dtype, [None, x.shape[1]]) # inference node_add = helper.make_node("Identity", ["X"], ["Y"]) # graph graph_def = helper.make_graph([node_add], "lr", [X], [Y], []) model_def = helper.make_model( graph_def, producer_name="dummy", ir_version=7, producer_version="0", opset_imports=[helper.make_operatorsetid("", opset)], ) sess = onnxrt.InferenceSession( model_def.SerializeToString(), providers=provider) bind = SessionIOBinding(sess._sess) ort_value = C_OrtValue.ortvalue_from_numpy(x, device) bind.bind_ortvalue_input("X", ort_value) bind.bind_output("Y", device) sess._sess.run_with_iobinding(bind, None) ortvaluevector = bind.get_outputs() self.assertIsInstance(ortvaluevector, OrtValueVector) ortvalue = bind.get_outputs()[0] y = ortvalue.numpy() assert_almost_equal(x, y) bind = SessionIOBinding(sess._sess) bind.bind_input("X", device, dtype, x.shape, ort_value.data_ptr()) bind.bind_output("Y", device) sess._sess.run_with_iobinding(bind, None) ortvalue = bind.get_outputs()[0] y = ortvalue.numpy() assert_almost_equal(x, y)
def forward(self, inputs, training=False, forward_outputs_cache=None): """ Implements forward function. :param inputs: inputs :param training: only inference or training as well :return: output as :epkg:`OrtValueVector` """ logger = self._logger cls = self.__class__ def _log(msg, *args): logger.debug("[%s.forward] (%dI) " + msg, cls.__name__, len(inputs), *args) if logger is not None: _log("begin with gradient" if training else "begin") _log("torch function %r", type(cls)) _log("ort class %r", cls) _log("create OrtValueVector (through dlpack)") forward_inputs = cls.input_to_ort(inputs, cls._devices, cls._debug) if training: forward_outputs = forward_outputs_cache or OrtValueVector() state = PartialGraphExecutionState() self.states_.append(state) if logger is not None: _log("run_forward") cls._training_agent.run_forward(forward_inputs, forward_outputs, state, cls._cache) self.save_for_backward(inputs) if logger is not None: _log("end") return forward_outputs else: # what about bind_input (+ data_ptr) if len(forward_inputs) != len(cls._grad_input_names): raise RuntimeError( # pragma: no cover "Size mismatch len(inputs)=%d, len(onnx inputs)=%d." % (len(forward_inputs), len(cls._grad_input_names))) iobinding = SessionIOBinding(cls._sess_eval._sess) if logger is not None: _log("bind inputs %r", cls._grad_input_names) for name, inp in zip(cls._grad_input_names, forward_inputs): iobinding.bind_ortvalue_input(name, inp) # bind output if logger is not None: _log("bind outputs %r", cls._output_names) for name, dev in zip(cls._output_names, cls._fw_no_grad_output_device_info): iobinding.bind_output(name, dev) # if the shape is known in advance # iobinding.bind_output( # output_desc.name, torch_tensor.device.type, # _utils.get_device_index(target_device), # _utils.dtype_torch_to_numpy(torch_tensor.dtype), # list(torch_tensor.size()), torch_tensor.data_ptr()) if logger is not None: _log("grad_enabled=False (run_with_iobinding)") cls._sess_eval._sess.run_with_iobinding(iobinding, cls._run_options) if logger is not None: _log("get_outputs") ortvalues = iobinding.get_outputs() if logger is not None: _log("to torck.tensor (%d)", len(ortvalues)) _log("end") return ortvalues
ro = RunOptions() output_names = [o.name for o in sess.get_outputs()] obs = measure_time( lambda: sess._sess.run_with_ort_values({'X': Xov}, output_names, ro), context=dict(sess=sess), repeat=repeat, number=number) obs['name'] = 'ort-ov' data.append(obs) ################################### # onnxruntime: run_with_iobinding print('ort-bind') sess = InferenceSession(onx.SerializeToString(), providers=['CPUExecutionProvider']) bind = SessionIOBinding(sess._sess) ort_device = C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0) def run_with_iobinding(sess, X, bind, ort_device): if X.__array_interface__['strides'] is not None: raise RuntimeError("onnxruntime only supports contiguous arrays.") bind.bind_input('X', ort_device, X.dtype, X.shape, X.__array_interface__['data'][0]) bind.bind_output('variable', ort_device) sess._sess.run_with_iobinding(bind, None) ortvalues = bind.get_outputs() return ortvalues[0].numpy() obs = measure_time(lambda: run_with_iobinding(sess, X, bind, ort_device),