def __init__(self, path_or_bytes, parameters, sess_options=None, providers=None, provider_options=None): Session.__init__(self) if sess_options: self._sess = C.TrainingSession(sess_options) else: self._sess = C.TrainingSession() # providers needs to be passed explicitly as of ORT 1.10 # retain the pre-1.10 behavior by setting to the available providers. if providers is None: providers = C.get_available_providers() providers, provider_options = check_and_normalize_provider_args( providers, provider_options, C.get_available_providers()) if isinstance(path_or_bytes, str): config_result = self._sess.load_model(path_or_bytes, parameters, providers, provider_options) elif isinstance(path_or_bytes, bytes): config_result = self._sess.read_bytes(path_or_bytes, parameters, providers, provider_options) else: raise TypeError("Unable to load from type '{0}'".format( type(path_or_bytes))) self.loss_scale_input_name = config_result.loss_scale_input_name self._inputs_meta = self._sess.inputs_meta self._outputs_meta = self._sess.outputs_meta
def set_providers(self, providers, provider_options=None): """ Register the input list of execution providers. The underlying session is re-created. :param providers: list of execution providers :param provider_options: list of provider options dict for each provider, in the same order as 'providers' The list of providers is ordered by Priority. For example ['CUDAExecutionProvider', 'CPUExecutionProvider'] means execute a node using CUDAExecutionProvider if capable, otherwise execute using CPUExecutionProvider. """ if not set(providers).issubset(C.get_available_providers()): raise ValueError( "{} does not contain a subset of available providers {}". format(providers, C.get_available_providers())) if provider_options: if not isinstance(providers, list) or not isinstance( provider_options, list): raise ValueError("Inputs must be two python lists.") if len(providers) != len(provider_options): raise ValueError("Two input lists must have same length.") for option in provider_options: if not isinstance(option, dict): raise ValueError( "Provider options must be list of python dict.") for key, val in option.items(): option[key] = str(val) # recreate the underlying C.InferenceSession self._reset_session(providers, provider_options)
def set_providers(self, providers): """ Register the input list of execution providers. The underlying session is re-created. :param providers: list of execution providers The list of providers is ordered by Priority. For example ['CUDAExecutionProvider', 'CPUExecutionProvider'] means execute a node using CUDAExecutionProvider if capable, otherwise execute using CPUExecutionProvider. """ if not set(providers).issubset(C.get_available_providers()): raise ValueError("{} does not contain a subset of available providers {}".format( providers, C.get_available_providers())) self._reset_session() self._load_model(providers)
def __init__(self, path_or_bytes, parameters, sess_options=None, providers=None, provider_options=None): Session.__init__(self) if sess_options: self._sess = C.TrainingSession(sess_options) else: self._sess = C.TrainingSession() providers, provider_options = check_and_normalize_provider_args( providers, provider_options, C.get_available_providers()) if isinstance(path_or_bytes, str): config_result = self._sess.load_model(path_or_bytes, parameters, providers, provider_options) elif isinstance(path_or_bytes, bytes): config_result = self._sess.read_bytes(path_or_bytes, parameters, providers, provider_options) else: raise TypeError("Unable to load from type '{0}'".format( type(path_or_bytes))) self.loss_scale_input_name = config_result.loss_scale_input_name self._inputs_meta = self._sess.inputs_meta self._outputs_meta = self._sess.outputs_meta
def _load_model(self, providers): if isinstance(self._path_or_bytes, str): self._sess = C.InferenceSession( self._sess_options if self._sess_options else C.get_default_session_options(), self._path_or_bytes, True) elif isinstance(self._path_or_bytes, bytes): self._sess = C.InferenceSession( self._sess_options if self._sess_options else C.get_default_session_options(), self._path_or_bytes, False) # elif isinstance(self._path_or_bytes, tuple): # to remove, hidden trick # self._sess.load_model_no_init(self._path_or_bytes[0], providers) else: raise TypeError("Unable to load from type '{0}'".format( type(self._path_or_bytes))) self._sess.load_model(providers) self._sess_options = self._sess.session_options self._inputs_meta = self._sess.inputs_meta self._outputs_meta = self._sess.outputs_meta self._overridable_initializers = self._sess.overridable_initializers self._model_meta = self._sess.model_meta self._providers = self._sess.get_providers() # Tensorrt can fall back to CUDA. All others fall back to CPU. if 'TensorrtExecutionProvider' in C.get_available_providers(): self._fallback_providers = [ 'CUDAExecutionProvider', 'CPUExecutionProvider' ] else: self._fallback_providers = ['CPUExecutionProvider']
def test_bcewithlogits_loss_training_graph_execution(): # Given device = "cuda" batch_size, input_size, hidden_size, output_size = 64, 784, 500, 10 pt_model, onnx_model = _get_models(device, batch_size, input_size, hidden_size, output_size) x = torch.randn(batch_size, input_size, device=device) target = torch.randn(batch_size, output_size, device=device) # Build the onnx model with loss simple_model = SimpleTrainingModelWithBCEWithLogitsLoss() with onnxblock.onnx_model(onnx_model): _ = simple_model(onnx_model.graph.output[0].name) ort_output_names = _get_training_ort_output_names(pt_model, onnx_model) ort_inputs = _get_training_ort_inputs(x, target, pt_model, onnx_model) def bcewithlogits_loss(prediction, target): loss = torch.nn.BCEWithLogitsLoss() return loss(prediction, target) # When with tempfile.NamedTemporaryFile(suffix=".onnx") as onnx_fo: onnx.save(onnx_model, onnx_fo.name) ort_session = onnxruntime.InferenceSession( onnx_fo.name, providers=C.get_available_providers()) ort_outs = ort_session.run(ort_output_names, ort_inputs) torch_outs = bcewithlogits_loss(pt_model(x), target) torch_outs.backward() # Then # assert loss is close assert np.allclose(ort_outs[0], _to_numpy(torch_outs))
def _create_inference_session(self, providers, provider_options): # Tensorrt can fall back to CUDA. All others fall back to CPU. if 'TensorrtExecutionProvider' in C.get_available_providers(): self._fallback_providers = [ 'CUDAExecutionProvider', 'CPUExecutionProvider' ] else: self._fallback_providers = ['CPUExecutionProvider'] session_options = self._sess_options if self._sess_options else C.get_default_session_options( ) if self._model_path: sess = C.InferenceSession(session_options, self._model_path, True, self._read_config_from_model) else: sess = C.InferenceSession(session_options, self._model_bytes, False, self._read_config_from_model) # initialize the C++ InferenceSession sess.initialize_session(providers or [], provider_options or []) self._sess = sess self._sess_options = self._sess.session_options self._inputs_meta = self._sess.inputs_meta self._outputs_meta = self._sess.outputs_meta self._overridable_initializers = self._sess.overridable_initializers self._model_meta = self._sess.model_meta self._providers = self._sess.get_providers() self._provider_options = self._sess.get_provider_options()
def _create_inference_session(self, providers, provider_options, disabled_optimizers=None): available_providers = C.get_available_providers() # Tensorrt can fall back to CUDA. All others fall back to CPU. if 'TensorrtExecutionProvider' in available_providers: self._fallback_providers = [ 'CUDAExecutionProvider', 'CPUExecutionProvider' ] elif 'MIGraphXExecutionProvider' in available_providers: self._fallback_providers = [ 'ROCMExecutionProvider', 'CPUExecutionProvider' ] else: self._fallback_providers = ['CPUExecutionProvider'] # validate providers and provider_options before other initialization providers, provider_options = check_and_normalize_provider_args( providers, provider_options, available_providers) if providers == [] and len(available_providers) > 1: self.disable_fallback() raise ValueError( "This ORT build has {} enabled. ".format(available_providers) + "Since ORT 1.9, you are required to explicitly set " + "the providers parameter when instantiating InferenceSession. For example, " "onnxruntime.InferenceSession(..., providers={}, ...)".format( available_providers)) session_options = self._sess_options if self._sess_options else C.get_default_session_options( ) if self._model_path: sess = C.InferenceSession(session_options, self._model_path, True, self._read_config_from_model) else: sess = C.InferenceSession(session_options, self._model_bytes, False, self._read_config_from_model) if disabled_optimizers is None: disabled_optimizers = set() elif not isinstance(disabled_optimizers, set): # convert to set. assumes iterable disabled_optimizers = set(disabled_optimizers) # initialize the C++ InferenceSession sess.initialize_session(providers, provider_options, disabled_optimizers) self._sess = sess self._sess_options = self._sess.session_options self._inputs_meta = self._sess.inputs_meta self._outputs_meta = self._sess.outputs_meta self._overridable_initializers = self._sess.overridable_initializers self._model_meta = self._sess.model_meta self._providers = self._sess.get_providers() self._provider_options = self._sess.get_provider_options() self._profiling_start_time_ns = self._sess.get_profiling_start_time_ns
def _create_inference_session(self, providers, provider_options, disabled_optimizers=None): available_providers = C.get_available_providers() # Tensorrt can fall back to CUDA. All others fall back to CPU. if 'TensorrtExecutionProvider' in available_providers: self._fallback_providers = [ 'CUDAExecutionProvider', 'CPUExecutionProvider' ] else: self._fallback_providers = ['CPUExecutionProvider'] # validate providers and provider_options before other initialization providers, provider_options = check_and_normalize_provider_args( providers, provider_options, available_providers) if providers == [] and len(available_providers) > 1: warnings.warn( "Deprecation warning. This ORT build has {} enabled. ".format( available_providers) + "The next release (ORT 1.10) will require explicitly setting the providers parameter " + "(as opposed to the current behavior of providers getting set/registered by default " + "based on the build flags) when instantiating InferenceSession." "For example, onnxruntime.InferenceSession(..., providers=[\"CUDAExecutionProvider\"], ...)" ) session_options = self._sess_options if self._sess_options else C.get_default_session_options( ) if self._model_path: sess = C.InferenceSession(session_options, self._model_path, True, self._read_config_from_model) else: sess = C.InferenceSession(session_options, self._model_bytes, False, self._read_config_from_model) if disabled_optimizers is None: disabled_optimizers = set() elif not isinstance(disabled_optimizers, set): # convert to set. assumes iterable disabled_optimizers = set(disabled_optimizers) # initialize the C++ InferenceSession sess.initialize_session(providers, provider_options, disabled_optimizers) self._sess = sess self._sess_options = self._sess.session_options self._inputs_meta = self._sess.inputs_meta self._outputs_meta = self._sess.outputs_meta self._overridable_initializers = self._sess.overridable_initializers self._model_meta = self._sess.model_meta self._providers = self._sess.get_providers() self._provider_options = self._sess.get_provider_options() self._profiling_start_time_ns = self._sess.get_profiling_start_time_ns
def test_adamw_optimizer_execution(): # Given device = "cuda" batch_size, input_size, hidden_size, output_size = 64, 784, 500, 10 pt_model, onnx_model = _get_models(device, batch_size, input_size, hidden_size, output_size) x = torch.randn(batch_size, input_size, device=device) target = torch.randn(batch_size, output_size, device=device) simple_model = SimpleTrainingModelWithMSELoss() with onnxblock.onnx_model(onnx_model): _ = simple_model(onnx_model.graph.output[0].name) optimizer = onnxblock.optim.AdamW() with onnxblock.onnx_model() as accessor: output_name = optimizer(simple_model.parameters()) optimizer_model = accessor.model learning_rate = 0.001 step = 1 ort_output_names = [output_name] def mse_loss(prediction, target): loss = torch.nn.MSELoss() return loss(prediction, target) # When with tempfile.NamedTemporaryFile(suffix=".onnx") as onnx_fo: onnx.save(optimizer_model, onnx_fo.name) loss = mse_loss(pt_model(x), target) loss.backward() ort_inputs = { "learning_rate": np.full(1, learning_rate, dtype=np.float32), "step": np.full(1, step, dtype=np.int64), "params": [], "first_order_moments": [], "second_order_moments": [], } for name, param in pt_model.named_parameters(): ort_inputs["params"].append(_to_numpy(copy.deepcopy(param))) ort_inputs[f"{name}_grad"] = _to_numpy(copy.deepcopy(param.grad)) ort_inputs["first_order_moments"].append( _to_numpy(torch.zeros_like(param))) ort_inputs["second_order_moments"].append( _to_numpy(torch.zeros_like(param))) # Then no error occurs when executing the model ort_session = onnxruntime.InferenceSession( onnx_fo.name, providers=C.get_available_providers()) _ = ort_session.run(ort_output_names, ort_inputs)
def _create_inference_session(self, providers, provider_options, disabled_optimizers=None): available_providers = C.get_available_providers() # Tensorrt can fall back to CUDA. All others fall back to CPU. if 'TensorrtExecutionProvider' in available_providers: self._fallback_providers = [ 'CUDAExecutionProvider', 'CPUExecutionProvider' ] else: self._fallback_providers = ['CPUExecutionProvider'] # validate providers and provider_options before other initialization providers, provider_options = check_and_normalize_provider_args( providers, provider_options, available_providers) session_options = self._sess_options if self._sess_options else C.get_default_session_options( ) if self._model_path: sess = C.InferenceSession(session_options, self._model_path, True, self._read_config_from_model) else: sess = C.InferenceSession(session_options, self._model_bytes, False, self._read_config_from_model) if disabled_optimizers is None: disabled_optimizers = set() elif not isinstance(disabled_optimizers, set): # convert to set. assumes iterable disabled_optimizers = set(disabled_optimizers) # initialize the C++ InferenceSession sess.initialize_session(providers, provider_options, disabled_optimizers) self._sess = sess self._sess_options = self._sess.session_options self._inputs_meta = self._sess.inputs_meta self._outputs_meta = self._sess.outputs_meta self._overridable_initializers = self._sess.overridable_initializers self._model_meta = self._sess.model_meta self._providers = self._sess.get_providers() self._provider_options = self._sess.get_provider_options() self._profiling_start_time_ns = self._sess.get_profiling_start_time_ns
def test_crossentropy_loss_execution(): # Given device = "cuda" batch_size, input_size, hidden_size, output_size = 64, 784, 500, 10 pt_model, onnx_model = _get_models(device, batch_size, input_size, hidden_size, output_size) x = torch.randn(batch_size, input_size, device=device) target = torch.randint(high=output_size, size=(batch_size, ), dtype=torch.int64, device=device) # Build the onnx model with loss simple_model = SimpleModelWithCrossEntropyLoss() with onnxblock.onnx_model(onnx_model): _ = simple_model(onnx_model.graph.output[0].name) ort_output_names = [onnx_model.graph.output[0].name] ort_inputs = { onnx_model.graph.input[0].name: _to_numpy(copy.deepcopy(x)), onnx_model.graph.input[1].name: _to_numpy(copy.deepcopy(target).type(torch.int32)), } def crossentropy_loss(prediction, target): loss = torch.nn.CrossEntropyLoss() return loss(prediction, target) # When with tempfile.NamedTemporaryFile(suffix=".onnx") as onnx_fo: onnx.save(onnx_model, onnx_fo.name) ort_session = onnxruntime.InferenceSession( onnx_fo.name, providers=C.get_available_providers()) ort_outs = ort_session.run(ort_output_names, ort_inputs) torch_outs = crossentropy_loss(pt_model(x), target) # Then assert np.allclose(ort_outs[0], _to_numpy(torch_outs))
###################################################################### # initial onnxruntime import onnxruntime as ort from time import time from onnxruntime.capi import _pybind_state as C so = ort.SessionOptions() so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL so.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL so.intra_op_num_threads = 1 so.inter_op_num_threads = 1 print(args.onnx_model) ort_sess = ort.InferenceSession(args.onnx_model,sess_options=so) print(C.get_available_providers()) ort_sess.set_providers(["CPUExecutionProvider"]) # initial MNN import MNN interpreter = MNN.Interpreter(args.mnn_model) session = interpreter.createSession({"numThread": 1}) input_tensor = interpreter.getSessionInput(session) ###################################################################### _,_,model_in_h,model_in_w = ort_sess.get_inputs()[0].shape out_shape = ort_sess.get_outputs()[0].shape mean = [107.304565, 115.69884, 132.35703]
def test_save(self): # We need a custom loss function to load the graph in an InferenceSession in ONNX Runtime Web. # You can still make the gradient graph with torch.nn.CrossEntropyLoss() and this test will pass. loss_fn = binary_cross_entropy_loss input_size = 10 model = NeuralNet(input_size=input_size, embedding_size=20, hidden_size=5, num_classes=2) directory_path = Path(os.path.dirname(__file__)).resolve() gradient_graph_path = directory_path/'gradient_graph_model.onnx' batch_size = 1 example_input = torch.randn( batch_size, input_size, requires_grad=True) example_labels = torch.tensor([1]) export_gradient_graph( model, loss_fn, example_input, example_labels, gradient_graph_path) onnx_model = onnx.load(str(gradient_graph_path)) onnx.checker.check_model(onnx_model) # Expected inputs: input, labels, models parameters. self.assertEqual( 1 + 1 + sum(1 for _ in model.parameters()), len(onnx_model.graph.input)) # Expected outputs: prediction, loss, and parameters with gradients. self.assertEqual( 1 + 1 + sum(1 if p.requires_grad else 0 for p in model.parameters()), len(onnx_model.graph.output)) torch_out = model(example_input) try: ort_session = onnxruntime.InferenceSession(str(gradient_graph_path)) except ValueError: # Sometimes it is required to pass the available providers. from onnxruntime.capi import _pybind_state as C available_providers = C.get_available_providers() ort_session = onnxruntime.InferenceSession(str(gradient_graph_path), providers=available_providers) ort_inputs = { onnx_model.graph.input[0].name: to_numpy(example_input), onnx_model.graph.input[1].name: to_numpy(example_labels), } for name, param in model.named_parameters(): ort_inputs[name] = to_numpy(param.data) ort_outs = ort_session.run(None, ort_inputs) onnx_output_names = [node.name for node in onnx_model.graph.output] onnx_name_to_output = dict(zip(onnx_output_names, ort_outs)) ort_output = onnx_name_to_output['output'] np.testing.assert_allclose( to_numpy(torch_out), ort_output, rtol=1e-03, atol=1e-05) torch_loss = loss_fn(torch_out, example_labels) ort_loss = onnx_name_to_output['loss'] np.testing.assert_allclose( to_numpy(torch_loss), ort_loss, rtol=1e-03, atol=1e-05) # Make sure the gradients have the right shape. model_param_names = tuple( name for name, param in model.named_parameters() if param.requires_grad) self.assertEqual(4, len(model_param_names)) for name, param in model.named_parameters(): if param.requires_grad: grad = onnx_name_to_output[name + '_grad'] self.assertEqual(param.size(), grad.shape)
def test_grad_clipping_execution(): # Given device = "cuda" batch_size, input_size, hidden_size, output_size = 64, 784, 500, 10 pt_model, _ = _get_models(device, batch_size, input_size, hidden_size, output_size) x = torch.randn(batch_size, input_size, device=device) target = torch.randn(batch_size, output_size, device=device) # Prepare the onnx model with only grad clipping onnx_model = onnx.ModelProto() onnx_model.graph.name = "AdamW Optimizer Model" onnx_model.producer_name = "grad clipping test" onnx_model.opset_import.extend(onnxblock.optim.optim._OPSET_IMPORTS) onnx_model.ir_version = onnx.IR_VERSION class GradClippingModel(onnxblock.Model): def __init__(self, max_norm): self._grad_clip = onnxblock.optim.ClipGradNorm(max_norm) def build(self, *grad_names): return self._grad_clip(*grad_names) grad_names = [] for name, param in pt_model.named_parameters(): grad_names.append(f"{name}_grad") onnx_model.graph.input.append( onnx.helper.make_tensor_value_info(grad_names[-1], onnx.TensorProto.FLOAT, param.shape)) grad_clip = GradClippingModel(2.5) with onnxblock.onnx_model(onnx_model): ort_output_names = grad_clip(*grad_names) def mse_loss(prediction, target): loss = torch.nn.MSELoss() return loss(prediction, target) # When with tempfile.NamedTemporaryFile(suffix=".onnx") as onnx_fo: onnx.save(onnx_model, onnx_fo.name) loss = mse_loss(pt_model(x), target) loss.backward() ort_inputs = {} for name, param in pt_model.named_parameters(): ort_inputs[f"{name}_grad"] = _to_numpy(copy.deepcopy(param.grad)) torch.nn.utils.clip_grad_norm_(pt_model.parameters(), 2.5) # Then no error occurs when executing the model ort_session = onnxruntime.InferenceSession( onnx_fo.name, providers=C.get_available_providers()) ort_outs = ort_session.run(ort_output_names, ort_inputs) # assert all the gradients are close for ort_grad, pt_param in zip(ort_outs, pt_model.parameters()): assert np.allclose(ort_grad, _to_numpy(pt_param.grad))
def get_available_providers(): return C.get_available_providers()
def set_providers(self, providers): "Register the input list of execution providers. The underlying session is re-created." if not set(providers).issubset(C.get_available_providers()): raise ValueError("{} does not contain a subset of available providers {}".format(providers, C.get_available_providers())) self._reset_session() self._load_model(providers)