示例#1
0
 def testGetProviders(self):
     self.assertTrue('CPUExecutionProvider' in onnxrt.get_available_providers())
     # get_all_providers() returns the default EP order from highest to lowest.
     # CPUExecutionProvider should always be last.
     self.assertTrue('CPUExecutionProvider' == onnxrt.get_all_providers()[-1])
     sess = onnxrt.InferenceSession(get_name("mul_1.onnx"))
     self.assertTrue('CPUExecutionProvider' in sess.get_providers())
示例#2
0
def create_model_for_provider(model_path: str, provider: str) -> InferenceSession: 
    assert provider in get_all_providers(), f"provider {provider} not found, {get_all_providers()}"
    # Few properties than might have an impact on performances (provided by MS)
    options = SessionOptions()
    options.intra_op_num_threads = 1
    # Load the model as a graph and prepare the CPU backend 
    return InferenceSession(model_path, options, providers=[provider])
示例#3
0
    def create_onnx_session(self,
                            onnx_model_path,
                            provider='CPUExecutionProvider'):
        """
        Creates ONNX inference session from provided onnx_model_path
        """

        from onnxruntime import GraphOptimizationLevel, InferenceSession, SessionOptions, get_all_providers
        assert provider in get_all_providers(
        ), f"provider {provider} not found, {get_all_providers()}"

        # Few properties that might have an impact on performances (provided by MS)
        options = SessionOptions()
        options.intra_op_num_threads = 0
        options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL

        # Load the model as a graph and prepare the CPU backend
        session = InferenceSession(onnx_model_path,
                                   options,
                                   providers=[provider])
        session.disable_fallback()

        #if 'OMP_NUM_THREADS' not in os.environ or 'OMP_WAIT_POLICY' not in os.environ:
        #warnings.warn('''We recommend adding the following at top of script for CPU inference:

        #from psutil import cpu_count
        ##Constants from the performance optimization available in onnxruntime
        ##It needs to be done before importing onnxruntime
        #os.environ["OMP_NUM_THREADS"] = str(cpu_count(logical=True))
        #os.environ["OMP_WAIT_POLICY"] = 'ACTIVE'
        #''')
        return session
示例#4
0
文件: onnx.py 项目: aarnphm/BentoML
    def __init__(
        self,
        tag: t.Union[str, Tag],
        backend: str,
        gpu_device_id: int,
        disable_copy_in_default_stream: bool,
        providers: t.Optional["_ProviderType"],
        session_options: t.Optional["ort.SessionOptions"],  # type: ignore
        name: t.Optional[str] = None,
    ):
        super().__init__(tag, name=name)
        self._backend = backend

        if backend not in SUPPORTED_ONNX_BACKEND:
            raise BentoMLException(
                f"'{backend}' runtime is currently not supported for ONNXModel"
            )
        if providers is not None:
            if not all(i in ort.get_all_providers() for i in flatten_list(providers)):
                raise BentoMLException(
                    f"'{providers}' cannot be parsed by `onnxruntime`"
                )
        else:
            providers = self._get_default_providers(
                gpu_device_id,
                disable_copy_in_default_stream,
            )
        self._providers = providers
        self._session_options = session_options
示例#5
0
文件: onnx.py 项目: aarnphm/BentoML
def load(
    tag: t.Union[str, Tag],
    backend: t.Optional[str] = "onnxruntime",
    providers: t.Optional[t.Union["_ProviderType", "_GPUProviderType"]] = None,
    session_options: t.Optional["ort.SessionOptions"] = None,  # type: ignore
    model_store: "ModelStore" = Provide[BentoMLContainer.model_store],
) -> "ort.InferenceSession":
    """
    Load a model from BentoML local modelstore with given name.

    Args:
        tag (:code:`Union[str, Tag]`):
            Tag of a saved model in BentoML local modelstore.
        backend (:code:`str`, `optional`, default to :code:`onnxruntime`):
            Different backend runtime supported by ONNX. Currently only accepted :obj:`onnxruntime`
            and :obj:`onnxruntime-gpu`.
        providers (`List[Union[str, Tuple[str, Dict[str, Any]]`, `optional`, default to :code:`None`):
            Different providers provided by users. By default BentoML will use :func:`onnxruntime.get_available_providers`
            when loading a model.
        session_options (`onnxruntime.SessionOptions`, `optional`, default to :code:`None`):
            SessionOptions per use case. If not specified, then default to :code:`None`.
        model_store (:mod:`~bentoml._internal.models.store.ModelStore`, default to :mod:`BentoMLContainer.model_store`):
            BentoML modelstore, provided by DI Container.

    Returns:
        :obj:`onnxruntime.InferenceSession`: an instance of ONNX model from BentoML modelstore.

    Examples:

    .. code-block:: python

        import bentoml

        model = bentoml.onnx.load(tag)

    """  # noqa
    model = model_store.get(tag)
    if model.info.module not in (MODULE_NAME, __name__):
        raise BentoMLException(
            f"Model {tag} was saved with module {model.info.module}, failed loading with {MODULE_NAME}."
        )
    model_file = model.path_of(f"{SAVE_NAMESPACE}{ONNX_EXT}")

    if backend not in SUPPORTED_ONNX_BACKEND:
        raise BentoMLException(
            f"'{backend}' runtime is currently not supported for ONNXModel"
        )
    if providers:
        if not all(i in ort.get_all_providers() for i in flatten_list(providers)):
            raise BentoMLException(f"'{providers}' cannot be parsed by `onnxruntime`")
    else:
        providers = ort.get_available_providers()

    return ort.InferenceSession(
        model_file,
        sess_options=session_options,
        providers=providers,
    )
示例#6
0
def create_onnx_session(onnx_model_path):
    provider = 'CPUExecutionProvider'
    from onnxruntime import GraphOptimizationLevel, InferenceSession, SessionOptions, get_all_providers
    assert provider in get_all_providers(
    ), f"provider {provider} not found, {get_all_providers()}"
    options = SessionOptions()
    options.intra_op_num_threads = 0
    options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
    session = InferenceSession(onnx_model_path, options, providers=[provider])
    session.disable_fallback()
    return session
示例#7
0
def create_model_for_provider(
        model_path: str,
        provider: str = 'CPUExecutionProvider') -> InferenceSession:
    assert provider in get_all_providers(
    ), f"provider {provider} not found, {get_all_providers()}"
    # Few properties that might have an impact on performances (provided by MS)
    options = SessionOptions()
    options.intra_op_num_threads = int(os.environ.get('NUM_THREADS', 4))
    options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
    # Load the model as a graph and prepare the CPU backend
    session = InferenceSession(model_path, options, providers=[provider])
    session.disable_fallback()
    return session
    def create_model_for_provider(self):

        assert self.provider in get_all_providers(), f"provider {self.provider} not found, {get_all_providers()}"

        # Few properties that might have an impact on performances (provided by MS)
        options = SessionOptions()
        options.intra_op_num_threads = 1
        options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL

        # Load the model as a graph and prepare the CPU backend
        session = InferenceSession(self.model_path, options, providers=[self.provider])
        session.disable_fallback()

        return session
    def test_bind_input_types(self):

        opset = onnx_opset_version()
        devices = [(C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0), ['CPUExecutionProvider'])]
        if "CUDAExecutionProvider" in onnxrt.get_all_providers():
            devices.append((C_OrtDevice(C_OrtDevice.cuda(), C_OrtDevice.default_memory(), 0), ['CUDAExecutionProvider']))
            
        for device, provider in devices:
            for dtype in [np.float32, np.float64, np.int32, np.uint32,
                          np.int64, np.uint64, np.int16, np.uint16,
                          np.int8, np.uint8, np.float16, np.bool_]:
                with self.subTest(dtype=dtype, device=str(device)):

                    x = np.arange(8).reshape((-1, 2)).astype(dtype)
                    proto_dtype = NP_TYPE_TO_TENSOR_TYPE[x.dtype]

                    X = helper.make_tensor_value_info('X', proto_dtype, [None, x.shape[1]])
                    Y = helper.make_tensor_value_info('Y', proto_dtype, [None, x.shape[1]])

                    # inference
                    node_add = helper.make_node('Identity', ['X'], ['Y'])

                    # graph
                    graph_def = helper.make_graph([node_add], 'lr', [X], [Y], [])
                    model_def = helper.make_model(
                        graph_def, producer_name='dummy', ir_version=7,
                        producer_version="0",
                        opset_imports=[helper.make_operatorsetid('', opset)])

                    sess = onnxrt.InferenceSession(model_def.SerializeToString(), providers=provider)

                    bind = SessionIOBinding(sess._sess)
                    ort_value = C_OrtValue.ortvalue_from_numpy(x, device)
                    bind.bind_ortvalue_input('X', ort_value)
                    bind.bind_output('Y', device)
                    sess._sess.run_with_iobinding(bind, None)
                    ortvalue = bind.get_outputs()[0]
                    y = ortvalue.numpy()
                    assert_almost_equal(x, y)

                    bind = SessionIOBinding(sess._sess)
                    bind.bind_input('X', device, dtype, x.shape, ort_value.data_ptr())
                    bind.bind_output('Y', device)
                    sess._sess.run_with_iobinding(bind, None)
                    ortvalue = bind.get_outputs()[0]
                    y = ortvalue.numpy()
                    assert_almost_equal(x, y)
示例#10
0
def create_model_for_provider(model_path: str, provider: str) -> InferenceSession:
    """
    这里解释一下ExecutionProvider,ONNXRuntime用Provider表示不同的运行设备比如CUDAProvider等。
    目前ONNX Runtime v1.0支持了包括CPU,CUDA,TensorRT,MKL等七种Providers。
    :param model_path:
    :param provider:
    :return:
    """
    assert provider in get_all_providers(), f"provider {provider} not found, {get_all_providers()}"

    # Few properties that might have an impact on performances (provided by MS)
    options = SessionOptions()
    options.intra_op_num_threads = 1
    options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL

    # Load the model as a graph and prepare the CPU backend
    session = InferenceSession(model_path, options, providers=[provider])
    session.disable_fallback()

    return session
示例#11
0
def create_model_for_provider(model_path: str, provider: str,
                              optimization_level: str) -> InferenceSession:

    assert provider in get_all_providers(
    ), f"provider {provider} not found, {get_all_providers()}"

    # Few properties that might have an impact on performances (provided by MS)
    options = SessionOptions()
    options.intra_op_num_threads = 1
    if optimization_level in GRAPH_OPTIMIZATIONS:
        options.graph_optimization_level = GRAPH_OPTIMIZATIONS[
            optimization_level]
    else:
        raise KeyError(
            f"Unknown Optimization Level {optimization_level} (Available optimization level are all/disable_all/basic/extended)"
        )

    # Load the model as a graph and prepare the CPU backend
    session = InferenceSession(model_path, options, providers=[provider])
    session.disable_fallback()

    return session
import onnxruntime

parser = argparse.ArgumentParser()
parser.add_argument('-m', type=str, required=False, default='resnet18-v2-7.onnx', help='ONNX model file name')
parser.add_argument('-d', type=str, required=False, default='CPU_FP32', help='OpenVINO device name')
args = parser.parse_args()

print('model: ', args.m)
print('device: ', args.d)

available_devices = ['CPU_FP32', 'GPU_FP32', 'GPU_FP16', 'MYRIAD_FP16', 'VAD-M_FP16', 'VAD-F_FP16']
if not args.d in available_devices:
    print('Device must be one of followings : ', available_devices)
    sys.exit(0)

print(onnxruntime.get_all_providers())
print(onnxruntime.get_device())

label = open('synset_words.txt').readlines()

# Available device names: CPU_FP32, GPU_FP32, GPU_FP16, MYRIAD_FP16, VAD-M_FP16, VAD-F_FP32 
# VAD == Vision Accelerator Design == HDDL
options = onnxruntime.SessionOptions()
options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL
onnxruntime.capi._pybind_state.set_openvino_device(args.d)

sess = onnxruntime.InferenceSession(args.m, options)

input_name = sess.get_inputs()[0].name
print("Input name  :", input_name)
input_shape = sess.get_inputs()[0].shape
示例#13
0
 def testGetProviders(self):
     self.assertTrue(
         'CPUExecutionProvider' in onnxrt.get_available_providers())
     self.assertTrue('CPUExecutionProvider' in onnxrt.get_all_providers())
     sess = onnxrt.InferenceSession(self.get_name("mul_1.onnx"))
     self.assertTrue('CPUExecutionProvider' in sess.get_providers())
示例#14
0
def framework_info() -> FrameworkInfo:
    """
    Detect the information for the onnx/onnxruntime framework such as package versions,
    availability for core actions such as training and inference,
    sparsification support, and inference provider support.

    :return: The framework info for onnx/onnxruntime
    :rtype: FrameworkInfo
    """
    all_providers = []
    available_providers = []
    if check_onnxruntime_install(raise_on_error=False):
        from onnxruntime import get_all_providers, get_available_providers

        available_providers = get_available_providers()
        all_providers = get_all_providers()

    cpu_provider = FrameworkInferenceProviderInfo(
        name="cpu",
        description="Base CPU provider within ONNXRuntime",
        device="cpu",
        supported_sparsification=SparsificationInfo(),  # TODO: fill in when available
        available=(
            check_onnx_install(raise_on_error=False)
            and check_onnxruntime_install(raise_on_error=False)
            and "CPUExecutionProvider" in available_providers
        ),
        properties={},
        warnings=[],
    )
    gpu_provider = FrameworkInferenceProviderInfo(
        name="cuda",
        description="Base GPU CUDA provider within ONNXRuntime",
        device="gpu",
        supported_sparsification=SparsificationInfo(),  # TODO: fill in when available
        available=(
            check_onnx_install(raise_on_error=False)
            and check_onnxruntime_install(raise_on_error=False)
            and "CUDAExecutionProvider" in available_providers
        ),
        properties={},
        warnings=[],
    )

    return FrameworkInfo(
        framework=Framework.onnx,
        package_versions={
            "onnx": get_version(package_name="onnx", raise_on_error=False),
            "onnxruntime": (
                get_version(package_name="onnxruntime", raise_on_error=False)
            ),
            "sparsezoo": get_version(
                package_name="sparsezoo",
                raise_on_error=False,
                alternate_package_names=["sparsezoo-nightly"],
            ),
            "sparseml": get_version(
                package_name="sparseml",
                raise_on_error=False,
                alternate_package_names=["sparseml-nightly"],
            ),
        },
        sparsification=sparsification_info(),
        inference_providers=[cpu_provider, gpu_provider],
        properties={
            "available_providers": available_providers,
            "all_providers": all_providers,
        },
        training_available=False,
        sparsification_available=True,
        exporting_onnx_available=True,
        inference_available=True,
    )
示例#15
0
def latency(model,
            law='normal',
            size=1,
            number=10,
            repeat=10,
            max_time=0,
            runtime="onnxruntime",
            device='cpu',
            profiling=None):
    """
    Measures the latency of a model (python API).

    :param model: ONNX graph
    :param law: random law used to generate fake inputs
    :param size: batch size, it replaces the first dimension
        of every input if it is left unknown
    :param number: number of calls to measure
    :param repeat: number of times to repeat the experiment
    :param max_time: if it is > 0, it runs as many time during
        that period of time
    :param runtime: available runtime
    :param device: device, `cpu`, `cuda:0`
    :param profiling: if True, profile the execution of every
        node, if can be sorted by name or type,
        the value for this parameter should e in `(None, 'name', 'type')`,
    :return: dictionary or a tuple (dictionary, dataframe)
        if the profiling is enable

    .. cmdref::
        :title: Measures model latency
        :cmd: -m mlprodict latency --help
        :lid: l-cmd-latency

        The command generates random inputs and call many times the
        model on these inputs. It returns the processing time for one
        iteration.

        Example::

            python -m mlprodict latency --model "model.onnx"
    """
    if isinstance(model, str) and not os.path.exists(model):
        raise FileNotFoundError(  # pragma: no cover
            "Unable to find model %r." % model)
    if profiling not in (None, '', 'name', 'type'):
        raise ValueError("Unexpected value for profiling: %r." % profiling)
    size = int(size)
    number = int(number)
    repeat = int(repeat)
    if max_time in (None, 0, ""):
        max_time = None
    else:
        max_time = float(max_time)
        if max_time <= 0:
            max_time = None

    if law != "normal":
        raise ValueError("Only law='normal' is supported, not %r." % law)

    if device in ('cpu', 'CPUExecutionProviders'):
        providers = ['CPUExecutionProviders']
    elif device in ('cuda:0', 'CUDAExecutionProviders'):
        if runtime != 'onnxruntime':
            raise NotImplementedError(  # pragma: no cover
                "Only runtime 'onnxruntime' supports this device or provider "
                "%r." % device)
        providers = ['CUDAExecutionProviders']
    elif ',' in device:
        if runtime != 'onnxruntime':
            raise NotImplementedError(  # pragma: no cover
                "Only runtime 'onnxruntime' supports this device or provider "
                "%r." % device)
        providers = device.split(',')
        allp = set(get_all_providers())
        for p in providers:
            if p not in allp:
                raise ValueError(
                    "One device or provider %r is not supported among %r."
                    "" % (p, allp))
    else:
        raise ValueError(  # pragma no cover
            "Device %r not supported." % device)

    if runtime == "onnxruntime":
        if profiling in ('name', 'type'):
            so = SessionOptions()
            so.enable_profiling = True
            sess = InferenceSession(model, sess_options=so)
        else:
            sess = InferenceSession(model)
        fct = lambda feeds: sess.run(None, feeds)
        inputs = sess.get_inputs()
    else:
        if profiling in ('name', 'type'):
            runtime_options = {"enable_profiling": True}
            if runtime != 'onnxruntime1':
                raise NotImplementedError(  # pragma: no cover
                    "Profiling is not implemented for runtime=%r." % runtime)
        else:
            runtime_options = None
        oinf = OnnxInference(model,
                             runtime=runtime,
                             runtime_options=runtime_options)
        fct = lambda feeds: oinf.run(feeds)
        inputs = oinf.obj.graph.input

    feeds = random_feed(inputs, size)
    res = measure_time(lambda: fct(feeds),
                       number=number,
                       repeat=repeat,
                       context={},
                       max_time=max_time,
                       div_by_number=True)
    for k, v in feeds.items():
        res["shape(%s)" % k] = "x".join(map(str, v.shape))
    if profiling in ('name', 'type'):
        if runtime == 'onnxruntime':
            profile_name = sess.end_profiling()
            with open(profile_name, 'r', encoding='utf-8') as f:
                js = json.load(f)
            js = OnnxWholeSession.process_profiling(js)
            df = DataFrame(js)
        else:
            df = oinf.get_profiling(as_df=True)
        if profiling == 'name':
            gr = df[['dur', "args_op_name",
                     "name"]].groupby(["args_op_name",
                                       "name"]).sum().sort_values('dur')
        else:
            gr = df[['dur', "args_op_name"
                     ]].groupby("args_op_name").sum().sort_values('dur')
        return res, gr

    return res