Пример #1
0
def main():
    # We can provide a path or file-like object if we want to cache calibration data.
    # This lets us avoid running calibration the next time we build the engine.
    #
    # TIP: You can use this calibrator with TensorRT APIs directly (e.g. config.int8_calibrator).
    # You don't have to use it with Polygraphy loaders if you don't want to.
    calibrator = Calibrator(data_loader=calib_data(),
                            cache="identity-calib.cache")

    # We must enable int8 mode in addition to providing the calibrator.
    build_engine = EngineFromNetwork(NetworkFromOnnxPath("identity.onnx"),
                                     config=CreateConfig(
                                         int8=True, calibrator=calibrator))

    # When we activate our runner, it will calibrate and build the engine. If we want to
    # see the logging output from TensorRT, we can temporarily increase logging verbosity:
    with G_LOGGER.verbosity(
            G_LOGGER.VERBOSE), TrtRunner(build_engine) as runner:
        # Finally, we can test out our int8 TensorRT engine with some dummy input data:
        inp_data = np.ones(shape=(1, 1, 2, 2), dtype=np.float32)

        # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls.
        # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`.
        outputs = runner.infer({"x": inp_data})

        assert np.array_equal(outputs["y"],
                              inp_data)  # It's an identity model!
Пример #2
0
    def onnx_to_trt(self, output_fpath: str, input_fpath: str,
                    network_metadata: NetworkMetadata):
        """
        Converts ONNX file to TRT engine.
        Since TensorRT already supplies converter functions and scripts,
        a default implementation is already provided.

        Arg:
            output_fpath (str): File location of the generated ONNX file.
            input_fpath (str): Input file location of the generated ONNX file.
            network_metadata (NetworkMetadata): Network metadata of the network being converted.

        Returns:
            TRTEngineFile: Newly generated engine.
        """
        result = self.trt_engine_class(output_fpath, network_metadata)
        self.trt_inference_config = CreateConfig(
            fp16=network_metadata.precision.fp16,
            max_workspace_size=result.DEFAULT_TRT_WORKSPACE_MB * 1024 * 1024,
            profiles=result.get_dynamic_shape_profiles(),
            strict_types=result.use_strict_types())

        g_logger_verbosity = (PG_LOGGER.EXTRA_VERBOSE if G_LOGGER.level
                              == G_LOGGER.DEBUG else PG_LOGGER.WARNING)
        with PG_LOGGER.verbosity(g_logger_verbosity):
            network_definition = result.get_network_definition(
                network_from_onnx_path(input_fpath))

            trt_engine = engine_from_network(network_definition,
                                             config=self.trt_inference_config)
            save_engine(trt_engine, output_fpath)

        return result
Пример #3
0
    def __call__(self, *args, **kwargs):
        # hook polygraphy verbosity for inference
        g_logger_verbosity = (G_LOGGER.EXTRA_VERBOSE if G_LOGGER.root.level
                              == G_LOGGER.DEBUG else G_LOGGER.WARNING)

        with PG_LOGGER.verbosity(g_logger_verbosity):
            return self.forward(*args, **kwargs)
Пример #4
0
    def fill_defaults(self, network, default_shape_value=None):
        """
        Fill this profile with sane default values for any bindings whose
        shapes have not been set explicitly.

        Args:
            network (trt.INetworkDefinition):
                    The TensorRT network this profile is meant for.
                    This will be used to determine model inputs and their shapes.
            default_shape_value (int):
                    The value to use to override dynamic dimensions.

        Returns:
            Profile: Self
        """
        default_shape_value = util.default(default_shape_value,
                                           constants.DEFAULT_SHAPE_VALUE)

        for idx in range(network.num_inputs):
            inp = network.get_input(idx)

            if inp.name in self:
                continue

            with G_LOGGER.verbosity(
                    G_LOGGER.CRITICAL):  # WAR for spam from TRT
                is_shape_tensor = inp.is_shape_tensor
            if is_shape_tensor:
                rank = inp.shape[0]
                shape = (default_shape_value, ) * rank
                G_LOGGER.warning(
                    "{:} | No values provided; Will use input values: {:} for min/opt/max in profile.\n"
                    .format(trt_util.str_from_tensor(inp, is_shape_tensor),
                            shape, rank),
                    mode=LogMode.ONCE,
                )
                G_LOGGER.warning(
                    "This will cause the shape-tensor to have static values. If this is incorrect, please "
                    "set the range of values for this input shape-tensor.",
                    mode=LogMode.ONCE,
                )
            else:
                shape = util.override_dynamic_shape(inp.shape,
                                                    default_shape_value)
                if shape != inp.shape:
                    G_LOGGER.warning(
                        "{:} | No shapes provided; Will use shape: {:} for min/opt/max in profile.\n"
                        .format(trt_util.str_from_tensor(inp, is_shape_tensor),
                                shape),
                        mode=LogMode.ONCE,
                    )
                    G_LOGGER.warning(
                        "This will cause the tensor to have a static shape. If this is incorrect, please "
                        "set the range of shapes for this input tensor.",
                        mode=LogMode.ONCE,
                    )

            self.add(inp.name, shape, shape, shape)
        return self
Пример #5
0
    def fallback_inference(self, onnx_model):
        """
        Run inference with ONNX-Runtime.

        This can be used to retrieve values/shapes/data types for all
        tensors in the model when other shape inference approaches fail.

        Args:
            onnx_model (onnx.ModelProto):
                    The ONNX model in which to infer shapes.
            data_loader_args (DataLoaderArgs):
                    The data loader argument group to use to generate input data.

        Returns:
            (OrderedDict[str, np.ndarray], TensorMetadata):
                    1. Mapping of values for all tensors in the model, including inputs.
                        Values are loaded lazily when first accessed so as to save memory.
                    2. Metadata for every tensor in the model.
        """
        from polygraphy.comparator import IterationResult

        with G_LOGGER.verbosity(G_LOGGER.severity + 10):
            load_model = onnx_backend.ModifyOutputs(onnx_model,
                                                    outputs=constants.MARK_ALL,
                                                    copy=True)
            with onnxrt_backend.OnnxrtRunner(
                    onnxrt_backend.SessionFromOnnx(
                        onnx_backend.BytesFromOnnx(load_model))) as runner:
                # We want to set input_metadata only - not user_input_metadata, so that user_input_metadata
                # will be populated by the --model-inputs argument.
                data_loader = self.data_loader_args.get_data_loader()
                data_loader.input_metadata = runner.get_input_metadata()
                feed_dict = data_loader[0]

                with G_LOGGER.verbosity(G_LOGGER.severity - 10):
                    G_LOGGER.info(
                        "Running fallback shape inference using input metadata:\n{:}"
                        .format(TensorMetadata.from_feed_dict(feed_dict)))

                outputs = runner.infer(feed_dict)
                # We include the inputs here so that we have values for all tensors in the model.
                outputs.update(feed_dict)
                # Use IterationResult here since it can handle very large tensors by saving to disk.
                # Layerwise outputs might otherwise take up too much memory.
                return IterationResult(outputs), TensorMetadata.from_feed_dict(
                    outputs)
Пример #6
0
    def check_network(self, suffix):
        """
        Checks whether the provided network is accurate compared to golden values.

        Returns:
            OrderedDict[str, OutputCompareResult]:
                    A mapping of output names to an object describing whether they matched, and what the
                    required tolerances were.
        """
        from polygraphy.comparator import Comparator, CompareFunc, DataLoader
        from polygraphy.backend.trt import EngineFromNetwork, TrtRunner, ModifyNetwork, SaveEngine

        with G_LOGGER.verbosity(severity=G_LOGGER.severity if self.args.
                                show_output else G_LOGGER.CRITICAL):
            data_loader = tool_util.get_data_loader(self.args)

            self.args.strict_types = True  # HACK: Override strict types so things actually run in the right precision.
            config = tool_util.get_trt_config_loader(self.args,
                                                     data_loader)(self.builder,
                                                                  self.network)

            suffix = "-{:}-{:}".format(suffix, self.precision)
            engine_path = misc.insert_suffix(self.args.save_engine, suffix)

            self.builder, self.network, self.parser = ModifyNetwork(
                (self.builder, self.network, self.parser),
                outputs=self.args.trt_outputs)()

            engine_loader = SaveEngine(EngineFromNetwork(
                (self.builder, self.network, self.parser), config),
                                       path=engine_path)

            runners = [TrtRunner(engine_loader)]

            results = Comparator.run(runners, data_loader=data_loader)
            if self.args.validate:
                Comparator.validate(results)
            results.update(self.golden)

            compare_func = CompareFunc.basic_compare_func(
                atol=self.args.atol,
                rtol=self.args.rtol,
                check_shapes=not self.args.no_shape_check)
            accuracy_result = Comparator.compare_accuracy(
                results, compare_func=compare_func)

        tolerances = list(accuracy_result.values())[0][
            0]  # First iteration of first runner pair
        for name, req_tol in tolerances.items():
            if bool(req_tol):
                G_LOGGER.success(
                    "PASSED | Output: {:} | Required Tolerances: {:}".format(
                        name, req_tol))
            else:
                G_LOGGER.error(
                    "FAILED | Output: {:} | Required Tolerances: {:}".format(
                        name, req_tol))
        return accuracy_result
Пример #7
0
    def to_trt(self, builder, network):
        """
        Creates a TensorRT IOptimizationProfile based on the values set in this Profile.

        Args:
            builder (trt.Builder):
                    A TensorRT builder. This will be used to construct the IOptimizationProfile.
            network (trt.INetworkDefinition):
                    The TensorRT network the profile applies to.

        Returns:
            trt.IOptimizationProfile: A TensorRT optimization profile.
        """
        trt_profile = builder.create_optimization_profile()
        unused_keys = set(self.keys())
        available_inputs = set()
        for idx in range(network.num_inputs):
            inp = network.get_input(idx)
            if inp.name in unused_keys:
                unused_keys.remove(inp.name)
            available_inputs.add(inp.name)

            with G_LOGGER.verbosity():  # WAR for spam from TRT
                is_shape_tensor = inp.is_shape_tensor

            if is_shape_tensor:
                if inp.name in self:
                    shapes = self[inp.name]
                    trt_profile.set_shape_input(inp.name, shapes.min,
                                                shapes.opt, shapes.max)
                    G_LOGGER.verbose(
                        "{:} | Setting input shape-tensor value range to: {:}".
                        format(trt_util.str_from_tensor(inp, is_shape_tensor),
                               shapes))
                else:
                    G_LOGGER.warning(
                        "{:} | No values provided. "
                        "Assuming this is not a dynamic shape-tensor.".format(
                            trt_util.str_from_tensor(inp, is_shape_tensor)),
                        mode=LogMode.ONCE,
                    )
            else:
                shapes = self[inp.name]
                trt_profile.set_shape(inp.name, shapes.min, shapes.opt,
                                      shapes.max)
                G_LOGGER.verbose(
                    "{:} | Setting input tensor shapes to: {:}".format(
                        trt_util.str_from_tensor(inp, is_shape_tensor),
                        shapes))

        if unused_keys:
            G_LOGGER.error(
                "Invalid inputs were provided to the optimization profile: {:}\n"
                "Note: Inputs available in the TensorRT network are: {:}".
                format(unused_keys, available_inputs))

        return trt_util.check_profile(trt_profile)
Пример #8
0
    def test_non_matching_outputs(self, shape):
        iter_result0 = IterationResult(outputs={"output": np.zeros(shape, dtype=np.float32)})
        iter_result1 = IterationResult(outputs={"output": np.ones(shape, dtype=np.float32)})

        compare_func = CompareFunc.basic_compare_func()

        with G_LOGGER.verbosity(G_LOGGER.ULTRA_VERBOSE):
            acc = compare_func(iter_result0, iter_result1)

        assert util.is_empty_shape(shape) or not acc["output"]
Пример #9
0
def str_from_network(network, mode="full"):
    """
    Converts a TensorRT network to a human-readable representation

    Args:
        network (trt.INetworkDefinition): The network.
        mode (str): Controls what is displayed for each layer. Choices: ["none", "basic", "attrs", "full"]

    Returns:
        str
    """
    LAYER_TYPE_CLASS_MAPPING = get_layer_class_mapping()

    network_str = "Name: {:} | {:} Batch Network{:}\n".format(
        network.name,
        "Implicit"
        if hasattr(network, "has_implicit_batch_dimension") and network.has_implicit_batch_dimension
        else "Explicit",
        " with Explicit Precision "
        if hasattr(network, "has_explicit_precision") and network.has_explicit_precision
        else "",
    )
    network_str += "\n"

    input_metadata = get_network_input_metadata(network)
    network_str += "---- {:} Network Input(s) ----\n{:}\n\n".format(len(input_metadata), input_metadata)
    output_metadata = get_network_output_metadata(network)
    network_str += "---- {:} Network Output(s) ----\n{:}\n\n".format(len(output_metadata), output_metadata)
    network_str += "---- {:} Layer(s) ----\n".format(network.num_layers)
    if mode != "none":
        for index, layer in enumerate(network):
            if layer.type in LAYER_TYPE_CLASS_MAPPING:
                layer.__class__ = LAYER_TYPE_CLASS_MAPPING[layer.type]

            network_str += str_from_layer(layer, index)

            if mode in ["attrs", "full"]:
                # Exclude special attributes, as well as any attributes of the base layer class (those can be displayed above).
                attrs = get_layer_attribute_names(layer)
                if attrs:
                    network_str += util.indent_block("---- Attributes ----") + "\n"
                for attr in attrs:
                    with G_LOGGER.verbosity():
                        val = getattr(layer, attr)
                    if mode == "full" or not isinstance(val, np.ndarray):
                        attr_str = ""
                        if layer.name:
                            attr_str += "{:}.".format(layer.name)
                        network_str += util.indent_block("{:}{:} = {:}".format(attr_str, attr, val)) + "\n"
            network_str += "\n"

    return util.indent_block(network_str, level=0)
Пример #10
0
"""
from polygraphy.backend.trt import NetworkFromOnnxPath, CreateConfig, EngineFromNetwork, Calibrator, TrtRunner
from polygraphy.logger import G_LOGGER

import numpy as np
import os


MODEL = os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir, "models", "identity.onnx")
INPUT_SHAPE = (1, 1, 2, 2)

# The data loader argument to Calibrator can be any iterable or generator that yields `feed_dict`s.
# A feed_dict is just a mapping of input names to corresponding inputs (as NumPy arrays).
# Calibration will continue until our data loader runs out of data (4 batches in this example).
def calib_data():
    for _ in range(4):
        yield {"x": np.ones(shape=INPUT_SHAPE, dtype=np.float32)} # Totally real data

# We can provide a path or file-like object if we want to cache calibration data.
# This lets us avoid running calibration the next time we build the engine.
calibrator = Calibrator(data_loader=calib_data(), cache="identity-calib.cache")
build_engine = EngineFromNetwork(NetworkFromOnnxPath(MODEL), config=CreateConfig(int8=True, calibrator=calibrator))

# When we activate our runner, it will calibrate and build the engine. If we want to
# see the logging output from TensorRT, we can temporarily increase logging verbosity:
with G_LOGGER.verbosity(G_LOGGER.VERBOSE):
    with TrtRunner(build_engine) as runner:
        feed_dict = {"x": np.ones(shape=INPUT_SHAPE, dtype=np.float32)}
        outputs = runner.infer(feed_dict=feed_dict)
        assert np.all(outputs["y"] == feed_dict["x"])
Пример #11
0
    def call_impl(self):
        """
        Returns:
            onnx.ModelProto: The ONNX-like, but **not** valid ONNX, representation of the TensorRT network.
        """
        ret, owns_network = util.invoke_if_callable(self._network)
        builder, network, parser = util.unpack_args(ret, num=3)

        if builder is None or network is None:
            G_LOGGER.critical(
                "Expected to recevie a (builder, network) tuple for the `network` parameter, "
                "but received: ({:}, {:})".format(builder, network))

        with contextlib.ExitStack() as stack:
            if owns_network:
                stack.enter_context(builder)
                stack.enter_context(network)
                if parser is not None:
                    stack.enter_context(parser)

            tensor_map = {}

            def tensors_from_meta(meta):
                nonlocal tensor_map
                tensors = []
                for name, (dtype, shape) in meta.items():
                    if name not in tensor_map:
                        tensor_map[name] = gs.Variable(name=name,
                                                       dtype=dtype,
                                                       shape=shape)
                    tensors.append(tensor_map[name])
                return tensors

            nodes = []
            graph_inputs = tensors_from_meta(
                trt_util.get_network_input_metadata(network))
            graph_outputs = tensors_from_meta(
                trt_util.get_network_output_metadata(network))

            LAYER_TYPE_CLASS_MAPPING = trt_util.get_layer_class_mapping()

            for layer in network:
                op_name = layer.type.name
                if layer.type in LAYER_TYPE_CLASS_MAPPING:
                    layer.__class__ = LAYER_TYPE_CLASS_MAPPING[layer.type]

                node_inputs = tensors_from_meta(
                    trt_util.get_layer_input_metadata(layer))
                node_outputs = tensors_from_meta(
                    trt_util.get_layer_output_metadata(layer))
                attrs = {}
                attr_names = trt_util.get_layer_attribute_names(layer)
                for name in attr_names:
                    with G_LOGGER.verbosity():
                        attr = getattr(layer, name)

                    if util.is_sequence(attr) or any(
                            isinstance(attr, cls)
                            for cls in [trt.Dims, trt.Permutation]):
                        try:
                            attr = list(attr)
                        except ValueError:  # Invalid dims
                            attr = []

                    if hasattr(attr, "__entries"):  # TensorRT Enums
                        attr = attr.name

                    if isinstance(attr, trt.ILoop):
                        attr = attr.name

                    VALID_TYPES = [np.ndarray, list, int, str, bool, float]
                    if not any(isinstance(attr, cls) for cls in VALID_TYPES):
                        G_LOGGER.internal_error(
                            "Unknown type: {:} for layer attribute: {:}.\n"
                            "Note: Layer was: {:}".format(
                                type(attr), attr, layer))
                        try:
                            attr = str(attr)
                        except:
                            attr = "<error during conversion>"

                    attrs[name] = attr

                nodes.append(
                    gs.Node(name=layer.name,
                            op=op_name,
                            attrs=attrs,
                            inputs=node_inputs,
                            outputs=node_outputs))

            graph = gs.Graph(name=network.name,
                             inputs=graph_inputs,
                             outputs=graph_outputs,
                             nodes=nodes)

            return gs.export_onnx(graph)
Пример #12
0
def str_from_network(network, mode="full"):
    """
    Converts a TensorRT network to a human-readable representation

    Args:
        network (trt.INetworkDefinition): The network.
        mode (str): Controls what is displayed for each layer. Choices: ["none", "basic", "attrs", "full"]

    Returns:
        str
    """
    LAYER_TYPE_CLASS_MAPPING = get_layer_class_mapping()

    def is_special_attribute(attr):
        return attr.startswith("__") and attr.endswith("__")

    def is_valid_attribute(attr, layer):
        if type(layer) == trt.IPoolingLayer or type(
                layer) == trt.IConvolutionLayer or type(
                    layer) == trt.IDeconvolutionLayer:
            if len(layer.get_input(0).shape) > 4:
                # 3D pooling uses padding_nd
                return attr not in ["padding", "stride", "window_size"]
        if type(layer) == trt.IResizeLayer:
            if layer.num_inputs > 1:
                return attr not in ["scales"]
        if type(layer) == trt.ISliceLayer:
            if layer.num_inputs > 1:
                return attr not in ["shape", "start", "stride"]
        return True

    network_str = "Name: {:} | {:} Batch Network{:}\n".format(
        network.name,
        "Implicit" if hasattr(network, "has_implicit_batch_dimension")
        and network.has_implicit_batch_dimension else "Explicit",
        " with Explicit Precision "
        if hasattr(network, "has_explicit_precision")
        and network.has_explicit_precision else "")
    network_str += "\n"

    input_metadata = get_input_metadata(network)
    network_str += "---- {:} Network Input(s) ----\n{:}\n\n".format(
        len(input_metadata), input_metadata)
    output_metadata = get_output_metadata(network)
    network_str += "---- {:} Network Output(s) ----\n{:}\n\n".format(
        len(output_metadata), output_metadata)
    network_str += "---- {:} Layer(s) ----\n".format(network.num_layers)
    if mode != "none":
        for index, layer in enumerate(network):
            if layer.type in LAYER_TYPE_CLASS_MAPPING:
                layer.__class__ = LAYER_TYPE_CLASS_MAPPING[layer.type]

            network_str += str_from_layer(layer, index)

            if mode in ["attrs", "full"]:
                # Exclude special attributes, as well as any attributes of the base layer class (those can be displayed above).
                attrs = [
                    attr for attr in dir(layer)
                    if not is_special_attribute(attr) and not hasattr(
                        trt.ILayer, attr) and is_valid_attribute(attr, layer)
                ]
                if attrs:
                    network_str += util.indent_block(
                        "---- Attributes ----") + "\n"
                for attr in attrs:
                    with G_LOGGER.verbosity():
                        val = getattr(layer, attr)
                    if mode == "full" or not isinstance(val, np.ndarray):
                        attr_str = ""
                        if layer.name:
                            attr_str += "{:}.".format(layer.name)
                        network_str += util.indent_block("{:}{:} = {:}".format(
                            attr_str, attr, val)) + "\n"
            network_str += "\n"

    return util.indent_block(network_str, level=0)
Пример #13
0
def main():
    # A Profile maps each input tensor to a range of shapes.
    #
    # TIP: To save lines, calls to `add` can be chained:
    #     profile.add("input0", ...).add("input1", ...)
    #
    #   Of course, you may alternatively write this as:
    #     profile.add("input0", ...)
    #     profile.add("input1", ...)
    #
    profiles = [
        # The low-latency case. For best performance, min == opt == max.
        Profile().add("X",
                      min=(1, 3, 28, 28),
                      opt=(1, 3, 28, 28),
                      max=(1, 3, 28, 28)),
        # The dynamic batching case. We use `4` for the opt batch size since that's our most common case.
        Profile().add("X",
                      min=(1, 3, 28, 28),
                      opt=(4, 3, 28, 28),
                      max=(32, 3, 28, 28)),
        # The offline case. For best performance, min == opt == max.
        Profile().add("X",
                      min=(128, 3, 28, 28),
                      opt=(128, 3, 28, 28),
                      max=(128, 3, 28, 28)),
    ]

    # See examples/api/06_immediate_eval_api for details on immediately evaluated functional loaders like `engine_from_network`.
    engine = engine_from_network(NetworkFromOnnxPath("dynamic_identity.onnx"),
                                 config=CreateConfig(profiles=profiles))

    # We'll save the engine so that we can inspect it with `inspect model`.
    # This should make it easy to see how the engine bindings are laid out.
    save_engine(engine, "dynamic_identity.engine")

    # We'll create, but not activate, three separate runners, each with a separate context.
    #
    # TIP: By providing a context directly, as opposed to via a lazy loader,
    # we can ensure that the runner will *not* take ownership of it.
    #
    low_latency = TrtRunner(engine.create_execution_context())

    # NOTE: The following two lines will cause TensorRT to display errors since profile 0
    # is already in use by the first execution context. We'll suppress them using G_LOGGER.verbosity().
    #
    with G_LOGGER.verbosity(G_LOGGER.CRITICAL):
        dynamic_batching = TrtRunner(engine.create_execution_context())
        offline = TrtRunner(engine.create_execution_context())
        # NOTE: We could update the profile index here (e.g. `context.active_optimization_profile = 2`),
        # but instead, we'll use TrtRunner's `set_profile()` API when we later activate the runner.

    # Finally, we can activate the runners as we need them.
    #
    # NOTE: Since the context and engine are already created, the runner will only need to
    # allocate input and output buffers during activation.

    input_img = np.ones((1, 3, 28, 28), dtype=np.float32)  # An input "image"

    with low_latency:
        outputs = low_latency.infer({"X": input_img})
        assert np.array_equal(outputs["Y"],
                              input_img)  # It's an identity model!

        print("Low latency runner succeeded!")

        # While we're serving requests online, we might decide that we need dynamic batching
        # for a moment.
        #
        # NOTE: We're assuming that activating runners will be cheap here, so we can bring up
        # the dynamic batching runner just-in-time.
        #
        # TIP: If activating the runner is not cheap (e.g. input/output buffers are large),
        # it might be better to keep the runner active the whole time.
        #
        with dynamic_batching:
            # NOTE: The very first time we activate this runner, we need to set
            # the profile index (it's 0 by default). We need to do this *only once*.
            # Alternatively, we could have set the profile index in the context directly (see above).
            #
            dynamic_batching.set_profile(
                1
            )  # Use the second profile, which is intended for dynamic batching.

            # We'll create fake batches by repeating our fake input image.
            small_input_batch = np.repeat(input_img, 4,
                                          axis=0)  # Shape: (4, 3, 28, 28)
            outputs = dynamic_batching.infer({"X": small_input_batch})
            assert np.array_equal(outputs["Y"], small_input_batch)

    # If we need dynamic batching again later, we can activate the runner once more.
    #
    # NOTE: This time, we do *not* need to set the profile.
    #
    with dynamic_batching:
        # NOTE: We can use any shape that's in the range of the profile without
        # additional setup - Polygraphy handles the details behind the scenes!
        #
        large_input_batch = np.repeat(input_img, 16,
                                      axis=0)  # Shape: (16, 3, 28, 28)
        outputs = dynamic_batching.infer({"X": large_input_batch})
        assert np.array_equal(outputs["Y"], large_input_batch)

        print("Dynamic batching runner succeeded!")

    with offline:
        # NOTE: We must set the profile to something other than 0 or 1 since both of those
        # are now in use by the `low_latency` and `dynamic_batching` runners respectively.
        #
        offline.set_profile(
            2
        )  # Use the third profile, which is intended for the offline case.

        large_offline_batch = np.repeat(input_img, 128,
                                        axis=0)  # Shape: (128, 3, 28, 28)
        outputs = offline.infer({"X": large_offline_batch})
        assert np.array_equal(outputs["Y"], large_offline_batch)

        print("Offline runner succeeded!")