Exemplo n.º 1
0
    def __init__(self,
                 graph,
                 max_workspace_size=None,
                 fp16=None,
                 int8=None,
                 max_batch_size=None,
                 is_dynamic_op=False,
                 minimum_segment_size=None):
        """
        Functor that optimizes a TensorFlow model using TF-TRT.

        Args:
            graph (Callable() -> Tuple[tf.Graph, Sequence[str]]):
                    A callable that can supply a tuple containing a TensorFlow graph and output names.
            max_workspace_size (int): The maximum workspace size.
            fp16 (bool): Whether to run in FP16 mode.
            max_batch_size (int): The maximum batch size.
        """
        self._graph = graph
        self.max_workspace_size = misc.default_value(max_workspace_size,
                                                     1 << 24)
        self.fp16 = misc.default_value(fp16, False)
        self.int8 = misc.default_value(int8, False)
        self.max_batch_size = misc.default_value(max_batch_size, 1)
        self.is_dynamic_op = is_dynamic_op
        self.minimum_segment_size = misc.default_value(minimum_segment_size, 3)
Exemplo n.º 2
0
 def __init__(self, explicit_precision, explicit_batch=None):
     """
     Args:
         explicit_precision (bool): Whether to create the network with explicit precision enabled.
     """
     self.explicit_precision = misc.default_value(explicit_precision, False)
     self.explicit_batch = misc.default_value(explicit_batch, True)
Exemplo n.º 3
0
    def __init__(self,
                 seed=None,
                 iterations=None,
                 input_metadata=None,
                 int_range=None,
                 float_range=None):
        """
        Args:
            seed (int):
                    The seed to use when generating random inputs.
                    Defaults to ``util.constants.DEFAULT_SEED``.
            iterations (int):
                    The number of iterations for which to supply data.
                    Defaults to 1.
            input_metadata (TensorMetadata):
                    A mapping of input names to their corresponding shapes and data types.
                    This will be used to determine what shapes to supply for inputs with dynamic shape, as
                    well as to set the data type of the generated inputs.
                    If either dtype or shape are None, then the value will be automatically determined.
                    For input shape tensors, i.e. inputs whose *value* describes a shape in the model, the
                    provided shape will be used to populate the values of the inputs, rather than to determine
                    their shape.
            int_range (Tuple[int]):
                    A tuple containing exactly 2 integers, indicating the minimum and maximum integer values (inclusive)
                    the data loader should generate. If either value in the tuple is None, the default will be used
                    for that value.
                    If None is provided instead of a tuple, then the default values will be used for both the
                    minimum and maximum.
            float_range (Tuple[float]):
                    A tuple containing exactly 2 floats, indicating the minimum and maximum float values (inclusive)
                    the data loader should generate. If either value in the tuple is None, the default will be used
                    for that value.
                    If None is provided instead of a tuple, then the default values will be used for both the
                    minimum and maximum.
        """
        def default_tuple(tup, default):
            if tup is None:
                return default
            new_tup = []
            for elem, default_elem in zip(tup, default):
                new_tup.append(misc.default_value(elem, default_elem))
            return tuple(new_tup)

        self.seed = misc.default_value(seed, DEFAULT_SEED)
        self.iterations = misc.default_value(iterations, 1)
        self.user_input_metadata = misc.default_value(input_metadata, {})
        self.int_range = default_tuple(int_range, (1, 25))
        self.float_range = default_tuple(float_range, (-1.0, 1.0))
        self.input_metadata = None

        if self.user_input_metadata:
            G_LOGGER.info(
                "Will generate inference input data according to provided TensorMetadata: {}"
                .format(self.user_input_metadata))
Exemplo n.º 4
0
    def __init__(self, explicit_precision=None, explicit_batch=None):
        """
        Functor that creates an empty TensorRT network.

        Args:
            explicit_precision (bool):
                    Whether to create the network with explicit precision enabled. Defaults to False
            explicit_batch (bool):
                    Whether to create the network with explicit batch mode. Defaults to True.
        """
        self.explicit_precision = misc.default_value(explicit_precision, False)
        self.explicit_batch = misc.default_value(explicit_batch, True)
Exemplo n.º 5
0
    def __init__(self, shape=None, dtype=None):
        """
        Represents a buffer on the GPU.

        Args:
            shape (Tuple[int]): The initial shape of the buffer.
            dtype (numpy.dtype): The data type of the buffer.
        """
        self.shape = misc.default_value(shape, tuple())
        self.dtype = misc.default_value(dtype, np.float32)
        self.allocated_nbytes = 0
        self._ptr = ctypes.c_void_p(None)
        self.resize(self.shape)
Exemplo n.º 6
0
    def __init__(self,
                 network_loader=None,
                 max_workspace_size=None,
                 max_batch_size=None,
                 fp16=None,
                 tf32=None,
                 load_engine=None,
                 save_engine=None,
                 layerwise=False,
                 plugins=[],
                 name=None):
        """
        Creates a runner that manages a single TensorRT engine.


            network_loader (BaseModelLoader):
                    A loader that returns a TRT builder, network, parser and input shapes.
            max_workspace_size (int): The maximum workspace size.
            max_batch_size (int): The maximum batch size.
            fp16 (bool): Whether to run in fp16 mode
            layerwise (bool): Whether to retrieve the outputs of every layer in the network.
            name (str):
                    The human-readable name prefix to use for this runner.
                    A runner count and timestamp will be appended to this prefix.
        """
        G_LOGGER.warning(
            "TrtLegacyRunner is deprecated, and will be removed in a future release"
        )
        # Load any user-supplied plugin libraries. This must happen before everything else, including engine deserialization.
        if plugins:
            import ctypes
            for plugin in plugins:
                path = os.path.abspath(plugin)
                G_LOGGER.info("Loading plugin library: {:}".format(path))
                ctypes.CDLL(path)

        # Choose a unique name for this runner.
        super().__init__(name=name, prefix="trt-legacy-runner")

        # Save parameters for activate and deactivate.
        self.network_loader = network_loader
        self.max_workspace_size = misc.default_value(max_workspace_size,
                                                     1 << 24)
        self.fp16 = misc.default_value(fp16, False)
        self.tf32 = misc.default_value(tf32, False)
        self.load_engine = load_engine

        self.engine_path = save_engine

        self.layerwise = layerwise
        self.max_batch_size = max_batch_size
Exemplo n.º 7
0
 def default_tuple(tup, default):
     if tup is None:
         return default
     new_tup = []
     for elem, default_elem in zip(tup, default):
         new_tup.append(misc.default_value(elem, default_elem))
     return tuple(new_tup)
Exemplo n.º 8
0
    def parse(self, args):
        self.verbosity_count = tools_util.get(args, "verbose")
        self.silent = tools_util.get(args, "silent")
        self.log_format = misc.default_value(tools_util.get(args, "log_format"), [])

        # Enable logger settings immediately on parsing.
        self.get_logger()
Exemplo n.º 9
0
def build_default_profile(builder, network, default_shape_value=None):
    default_shape_value = misc.default_value(default_shape_value, DEFAULT_SHAPE_VALUE)

    def override_shape(shape):
        return tuple([default_shape_value if misc.is_dimension_dynamic(dim) else dim for dim in shape])

    trt_profile = builder.create_optimization_profile()
    for idx in range(network.num_inputs):
        inp = network.get_input(idx)

        with G_LOGGER.verbosity(G_LOGGER.CRITICAL): # WAR for spam from TRT
            is_shape_tensor = inp.is_shape_tensor

        if is_shape_tensor:
            rank = inp.shape[0]
            shape = (default_shape_value, ) * rank
            G_LOGGER.warning("Input shape-tensor: {:24} | Will use input values: {:} in profile.\n"
                             "If this is incorrect, please provide a profile "
                             "that sets the values for this input shape-tensor.".format(inp.name, shape, rank), mode=LogMode.ONCE)
            trt_profile.set_shape_input(inp.name, shape, shape, shape)
        else:
            shape = override_shape(inp.shape)
            if override_shape(inp.shape) != inp.shape:
                G_LOGGER.warning("Input tensor: {:24} | Will use shape: {:} in profile (tensor shape is: {:}).\n"
                                 "If this is incorrect, please provide a profile "
                                 "that sets the shape for this input tensor.".format(inp.name, shape, inp.shape), mode=LogMode.ONCE)
            trt_profile.set_shape(inp.name, shape, shape, shape)
    return check_profile(trt_profile)
Exemplo n.º 10
0
    def add_loader(self, loader_str, loader_id, suffix=None):
        """
        Adds a loader to the script.
        If the loader is a duplicate, returns the existing loader instead.

        Args:
            loader_str (str): A string constructing the loader.
            loader_id (str): A short human-friendly identifier for the loader

        Returns:
            str: The name of the loader added.
        """
        suffix = misc.default_value(suffix, "")

        if loader_str in self.loaders:
            return self.loaders[loader_str]

        unique_name = loader_id + suffix
        if self.loader_count[unique_name]:
            unique_name = "{:}_{:}".format(unique_name, self.loader_count[loader_id])
        unique_name = Inline(unique_name)

        self.loader_count[loader_id] += 1
        self.loaders[loader_str] = unique_name
        return unique_name
Exemplo n.º 11
0
def add_logger_settings(script, args):
    # Always required since it is used to print the exit message.
    script.append_preimport("from polygraphy.logger import G_LOGGER")

    logger_settings = []
    verbosity_count = args_util.get(args, "verbose")
    if verbosity_count >= 4:
        logger_settings.append("G_LOGGER.severity = G_LOGGER.ULTRA_VERBOSE")
    elif verbosity_count == 3:
        logger_settings.append("G_LOGGER.severity = G_LOGGER.SUPER_VERBOSE")
    elif verbosity_count == 2:
        logger_settings.append("G_LOGGER.severity = G_LOGGER.EXTRA_VERBOSE")
    elif verbosity_count == 1:
        logger_settings.append("G_LOGGER.severity = G_LOGGER.VERBOSE")

    if args_util.get(args, "silent"):
        logger_settings.append("G_LOGGER.severity = G_LOGGER.CRITICAL")

    log_format = misc.default_value(args_util.get(args, "log_format"), [])
    for fmt in args.log_format:
        if fmt == "no-colors":
            logger_settings.append("G_LOGGER.colors = False")
        elif fmt == "timestamp":
            logger_settings.append("G_LOGGER.timestamp = True")
        elif fmt == "line-info":
            logger_settings.append("G_LOGGER.line_info = True")

    for setting in logger_settings:
        script.append_preimport(setting)
Exemplo n.º 12
0
    def __init__(self, deploy, model, outputs, batch_size=None, dtype=None):
        self.deploy = deploy
        self.model = model
        if not self.model:
            G_LOGGER.warning(
                "No model file provided for Caffe model, random weights will be used. To avoid this, "
                "please set the model paramater, or --model")

        if not outputs:
            G_LOGGER.critical(
                "Please set Caffe model outputs using the outputs parameter, or --trt-outputs. "
                "Note: To determine possible outputs, try running: tail -n50 {:}"
                .format(deploy))

        self.outputs = outputs
        self.dtype = misc.default_value(dtype, trt.float32)
        self.batch_size = misc.default_value(batch_size, 1)
Exemplo n.º 13
0
    def __init__(self,
                 gpu_memory_fraction=None,
                 allow_growth=None,
                 use_xla=None):
        """
        Functor that creates a TensorFlow config.

        Args:
            gpu_memory_fraction (float):
                The fraction of GPU memory that will be made available to TensorFlow.
                This should be a value between 0.0 and 1.0.
            allow_growth (bool): Whether to allow GPU memory allocated by TensorFlow to grow.
            use_xla (bool): Whether to attempt to enable XLA.
        """
        self.gpu_memory_fraction = misc.default_value(gpu_memory_fraction, 0.9)
        self.allow_growth = misc.default_value(allow_growth, False)
        self.use_xla = misc.default_value(use_xla, False)
Exemplo n.º 14
0
def add_trt_legacy_runner(script, args):
    script.add_import(imports=["TrtLegacyRunner"],
                      frm="polygraphy.backend.trt_legacy")
    G_LOGGER.warning(
        "Legacy TensorRT runner only supports implicit batch TensorFlow/UFF, ONNX, and Caffe models"
    )

    if args.model_type == "onnx":
        script.add_import(imports=["ParseNetworkFromOnnxLegacy"],
                          frm="polygraphy.backend.trt_legacy")
        onnx_loader = tool_util.add_onnx_loader(script,
                                                args,
                                                disable_outputs=True)
        loader_name = script.add_loader(
            Script.format_str("ParseNetworkFromOnnxLegacy({:})", onnx_loader),
            "parse_network_from_onnx_legacy")
    elif args.model_type == "caffe":
        script.add_import(imports=["LoadNetworkFromCaffe"],
                          frm="polygraphy.backend.trt_legacy")
        loader_name = script.add_loader(
            Script.format_str("LoadNetworkFromCaffe({:}, {:}, {:}, {:})",
                              args.model_file, args.caffe_model,
                              args.trt_outputs, args.batch_size),
            "parse_network_from_caffe")
    else:
        script.add_import(imports=["LoadNetworkFromUff"],
                          frm="polygraphy.backend.trt_legacy")
        if args.model_type == "uff":
            script.add_import(imports=["LoadUffFile"],
                              frm="polygraphy.backend.trt_legacy")
            shapes = {name: shape for name, (_, shape) in args.inputs.items()}
            loader_name = script.add_loader(
                Script.format_str("LoadUffFile({:}, {:}, {:})",
                                  args.model_file,
                                  misc.default_value(shapes, {}),
                                  args.trt_outputs), "load_uff_file")
        else:
            script.add_import(imports=["ConvertToUff"],
                              frm="polygraphy.backend.trt_legacy")
            loader_name = script.add_loader(
                Script.format_str(
                    "ConvertToUff({:}, save_uff={:}, preprocessor={:})",
                    tool_util.add_tf_loader(script, args), args.save_uff,
                    args.preprocessor), "convert_to_uff")
        loader_name = script.add_loader(
            Script.format_str("LoadNetworkFromUff({:}, uff_order={:})",
                              loader_name, args.uff_order),
            "uff_network_loader")

    runner_str = Script.format_str(
        "TrtLegacyRunner({:}, {:}, {:}, fp16={:}, tf32={:}, load_engine={:}, save_engine={:}, layerwise={:}, plugins={:})",
        loader_name, args.workspace, args.batch_size, args.fp16, args.tf32,
        args.model_file if args.model_type == "engine" else None,
        args.save_engine,
        args_util.get(args, "trt_outputs") == constants.MARK_ALL, args.plugins)
    script.add_runner(runner_str)
Exemplo n.º 15
0
        def topk(run_result):
            nonlocal outputs
            outputs = set(misc.default_value(outputs, run_result.keys()))

            for name, output in run_result.items():
                if name in outputs and name not in exclude:
                    indices = np.argsort(-output, axis=axis)
                    axis_len = indices.shape[axis]
                    run_result[name] = np.take(indices, np.arange(0, min(k, axis_len)), axis=axis)
            return run_result
Exemplo n.º 16
0
        def __init__(self):
            # Must explicitly initialize parent for any trampoline class! Will mysteriously segfault without this.
            BaseClass.__init__(self)

            self.data_loader = data_loader
            self._cache = cache
            self.device_buffers = OrderedDict()
            self.reset()
            G_LOGGER.verbose("Created calibrator [cache={:}]".format(self._cache))

            self.batch_size = misc.default_value(batch_size, 1)
Exemplo n.º 17
0
    def __init__(self, graph, opset=None, optimize=None, fold_constant=None):
        """
        Functor that loads a TensorFlow graph and converts it to ONNX using the tf2onnx converter.

        Args:
            graph (Callable() -> Tuple[tf.Graph, Sequence[str]]):
                    A callable that can supply a tuple containing a TensorFlow
                    graph and output names.


            opset (int): The ONNX opset to use during conversion.
            optimize (bool): Whether to use tf2onnx's graph optimization pass.
            fold_constant (bool): Whether to fold constants in the TensorFlow Graph. Requires that ``optimize`` is also enabled. Defaults to True.
        """
        self._graph = graph
        self.opset = misc.default_value(opset, 11)
        self.fold_constant = misc.default_value(fold_constant, True)
        self.optimize = misc.default_value(optimize, True)

        if self.fold_constant and not self.optimize:
            G_LOGGER.warning("`fold_constant` is enabled, but `optimize` is disabled. Constant folding will not be performed")
Exemplo n.º 18
0
    def __init__(self, max_workspace_size=None, tf32=None, fp16=None, int8=None, profiles=None, calibrator=None, strict_types=None):
        """
        Functor that creates a TensorRT IBuilderConfig.

        Args:
            max_workspace_size (int): The maximum workspace size, in bytes, when building the engine.
            tf32 (bool): Whether to build the engine with TF32 precision enabled. Defaults to False.
            fp16 (bool): Whether to build the engine with FP16 precision enabled. Defaults to False.
            int8 (bool): Whether to build the engine with INT8 precision enabled. Defaults to False.
            profiles (List[Profile]):
                    A list of optimization profiles to add to the configuration. Only needed for
                    networks with dynamic input shapes. If this is omitted for a network with
                    dynamic shapes, a default profile is created, where dynamic dimensions are
                    replaced with Polygraphy's DEFAULT_SHAPE_VALUE  (defined in util/constants.py).
                    See `Profile` for details.
            calibrator (trt.IInt8Calibrator):
                    An int8 calibrator. Only required in int8 mode when
                    the network does not have explicit precision. For networks with
                    dynamic shapes, the last profile provided (or default profile if
                    no profiles are provided) is used during calibration.
        """
        self.max_workspace_size = misc.default_value(max_workspace_size, 1 << 24)
        self.tf32 = misc.default_value(tf32, False)
        self.fp16 = misc.default_value(fp16, False)
        self.int8 = misc.default_value(int8, False)
        self.profiles = misc.default_value(profiles, [])
        self.calibrator = calibrator
        self.strict_types = misc.default_value(strict_types, False)

        if self.calibrator is not None and not self.int8:
            G_LOGGER.warning("A calibrator was provided to `CreateConfig`, but int8 mode was not enabled. "
                             "Did you mean to set `int8=True` to enable building with int8 precision?")
Exemplo n.º 19
0
    def __init__(self, graph, config=None):
        """
        Functor that creates a TensorFlow session that can be used for inference.

        Args:
            graph (Callable() -> Tuple[tf.Graph, Sequence[str]]):
                    A callable that can supply a tuple containing a
                    TensorFlow graph and output names.


            config (Callable() -> tf.ConfigProto):
        """
        self.graph = graph
        self.config = misc.default_value(config, CreateConfig())
Exemplo n.º 20
0
    def parse(self, args):
        self.plugins = tools_util.get(args, "plugins")
        self.outputs = tools_util.get_outputs(args, "trt_outputs")
        self.network_api = tools_util.get(args, "network_api")
        self.ext = tools_util.get(args, "ext")
        self.explicit_precision = tools_util.get(args, "explicit_precision")
        self.exclude_outputs = tools_util.get(args, "trt_exclude_outputs")

        self.trt_min_shapes = misc.default_value(
            tools_util.get(args, "trt_min_shapes"), [])
        self.trt_max_shapes = misc.default_value(
            tools_util.get(args, "trt_max_shapes"), [])
        self.trt_opt_shapes = misc.default_value(
            tools_util.get(args, "trt_opt_shapes"), [])

        workspace = tools_util.get(args, "workspace")
        self.workspace = int(workspace) if workspace is not None else workspace

        self.tf32 = tools_util.get(args, "tf32")
        self.fp16 = tools_util.get(args, "fp16")
        self.int8 = tools_util.get(args, "int8")

        self.calibration_cache = tools_util.get(args, "calibration_cache")
        self.strict_types = tools_util.get(args, "strict_types")
Exemplo n.º 21
0
    def __init__(self, outputs=None, runtime=None, runner_name=None):
        """
        An ordered dictionary containing the result of a running a single iteration of a runner.

        This maps output names to NumPy arrays, and preserves the output ordering from the runner.

        Also includes additional fields indicating the name of the runner which produced the
        outputs, and the time required to do so.


        Args:
            outputs (Dict[str, np.array]): The outputs of this iteration, mapped to their names.


            runtime (float): The time required for this iteration, in seconds.
            runner_name (str): The name of the runner that produced this output.
        """
        # IMPORTANT: This class must be pickleable.
        initial = misc.default_value(outputs, {})
        # Before 3.6, OrderedDict.update() did not preserve ordering
        for key, val in initial.items():
            self[key] = val
        self.runtime = runtime
        self.runner_name = misc.default_value(runner_name, "")
Exemplo n.º 22
0
        def determine_model_type():
            if tools_util.get(args, "model_type") is not None:
                return args.model_type.lower()

            if tools_util.get(args, "model_file") is None:
                return None

            def use_ext(ext_mapping):
                file_ext = os.path.splitext(args.model_file)[-1]
                if file_ext in ext_mapping:
                    return ext_mapping[file_ext]

            runners = misc.default_value(tools_util.get(args, "runners"), [])
            if tools_util.get(args, "ckpt") or os.path.isdir(args.model_file):
                return "ckpt"
            elif "tf" in runners or "trt_legacy" in runners:
                if args.caffe_model:
                    return "caffe"
                ext_mapping = {
                    ".hdf5": "keras",
                    ".uff": "uff",
                    ".prototxt": "caffe",
                    ".onnx": "onnx",
                    ".engine": "engine",
                    ".plan": "engine"
                }
                return use_ext(ext_mapping) or "frozen"
            else:
                # When no framework is provided, some extensions can be ambiguous
                ext_mapping = {
                    ".hdf5": "keras",
                    ".graphdef": "frozen",
                    ".onnx": "onnx",
                    ".uff": "uff",
                    ".engine": "engine",
                    ".plan": "engine"
                }
                model_type = use_ext(ext_mapping)
                if model_type:
                    return model_type

            G_LOGGER.critical(
                "Could not automatically determine model type for: {:}\n"
                "Please explicitly specify the type with the --model-type option"
                .format(args.model_file))
Exemplo n.º 23
0
    def __init__(self, model, do_shape_inference=None, outputs=None, exclude_outputs=None):
        """
        Functor that modifies an ONNX model.

        Args:
            model (Callable() -> onnx.ModelProto): A loader that can supply an ONNX model.

            outputs (Sequence[str]):
                    Names of tensors to mark as outputs. If provided, this will override the
                    existing model outputs.
                    If a value of `constants.MARK_ALL` is used instead of a list, all tensors in the network are marked.
            exclude_outputs (Sequence[str]):
                    Names of tensors to exclude as outputs. This can be useful in conjunction with
                    ``outputs=constants.MARK_ALL`` to omit outputs.
        """
        self._model = model
        self.do_shape_inference = misc.default_value(do_shape_inference, False)
        self.outputs = outputs
        self.exclude_outputs = exclude_outputs
Exemplo n.º 24
0
    def __init__(self, name=None, prefix=None):
        """
        Args:
            name (str):
                    The name to use for this runner.
            prefix (str):
                    The human-readable name prefix to use for this runner.
                    A runner count and timestamp will be appended to this prefix.
                    Only used if name is not provided.
        """
        prefix = misc.default_value(prefix, "Runner")
        if name is None:
            count = BaseRunner.RUNNER_COUNTS[prefix]
            BaseRunner.RUNNER_COUNTS[prefix] += 1
            name = "{:}-N{:}-{:}-{:}".format(prefix, count,
                                             time.strftime("%x"),
                                             time.strftime("%X"))
        self.name = name
        self.inference_time = None

        self.is_active = False
        """bool: Whether this runner has been activated, either via context manager, or by calling ``activate()``."""
Exemplo n.º 25
0
    def topk_func(k=10, axis=-1, outputs=None, exclude=None):
        """
        Creates a function that applies a Top-K operation to a IterationResult.
        Top-K will return the indices of the k largest values in the array.

        Args:
            k (int):
                    The number of indices to keep.
                    If this exceeds the axis length, it will be clamped.
                    Defaults to 10.
            axis (int):
                    The axis along which to apply the topk.
                    Defaults to -1.
            outputs (Sequence[str]):
                    Names of outputs to apply top-k to.
                    Defaults to all outputs.
            exclude (Sequence[str]):
                    Names of outputs to exclude. Top-K will not be applied to these outputs.

        Returns:
            Callable(IterationResult) -> IterationResult: The top-k function.
        """
        exclude = set(misc.default_value(exclude, []))

        # Top-K implementation.
        def topk(run_result):
            nonlocal outputs
            outputs = set(misc.default_value(outputs, run_result.keys()))

            for name, output in run_result.items():
                if name in outputs and name not in exclude:
                    indices = np.argsort(-output, axis=axis, kind="stable")
                    axis_len = indices.shape[axis]
                    run_result[name] = np.take(indices,
                                               np.arange(0, min(k, axis_len)),
                                               axis=axis)
            return run_result

        return topk
Exemplo n.º 26
0
    def __init__(self, network, config=None):
        """
        Functor that uses a TensorRT ``INetworkDefinition`` to build an engine.

        Args:
            network (Callable() -> trt.Builder, trt.INetworkDefinition):
                    A callable capable of returning a TensorRT Builder and INetworkDefinition. The returned builder
                    and network are owned by EngineFromNetwork and should not be freed manually. The callable may
                    have at most 3 return values if another object needs to be kept alive for the duration of the network,
                    e.g., in the case of a parser. EngineFromNetwork will take ownership of the third return value, and,
                    like the network, it should not be freed by the callable. The first and second return values must
                    always be the builder and network respectively.
                    If instead of a loader, the network, builder, and optional parser arguments are provided directly,
                    then EngineFromNetwork will *not* deallocate them.


            config (Callable(trt.Builder, trt.INetworkDefinition) -> trt.IBuilderConfig):
                    A callable that returns a TensorRT builder configuration. If not supplied,
                    a `CreateConfig` instance with default parameters is used.
        """
        self._network = network
        self._config = misc.default_value(config, CreateConfig())
Exemplo n.º 27
0
def wrapper():
    global G_CUDA
    G_CUDA = misc.default_value(G_CUDA, Cuda())
    return G_CUDA
Exemplo n.º 28
0
    def __getitem__(self, index):
        """
        Randomly generates input data.

        Args:
            index (int):
                    Since this class behaves like an iterable, it takes an index parameter.
                    Generated data is guaranteed to be the same for the same index.

            Returns:
                OrderedDict[str, numpy.ndarray]: A mapping of input names to input numpy buffers.
        """
        if index >= self.iterations:
            raise IndexError()

        G_LOGGER.verbose(
            "Generating data using numpy seed: {:}".format(self.seed + index))
        rng = np.random.RandomState(self.seed + index)

        def get_static_shape(name, shape):
            static_shape = shape
            if misc.is_shape_dynamic(shape):
                static_shape = misc.override_dynamic_shape(shape)
                if static_shape != shape and name not in self.user_input_metadata:
                    if not misc.is_valid_shape_override(static_shape, shape):
                        G_LOGGER.critical(
                            "Input tensor: {:24} | Cannot override original shape: {:} to {:}"
                            .format(name, shape, static_shape))
                    G_LOGGER.warning(
                        "Input tensor: {:24} | Will generate data of shape: {:} (tensor shape is: {:}).\n"
                        "If this is incorrect, please set input_metadata "
                        "or provide a custom data loader.".format(
                            name, static_shape, shape),
                        mode=LogMode.ONCE)
            return static_shape

        # Whether the user provided the values for a shape tensor input,
        # rather than the shape of the input.
        # If the shape is 1D, and has a value equal to the rank of the provided default shape, it is
        # likely to be a shape tensor, and so its value, not shape, should be overriden.
        def is_shape_tensor(name, dtype):
            if name not in self.input_metadata or name not in self.user_input_metadata:
                return False

            _, shape = self.input_metadata[name]
            is_shape = np.issubdtype(dtype, np.integer) and (
                not misc.is_shape_dynamic(shape)) and (len(shape) == 1)

            user_shape = self.user_input_metadata[name][1]
            is_shape &= len(user_shape) == shape[0]
            # Can't have negative values in shapes
            is_shape &= all([elem >= 0 for elem in user_shape])
            return is_shape

        def generate_buffer(name, dtype, shape):
            if is_shape_tensor(name, dtype):
                buffer = np.array(shape, dtype=dtype)
                G_LOGGER.info(
                    "Assuming {:} is a shape tensor. Setting input values to: {:}. If this is not correct, "
                    "please set it correctly in 'input_metadata' or by providing --input-shapes"
                    .format(name, buffer),
                    mode=LogMode.ONCE)
            elif np.issubdtype(dtype, np.integer):
                # high is 1 greater than the max int drawn
                buffer = rng.randint(low=self.int_range[0],
                                     high=self.int_range[1] + 1,
                                     size=shape,
                                     dtype=dtype)
            elif np.issubdtype(dtype, np.bool_):
                buffer = rng.randint(low=0, high=2, size=shape).astype(dtype)
            else:
                buffer = (rng.random_sample(size=shape) *
                          (self.float_range[1] - self.float_range[0]) +
                          self.float_range[0]).astype(dtype)

            buffer = np.array(
                buffer
            )  # To handle scalars, since the above functions return a float if shape is ().
            return buffer

        if self.input_metadata is None and self.user_input_metadata is not None:
            self.input_metadata = self.user_input_metadata

        buffers = OrderedDict()
        for name, (dtype, shape) in self.input_metadata.items():
            if name in self.user_input_metadata:
                user_dtype, user_shape = self.user_input_metadata[name]

                dtype = misc.default_value(user_dtype, dtype)

                is_valid_shape_override = user_shape is not None and misc.is_valid_shape_override(
                    user_shape, shape)
                if not is_valid_shape_override and not is_shape_tensor(
                        name, dtype):
                    G_LOGGER.warning(
                        "Input tensor: {:24} | Cannot use provided custom shape: {:}, since this input has "
                        "a static shape: {:}".format(name, user_shape, shape),
                        mode=LogMode.ONCE)
                else:
                    shape = misc.default_value(user_shape, shape)

            static_shape = get_static_shape(name, shape)
            buffers[name] = generate_buffer(name, dtype, shape=static_shape)

        # Warn about unused metadata
        for name in self.user_input_metadata.keys():
            if name not in self.input_metadata:
                msg = "Input tensor: {:24} | Metadata was provided, but the input does not exist in one or more runners.".format(
                    name)
                close_match = misc.find_in_dict(name, self.input_metadata)
                if close_match:
                    msg += "\nMaybe you meant to set: {:}".format(close_match)
                G_LOGGER.warning(msg)

        return buffers
Exemplo n.º 29
0
    def run(runners,
            data_loader=None,
            warm_up=None,
            use_subprocess=None,
            subprocess_timeout=None,
            subprocess_polling_interval=None,
            save_inputs_path=None):
        """
        Runs the supplied runners sequentially.

        Args:
            runners (List[BaseRunner]):
                    A list of runners to run.
            data_loader (Generator -> OrderedDict[str, numpy.ndarray]):
                    A generator or iterable that yields a dictionary that maps input names to input numpy buffers.
                    In the simplest case, this can be a `List[Dict[str, numpy.ndarray]]` .

                    In case you don't know details about the inputs ahead of time, you can access the
                    `input_metadata` property in your data loader, which will be set to an `TensorMetadata`
                    instance by this function.
                    Note that this does not work for generators or lists.

                    The number of iterations run by this function is controlled by the number of items supplied
                    by the data loader.

                    Defaults to an instance of `DataLoader`.
            warm_up (int):
                    The number of warm up runs to perform for each runner before timing.
                    Defaults to 0.
            use_subprocess (bool):
                    Whether each runner should be run in a subprocess. This allows each runner to have exclusive
                    access to the GPU. When using a subprocess, runners and loaders will never be modified.
            subprocess_timeout (int):
                    The timeout before a subprocess is killed automatically. This is useful for handling processes
                    that never terminate. A value of None disables the timeout. Defaults to None.
            subprocess_polling_interval (int):
                    The polling interval, in seconds, for checking whether a subprocess has completed or crashed.
                    In rare cases, omitting this parameter when subprocesses are enabled may cause this function
                    to hang indefinitely if the subprocess crashes.
                    A value of 0 disables polling. Defaults to 30 seconds.
            save_inputs_path (str):
                    [EXPERIMENTAL] Path at which to save inputs used during inference. This will include all inputs generated by
                    the provided data_loader, and will be saved as a pickled List[Dict[str, numpy.ndarray]].

        Returns:
            RunResults:
                    A mapping of runner names to the results of their inference.
                    The ordering of `runners` is preserved in this mapping.
        """
        warm_up = misc.default_value(warm_up, 0)
        data_loader = misc.default_value(data_loader, DataLoader())
        use_subprocess = misc.default_value(use_subprocess, False)
        subprocess_polling_interval = misc.default_value(
            subprocess_polling_interval, 30)
        loader_cache = DataLoaderCache(data_loader,
                                       save_inputs_path=save_inputs_path)

        def execute_runner(runner, loader_cache):
            with runner as active_runner:
                input_metadata = active_runner.get_input_metadata()
                G_LOGGER.info("Runner: {:40} | Input Metadata: {:}".format(
                    active_runner.name, input_metadata),
                              mode=LogMode.ONCE)
                # DataLoaderCache will ensure that the feed_dict does not contain any extra entries
                # based on the provided input_metadata.
                loader_cache.set_input_metadata(input_metadata)

                if warm_up:
                    G_LOGGER.start(
                        "Runner: {:40} | Running {:} warm-up runs".format(
                            active_runner.name, warm_up))
                    try:
                        feed_dict = loader_cache[0]
                    except IndexError:
                        G_LOGGER.warning(
                            "{:} warm-up runs were requested, but data loader did not supply any data. "
                            "Skipping warm-up runs".format(warm_up))
                    else:
                        G_LOGGER.ultra_verbose(
                            "Warm-up Input Buffers:\n{:}".format(
                                misc.indent_block(feed_dict)))
                        # First do a few warm-up runs, and don't time them.
                        for i in range(warm_up):
                            active_runner.infer(feed_dict=feed_dict)

                # Then, actual iterations.
                index = 0
                iteration_results = []
                output_metadata = TensorMetadata()

                for index, feed_dict in enumerate(loader_cache):
                    G_LOGGER.extra_verbose(
                        lambda: "Runner: {:40} | Feeding inputs:\n{:}".format(
                            active_runner.name, misc.indent_block(feed_dict)))
                    outputs = active_runner.infer(feed_dict=feed_dict)

                    runtime = active_runner.last_inference_time()
                    # Without a deep copy here, outputs will always reference the output of the last run
                    iteration_results.append(
                        IterationResult(outputs=copy.deepcopy(outputs),
                                        runtime=runtime,
                                        runner_name=active_runner.name))

                    if index == 0:
                        for name, out in outputs.items():
                            output_metadata.add(name, out.dtype, out.shape)

                    G_LOGGER.info(
                        "Runner: {:40} | Output Metadata: {:}".format(
                            active_runner.name, output_metadata),
                        mode=LogMode.ONCE)
                    G_LOGGER.extra_verbose(
                        lambda:
                        "Runner: {:40} | Inference Time: {:.3f} ms | Received outputs:\n{:}"
                        .format(active_runner.name, runtime * 1000.0,
                                misc.indent_block(outputs)))

                G_LOGGER.finish(
                    "Runner: {:40} | Completed {:} iterations.".format(
                        active_runner.name, index + 1))
                return iteration_results

        # Wraps execute_runner to use a queue.
        def execute_runner_with_queue(runner_queue, runner, loader_cache):
            iteration_results = None
            try:
                iteration_results = execute_runner(runner, loader_cache)
            except:
                # Cannot send the exception back, as it is not necessarily pickleable
                import traceback
                G_LOGGER.error(traceback.format_exc())
            misc.try_send_on_queue(runner_queue, iteration_results)
            # After finishing, send the updated loader_cache back.
            misc.try_send_on_queue(runner_queue, loader_cache)

        # Do all inferences in one loop, then comparisons at a later stage.
        # We run each runner in a separate process so that we can provide exclusive GPU access for each runner.
        run_results = RunResults()
        for runner in runners:
            G_LOGGER.start(
                "Runner: {:40} | Activating and starting inference".format(
                    runner.name))
            if use_subprocess:
                runner_queue = Queue()
                process = Process(target=execute_runner_with_queue,
                                  args=(runner_queue, runner, loader_cache))
                process.start()

                # If a subprocess hangs in a certain way, then process.join could block forever. Hence,
                # we need to keep polling the process to make sure it really is alive.
                iteration_results = None
                while process.is_alive() and iteration_results is None:
                    try:
                        iteration_results = misc.try_receive_on_queue(
                            runner_queue,
                            timeout=subprocess_polling_interval / 2)
                        # Receive updated loader cache, or fall back if it could not be sent.
                        loader_cache = misc.try_receive_on_queue(
                            runner_queue,
                            timeout=subprocess_polling_interval / 2)
                    except queue.Empty:
                        G_LOGGER.extra_verbose(
                            "Polled subprocess - still running")

                try:
                    assert iteration_results is not None
                    run_results.append((runner.name, iteration_results))
                    process.join(subprocess_timeout)
                except:
                    G_LOGGER.critical(
                        "Runner: {:40} | Terminated prematurely. Check the exception logged above. "
                        "If there is no exception logged above, make sure not to use the --use-subprocess "
                        "flag or set use_subprocess=False in Comparator.run()."
                        .format(runner.name))
                finally:
                    process.terminate()

                if loader_cache is None:
                    G_LOGGER.critical(
                        "Could not send data loader cache to runner subprocess. Please try disabling subprocesses "
                        "by removing the --use-subprocess flag, or setting use_subprocess=False in Comparator.run()"
                    )
            else:
                run_results.append(
                    (runner.name, execute_runner(runner, loader_cache)))

        G_LOGGER.verbose("Successfully ran: {:}".format(
            [r.name for r in runners]))
        return run_results
Exemplo n.º 30
0
    def validate(run_results,
                 check_finite=None,
                 check_nan=None,
                 fail_fast=None):
        """
        Checks output validity.

        Args:
            run_results (Dict[str, List[IterationResult]]): The result of Comparator.run().
            check_finite (bool): Whether to fail on non-finite values. Defaults to False.
            check_nan (bool): Whether to fail on NaNs. Defaults to True.
            fail_fast (bool): Whether to fail after the first invalid value. Defaults to False.

        Returns:
            bool: True if all outputs were valid, False otherwise.
        """
        check_finite = misc.default_value(check_finite, False)
        check_nan = misc.default_value(check_nan, True)
        fail_fast = misc.default_value(fail_fast, False)

        def is_finite(output):
            non_finite = np.logical_not(np.isfinite(output))
            if np.any(non_finite):
                G_LOGGER.error("Encountered one or more non-finite values")
                G_LOGGER.error(
                    "Note: Use -vv or set logging verbosity to EXTRA_VERBOSE to display non-finite values",
                    mode=LogMode.ONCE)
                G_LOGGER.extra_verbose(
                    "Note: non-finite values at:\n{:}".format(non_finite))
                G_LOGGER.extra_verbose("Note: non-finite values:\n{:}".format(
                    output[non_finite]))
                return False
            return True

        def is_not_nan(output):
            nans = np.isnan(output)
            if np.any(nans):
                G_LOGGER.error("Encountered one or more NaNs")
                G_LOGGER.error(
                    "Note: Use -vv or set logging verbosity to EXTRA_VERBOSE to display locations of NaNs",
                    mode=LogMode.ONCE)
                G_LOGGER.extra_verbose("Note: NaNs at:\n{:}".format(nans))
                return False
            return True

        all_valid = True
        for runner_name, results in run_results:
            for result in results:
                for output_name, output in result.items():
                    G_LOGGER.info(
                        "Runner: {:40} | Validating output: {:} (check_finite={:}, check_nan={:})"
                        .format(runner_name, output_name, check_finite,
                                check_nan))

                    output_valid = True
                    with G_LOGGER.indent():
                        if check_nan:
                            output_valid &= is_not_nan(output)
                        if check_finite:
                            output_valid &= is_finite(output)

                        all_valid &= output_valid

                        if output_valid:
                            G_LOGGER.finish(
                                "Runner: {:40} | Output: {:} is valid".format(
                                    runner_name, output_name))
                        else:
                            G_LOGGER.error(
                                "Runner: {:40} | Errors detected in output: {:}"
                                .format(runner_name, output_name))
                            if fail_fast:
                                return False

        if all_valid:
            G_LOGGER.finish("Validation passed")
        else:
            G_LOGGER.error("Validation failed")
        return all_valid