Exemplo n.º 1
0
    def call_impl(self):
        """
        Returns:
            onnx.ModelProto: The ONNX model with modified outputs.
        """
        model = self.load()

        if self.outputs == constants.MARK_ALL:
            G_LOGGER.verbose("Marking all ONNX tensors as outputs")
            model = onnx_util.mark_layerwise(model)
        elif self.outputs is not None:
            model = onnx_util.mark_outputs(model, self.outputs)

        if self.exclude_outputs is not None:
            model = onnx_util.unmark_outputs(model, self.exclude_outputs)

        return model
Exemplo n.º 2
0
        def __init__(self):
            # Must explicitly initialize parent for any trampoline class! Will mysteriously segfault without this.
            BaseClass.__init__(self)

            self.is_active = False

            self.data_loader = data_loader
            self._cache = cache
            self.device_buffers = OrderedDict()
            self.reset()
            G_LOGGER.verbose("Created calibrator [cache={:}]".format(
                self._cache))

            self.batch_size = util.default(batch_size, 1)

            # The function that constructed this instance
            self.make_func = Calibrator
Exemplo n.º 3
0
    def activate_impl(self):
        def make_buffers(engine):
            """
            Creates empty host and device buffers for the specified engine.
            Always uses binding names from Profile 0.
            """
            device_buffers = OrderedDict()
            host_output_buffers = OrderedDict()

            for idx in range(trt_util.get_bindings_per_profile(engine)):
                binding = engine[idx]
                dtype = trt_util.np_dtype_from_trt(engine.get_binding_dtype(binding))
                device_buffers[binding] = cuda.DeviceArray(dtype=dtype)
                if not engine.binding_is_input(binding):
                    host_output_buffers[binding] = np.empty(shape=tuple(), dtype=dtype)
            G_LOGGER.extra_verbose("Created device buffers: {:}".format(device_buffers))
            return device_buffers, host_output_buffers

        engine_or_context, owning = util.invoke_if_callable(self._engine_or_context)

        if isinstance(engine_or_context, trt.ICudaEngine):
            self.engine = engine_or_context
            self.owns_engine = owning
            self.context = self.engine.create_execution_context()
            self.owns_context = True
            if not self.context:
                G_LOGGER.critical("Invalid Context. See error log for details.")
        elif isinstance(engine_or_context, trt.IExecutionContext):
            self.engine = None
            self.owns_engine = False
            self.context = engine_or_context
            self.owns_context = owning
        else:
            G_LOGGER.critical(
                "Invalid Engine or Context. Please ensure the engine was built correctly. See error log for details."
            )

        if not owning:
            G_LOGGER.verbose(
                "Object was provided directly instead of via a Callable. This runner will not assume ownership. "
                "Please ensure it is freed."
            )

        self.device_buffers, self.host_output_buffers = make_buffers(self.context.engine)
        self.stream = cuda.Stream()
Exemplo n.º 4
0
 def add_to_script(self, script, suffix=None):
     G_LOGGER.verbose(
         "Attempting to load as a TensorFlow model, using TF2ONNX to convert to ONNX. "
         "If this is not correct, please specify --model-type",
         mode=LogMode.ONCE)
     script.add_import(imports=["OnnxFromTfGraph"],
                       frm="polygraphy.backend.onnx")
     loader_str = make_invocable("OnnxFromTfGraph",
                                 self.tf_loader_args.add_to_script(
                                     script,
                                     disable_custom_outputs=True,
                                     suffix=suffix),
                                 opset=self.opset,
                                 fold_constant=self.fold_constant)
     loader_name = script.add_loader(loader_str,
                                     "export_onnx_from_tf",
                                     suffix=suffix)
     return loader_name
Exemplo n.º 5
0
    def mark_layers(self, indices):
        def layer_to_str(layer):
            outputs = [
                layer.get_output(i).name for i in range(layer.num_outputs)
            ]
            return "{:}: {:}".format(layer.name, outputs)

        # First, reset, since changes from the previous call will persist.
        for layer in self.network:
            layer.reset_precision()

        for index in indices:
            layer = self.network.get_layer(index)
            G_LOGGER.verbose("Running layer in higher precision: {:}".format(
                layer_to_str(layer)))
            layer.precision = self.precision
        G_LOGGER.info("Will run layer(s): {:} in {:} precision".format(
            indices, self.precision))
Exemplo n.º 6
0
        def generate_buffer(name, dtype, shape):
            if is_shape_tensor(name, dtype):
                buffer = np.array(shape, dtype=dtype)
                G_LOGGER.info("Assuming {:} is a shape tensor. Setting input values to: {:}. If this is not correct, "
                              "please set it correctly in 'input_metadata' or by providing --input-shapes".format(name, buffer), mode=LogMode.ONCE)
            elif np.issubdtype(dtype, np.integer) or np.issubdtype(dtype, np.bool_):
                imin, imax = self._get_range(name, cast_type=int if np.issubdtype(dtype, np.integer) else bool)
                G_LOGGER.verbose("Input tensor: {:} | Generating input data in range: [{:}, {:}]".format(name, imin, imax),
                                 mode=LogMode.ONCE)
                # high is 1 greater than the max int drawn.
                buffer = rng.randint(low=imin, high=imax + 1, size=shape, dtype=dtype)
            else:
                fmin, fmax = self._get_range(name, cast_type=float)
                G_LOGGER.verbose("Input tensor: {:} | Generating input data in range: [{:}, {:}]".format(name, fmin, fmax),
                                 mode=LogMode.ONCE)
                buffer = (rng.random_sample(size=shape) * (fmax - fmin) + fmin).astype(dtype)

            buffer = np.array(buffer) # To handle scalars, since the above functions return a float if shape is ().
            return buffer
Exemplo n.º 7
0
            def update_meta_from_layerwise(meta, user_meta, set_shapes=True):
                for name in meta:
                    user_dtype, user_shape = None, None
                    if name in user_meta:
                        user_dtype, user_shape = user_meta[name].dtype, user_meta[name].shape

                    # Choose between what the user set, what's in the model, and what
                    # fallback shape inference said.
                    def choose_meta(user, model, fallback):
                        if self.arg_groups[OnnxShapeInferenceArgs].force_fallback:
                            return user or fallback
                        return user or model or fallback

                    if name in layerwise_meta:
                        meta[name].dtype = choose_meta(user_dtype, meta[name].dtype, layerwise_meta[name].dtype)
                        if set_shapes:
                            meta[name].shape = choose_meta(user_shape, meta[name].shape, layerwise_meta[name].shape)
                        G_LOGGER.verbose("Updated tensor: {:} metadata to: {:}".format(name, meta[name]))
                return meta
Exemplo n.º 8
0
    def call_impl(self):
        """
        Returns:
            onnx.ModelProto: The model, after saving it.
        """
        model, _ = util.invoke_if_callable(self._model)
        G_LOGGER.info("Saving ONNX model to: {:}".format(self.path))
        if self.external_data_path is not None:
            G_LOGGER.verbose(
                "Saving external data for ONNX model to: {:}".format(
                    self.external_data_path))
            try:
                external_data_helper.convert_model_to_external_data(
                    model,
                    location=self.external_data_path,
                    all_tensors_to_one_file=util.default(
                        self.all_tensors_to_one_file, True),
                    size_threshold=util.default(self.size_threshold, 1024),
                )
            except TypeError:
                if self.size_threshold is not None:
                    G_LOGGER.warning(
                        "This version of onnx does not support size_threshold in convert_model_to_external_data"
                    )
                external_data_helper.convert_model_to_external_data(
                    model,
                    location=self.external_data_path,
                    all_tensors_to_one_file=util.default(
                        self.all_tensors_to_one_file, True),
                )
        else:
            if self.size_threshold is not None:
                G_LOGGER.warning(
                    "size_threshold is set, but external data path has not been set. "
                    "No external data will be written.")
            if self.all_tensors_to_one_file is not None:
                G_LOGGER.warning(
                    "all_tensors_to_one_file is set, but external data path has not been set. "
                    "No external data will be written.")

        util.makedirs(self.path)
        onnx.save(model, self.path)
        return model
Exemplo n.º 9
0
def str_histogram(output, hist_range=None):
    if np.issubdtype(output.dtype, np.bool_):
        return ""

    try:
        try:
            hist, bin_edges = np.histogram(output, range=hist_range)
        except ValueError as err:
            G_LOGGER.verbose(
                "Could not generate histogram. Note: Error was: {:}".format(
                    err))
            return ""

        max_num_elems = compute_max(hist)
        if not max_num_elems:  # Empty tensor
            return

        bin_edges = ["{:.3g}".format(bin) for bin in bin_edges]
        max_start_bin_width = max(len(bin) for bin in bin_edges)
        max_end_bin_width = max(len(bin) for bin in bin_edges[1:])

        MAX_WIDTH = 40
        ret = "---- Histogram ----\n"
        ret += "{:{width}}|  Num Elems | Visualization\n".format(
            "Bin Range", width=max_start_bin_width + max_end_bin_width + 5)
        for num, bin_start, bin_end in zip(hist, bin_edges, bin_edges[1:]):
            bar = "#" * int(MAX_WIDTH * float(num) / float(max_num_elems))
            ret += "({:<{max_start_bin_width}}, {:<{max_end_bin_width}}) | {:10} | {:}\n".format(
                bin_start,
                bin_end,
                num,
                bar,
                max_start_bin_width=max_start_bin_width,
                max_end_bin_width=max_end_bin_width,
            )
        return ret
    except Exception as err:
        G_LOGGER.verbose(
            "Could not generate histogram.\nNote: Error was: {:}".format(err))
        if config.INTERNAL_CORRECTNESS_CHECKS:
            raise
        return ""
Exemplo n.º 10
0
def str_output_stats(output, runner_name=None):
    ret = ""
    if runner_name:
        ret += "{:} | Stats: ".format(runner_name)

    try:
        with np.testing.suppress_warnings() as sup:
            sup.filter(RuntimeWarning)
            ret += "mean={:.5g}, std-dev={:.5g}, var={:.5g}, median={:.5g}, min={:.5g} at {:}, max={:.5g} at {:}\n".format(
                compute_mean(output), compute_stddev(output),
                compute_variance(output), compute_median(output),
                compute_min(output), compute_argmin(output),
                compute_max(output), compute_argmax(output))
    except Exception as err:
        G_LOGGER.verbose(
            "Could not generate statistics.\nNote: Error was: {:}".format(err))
        ret += "<Error while computing statistics>"
        if config.INTERNAL_CORRECTNESS_CHECKS:
            raise
    return ret
Exemplo n.º 11
0
        def find_worst(num, acc_results):
            acc_mapping = list(acc_results.values())[0][
                0]  # First iteration of first runner-pair.

            # Compute for each layer: atol / prev_atol, to determine which layers contribute the greatest error.
            # It is not enough to simply find the max(atol), because that doesn't account for error introduced
            # by previous layers.
            items = list(acc_mapping.items())
            ratios = []
            for (_, prev_tols), (outname,
                                 cur_tols) in zip(items[:-1], items[1:]):
                ratio = cur_tols.max_absdiff / prev_tols.max_absdiff
                ratios.append((ratio, outname))

            # Mark more layers on each iteration
            ratios = sorted(ratios, reverse=True)[:num]
            G_LOGGER.verbose(
                "Found worst {:} layers (Format: (error ratio, tensor name)): {:}"
                .format(num, ratios))
            return [output_mapping[outname] for (ratio, outname) in ratios]
Exemplo n.º 12
0
        def get_batch(self, names):
            if not self.is_active:
                G_LOGGER.error("Calibrator must be activated prior to use. Please use a context manager. "
                               "For example:\nwith calibrator:\n\t# Use calibrator here")
                return None

            try:
                buffers = next(self.data_loader_iter)
            except StopIteration:
                if not self.num_batches:
                    G_LOGGER.error("Calibrator data loader provided no data.\nPossible reasons for this include:\n(1) data loader "
                                   "has no data to provide\n(2) data loader was a generator, and the calibrator is being "
                                   "used multiple times (generators cannot be rewound)")
                return None
            else:
                self.num_batches += 1

            if not util.check_dict_contains(buffers, names, dict_name="calibration data", log_func=G_LOGGER.error):
                return None

            ptrs = []
            for name in names:
                buf = buffers[name]

                if isinstance(buf, cuda.DeviceView):
                    ptrs.append(buf.ptr)
                elif isinstance(buf, np.ndarray):
                    if name not in self.device_buffers:
                        self.device_buffers[name] = cuda.DeviceArray(shape=buf.shape, dtype=buf.dtype)
                        G_LOGGER.verbose("Allocated: {:}".format(self.device_buffers[name]))

                    ptrs.append(self.device_buffers[name].copy_from(buf).ptr)
                elif isinstance(buf, int):
                    ptrs.append(buf)
                else:
                    G_LOGGER.error("Calibration data loader provided an unrecognized type: {:} for input: {:}.\n"
                                   "Please provide either a NumPy array, Polygraphy DeviceView, or GPU pointer. ".format(
                                       type(buf).__name__, name))
                    return None

            return ptrs
Exemplo n.º 13
0
    def call_impl(self):
        """
        Returns:
            Tuple[tf.Graph, Sequence[str]]: The TensorFlow graph, and the names of its outputs.
        """
        # If `name` is not provided, this expects that the directory contains a `checkpoint` file with the contents:
        #
        # model_checkpoint_path: "model"
        # all_model_checkpoint_paths: "model"
        #
        # where "model" is the checkpoint name
        if not os.path.isdir(self.dir):
            G_LOGGER.warning(
                "Specified checkpoint directory: {:} does not look like a directory."
                .format(self.dir))

        if self.name is None:
            G_LOGGER.verbose(
                "Checkpoint name was not explicitly provided, searching for `checkpoint` file"
            )
            checkpoint = tf.train.get_checkpoint_state(self.dir)
            if checkpoint is None:
                ckpt_file_contents = '\nmodel_checkpoint_path: "model"\nall_model_checkpoint_paths: "model"\n'
                G_LOGGER.critical(
                    "Checkpoint directory: {:} does not contain a `checkpoint` file, and the checkpoint name was "
                    "not provided. Please either create a checkpoint file with the contents:\n{:} "
                    "\nWhere `model` is the name of the checkpoint, or explicitly provide the name with "
                    "--ckpt, not including file extensions".format(
                        self.dir, ckpt_file_contents))
            input_checkpoint = checkpoint.model_checkpoint_path
        else:
            input_checkpoint = os.path.join(self.dir, self.name)

        meta_file = input_checkpoint + ".meta"
        with tf.Graph().as_default() as graph, tf.compat.v1.Session(
                graph=graph).as_default() as sess:
            saver = tf.compat.v1.train.import_meta_graph(meta_file,
                                                         clear_devices=True)
            saver.restore(sess, input_checkpoint)
            return graph, tf_util.get_graph_output_names(graph)
Exemplo n.º 14
0
    def set_input_metadata(self, input_metadata):
        """
        Set the input metadata for the data loader.

        Args:
            input_metadata (TensorMetadata):
                    Input Metadata, including shape and type information. The cache may attempt to transform inputs to
                    match the specified input_metadata when data already in the cache does not exactly match.
        """
        self.input_metadata = input_metadata
        with contextlib.suppress(AttributeError):
            self.data_loader.input_metadata = input_metadata

        if not self.cache:
            G_LOGGER.verbose("Loading inputs from data loader")
            self.cache = list(self.data_loader)
            if not self.cache:
                G_LOGGER.warning("Data loader did not yield any input data.")

            # Only save inputs the first time the cache is generated
            if self.save_inputs_path is not None:
                save_json(self.cache, self.save_inputs_path, "inference input data")
Exemplo n.º 15
0
    def infer(self, feed_dict, check_inputs=True):
        """
        Runs inference using the provided feed_dict.

        Args:
            feed_dict (OrderedDict[str, numpy.ndarray]):
                    A mapping of input tensor names to corresponding input NumPy arrays.

            check_inputs (bool):
                    Whether to check that the provided ``feed_dict`` includes the expected inputs
                    with the expected data types and shapes.

        Returns:
            OrderedDict[str, numpy.ndarray]:
                    A mapping of output tensor names to their corresponding NumPy arrays.

                    IMPORTANT: Runners may reuse these output buffers. Thus, if you need to save
                    outputs from multiple inferences, you should make a copy with ``copy.deepcopy(outputs)``.
        """
        if not self.is_active:
            G_LOGGER.critical("{:35} | Must be activated prior to calling infer()".format(self.name))

        if check_inputs:
            input_metadata = self.get_input_metadata()
            G_LOGGER.verbose("Runner input metadata is: {:}".format(input_metadata))

            util.check_dict_contains(feed_dict, input_metadata.keys(), dict_name="feed_dict", log_func=G_LOGGER.critical)

            for name, inp in feed_dict.items():
                meta = input_metadata[name]
                if not np.issubdtype(inp.dtype, meta.dtype):
                    G_LOGGER.critical("Input tensor: {:} | Received unexpected dtype: {:}.\n"
                                      "Note: Expected type: {:}".format(name, inp.dtype, meta.dtype))

                if not util.is_valid_shape_override(inp.shape, meta.shape):
                    G_LOGGER.critical("Input tensor: {:} | Received incompatible shape: {:}.\n"
                                      "Note: Expected a shape compatible with: {:}".format(name, inp.shape, meta.shape))

        return self.infer_impl(feed_dict)
Exemplo n.º 16
0
    def run(self, args):
        if args.dir is None and (args.good is None or args.bad is None):
            G_LOGGER.critical(
                "Either `--dir`, or both `--good` and `--bad` must be specified."
            )

        def load_tactics(dir):
            """
            Load all tactic replays from the specified directory into a single dictionary.

            Args:
                dir (str): Directory containing zero or more tactic replay files.

            Returns:
                dict[str, Set[polygraphy.backend.trt.algorithm_selector.Algorithm]]:
                        Maps layer names to the set of algorithms present in the tactic replays.
            """
            def try_load_replay(path):
                try:
                    return algorithm_selector.TacticReplayData.load(path)
                except:
                    return None

            tactics = defaultdict(set)
            replay_paths = []
            for path in glob.iglob(os.path.join(dir, "**"), recursive=True):
                replay = try_load_replay(path)
                if replay is None:
                    G_LOGGER.verbose(
                        "{:} does not look like a tactic replay file, skipping."
                        .format(path))
                    continue

                replay_paths.append(path)
                for name, algo in replay.items():
                    tactics[name].add(algo)
            return tactics, replay_paths

        good_dir = util.default(args.good, os.path.join(args.dir, "good"))
        good_tactics, good_paths = load_tactics(good_dir)
        G_LOGGER.info("Loaded {:} good tactic replays.".format(
            len(good_paths)))
        G_LOGGER.verbose("Good tactic replays: {:}".format(good_paths))

        bad_dir = util.default(args.bad, os.path.join(args.dir, "bad"))
        bad_tactics, bad_paths = load_tactics(bad_dir)
        G_LOGGER.info("Loaded {:} bad tactic replays.".format(len(bad_paths)))
        G_LOGGER.verbose("Bad tactic replays: {:}".format(bad_paths))

        # Walk bad tactics and remove all the known good tactics.
        potential_bad_tactics = OrderedDict()
        for name, algo_set in bad_tactics.items():
            if name in good_tactics:
                algo_set -= good_tactics[name]

            if algo_set:
                potential_bad_tactics[name] = algo_set

        if potential_bad_tactics:
            G_LOGGER.info("Found potentially bad tactics:")
            for name, algo_set in potential_bad_tactics.items():
                algo_set_str = list(map(str, algo_set))
                G_LOGGER.info("Layer: {:}\n\tAlgorithms: {:}".format(
                    name, algo_set_str))
        else:
            G_LOGGER.info(
                "Could not determine potentially bad tactics. Try generating more tactic replay files?"
            )
Exemplo n.º 17
0
    def call_impl(self, builder, network):
        """
        Args:
            builder (trt.Builder):
                    The TensorRT builder to use to create the configuration.
            network (trt.INetworkDefinition):
                    The TensorRT network for which to create the config. The network is used to
                    automatically create a default optimization profile if none are provided.

        Returns:
            trt.IBuilderConfig: The TensorRT builder configuration.
        """
        with util.FreeOnException([builder.create_builder_config()]) as (config, ):
            def try_run(func, name):
                try:
                    return func()
                except AttributeError:
                    trt_util.fail_unavailable("{:} in CreateConfig".format(name))


            def try_set_flag(flag_name):
                return try_run(lambda: config.set_flag(getattr(trt.BuilderFlag, flag_name)), flag_name.lower())


            with G_LOGGER.indent():
                G_LOGGER.verbose("Setting TensorRT Optimization Profiles")
                profiles = copy.deepcopy(self.profiles)
                for profile in profiles:
                    # Last trt_profile is used for set_calibration_profile.
                    trt_profile = profile.fill_defaults(network).to_trt(builder, network)
                    config.add_optimization_profile(trt_profile)
                G_LOGGER.info("Configuring with profiles: {:}".format(profiles))

            config.max_workspace_size = int(self.max_workspace_size)

            if self.strict_types:
                try_set_flag("STRICT_TYPES")

            if self.tf32:
                try_set_flag("TF32")
            else: # TF32 is on by default
                with contextlib.suppress(AttributeError):
                    config.clear_flag(trt.BuilderFlag.TF32)

            if self.fp16:
                try_set_flag("FP16")

            if self.int8:
                try_set_flag("INT8")
                if not network.has_explicit_precision:
                    if self.calibrator is not None:
                        input_metadata = trt_util.get_input_metadata_from_profile(trt_profile, network)
                        with contextlib.suppress(AttributeError): # Polygraphy calibrator has a reset method
                            self.calibrator.reset(input_metadata)
                        config.int8_calibrator = self.calibrator
                        try:
                            config.set_calibration_profile(trt_profile)
                        except:
                            G_LOGGER.extra_verbose("Cannot set calibration profile on TensorRT 7.0 and older.")
                    else:
                        G_LOGGER.warning("Network does not have explicit precision and no calibrator was provided. Please ensure "
                                         "that tensors in the network have dynamic ranges set, or provide a calibrator in order to use int8 mode.")

            if self.sparse_weights:
                try_set_flag("SPARSE_WEIGHTS")

            if self.tactic_sources is not None:
                tactic_sources_flag = 0
                for source in self.tactic_sources:
                    tactic_sources_flag |= (1 << int(source))
                try_run(lambda: config.set_tactic_sources(tactic_sources_flag), name="tactic_sources")

            try:
                if self.timing_cache_path:
                    timing_cache_data = util.load_file(self.timing_cache_path, description="tactic timing cache")
                    cache = config.create_timing_cache(timing_cache_data)
                else:
                    # Create an empty timing cache by default so it will be populated during engine build.
                    # This way, consumers of CreateConfig have the option to use the cache later.
                    cache = config.create_timing_cache(b"")
            except AttributeError:
                if self.timing_cache_path:
                    trt_util.fail_unavailable("load_timing_cache in CreateConfig")
            else:
                config.set_timing_cache(cache, ignore_mismatch=False)

            if self.algorithm_selector is not None:
                def set_algo_selector():
                    config.algorithm_selector = self.algorithm_selector
                try_run(set_algo_selector, "algorithm_selector")

            return config
Exemplo n.º 18
0
def get_output_metadata(graph, layerwise=False):
    graphdef = graph.as_graph_def()

    node_output_map = map_node_outputs(graphdef)

    def is_output_node(node):
        # Make sure that we're not using hanging nodes as outputs - must have at least one input.
        if len(node_output_map[node.name]) != 0 or len(node.input) == 0:
            return False

        # Tensors with no shape cannot be outputs and TensorFlow doesn't like certain ops as outputs.
        EXCLUDE_OPS = [
            "Switch",
            "FusedBatchNorm",
            "Assert",
            "NextIteration",
            "Enter",
            "LoopCond",
            "Exit",
            "Print",
            "Assign",
            "NoOp",
            "ReadVariableOp",
            "VarIsInitializedOp",
            "Const",
        ]

        # Additionally, we sometimes need to exclude entire namespaces e.g. while loops.
        EXCLUDE_NAMESPACES = ["while", "Assert"]

        if any([ex_op in node.op for ex_op in EXCLUDE_OPS]) or any(
            [ns in node.name for ns in EXCLUDE_NAMESPACES]):
            G_LOGGER.extra_verbose(
                "Excluding {:}, op {:} is not a valid output op or is part of an excluded namespace "
                "(Note: excluded namespaces: {:})".format(
                    node.name, node.op, EXCLUDE_NAMESPACES))
            return False

        return True

    # For layerwise mode, every layer becomes an output.
    if layerwise:
        output_nodes = list(graphdef.node)
        G_LOGGER.verbose(
            "Running in layerwise mode. Marking {:} layers as potential outputs"
            .format(len(output_nodes)))
    else:
        output_nodes = [node for node in graphdef.node if is_output_node(node)]
    G_LOGGER.extra_verbose(
        "Found likely output nodes: {:}".format(output_nodes))

    output_tensors = []
    for node in output_nodes:

        tensor_name = node.name + ":0"
        try:
            tensor = graph.get_tensor_by_name(tensor_name)
            output_tensors.append(tensor)
        except KeyError:
            G_LOGGER.warning(
                "Could not import: {:}. Skipping.".format(tensor_name))
    if len(output_tensors) != len(output_nodes):
        G_LOGGER.warning(
            "Excluded {:} ops that don't seem like outputs. Use -vv/--super-verbose, or set "
            "logging verbosity to EXTRA_VERBOSE to view them.".format(
                len(output_nodes) - len(output_tensors)))

    G_LOGGER.extra_verbose("Found output op types in graph: {:}".format(
        {tensor.op.type
         for tensor in output_tensors}))
    G_LOGGER.verbose(
        "Retrieved TensorFlow output_tensors: {:}".format(output_tensors))
    return get_tensor_metadata(output_tensors)
Exemplo n.º 19
0
    def run(
        runners,
        data_loader=None,
        warm_up=None,
        use_subprocess=None,
        subprocess_timeout=None,
        subprocess_polling_interval=None,
        save_inputs_path=None,
    ):
        """
        Runs the supplied runners sequentially.

        Args:
            runners (List[BaseRunner]):
                    A list of runners to run.
            data_loader (Generator -> OrderedDict[str, numpy.ndarray]):
                    A generator or iterable that yields a dictionary that maps input names to input numpy buffers.
                    In the simplest case, this can be a `List[Dict[str, numpy.ndarray]]` .

                    In case you don't know details about the inputs ahead of time, you can access the
                    `input_metadata` property in your data loader, which will be set to an `TensorMetadata`
                    instance by this function.
                    Note that this does not work for generators or lists.

                    The number of iterations run by this function is controlled by the number of items supplied
                    by the data loader.

                    Defaults to an instance of `DataLoader`.
            warm_up (int):
                    The number of warm up runs to perform for each runner before timing.
                    Defaults to 0.
            use_subprocess (bool):
                    Whether each runner should be run in a subprocess. This allows each runner to have exclusive
                    access to the GPU. When using a subprocess, runners and loaders will never be modified.
            subprocess_timeout (int):
                    The timeout before a subprocess is killed automatically. This is useful for handling processes
                    that never terminate. A value of None disables the timeout. Defaults to None.
            subprocess_polling_interval (int):
                    The polling interval, in seconds, for checking whether a subprocess has completed or crashed.
                    In rare cases, omitting this parameter when subprocesses are enabled may cause this function
                    to hang indefinitely if the subprocess crashes.
                    A value of 0 disables polling. Defaults to 30 seconds.
            save_inputs_path (str):
                    [EXPERIMENTAL] Path at which to save inputs used during inference. This will include all inputs generated by
                    the provided data_loader, and will be saved as a JSON List[Dict[str, numpy.ndarray]].

        Returns:
            RunResults:
                    A mapping of runner names to the results of their inference.
                    The ordering of `runners` is preserved in this mapping.
        """
        warm_up = util.default(warm_up, 0)
        data_loader = util.default(data_loader, DataLoader())
        use_subprocess = util.default(use_subprocess, False)
        subprocess_polling_interval = util.default(subprocess_polling_interval,
                                                   30)
        loader_cache = DataLoaderCache(data_loader,
                                       save_inputs_path=save_inputs_path)

        def execute_runner(runner, loader_cache):
            with runner as active_runner:
                # DataLoaderCache will ensure that the feed_dict does not contain any extra entries
                # based on the provided input_metadata.
                loader_cache.set_input_metadata(
                    active_runner.get_input_metadata())

                if warm_up:
                    G_LOGGER.start("{:35} | Running {:} warm-up run(s)".format(
                        active_runner.name, warm_up))
                    try:
                        feed_dict = loader_cache[0]
                    except IndexError:
                        G_LOGGER.warning(
                            "{:} warm-up run(s) were requested, but data loader did not supply any data. "
                            "Skipping warm-up run(s)".format(warm_up))
                    else:
                        G_LOGGER.ultra_verbose(
                            "Warm-up Input Buffers:\n{:}".format(
                                util.indent_block(feed_dict)))
                        # First do a few warm-up runs, and don't time them.
                        for _ in range(warm_up):
                            active_runner.infer(feed_dict=feed_dict)
                    G_LOGGER.finish(
                        "{:35} | Finished {:} warm-up run(s)".format(
                            active_runner.name, warm_up))

                # Then, actual iterations.
                index = 0
                iteration_results = []

                total_runtime = 0
                for index, feed_dict in enumerate(loader_cache):
                    G_LOGGER.info(
                        "{:35}\n---- Inference Input(s) ----\n{:}".format(
                            active_runner.name,
                            TensorMetadata().from_feed_dict(feed_dict)),
                        mode=LogMode.ONCE,
                    )

                    G_LOGGER.extra_verbose(
                        lambda: "{:35} | Feeding inputs:\n{:}".format(
                            active_runner.name, util.indent_block(feed_dict)))
                    outputs = active_runner.infer(feed_dict=feed_dict)

                    runtime = active_runner.last_inference_time()
                    total_runtime += runtime
                    # Without a deep copy here, outputs will always reference the output of the last run
                    iteration_results.append(
                        IterationResult(outputs=copy.deepcopy(outputs),
                                        runtime=runtime,
                                        runner_name=active_runner.name))

                    G_LOGGER.info(
                        "{:35}\n---- Inference Output(s) ----\n{:}".format(
                            active_runner.name,
                            TensorMetadata().from_feed_dict(outputs)),
                        mode=LogMode.ONCE,
                    )
                    G_LOGGER.extra_verbose(
                        lambda:
                        "{:35} | Inference Time: {:.3f} ms | Received outputs:\n{:}"
                        .format(active_runner.name, runtime * 1000.0,
                                util.indent_block(outputs)))

                total_runtime_ms = total_runtime * 1000.0
                G_LOGGER.finish(
                    "{:35} | Completed {:} iteration(s) in {:.4g} ms | Average inference time: {:.4g} ms."
                    .format(active_runner.name, index + 1, total_runtime_ms,
                            total_runtime_ms / float(index + 1)))
                return iteration_results

        # Wraps execute_runner to use a queue.
        def execute_runner_with_queue(runner_queue, runner, loader_cache):
            iteration_results = None
            try:
                iteration_results = execute_runner(runner, loader_cache)
            except:
                # Cannot necessarily send the exception back over the queue.
                G_LOGGER.backrace()
            util.try_send_on_queue(runner_queue, iteration_results)
            # After finishing, send the updated loader_cache back.
            util.try_send_on_queue(runner_queue, loader_cache)

        # Do all inferences in one loop, then comparisons at a later stage.
        # We run each runner in a separate process so that we can provide exclusive GPU access for each runner.
        run_results = RunResults()

        if not runners:
            G_LOGGER.warning(
                "No runners were provided to Comparator.run(). Inference will not be run, and run results will be empty."
            )

        for runner in runners:
            G_LOGGER.start("{:35} | Activating and starting inference".format(
                runner.name))
            if use_subprocess:
                runner_queue = Queue()
                process = Process(target=execute_runner_with_queue,
                                  args=(runner_queue, runner, loader_cache))
                process.start()

                # If a subprocess hangs in a certain way, then process.join could block forever. Hence,
                # we need to keep polling the process to make sure it really is alive.
                iteration_results = None
                while process.is_alive() and iteration_results is None:
                    try:
                        iteration_results = util.try_receive_on_queue(
                            runner_queue,
                            timeout=subprocess_polling_interval / 2)
                        # Receive updated loader cache, or fall back if it could not be sent.
                        loader_cache = util.try_receive_on_queue(
                            runner_queue,
                            timeout=subprocess_polling_interval / 2)
                    except queue.Empty:
                        G_LOGGER.extra_verbose(
                            "Polled subprocess - still running")

                try:
                    assert iteration_results is not None
                    run_results.append((runner.name, iteration_results))
                    process.join(subprocess_timeout)
                except:
                    G_LOGGER.critical(
                        "{:35} | Terminated prematurely. Check the exception logged above. "
                        "If there is no exception logged above, make sure not to use the --use-subprocess "
                        "flag or set use_subprocess=False in Comparator.run()."
                        .format(runner.name))
                finally:
                    process.terminate()

                if loader_cache is None:
                    G_LOGGER.critical(
                        "Could not send data loader cache to runner subprocess. Please try disabling subprocesses "
                        "by removing the --use-subprocess flag, or setting use_subprocess=False in Comparator.run()"
                    )
            else:
                run_results.append(
                    (runner.name, execute_runner(runner, loader_cache)))

        G_LOGGER.verbose("Successfully ran: {:}".format(
            [r.name for r in runners]))
        return run_results
Exemplo n.º 20
0
    def call_impl(self):
        """
        Returns:
            onnx.ModelProto: The new ONNX model with shapes inferred.
        """
        model, _ = util.invoke_if_callable(self._model)
        external_data_dir = self.external_data_dir

        try:
            if isinstance(model, onnx.ModelProto):
                MODEL_SIZE = model.ByteSize()
                if MODEL_SIZE > LARGE_MODEL_THRESHOLD:
                    G_LOGGER.warning(
                        "Attempting to run shape inference on a large model. "
                        "This may require a large amount of memory.\nIf memory consumption becomes too high, "
                        "the process may be killed. You may want to try disabling shape inference in that case. ",
                        mode=LogMode.ONCE,
                    )

                if MODEL_SIZE > self.save_to_disk_threshold_bytes:
                    G_LOGGER.warning(
                        "Model size ({:.3} MiB) exceeds the in-memory size threshold: {:.3} MiB.\n"
                        "The model will be saved to a temporary file before shape inference is run."
                        .format(
                            MODEL_SIZE / (1024.0**2),
                            self.save_to_disk_threshold_bytes / (1024.0**2)),
                        mode=LogMode.ONCE,
                    )
                    outdir = tempfile.TemporaryDirectory()
                    outpath = os.path.join(outdir.name, "tmp_model.onnx")
                    save_onnx(model, outpath, external_data_path="ext.data")
                    model = outpath
                    external_data_dir = outdir.name

            G_LOGGER.verbose("Starting ONNX shape inference")
            if isinstance(model, onnx.ModelProto):
                model = shape_inference.infer_shapes(model)
            else:
                tmp_path = util.NamedTemporaryFile(prefix="tmp_polygraphy_",
                                                   suffix=".onnx").name
                G_LOGGER.verbose(
                    "Writing shape-inferred model to: {:}".format(tmp_path))
                shape_inference.infer_shapes_path(model, tmp_path)
                # When external_data_dir is unset, use the model's current directory
                model = onnx_from_path(tmp_path,
                                       external_data_dir=util.default(
                                           external_data_dir,
                                           os.path.dirname(model) or None))
            G_LOGGER.verbose("ONNX Shape Inference completed successfully")
        except Exception as err:
            if not self.error_ok:
                raise
            G_LOGGER.warning(
                "ONNX shape inference exited with an error:\n{:}".format(err))
            G_LOGGER.internal_error(
                "ONNX shape inference exited with an error:\n{:}".format(err))

            if not isinstance(model, onnx.ModelProto):
                model = onnx_from_path(
                    model, external_data_dir=self.external_data_dir)
        return model
Exemplo n.º 21
0
def compress(obj):
    G_LOGGER.verbose("Compressing {} object".format(type(obj)))
    return Compressed(zlib.compress(obj))
Exemplo n.º 22
0
def decompress(compressed):
    G_LOGGER.verbose("Decompressing bytes")
    return zlib.decompress(compressed.bytes)
Exemplo n.º 23
0
    def run_impl(self, args):
        graph = gs.import_onnx(super().load_model())

        TENSOR_MAP = graph.tensors()

        def get_tensor(name):
            if name not in TENSOR_MAP:
                G_LOGGER.exit(
                    "Tensor: {:} does not exist in the model.".format(name))
            return TENSOR_MAP[name]

        TENSOR_NAME_SUFFIX = "_polygraphy_surgeon_insert_output"

        output_tensors = []
        for name in self.arg_groups[OnnxNodeArgs].outputs:
            if name in self.arg_groups[OnnxNodeArgs].inputs:
                # When the new node's input == output, we need to generate a new tensor
                # If the tensor was a graph output, try to preserve the name.
                inp_tensor = get_tensor(name)
                if inp_tensor in graph.outputs:
                    inp_tensor.name += TENSOR_NAME_SUFFIX
                    tensor = gs.Variable(name=name)
                else:
                    tensor = gs.Variable(name=name + TENSOR_NAME_SUFFIX)

                def replace_tensor(tensors):
                    # This is needed to preserve ordering and handle cases where the tensor shows up more than once.
                    for index, t in enumerate(tensors):
                        if t.name == inp_tensor.name:
                            tensors[index] = tensor

                for out_node in inp_tensor.outputs:
                    replace_tensor(out_node.inputs)

                replace_tensor(graph.outputs)
                G_LOGGER.verbose(
                    "Generating new tensor for output: {:}".format(tensor))
            else:
                tensor = get_tensor(name)
            tensor.inputs.clear()
            output_tensors.append(tensor)

        input_tensors = [
            get_tensor(name) for name in self.arg_groups[OnnxNodeArgs].inputs
        ]

        new_node = gs.Node(op=self.arg_groups[OnnxNodeArgs].op,
                           name=self.arg_groups[OnnxNodeArgs].name,
                           attrs=self.arg_groups[OnnxNodeArgs].attrs,
                           inputs=input_tensors,
                           outputs=output_tensors)
        G_LOGGER.verbose("Generated new node: {:}".format(new_node))

        # Assuming the graph is topologically sorted, the node needs to be inserted
        # after its last input node to maintain the sorting.
        with graph.node_ids():
            # Nodes with no inputs can be inserted at index 0
            insert_index = max(
                [node.id + 1 for inp in input_tensors
                 for node in inp.inputs] + [0])

        graph.nodes.insert(insert_index, new_node)

        super().save_model(super().export_graph(graph.cleanup()))
Exemplo n.º 24
0
    def call_impl(self):
        """
        Returns:
            bytes: The serialized engine that was created.
        """
        # If network is a callable, then we own its return value
        ret, owns_network = util.invoke_if_callable(self._network)
        builder, network, parser = util.unpack_args(ret, num=3)

        if builder is None or network is None:
            G_LOGGER.critical("Expected to recevie a (builder, network) tuple for the `network` parameter, "
                              "but received: ({:}, {:})".format(builder, network))

        with contextlib.ExitStack() as stack:
            if owns_network:
                stack.enter_context(builder)
                stack.enter_context(network)
                if parser is not None:
                    stack.enter_context(parser)
            else:
                provided = "Builder and Network" if parser is None else "Builder, Network, and Parser"
                G_LOGGER.verbose("{:} were provided directly instead of via a Callable. This loader will not assume ownership. "
                                 "Please ensure that they are freed.".format(provided))

            config, owns_config = util.invoke_if_callable(self._config, builder, network)
            if owns_config:
                stack.enter_context(config)
            else:
                G_LOGGER.verbose("Builder configuration was provided directly instead of via a Callable. This loader will not assume "
                                 "ownership. Please ensure it is freed.")

            try:
                config.int8_calibrator.__enter__ # Polygraphy calibrator frees device buffers on exit.
            except AttributeError:
                pass
            else:
                stack.enter_context(config.int8_calibrator)

            network_log_mode = "full" if G_LOGGER.severity <= G_LOGGER.ULTRA_VERBOSE else "attrs"
            G_LOGGER.super_verbose(lambda: ("Displaying TensorRT Network:\n" + trt_util.str_from_network(network, mode=network_log_mode)))

            G_LOGGER.start("Building engine with configuration:\n{:}".format(trt_util.str_from_config(config)))

            try:
                engine_bytes = builder.build_serialized_network(network, config)
            except AttributeError:
                engine = builder.build_engine(network, config)
                if not engine:
                    G_LOGGER.critical("Invalid Engine. Please ensure the engine was built correctly")
                stack.enter_context(engine)
                engine_bytes = engine.serialize()

            if not engine_bytes:
                G_LOGGER.critical("Invalid Engine. Please ensure the engine_bytes was built correctly")

            try:
                timing_cache = config.get_timing_cache()
            except AttributeError:
                if self.timing_cache_path:
                    trt_util.fail_unavailable("save_timing_cache in EngineBytesFromNetwork")
            else:
                if timing_cache and self.timing_cache_path:
                    with timing_cache.serialize() as buffer:
                        util.save_file(buffer, self.timing_cache_path, description="tactic timing cache")

            return engine_bytes
Exemplo n.º 25
0
def add_comparator(script, args, data_loader_name, cmd_run):
    script.add_import(imports=["Comparator"], frm="polygraphy.comparator")
    script.add_import(imports=["sys"])
    comparator_run = Script.invoke("Comparator.run",
                                   script.get_runners(),
                                   warm_up=args.warm_up,
                                   data_loader=data_loader_name,
                                   use_subprocess=args.use_subprocess)
    script.append_suffix(
        Script.format_str("\n# Runner Execution\nresults = {:}",
                          Inline(comparator_run)))

    if args.load_results:
        G_LOGGER.verbose("Will load runner results from: {:}".format(
            args.load_results))
        script.add_import(imports=["misc"], frm="polygraphy.util")
        script.append_suffix(
            Script.format_str(
                "\n# Load results\nfor load_output in {:}:\n{:}results.update(misc.pickle_load(load_output))",
                args.load_results, Inline(constants.TAB)))

    if args.save_results:
        G_LOGGER.verbose("Will save runner results to: {:}".format(
            args.save_results))
        script.add_import(imports=["misc"], frm="polygraphy.util")
        script.append_suffix(
            Script.format_str(
                "\n# Save results\nmisc.pickle_save({:}, results)",
                args.save_results))

    top_k = args_util.get(args, "top_k")
    if top_k is not None:
        script.add_import(imports=["PostprocessFunc"],
                          frm="polygraphy.comparator")
        script.append_suffix(
            Script.format_str(
                "\n# Postprocessing - Apply Top-{:}\nresults = Comparator.postprocess(results, PostprocessFunc.topk_func(k={:}))",
                top_k, top_k))

    script.append_suffix("\nsuccess = True")

    if len(
            args.runners
    ) > 1 or args.load_results:  # Only do comparisons if there's actually something to compare.
        script.append_suffix("# Accuracy Comparison")

        compare_func_str = Script.invoke_if_nondefault(
            "CompareFunc.basic_compare_func",
            rtol=args.rtol,
            atol=args.atol,
            check_shapes=False if args.no_shape_check else None,
            fail_fast=args.fail_fast)
        compare_func = None
        if compare_func_str:
            script.add_import(imports=["CompareFunc"],
                              frm="polygraphy.comparator")
            compare_func = "compare_func"
            script.append_suffix(
                Script.format_str("{:} = {:}", Inline(compare_func),
                                  Inline(compare_func_str)))

        compare_accuracy = Script.invoke("Comparator.compare_accuracy",
                                         Inline("results"),
                                         compare_func=Inline(compare_func)
                                         if compare_func is not None else None,
                                         fail_fast=args.fail_fast)
        script.append_suffix(
            Script.format_str("success &= bool({:})\n",
                              Inline(compare_accuracy)))
    if args.validate:
        script.append_suffix(
            "# Validation\nsuccess &= Comparator.validate(results)\n")

    if cmd_run is None:
        cmd_run = Inline("' '.join(sys.argv)")
    script.append_suffix(
        Script.format_str(
            '# Report Results\ncmd_run={cmd}\nif success:\n    G_LOGGER.success("PASSED | Command: {{}}".format(cmd_run))\nelse:\n    G_LOGGER.error("FAILED | Command: {{}}".format(cmd_run))',
            cmd=cmd_run))
    script.append_suffix("sys.exit(0 if success else 1)")
Exemplo n.º 26
0
    def __getitem__(self, index):
        """
        Generates random input data.

        May update the DataLoader's `input_metadata` attribute.

        Args:
            index (int):
                    Since this class behaves like an iterable, it takes an index parameter.
                    Generated data is guaranteed to be the same for the same index.

        Returns:
            OrderedDict[str, numpy.ndarray]: A mapping of input names to input numpy buffers.
        """
        if index >= self.iterations:
            raise IndexError()

        G_LOGGER.verbose(
            "Generating data using numpy seed: {:}".format(self.seed + index))
        rng = np.random.RandomState(self.seed + index)

        def get_static_shape(name, shape):
            static_shape = shape
            if util.is_shape_dynamic(shape):
                static_shape = util.override_dynamic_shape(shape)
                if static_shape != shape:
                    if not util.is_valid_shape_override(static_shape, shape):
                        G_LOGGER.critical(
                            "Input tensor: {:} | Cannot override original shape: {:} to {:}"
                            .format(name, shape, static_shape))
                    G_LOGGER.warning(
                        "Input tensor: {:} | Will generate data of shape: {:}.\n"
                        "If this is incorrect, please set input_metadata "
                        "or provide a custom data loader.".format(
                            name, static_shape),
                        mode=LogMode.ONCE,
                    )
            return static_shape

        # Whether the user provided the values for a shape tensor input,
        # rather than the shape of the input.
        # If the shape is 1D, and has a value equal to the rank of the provided default shape, it is
        # likely to be a shape tensor, and so its value, not shape, should be overriden.
        def is_shape_tensor(name, dtype):
            if name not in self.input_metadata or name not in self.user_input_metadata:
                return False

            _, shape = self.input_metadata[name]
            is_shape = np.issubdtype(dtype, np.integer) and (
                not util.is_shape_dynamic(shape)) and (len(shape) == 1)

            user_shape = self.user_input_metadata[name].shape
            is_shape &= len(user_shape) == shape[0]
            is_shape &= not util.is_shape_dynamic(
                user_shape)  # Shape of shape cannot be dynamic.
            return is_shape

        def generate_buffer(name, dtype, shape):
            if is_shape_tensor(name, dtype):
                buffer = np.array(shape, dtype=dtype)
                G_LOGGER.info(
                    "Assuming {:} is a shape tensor. Setting input values to: {:}. If this is not correct, "
                    "please set it correctly in 'input_metadata' or by providing --input-shapes"
                    .format(name, buffer),
                    mode=LogMode.ONCE,
                )
            elif np.issubdtype(dtype, np.integer) or np.issubdtype(
                    dtype, np.bool_):
                imin, imax = self._get_range(name,
                                             cast_type=int if np.issubdtype(
                                                 dtype, np.integer) else bool)
                G_LOGGER.verbose(
                    "Input tensor: {:} | Generating input data in range: [{:}, {:}]"
                    .format(name, imin, imax),
                    mode=LogMode.ONCE,
                )
                # high is 1 greater than the max int drawn.
                buffer = rng.randint(low=imin,
                                     high=imax + 1,
                                     size=shape,
                                     dtype=dtype)
            else:
                fmin, fmax = self._get_range(name, cast_type=float)
                G_LOGGER.verbose(
                    "Input tensor: {:} | Generating input data in range: [{:}, {:}]"
                    .format(name, fmin, fmax),
                    mode=LogMode.ONCE,
                )
                buffer = (rng.random_sample(size=shape) * (fmax - fmin) +
                          fmin).astype(dtype)

            buffer = np.array(
                buffer
            )  # To handle scalars, since the above functions return a float if shape is ().
            return buffer

        if self.input_metadata is None and self.user_input_metadata is not None:
            self.input_metadata = self.user_input_metadata

        buffers = OrderedDict()
        for name, (dtype, shape) in self.input_metadata.items():
            if name in self.user_input_metadata:
                user_dtype, user_shape = self.user_input_metadata[name]

                dtype = util.default(user_dtype, dtype)
                is_valid_shape_override = user_shape is not None and util.is_valid_shape_override(
                    user_shape, shape)

                if util.is_shape_dynamic(user_shape):
                    G_LOGGER.warning(
                        "Input tensor: {:} | Provided input shape: {:} is dynamic.\n"
                        "Dynamic shapes cannot be used to generate inference data. "
                        "Will use default shape instead.\n"
                        "To avoid this, please provide a fixed shape to the data loader. "
                        .format(name, user_shape))
                elif not is_valid_shape_override and not is_shape_tensor(
                        name, dtype):
                    G_LOGGER.warning(
                        "Input tensor: {:} | Cannot use provided custom shape: {:} "
                        "to override: {:}. Will use default shape instead.".
                        format(name, user_shape, shape),
                        mode=LogMode.ONCE,
                    )
                else:
                    shape = util.default(user_shape, shape)

            static_shape = get_static_shape(name, shape)
            buffers[name] = generate_buffer(name, dtype, shape=static_shape)

        # Warn about unused metadata
        for name in self.user_input_metadata.keys():
            if name not in self.input_metadata:
                msg = "Input tensor: {:} | Metadata was provided, but the input does not exist in one or more runners.".format(
                    name)
                close_match = util.find_in_dict(name, self.input_metadata)
                if close_match:
                    msg += "\nMaybe you meant to set: {:}".format(close_match)
                G_LOGGER.warning(msg)

        # Warn about unused val_range
        if not isinstance(self.val_range, tuple):
            util.check_dict_contains(self.val_range,
                                     list(self.input_metadata.keys()) + [""],
                                     check_missing=False,
                                     dict_name="val_range")

        return buffers
Exemplo n.º 27
0
 def func():
     try:
         os.remove(path)
     except:
         G_LOGGER.verbose("Could not remove: {:}".format(path))
Exemplo n.º 28
0
    def add_to_script(self, script, disable_custom_outputs=None, suffix=None):
        if disable_custom_outputs:
            outputs = None
        else:
            outputs = args_util.get_outputs_for_script(script, self.outputs)

        model_file = self.model_args.model_file
        model_type = self.model_args.model_type

        if model_type == "ckpt":
            G_LOGGER.verbose(
                "Loading a TensorFlow checkpoint. Please ensure you are not using the --use-subprocess flag"
                .format(model_file),
                mode=LogMode.ONCE,
            )
            script.add_import(imports=["GraphFromCkpt"],
                              frm="polygraphy.backend.tf")
            loader_id = "load_ckpt"
            loader_str = make_invocable("GraphFromCkpt", model_file, self.ckpt)
        elif model_type == "keras":
            script.add_import(imports=["GraphFromKeras"],
                              frm="polygraphy.backend.tf")
            loader_id = "load_keras"
            loader_str = make_invocable("GraphFromKeras", model_file)
        elif model_type == "frozen":
            script.add_import(imports=["GraphFromFrozen"],
                              frm="polygraphy.backend.tf")
            G_LOGGER.verbose(
                "Attempting to load as a frozen graph. If this is not correct, please specify --model-type",
                mode=LogMode.ONCE,
            )
            loader_id = "load_frozen"
            loader_str = make_invocable("GraphFromFrozen", model_file)
        else:
            G_LOGGER.critical(
                "Model type: {:} cannot be imported with TensorFlow.".format(
                    model_type))

        loader_name = script.add_loader(loader_str, loader_id, suffix=suffix)

        if self.freeze_graph:
            script.add_import(imports=["OptimizeGraph"],
                              frm="polygraphy.backend.tf")
            loader_name = script.add_loader(make_invocable(
                "OptimizeGraph", loader_name),
                                            "optimize_graph",
                                            suffix=suffix)
        if self.tftrt:
            script.add_import(imports=["UseTfTrt"],
                              frm="polygraphy.backend.tf")
            loader_str = make_invocable(
                "UseTfTrt",
                loader_name,
                max_workspace_size=self.trt_config_args.workspace,
                fp16=self.trt_config_args.fp16,
                int8=self.trt_config_args.int8,
                max_batch_size=self.trt_legacy_args.batch_size,
                is_dynamic_op=self.dynamic_op,
                minimum_segment_size=self.minimum_segment_size,
            )
            loader_name = script.add_loader(loader_str,
                                            "use_tftrt",
                                            suffix=suffix)

        MODIFY_TF = "ModifyGraphOutputs"
        modify_tf_str = make_invocable(MODIFY_TF, loader_name, outputs=outputs)
        if modify_tf_str != make_invocable(MODIFY_TF, loader_name):
            script.add_import(imports=[MODIFY_TF], frm="polygraphy.backend.tf")
            loader_name = script.add_loader(modify_tf_str, "modify_tf")

        engine_dir = None
        if self.tftrt:
            engine_dir = self.trt_engine_save_args.path

        WRITE_TF = "SaveGraph"
        write_tf_str = make_invocable(WRITE_TF,
                                      loader_name,
                                      path=self.save_pb,
                                      tensorboard_dir=self.save_tensorboard,
                                      engine_dir=engine_dir)
        if write_tf_str != make_invocable(WRITE_TF, loader_name):
            script.add_import(imports=[WRITE_TF], frm="polygraphy.backend.tf")
            loader_name = script.add_loader(write_tf_str, "save_tf")

        return loader_name