示例#1
0
        def execute_runner(runner, loader_cache):
            with runner as active_runner:
                input_metadata = active_runner.get_input_metadata()
                G_LOGGER.info("{:35}\n---- Model Input(s) ----\n{:}".format(active_runner.name, input_metadata),
                              mode=LogMode.ONCE)

                # DataLoaderCache will ensure that the feed_dict does not contain any extra entries
                # based on the provided input_metadata.
                loader_cache.set_input_metadata(input_metadata)

                if warm_up:
                    G_LOGGER.start("{:35} | Running {:} warm-up run(s)".format(active_runner.name, warm_up))
                    try:
                        feed_dict = loader_cache[0]
                    except IndexError:
                        G_LOGGER.warning("{:} warm-up run(s) were requested, but data loader did not supply any data. "
                                         "Skipping warm-up run(s)".format(warm_up))
                    else:
                        G_LOGGER.ultra_verbose("Warm-up Input Buffers:\n{:}".format(util.indent_block(feed_dict)))
                        # First do a few warm-up runs, and don't time them.
                        for _ in range(warm_up):
                            active_runner.infer(feed_dict=feed_dict)
                    G_LOGGER.finish("{:35} | Finished {:} warm-up run(s)".format(active_runner.name, warm_up))

                # Then, actual iterations.
                index = 0
                iteration_results = []

                total_runtime = 0
                for index, feed_dict in enumerate(loader_cache):
                    G_LOGGER.extra_verbose(lambda: "{:35} | Feeding inputs:\n{:}".format(active_runner.name, util.indent_block(feed_dict)))
                    outputs = active_runner.infer(feed_dict=feed_dict)

                    runtime = active_runner.last_inference_time()
                    total_runtime += runtime
                    # Without a deep copy here, outputs will always reference the output of the last run
                    iteration_results.append(IterationResult(outputs=copy.deepcopy(outputs), runtime=runtime, runner_name=active_runner.name))

                    G_LOGGER.info(lambda: "{:35}\n---- Model Output(s) ----\n{:}".format(
                                            active_runner.name, TensorMetadata().from_feed_dict(outputs)),
                                  mode=LogMode.ONCE)
                    G_LOGGER.extra_verbose(lambda: "{:35} | Inference Time: {:.3f} ms | Received outputs:\n{:}".format(
                                                        active_runner.name, runtime * 1000.0, util.indent_block(outputs)))

                total_runtime_ms = total_runtime * 1000.0
                G_LOGGER.finish("{:35} | Completed {:} iteration(s) in {:.4g} ms | Average inference time: {:.4g} ms.".format(active_runner.name, index + 1, total_runtime_ms, total_runtime_ms / float(index + 1)))
                return iteration_results
示例#2
0
        def validate_output(runner_name, output_name, output):
            G_LOGGER.start("{:35} | Validating output: {:} (check_inf={:}, check_nan={:})".format(
                runner_name, output_name, check_inf, check_nan))
            with G_LOGGER.indent():
                comp_util.log_output_stats(output)

                output_valid = True
                if check_nan:
                    output_valid &= is_not_nan(output)
                if check_inf:
                    output_valid &= is_finite(output)

                if output_valid:
                    G_LOGGER.finish("PASSED | Output: {:} is valid".format(output_name))
                else:
                    G_LOGGER.error("FAILED | Errors detected in output: {:}".format(output_name))
                return output_valid
示例#3
0
    def setup(self, args, network):
        self.precision = {
            "fp32": trt.float32,
            "fp16": trt.float16
        }[args.precision]

        if self.precision == trt.float16 and not self.arg_groups[
                TrtConfigArgs].fp16:
            G_LOGGER.exit(
                "Cannot mark layers to run in fp16 if it is not enabled in the builder configuration.\n"
                "Please also specify `--fp16` as a command-line option")

        if self.precision == trt.float16 and not self.arg_groups[
                TrtConfigArgs].int8:
            G_LOGGER.warning(
                "Using fp16 as the higher precision, but fp16 is also the lowest precision available. "
                "Did you mean to set --int8 as well?")

        if not any([
                self.arg_groups[TrtConfigArgs].tf32,
                self.arg_groups[TrtConfigArgs].fp16,
                self.arg_groups[TrtConfigArgs].int8
        ]):
            G_LOGGER.exit(
                "Please enable at least one precision besides fp32 (e.g. --int8, --fp16, --tf32)"
            )

        if self.arg_groups[ModelArgs].model_type == "engine":
            G_LOGGER.exit(
                "The precision tool cannot work with engines, as they cannot be modified. "
                "Please provide a different format, such as an ONNX or TensorFlow model."
            )

        G_LOGGER.start("Using {:} as higher precision".format(self.precision))

        if args.mode == "linear":
            self.layer_marker = LinearMarker(len(network), args.direction)
        elif args.mode == "bisect":
            self.layer_marker = BisectMarker(len(network), args.direction)