示例#1
0
    def __str__(self):
        script = "#!/usr/bin/env python3\n"
        script += "# Template auto-generated by polygraphy [v{:}] on {:} at {:}\n".format(
                    polygraphy.__version__, time.strftime("%D"), time.strftime("%H:%M:%S"))
        script += "# Generation Command: {:}\n".format(" ".join(sys.argv))
        if self.summary:
            script += "# " + "\n# ".join(self.summary.splitlines()) + "\n"
        script += "\n".join(self.preimport) + ("\n\n" if self.preimport else "")

        for imp in sorted(self.imports):
            script += "import {:}\n".format(imp)
        for frm, imps in sorted(self.from_imports.items()):
            imps = sorted(imps)
            script += "from {:} import {:}\n".format(frm, ", ".join(imps))
        script += "\n"

        script += "\n".join(self.prefix) + ("\n" if self.prefix else "")

        if self.loaders:
            script += "# Loaders\n"
        for loader, loader_name in self.loaders.items():
            script += "{:} = {:}\n".format(loader_name, loader)
        script += "\n"

        script += "# Runners\n"
        script += "{:} = [\n".format(self.get_runners())
        for runner in self.runners:
            script += "{:}{:},\n".format(constants.TAB, runner)
        script += "]\n"

        script += "\n".join(self.suffix) + "\n"

        G_LOGGER.super_verbose("Created script:\n{:}".format(script))
        return script
示例#2
0
    def __str__(self):
        script = "#!/usr/bin/env python3\n"
        script += "# Template auto-generated by polygraphy [v{:}] on {:} at {:}\n".format(
            polygraphy.__version__, time.strftime("%D"),
            time.strftime("%H:%M:%S"))
        script += "# Generation Command: {:}\n".format(" ".join(sys.argv))
        if self.summary:
            script += "# " + "\n# ".join(self.summary.splitlines()) + "\n"
        script += ("\n" if self.preimport else "") + "\n".join(
            self.preimport) + ("\n\n" if self.preimport else "")

        imports = []
        for imp in self.imports:
            imports.append("import {:}".format(imp))
        for frm, imps in self.from_imports.items():
            imps = sorted(imps)
            imports.append("from {:} import {:}".format(frm, ", ".join(imps)))
        script += "\n".join(sorted(imports)) + "\n"

        if self.data_loader:
            script += "\n# Data Loader\n"
            script += "{:} = {:}\n".format(Script.DATA_LOADER_NAME,
                                           self.data_loader)
        script += "\n"

        if self.loaders:
            script += "# Loaders\n"
        for loader, loader_name in self.loaders.items():
            script += "{:} = {:}\n".format(loader_name, loader)
        script += "\n"

        if self.runners or self.always_create_runners:
            script += "# Runners\n"
            script += "{:} = [".format(self.get_runners())
            for runner in self.runners:
                script += "\n\t{:},".format(runner)
            if self.runners:
                script += "\n"
            script += "]\n"

        script += "\n".join(self.suffix) + "\n"
        script = script.replace("\t", constants.TAB).replace("\n\n\n", "\n\n")

        G_LOGGER.super_verbose("Created script:\n{:}".format(script))
        return script
示例#3
0
            def check_outputs_match(out0, out0_name, out1, out1_name, per_out_rtol, per_out_atol, per_out_err_stat):
                VALID_CHECK_ERROR_STATS = ["max", "mean", "median", "elemwise"]
                if per_out_err_stat not in VALID_CHECK_ERROR_STATS:
                    G_LOGGER.critical("Invalid choice for check_error_stat: {:}.\n"
                                      "Note: Valid choices are: {:}".format(per_out_err_stat, VALID_CHECK_ERROR_STATS))

                G_LOGGER.super_verbose("{:35} | Output: {:} (dtype={:}, shape={:}):\n{:}".format(
                                            iter_result0.runner_name, out0_name, out0.dtype, out0.shape, util.indent_block(out0)))
                G_LOGGER.super_verbose("{:35} | Output: {:} (dtype={:}, shape={:}):\n{:}".format(
                                            iter_result1.runner_name, out1_name, out1.dtype, out1.shape, util.indent_block(out1)))

                # Check difference vs. tolerances
                if np.issubdtype(out0.dtype, np.bool_) and np.issubdtype(out1.dtype, np.bool_):
                    absdiff = np.logical_xor(out0, out1)
                else:
                    absdiff = np.abs(out0 - out1)

                absout1 = np.abs(out1)
                with np.testing.suppress_warnings() as sup:
                    sup.filter(RuntimeWarning)
                    reldiff = absdiff / absout1

                max_absdiff = comp_util.compute_max(absdiff)
                mean_absdiff = comp_util.compute_mean(absdiff)
                median_absdiff = comp_util.compute_median(absdiff)
                max_reldiff = comp_util.compute_max(reldiff)
                mean_reldiff = comp_util.compute_mean(reldiff)
                median_reldiff = comp_util.compute_median(reldiff)

                max_elemwiseabs = "Unknown"
                max_elemwiserel = "Unknown"

                if per_out_err_stat == "mean":
                    failed = mean_absdiff > per_out_atol and (np.isnan(mean_reldiff) or mean_reldiff > per_out_rtol)
                elif per_out_err_stat == "median":
                    failed = median_absdiff > per_out_atol and (np.isnan(median_reldiff) or median_reldiff > per_out_rtol)
                elif per_out_err_stat == "max":
                    failed = max_absdiff > per_out_atol and (np.isnan(max_reldiff) or max_reldiff > per_out_rtol)
                else:
                    assert per_out_err_stat == "elemwise", "This branch should be unreachable unless per_out_err_stat is 'elemwise'"
                    mismatches = (absdiff > per_out_atol) & (reldiff > per_out_rtol)

                    failed = np.any(mismatches)
                    try:
                        # Special because we need to account for tolerances too.
                        max_elemwiseabs = comp_util.compute_max(absdiff[mismatches])
                        max_elemwiserel = comp_util.compute_max(reldiff[mismatches])

                        with G_LOGGER.indent():
                            G_LOGGER.super_verbose("Mismatched indices:\n{:}".format(np.argwhere(mismatches)))
                            G_LOGGER.extra_verbose("{:35} | Mismatched values:\n{:}".format(iter_result0.runner_name, out0[mismatches]))
                            G_LOGGER.extra_verbose("{:35} | Mismatched values:\n{:}".format(iter_result1.runner_name, out1[mismatches]))
                    except Exception as err:
                        G_LOGGER.warning("Failing to log mismatches.\nNote: Error was: {:}".format(err))

                # Log information about the outputs
                hist_bin_range = (min(comp_util.compute_min(out0), comp_util.compute_min(out1)),
                                  max(comp_util.compute_max(out0), comp_util.compute_max(out1)))
                comp_util.log_output_stats(out0, failed, iter_result0.runner_name + ": " + out0_name, hist_range=hist_bin_range)
                comp_util.log_output_stats(out1, failed, iter_result1.runner_name + ": " + out1_name, hist_range=hist_bin_range)

                G_LOGGER.info("Error Metrics: {:}".format(out0_name))
                with G_LOGGER.indent():
                    def req_tol(mean_diff, median_diff, max_diff, elemwise_diff):
                        return {
                            "mean": mean_diff,
                            "median": median_diff,
                            "max": max_diff,
                            "elemwise": elemwise_diff,
                        }[per_out_err_stat]

                    G_LOGGER.info("Minimum Required Tolerance: {:} error | [abs={:.5g}] OR [rel={:.5g}]".format(
                                    per_out_err_stat,
                                    req_tol(mean_absdiff, median_absdiff, max_absdiff, max_elemwiseabs),
                                    req_tol(mean_reldiff, median_reldiff, max_reldiff, max_elemwiserel)))
                    comp_util.log_output_stats(absdiff, failed, "Absolute Difference")
                    comp_util.log_output_stats(reldiff, failed, "Relative Difference")

                # Finally show summary.
                if failed:
                    G_LOGGER.error("FAILED | Difference exceeds tolerance (rel={:}, abs={:})".format(per_out_rtol, per_out_atol))
                else:
                    G_LOGGER.finish("PASSED | Difference is within tolerance (rel={:}, abs={:})".format(per_out_rtol, per_out_atol))

                G_LOGGER.extra_verbose("Finished comparing: '{:}' (dtype={:}, shape={:}) [{:}] and '{:}' (dtype={:}, shape={:}) [{:}]"
                                .format(out0_name, out0.dtype, out0.shape, iter_result0.runner_name, out1_name, out1.dtype, out1.shape, iter_result1.runner_name))
                return OutputCompareResult(not failed, max_absdiff, max_reldiff, mean_absdiff, mean_reldiff, median_absdiff, median_reldiff)
示例#4
0
    def call_impl(self):
        """
        Returns:
            bytes: The serialized engine that was created.
        """
        # If network is a callable, then we own its return value
        ret, owns_network = util.invoke_if_callable(self._network)
        builder, network, parser = util.unpack_args(ret, num=3)

        if builder is None or network is None:
            G_LOGGER.critical("Expected to recevie a (builder, network) tuple for the `network` parameter, "
                              "but received: ({:}, {:})".format(builder, network))

        with contextlib.ExitStack() as stack:
            if owns_network:
                stack.enter_context(builder)
                stack.enter_context(network)
                if parser is not None:
                    stack.enter_context(parser)
            else:
                provided = "Builder and Network" if parser is None else "Builder, Network, and Parser"
                G_LOGGER.verbose("{:} were provided directly instead of via a Callable. This loader will not assume ownership. "
                                 "Please ensure that they are freed.".format(provided))

            config, owns_config = util.invoke_if_callable(self._config, builder, network)
            if owns_config:
                stack.enter_context(config)
            else:
                G_LOGGER.verbose("Builder configuration was provided directly instead of via a Callable. This loader will not assume "
                                 "ownership. Please ensure it is freed.")

            try:
                config.int8_calibrator.__enter__ # Polygraphy calibrator frees device buffers on exit.
            except AttributeError:
                pass
            else:
                stack.enter_context(config.int8_calibrator)

            network_log_mode = "full" if G_LOGGER.severity <= G_LOGGER.ULTRA_VERBOSE else "attrs"
            G_LOGGER.super_verbose(lambda: ("Displaying TensorRT Network:\n" + trt_util.str_from_network(network, mode=network_log_mode)))

            G_LOGGER.start("Building engine with configuration:\n{:}".format(trt_util.str_from_config(config)))

            try:
                engine_bytes = builder.build_serialized_network(network, config)
            except AttributeError:
                engine = builder.build_engine(network, config)
                if not engine:
                    G_LOGGER.critical("Invalid Engine. Please ensure the engine was built correctly")
                stack.enter_context(engine)
                engine_bytes = engine.serialize()

            if not engine_bytes:
                G_LOGGER.critical("Invalid Engine. Please ensure the engine_bytes was built correctly")

            try:
                timing_cache = config.get_timing_cache()
            except AttributeError:
                if self.timing_cache_path:
                    trt_util.fail_unavailable("save_timing_cache in EngineBytesFromNetwork")
            else:
                if timing_cache and self.timing_cache_path:
                    with timing_cache.serialize() as buffer:
                        util.save_file(buffer, self.timing_cache_path, description="tactic timing cache")

            return engine_bytes
示例#5
0
    def activate_impl(self):
        """
        Vars:
            engine (trt.ICudaEngine):
                    The engine tracked by this runner. The TrtLegacyRunner OWNS the engine it
                    manages, and therefore is responsible for it's destruction. Do not free the engine outside of the
                    runner, or it will result in a double free.
            context (trt.IExecutionContext): The context used for inference.
            input_buffers (Dict[str, TrtLegacyRunner.HostDeviceMem]):
                    A mapping of binding names to HostDeviceMem objects for input buffers.
            output_buffers (Dict[str, TrtLegacyRunner.HostDeviceMem]):
                    A mapping of binding names to HostDeviceMem objects for output buffers.
            bindings (List[int]): A list of device pointers for engine bindings.
            stream (cuda.Stream): The CUDA stream that this runner will use for inference.
        """

        # Only initialize GPU after this runner is activated.
        # Allocates all buffers required for an engine, i.e. host/device input_buffers/output_buffers.
        def allocate_buffers(engine):
            input_buffers = OrderedDict()
            output_buffers = OrderedDict()
            bindings = []
            stream = cuda.Stream()
            G_LOGGER.verbose("Using batch size: " +
                             str(engine.max_batch_size) +
                             " during buffer allocation")
            for binding in engine:
                shape = (engine.max_batch_size, ) + tuple(
                    engine.get_binding_shape(binding))
                dtype = engine.get_binding_dtype(binding)

                device_mem = cuda.DeviceArray(shape=shape,
                                              dtype=trt.nptype(dtype))
                G_LOGGER.extra_verbose("Tensor: "
                                       "{:35} | Allocated: {:}".format(
                                           binding, device_mem))

                if engine.binding_is_input(binding):
                    input_buffers[binding] = TrtLegacyRunner.HostDeviceMem(
                        None, device_mem)
                else:
                    host_mem = np.empty(shape=shape, dtype=trt.nptype(dtype))
                    output_buffers[binding] = TrtLegacyRunner.HostDeviceMem(
                        host_mem, device_mem)
            return input_buffers, output_buffers, stream

        # Always try reading the engine first, or, failing that, build it.
        if self.load_engine:
            with open(self.load_engine,
                      "rb") as f, trt.Runtime(get_trt_logger()) as runtime:
                G_LOGGER.info("Reading engine from {:}".format(
                    self.load_engine))
                self.engine = runtime.deserialize_cuda_engine(f.read())
        else:
            trt.init_libnvinfer_plugins(get_trt_logger(), "")
            builder, network, parser, model_batch_size = self.network_loader()
            with builder, network, parser, builder.create_builder_config(
            ) as config:
                builder.max_batch_size = int(self.max_batch_size
                                             or model_batch_size or 1)

                config.max_workspace_size = int(self.max_workspace_size)

                if not self.tf32:
                    with contextlib.suppress(AttributeError):
                        config.clear_flag(trt.BuilderFlag.TF32)
                if self.fp16:
                    config.flags = 1 << int(trt.BuilderFlag.FP16)

                if not network:
                    G_LOGGER.critical("Invalid network")
                G_LOGGER.super_verbose(lambda: trt_util.str_from_network(
                    network) or "Finished logging network")

                if self.layerwise:
                    # In layerwise mode, every layer becomes an output.
                    G_LOGGER.info(
                        "Running in layerwise mode. Marking {:} layers as outputs"
                        .format(network.num_layers))
                    for layer in network:
                        for index in range(layer.num_outputs):
                            out = layer.get_output(index)
                            if not out.is_network_output:
                                network.mark_output(out)

                G_LOGGER.info(
                    "Building engine: max workspace size={:} bytes, max batch size={:}, fp16={:}, "
                    "tf32={:}".format(config.max_workspace_size,
                                      builder.max_batch_size, self.fp16,
                                      self.tf32))
                self.engine = builder.build_engine(network, config)

        if not self.engine:
            G_LOGGER.critical(
                "Invalid Engine. Please ensure the engine was built correctly")

        if self.engine_path:
            with open(self.engine_path, "wb") as f:
                G_LOGGER.info("Writing engine to {:}".format(self.engine_path))
                f.write(self.engine.serialize())

        self.context = self.engine.create_execution_context()
        self.input_buffers, self.output_buffers, self.stream = allocate_buffers(
            self.engine)