Пример #1
0
    def __call__(self):
        self.add_args(self._parser)
        self.config.MetadataClass.add_args(self._parser)
        self._args = self._parser.parse_args()

        if self._args.verbose:
            G_LOGGER.setLevel(level=G_LOGGER.DEBUG)

        self.metadata = self.args_to_network_metadata(self._args)
        self.check_network_metadata_is_supported(self.metadata)
Пример #2
0
    def cleanup(self) -> None:
        if self.model:
            G_LOGGER.debug("Freeing model from memory: {}".format(self.model))
            del self.model

        if self.fpath:
            G_LOGGER.debug(
                "Removing saved torch model from location: {}".format(
                    self.fpath))
            rmtree(self.fpath)
Пример #3
0
    def torch_to_onnx(self, output_fpath: str, model: Module,
                      network_metadata: NetworkMetadata):
        """
        Exports a given huggingface T5 to decoder architecture only.
        Inspired by https://github.com/onnx/models/blob/master/text/machine_comprehension/t5/dependencies/T5-export.py

        Args:
            output_prefix (str): Path to the onnx file
            model (torch.Model): Model loaded torch class

        Returns:
            T5DecoderONNXFile: ONNX decoder object.
        """

        input_ids = torch.tensor([[42] * 10])
        # Exporting the decoder requires a basic instance of the encoder
        # Create one temporarily
        simplified_encoder = T5EncoderTorchFile.TorchModule(model.encoder)
        # Exports to ONNX
        decoder_with_lm_head = T5DecoderTorchFile.TorchModule(
            model.decoder, model.lm_head, model.config)

        # This code allows for huggingface compatible torch class to use onnx exporter
        old_forward = decoder_with_lm_head.forward

        def _export_forward(*args, **kwargs):
            result = old_forward(*args, **kwargs)
            return result[0]

        decoder_with_lm_head.forward = _export_forward

        inputs = T5ModelTRTConfig.get_input_dims(network_metadata)["decoder"]
        outputs = T5ModelTRTConfig.get_output_dims(network_metadata)["decoder"]

        torch.onnx.export(decoder_with_lm_head,
                          (input_ids, simplified_encoder(input_ids)),
                          output_fpath,
                          export_params=True,
                          opset_version=12,
                          input_names=inputs.get_names(),
                          output_names=outputs.get_names(),
                          dynamic_axes={
                              **inputs.get_torch_dynamic_axis_encoding(),
                              **outputs.get_torch_dynamic_axis_encoding(),
                          },
                          training=False,
                          use_external_data_format=True)

        if network_metadata.precision.fp16:
            G_LOGGER.debug("Clamping FP16 weights for T5")
            clamp_weights_onnx_to_fp16_bounds(output_fpath, output_fpath)

        return T5DecoderONNXFile(output_fpath, network_metadata)
Пример #4
0
    def execute(self, args: argparse.Namespace):
        compare_group = []
        if args.compare is None:
            compare_group = self.PER_NETWORK_SCRIPTS
        else:
            compare_group = args.compare

        if len(compare_group) <= 1:
            G_LOGGER.error(
                "Comparison command must have atleast two groups to compare to."
            )
            exit()

        results = []
        # Get the parser for inference script which is a superset
        module = None
        try:
            module = self.load_script(self.TRT_SCRIPT_NAME, args)
        except ModuleNotFoundError as e:
            print("Unable to do comparison. TRT script not yet supported.")
            exit(1)

        nconfig = module.RUN_CMD.config
        nconfig.MetadataClass.add_inference_args(self.parser)
        self.parser.parse_known_args()

        results = []
        # It is possible certain scripts are not implemented
        # Allow the results to generate even if script does not exist.
        modified_compare_group = []
        for g in compare_group:
            cwd = os.getcwd()
            try:
                print()
                print("Collecting Data for {}".format(g))
                os.chdir(args.network)
                module = self.load_script(g, args)
                module.RUN_CMD._parser = self.parser
                results.append(module.RUN_CMD())
                modified_compare_group.append(g)
            except ModuleNotFoundError as e:
                print(
                    "{} is not valid, the demo does not support this script yet. Ignoring."
                    .format(g))

            finally:
                os.chdir(cwd)

        headers, rows = process_results(modified_compare_group, results,
                                        nconfig)
        print()
        print(tabulate(rows, headers=headers))
        return 0
Пример #5
0
def confirm_folder_delete(
        fpath: str,
        prompt: str = "Confirm you want to delete entire folder?") -> None:
    """
    Confirms whether or not user wants to delete given folder path.

    Args:
        fpath (str): Path to folder.
        prompt (str): Prompt to display

    Returns:
        None
    """
    msg = prompt + " {} [Y/n] ".format(fpath)
    confirm = input(msg)
    if confirm == "Y":
        rmtree(fpath)
    else:
        G_LOGGER.info("Skipping file removal.")
Пример #6
0
def remove_if_empty(
    fpath: str,
    success_msg: str = "Folder successfully removed.",
    error_msg: str = "Folder cannot be removed, there are files.",
) -> None:
    """
    Removes an entire folder if folder is empty. Provides print info statements.

    Args:
        fpath: Location to folder
        success_msg: Success message.
        error_msg: Error message.

    Returns:
        None
    """
    if len(os.listdir(fpath)) == 0:
        os.rmdir(fpath)
        G_LOGGER.info(success_msg + " {}".format(fpath))
    else:
        G_LOGGER.info(error_msg + " {}".format(fpath))
Пример #7
0
    def get_optimization_profile(self, batch_size, sequence_length):
        """Provided helper function to obtain a profile optimization."""
        # Select an optimization profile
        # inspired by demo/BERT/inference.py script
        selected_profile_idx = None
        for idx in range(self.trt_engine.num_optimization_profiles):
            profile_shape = self.trt_engine.get_profile_shape(
                profile_index=idx,
                binding=idx * self._num_bindings_per_profile)

            if profile_shape[0][0] <= batch_size and profile_shape[2][0] >= batch_size \
               and profile_shape[0][1] <=  sequence_length and profile_shape[2][1] >= sequence_length:
                G_LOGGER.debug("Selected profile: {}".format(profile_shape))
                selected_profile_idx = idx
                break

        if selected_profile_idx == -1:
            raise RuntimeError(
                "Could not find any profile that matches batch_size={}, sequence_length={}"
                .format(batch_size, sequence_length))

        return selected_profile_idx
Пример #8
0
    def __init__(self, trt_engine_file: TRTEngineFile,
                 network_metadata: NetworkMetadata):
        self.trt_engine_file = trt_engine_file
        trt_logger = trt.Logger(trt.Logger.VERBOSE if G_LOGGER.root.level ==
                                G_LOGGER.DEBUG else trt.Logger.WARNING)

        G_LOGGER.info(
            "Reading and loading engine file {} using trt native runner.".
            format(self.trt_engine_file.fpath))
        with open(self.trt_engine_file.fpath, "rb") as f:
            self.trt_runtime = trt.Runtime(trt_logger)
            self.trt_engine = self.trt_runtime.deserialize_cuda_engine(
                f.read())
            self.trt_context = self.trt_engine.create_execution_context()

        # By default set optimization profile to 0
        self.profile_idx = 0

        # Other metadata required by the profile
        self._num_bindings_per_profile = self.trt_engine.num_bindings // self.trt_engine.num_optimization_profiles
        G_LOGGER.debug("Number of profiles detected in engine: {}".format(
            self._num_bindings_per_profile))
Пример #9
0
    def torch_to_onnx(self, output_fpath: str, model: Module,
                      network_metadata: NetworkMetadata):
        """
        Exports a given huggingface T5 to encoder architecture only.
        Inspired by https://github.com/onnx/models/blob/master/text/machine_comprehension/t5/dependencies/T5-export.py

        Args:
            output_prefix (str): Path to the onnx file
            model (torch.Model): Model loaded torch class

        Returns:
            Tuple[str]: Names of generated models
        """
        input_ids = torch.tensor([[42] * 10])
        simplified_encoder = T5EncoderTorchFile.TorchModule(model.encoder)
        inputs = T5ModelTRTConfig.get_input_dims(network_metadata)["encoder"]
        outputs = T5ModelTRTConfig.get_output_dims(network_metadata)["encoder"]

        # Exports to ONNX
        torch.onnx._export(simplified_encoder,
                           input_ids,
                           output_fpath,
                           export_params=True,
                           opset_version=12,
                           input_names=inputs.get_names(),
                           output_names=outputs.get_names(),
                           dynamic_axes={
                               **inputs.get_torch_dynamic_axis_encoding(),
                               **outputs.get_torch_dynamic_axis_encoding(),
                           },
                           training=False,
                           use_external_data_format=True)

        if network_metadata.precision.fp16:
            G_LOGGER.debug("Clamping FP16 weights for T5")
            clamp_weights_onnx_to_fp16_bounds(output_fpath, output_fpath)

        return T5EncoderONNXFile(output_fpath, network_metadata)
Пример #10
0
def measure_python_inference_code(stmt: Union[Callable, str],
                                  warmup: int = 3,
                                  number: int = 10,
                                  iterations: int = 10) -> None:
    """
    Measures the time it takes to run Pythonic inference code.
    Statement given should be the actual model inference like forward() in torch.

    See timeit for more details on how stmt works.

    Args:
        stmt (Union[Callable, str]): Callable or string for generating numbers.
        number (int): Number of times to call function per iteration.
        iterations (int): Number of measurement cycles.
    """
    G_LOGGER.debug(
        "Measuring inference call with warmup: {} and number: {} and iterations {}"
        .format(warmup, number, iterations))
    # Warmup
    warmup_mintime = timeit.repeat(stmt, number=number, repeat=warmup)
    G_LOGGER.debug("Warmup times: {}".format(warmup_mintime))

    return median(timeit.repeat(stmt, number=number,
                                repeat=iterations)) / number
Пример #11
0
 def cleanup(self) -> None:
     G_LOGGER.debug("Removing saved engine model from location: {}".format(
         self.fpath))
     os.remove(self.fpath)
Пример #12
0
 def cleanup(self) -> None:
     G_LOGGER.debug("Removing saved ONNX model from location: {}".format(
         self.fpath))
     # Does not cleanup external data and weights.
     os.remove(self.fpath)