Python LMConfig示例，deepspeech_pytorch.configs.inference_config.LMConfig Python示例

示例#1

0

显示文件

文件： pretrained_smoke_test.py 项目： tongning/deepspeech.pytorch-2.1

    def test_pretrained_eval_inference(self):
        # Disabled GPU due to using TravisCI
        cuda, use_half = False, False
        train_manifest, val_manifest, test_manifest = self.download_data(
            DatasetConfig(target_dir=self.target_dir,
                          manifest_dir=self.manifest_dir))
        wget.download(lm_path)
        for pretrained_url in pretrained_urls:
            print("Running Pre-trained Smoke test for: ", pretrained_url)
            wget.download(pretrained_url)
            file_path = os.path.basename(pretrained_url)
            pretrained_path = os.path.abspath(file_path)

            lm_configs = [
                LMConfig(),  # Greedy
                LMConfig(decoder_type=DecoderType.beam),  # Test Beam Decoder
                LMConfig(decoder_type=DecoderType.beam,
                         lm_path=os.path.basename(lm_path),
                         alpha=1,
                         beta=1)  # Test Beam Decoder with LM
            ]

            for lm_config in lm_configs:
                self.eval_model(model_path=pretrained_path,
                                test_manifest=test_manifest,
                                cuda=cuda,
                                use_half=use_half,
                                lm_config=lm_config)
                self.inference(test_manifest=test_manifest,
                               model_path=pretrained_path,
                               cuda=cuda,
                               lm_config=lm_config,
                               use_half=use_half)

示例#2

0

显示文件

    def build_train_evaluate_model(self,
                                   limit_train_batches: int,
                                   limit_val_batches: int,
                                   epoch: int,
                                   batch_size: int,
                                   model_config: BiDirectionalConfig,
                                   precision: int,
                                   gpus: int,
                                   folders: bool):
        cuda = gpus > 0

        train_path, val_path, test_path = self.download_data(
            DatasetConfig(
                target_dir=self.target_dir,
                manifest_dir=self.manifest_dir
            ),
            folders=folders
        )

        train_cfg = self.create_training_config(
            limit_train_batches=limit_train_batches,
            limit_val_batches=limit_val_batches,
            max_epochs=epoch,
            batch_size=batch_size,
            train_path=train_path,
            val_path=val_path,
            model_config=model_config,
            precision=precision,
            gpus=gpus
        )
        print("Running Training DeepSpeech Model Smoke Test")
        train(train_cfg)

        # Expected final model path after training
        print(os.listdir(self.model_dir))
        model_path = self.model_dir + '/last.ckpt'
        assert os.path.exists(model_path)

        lm_configs = [
            LMConfig(),  # Test Greedy
            LMConfig(
                decoder_type=DecoderType.beam
            )  # Test Beam Decoder
        ]
        print("Running Inference Smoke Tests")
        for lm_config in lm_configs:
            self.eval_model(
                model_path=model_path,
                test_path=test_path,
                cuda=cuda,
                precision=precision,
                lm_config=lm_config
            )

            self.inference(test_path=test_path,
                           model_path=model_path,
                           cuda=cuda,
                           precision=precision,
                           lm_config=lm_config)

示例#3

0

显示文件

    def build_train_evaluate_model(self, epoch: int, batch_size: int,
                                   model_config: BiDirectionalConfig,
                                   use_half: bool, cuda: bool):
        train_manifest, val_manifest, test_manifest = self.download_data(
            DatasetConfig(target_dir=self.target_dir,
                          manifest_dir=self.manifest_dir))

        train_cfg = self.create_training_config(epoch=epoch,
                                                batch_size=batch_size,
                                                train_manifest=train_manifest,
                                                val_manifest=val_manifest,
                                                model_config=model_config,
                                                cuda=cuda)
        print("Running Training DeepSpeech Model Smoke Test")
        train(train_cfg)

        # Expected final model path after training
        model_path = self.model_dir + '/deepspeech_final.pth'
        assert os.path.exists(model_path)

        lm_configs = [
            LMConfig(),  # Test Greedy
            LMConfig(decoder_type=DecoderType.beam)  # Test Beam Decoder
        ]
        print("Running Inference Smoke Tests")
        for lm_config in lm_configs:
            self.eval_model(model_path=model_path,
                            test_manifest=test_manifest,
                            cuda=cuda,
                            use_half=use_half,
                            lm_config=lm_config)

            self.inference(test_manifest=test_manifest,
                           model_path=model_path,
                           cuda=cuda,
                           use_half=use_half,
                           lm_config=lm_config)

示例#4

0

显示文件

文件： utils.py 项目： tongning/deepspeech.pytorch-2.1

def load_decoder(labels, cfg: LMConfig):
    if cfg.decoder_type == DecoderType.beam:
        from deepspeech_pytorch.decoder import BeamCTCDecoder
        if cfg.lm_path:
            cfg.lm_path = hydra.utils.to_absolute_path(cfg.lm_path)
        decoder = BeamCTCDecoder(labels=labels,
                                 lm_path=cfg.lm_path,
                                 alpha=cfg.alpha,
                                 beta=cfg.beta,
                                 cutoff_top_n=cfg.cutoff_top_n,
                                 cutoff_prob=cfg.cutoff_prob,
                                 beam_width=cfg.beam_width,
                                 num_processes=cfg.lm_workers)
    else:
        decoder = GreedyDecoder(labels=labels, blank_index=labels.index('_'))
    return decoder

示例#5

0

显示文件

文件： pytorch_deep_speech.py 项目： GU-DataLab/fairness-and-missing-values

    def __init__(
        self,
        model: Optional["DeepSpeech"] = None,
        pretrained_model: Optional[str] = None,
        filename: Optional[str] = None,
        url: Optional[str] = None,
        use_half: bool = False,
        optimizer: Optional["torch.optim.Optimizer"] = None,  # type: ignore
        use_amp: bool = False,
        opt_level: str = "O1",
        decoder_type: str = "greedy",
        lm_path: str = "",
        top_paths: int = 1,
        alpha: float = 0.0,
        beta: float = 0.0,
        cutoff_top_n: int = 40,
        cutoff_prob: float = 1.0,
        beam_width: int = 10,
        lm_workers: int = 4,
        clip_values: Optional["CLIP_VALUES_TYPE"] = None,
        preprocessing_defences: Union["Preprocessor", List["Preprocessor"],
                                      None] = None,
        postprocessing_defences: Union["Postprocessor", List["Postprocessor"],
                                       None] = None,
        preprocessing: "PREPROCESSING_TYPE" = None,
        device_type: str = "gpu",
        verbose: bool = True,
    ):
        """
        Initialization of an instance PyTorchDeepSpeech.

        :param model: DeepSpeech model.
        :param pretrained_model: The choice of pretrained model if a pretrained model is required. Currently this
                                 estimator supports 3 different pretrained models consisting of `an4`, `librispeech`
                                 and `tedlium`.
        :param filename: Name of the file.
        :param url: Download URL.
        :param use_half: Whether to use FP16 for pretrained model.
        :param optimizer: The optimizer used to train the estimator.
        :param use_amp: Whether to use the automatic mixed precision tool to enable mixed precision training or
                        gradient computation, e.g. with loss gradient computation. When set to True, this option is
                        only triggered if there are GPUs available.
        :param opt_level: Specify a pure or mixed precision optimization level. Used when use_amp is True. Accepted
                          values are `O0`, `O1`, `O2`, and `O3`.
        :param decoder_type: Decoder type. Either `greedy` or `beam`. This parameter is only used when users want
                             transcription outputs.
        :param lm_path: Path to an (optional) kenlm language model for use with beam search. This parameter is only
                        used when users want transcription outputs.
        :param top_paths: Number of beams to be returned. This parameter is only used when users want transcription
                          outputs.
        :param alpha: The weight used for the language model. This parameter is only used when users want transcription
                      outputs.
        :param beta: Language model word bonus (all words). This parameter is only used when users want transcription
                     outputs.
        :param cutoff_top_n: Cutoff_top_n characters with highest probs in vocabulary will be used in beam search. This
                             parameter is only used when users want transcription outputs.
        :param cutoff_prob: Cutoff probability in pruning. This parameter is only used when users want transcription
                            outputs.
        :param beam_width: The width of beam to be used. This parameter is only used when users want transcription
                           outputs.
        :param lm_workers: Number of language model processes to use. This parameter is only used when users want
                           transcription outputs.
        :param clip_values: Tuple of the form `(min, max)` of floats or `np.ndarray` representing the minimum and
               maximum values allowed for features. If floats are provided, these will be used as the range of all
               features. If arrays are provided, each value will be considered the bound for a feature, thus
               the shape of clip values needs to match the total number of features.
        :param preprocessing_defences: Preprocessing defence(s) to be applied by the estimator.
        :param postprocessing_defences: Postprocessing defence(s) to be applied by the estimator.
        :param preprocessing: Tuple of the form `(subtrahend, divisor)` of floats or `np.ndarray` of values to be
               used for data preprocessing. The first value will be subtracted from the input. The input will then
               be divided by the second one.
        :param device_type: Type of device to be used for model and tensors, if `cpu` run on CPU, if `gpu` run on GPU
                            if available otherwise run on CPU.
        """
        import torch  # lgtm [py/repeated-import]

        from deepspeech_pytorch.configs.inference_config import LMConfig
        from deepspeech_pytorch.enums import DecoderType
        from deepspeech_pytorch.utils import load_decoder, load_model

        # Super initialization
        super().__init__(
            model=None,
            clip_values=clip_values,
            channels_first=None,
            preprocessing_defences=preprocessing_defences,
            postprocessing_defences=postprocessing_defences,
            preprocessing=preprocessing,
        )

        self.verbose = verbose

        # Check clip values
        if self.clip_values is not None:
            if not np.all(self.clip_values[0] == -1):
                raise ValueError(
                    "This estimator requires normalized input audios with clip_vales=(-1, 1)."
                )
            if not np.all(self.clip_values[1] == 1):
                raise ValueError(
                    "This estimator requires normalized input audios with clip_vales=(-1, 1)."
                )

        # Check postprocessing defences
        if self.postprocessing_defences is not None:
            raise ValueError(
                "This estimator does not support `postprocessing_defences`.")

        # Set cpu/gpu device
        self._device: torch.device
        if device_type == "cpu" or not torch.cuda.is_available():
            self._device = torch.device("cpu")
        else:
            cuda_idx = torch.cuda.current_device()
            self._device = torch.device("cuda:{}".format(cuda_idx))

        self._input_shape = None

        # Load model
        if model is None:
            if pretrained_model == "an4":
                filename, url = (
                    "an4_pretrained_v2.pth",
                    "https://github.com/SeanNaren/deepspeech.pytorch/releases/download/v2.0/an4_pretrained_v2.pth",
                )

            elif pretrained_model == "librispeech":
                filename, url = (
                    "librispeech_pretrained_v2.pth",
                    "https://github.com/SeanNaren/deepspeech.pytorch/releases/download/v2.0/"
                    "librispeech_pretrained_v2.pth",
                )

            elif pretrained_model == "tedlium":
                filename, url = (
                    "ted_pretrained_v2.pth",
                    "https://github.com/SeanNaren/deepspeech.pytorch/releases/download/v2.0/ted_pretrained_v2.pth",
                )

            elif pretrained_model is None:
                # If model is None and no pretrained model is selected, then we need to have parameters filename and
                # url to download, extract and load the automatic speech recognition model
                if filename is None or url is None:
                    filename, url = (
                        "librispeech_pretrained_v2.pth",
                        "https://github.com/SeanNaren/deepspeech.pytorch/releases/download/v2.0/"
                        "librispeech_pretrained_v2.pth",
                    )

            else:
                raise ValueError(
                    "The input pretrained model %s is not supported." %
                    pretrained_model)

            # Download model
            model_path = get_file(filename=filename,
                                  path=config.ART_DATA_PATH,
                                  url=url,
                                  extract=False,
                                  verbose=self.verbose)

            # Then load model
            self._model = load_model(device=self._device,
                                     model_path=model_path,
                                     use_half=use_half)

        else:
            self._model = model

            # Push model to the corresponding device
            self._model.to(self._device)

        # Save first version of the optimizer
        self._optimizer = optimizer
        self._use_amp = use_amp

        # Now create a decoder
        # Create the language model config first
        lm_config = LMConfig()

        # Then setup the config
        if decoder_type == "greedy":
            lm_config.decoder_type = DecoderType.greedy
        elif decoder_type == "beam":
            lm_config.decoder_type = DecoderType.beam
        else:
            raise ValueError("Decoder type %s currently not supported." %
                             decoder_type)

        lm_config.lm_path = lm_path
        lm_config.top_paths = top_paths
        lm_config.alpha = alpha
        lm_config.beta = beta
        lm_config.cutoff_top_n = cutoff_top_n
        lm_config.cutoff_prob = cutoff_prob
        lm_config.beam_width = beam_width
        lm_config.lm_workers = lm_workers

        # Create the decoder with the lm config
        self.decoder = load_decoder(labels=self._model.labels, cfg=lm_config)

        # Setup for AMP use
        if self._use_amp:
            from apex import amp

            if self._optimizer is None:
                logger.warning(
                    "An optimizer is needed to use the automatic mixed precision tool, but none for provided. "
                    "A default optimizer is used.")

                # Create the optimizers
                parameters = self._model.parameters()
                self._optimizer = torch.optim.SGD(parameters, lr=0.01)

            if self._device.type == "cpu":
                enabled = False
            else:
                enabled = True

            self._model, self._optimizer = amp.initialize(
                models=self._model,
                optimizers=self._optimizer,
                enabled=enabled,
                opt_level=opt_level,
                loss_scale=1.0,
            )