class NLUFinetuningConfig(AudioPretrainingConfig): # Options for reporting WER metrics during validation. Only applicable to # Seq2Seq models during fine-tuning eval_wer: bool = field( default=False, metadata={"help": "compute WER for Seq2Seq models"} ) eval_wer_parse: bool = field( default=False, metadata={"help": "compute WER for Seq2Seq models"} ) eval_wer_config: GenerationConfig = field( default_factory=lambda: GenerationConfig(), metadata={"help": "beam search config for evaluating wer during training"}, ) eval_wer_tokenizer: Any = field( default=None, metadata={"help": "tokenizer config for evaluating wer during training"}, ) eval_wer_post_process: str = field( default="letter", metadata={ "help": "remove BPE tokens before scoring (can be sentencepiece, letter, and more)" }, ) eval_bleu: bool = field( default=False, metadata={"help": "evaluation with BLEU scores"} ) eval_bleu_detok: Optional[str] = field( default=None, metadata={ "help": "detokenize before computing BLEU (e.g., 'moses'); " "required if using --eval-bleu; use 'space' to disable " "detokenization; see fairseq.data.encoders for other options" }, ) eval_bleu_detok_args: str = field( default="{}", metadata={"help": "args for building the tokenizer, if needed"} ) eval_tokenized_bleu: bool = field( default=False, metadata={"help": "compute tokenized BLEU instead of sacrebleu"} ) eval_bleu_remove_bpe: Optional[str] = field( default=None, metadata={"help": "remove BPE before computing BLEU"} ) eval_bleu_args: str = field( default="{}", metadata={ "help": "generation args for BLUE scoring, e.g., " '\'{"beam": 4, "lenpen": 0.6}\'' }, ) eval_bleu_print_samples: bool = field( default=False, metadata={"help": "print sample generations during validation"} ) autoregressive: bool = field( default=False, metadata={ "help": "required for autoregressive decoders (like seq2seq models); " "adds 'prev_output_tokens' to input and appends eos to target" }, )
class InferConfig(FairseqDataclass): task: Any = None decoding: DecodingConfig = DecodingConfig() common: CommonConfig = CommonConfig() common_eval: CommonEvalConfig = CommonEvalConfig() checkpoint: CheckpointConfig = CheckpointConfig() generation: GenerationConfig = GenerationConfig() distributed_training: DistributedTrainingConfig = DistributedTrainingConfig( ) dataset: DatasetConfig = DatasetConfig()
def build_generator(self, models, cfg: GenerationConfig): if cfg.score_reference: cfg.score_reference = False logger.warning( "--score-reference is not applicable to speech recognition, ignoring it." ) from espresso.tools.generate_log_probs_for_decoding import GenerateLogProbsForDecoding apply_log_softmax = getattr(cfg, "apply_log_softmax", False) return GenerateLogProbsForDecoding(models, apply_log_softmax=apply_log_softmax)
class AudioPretrainingConfig(FairseqDataclass): data: str = field(default=MISSING, metadata={"help": "path to data directory"}) labels: Optional[str] = field( default=None, metadata={"help": "extension of the label file to load, used for fine-tuning"}, ) sample_rate: int = field( default=16_000, metadata={ "help": "target sample rate. audio files will be up/down sampled to this rate" }, ) normalize: bool = field( default=False, metadata={"help": "if set, normalizes input to have 0 mean and unit variance"}, ) enable_padding: bool = field( default=False, metadata={"help": "pad shorter samples instead of cropping"} ) max_sample_size: Optional[int] = field( default=None, metadata={"help": "max sample size to crop to for batching"} ) min_sample_size: Optional[int] = field( default=None, metadata={"help": "min sample size to skip small examples"} ) # Options for reporting WER metrics during validation. Only applicable to # Seq2Seq models during fine-tuning eval_wer: bool = field( default=False, metadata={"help": "compute WER for Seq2Seq models"} ) eval_wer_config: GenerationConfig = field( default_factory=lambda: GenerationConfig(), metadata={"help": "beam search config for evaluating wer during training"}, ) eval_wer_tokenizer: Any = field( default=None, metadata={"help": "tokenizer config for evaluating wer during training"}, ) eval_wer_post_process: str = field( default="letter", metadata={ "help": "remove BPE tokens before scoring (can be sentencepiece, letter, and more)" }, ) autoregressive: bool = field( default=False, metadata={ "help": "required for autoregressive decoders (like seq2seq models); " "adds 'prev_output_tokens' to input and appends eos to target" }, )
def add_generation_args(parser): group = parser.add_argument_group("Generation") add_common_eval_args(group) gen_parser_from_dataclass(group, GenerationConfig()) return group
class AudioPretrainingConfig(FairseqDataclass): data: str = field(default=MISSING, metadata={"help": "path to data directory"}) label_dir: Optional[str] = field(default=None, metadata={"help": "path to label directory"}) labels: Optional[str] = field( default=None, metadata={"help": "extension of the label file to load, used for fine-tuning"}, ) binarized_dataset: bool = field( default=False, metadata={ "help": "if true, loads binarized dataset (useful for very large datasets). " "See examples/wav2vec/scripts/binarize_manifest.sh" }, ) sample_rate: int = field( default=16_000, metadata={ "help": "target sample rate. audio files will be up/down sampled to this rate" }, ) normalize: bool = field( default=False, metadata={"help": "if set, normalizes input to have 0 mean and unit variance"}, ) enable_padding: bool = field( default=False, metadata={"help": "pad shorter samples instead of cropping"} ) max_sample_size: Optional[int] = field( default=None, metadata={"help": "max sample size to crop to for batching"} ) min_sample_size: Optional[int] = field( default=None, metadata={"help": "min sample size to skip small examples"} ) # Options for reporting WER metrics during validation. Only applicable to # Seq2Seq models during fine-tuning eval_wer: bool = field( default=False, metadata={"help": "compute WER for Seq2Seq models"} ) eval_wer_config: GenerationConfig = field( default_factory=lambda: GenerationConfig(), metadata={"help": "beam search config for evaluating wer during training"}, ) eval_wer_tokenizer: Any = field( default=None, metadata={"help": "tokenizer config for evaluating wer during training"}, ) eval_wer_post_process: str = field( default="letter", metadata={ "help": "remove BPE tokens before scoring (can be sentencepiece, letter, and more)" }, ) autoregressive: bool = field( default=False, metadata={ "help": "required for autoregressive decoders (like seq2seq models); " "adds 'prev_output_tokens' to input and appends eos to target" }, ) num_batch_buckets: int = field( default=0, metadata={"help": "number of buckets"}, ) precompute_mask_indices: bool = field( default=False, metadata={ "help": "flag to compute mask indices in data preparation.", }, ) # The following are needed to precompute mask and mask channel indices # before model's forward. # mask_length: Optional[int] = II("model.mask_length") # mask_prob: Optional[float] = II("model.mask_prob") # mask_selection: Optional[str] = II("model.mask_selection") # mask_other: Optional[float] = II("model.mask_other") # no_mask_overlap: Optional[bool] = II("model.no_mask_overlap") # mask_min_space: Optional[int] = II("model.mask_min_space") # mask_channel_length: Optional[int] = II("model.mask_channel_length") # mask_channel_prob: Optional[float] = II("model.mask_channel_prob") # mask_channel_selection: Optional[str] = II("model.mask_channel_selection") # mask_channel_other: Optional[float] = II("model.mask_channel_other") # no_mask_channel_overlap: Optional[bool] = II("model.no_mask_channel_overlap") # mask_channel_min_space: Optional[int] = II("model.mask_channel_min_space") # conv_feature_layers: Optional[str] = II("model.conv_feature_layers") # encoder_embed_dim: Optional[int] = II("model.encoder_embed_dim") tpu: bool = II("common.tpu")
class AudioPretrainingConfig(FairseqDataclass): data: str = field(default=MISSING, metadata={"help": "path to data directory"}) labels: Optional[str] = field( default=None, metadata={ "help": "extension of the label file to load, used for fine-tuning" }, ) binarized_dataset: bool = field( default=False, metadata={ "help": "if true, loads binarized dataset (useful for very large datasets). " "See examples/wav2vec/scripts/binarize_manifest.sh" }, ) sample_rate: int = field( default=16_000, metadata={ "help": "target sample rate. audio files will be up/down sampled to this rate" }, ) normalize: bool = field( default=False, metadata={ "help": "if set, normalizes input to have 0 mean and unit variance" }, ) enable_padding: bool = field( default=False, metadata={"help": "pad shorter samples instead of cropping"}) max_sample_size: Optional[int] = field( default=None, metadata={"help": "max sample size to crop to for batching"}) min_sample_size: Optional[int] = field( default=None, metadata={"help": "min sample size to skip small examples"}) dataset_sampling_alpha: Optional[float] = field( default=0.5, metadata={ "help": "smoothing alpha for sample rations across datasets" }) # Options for reporting WER metrics during validation. Only applicable to # Seq2Seq models during fine-tuning eval_wer: bool = field(default=False, metadata={"help": "compute WER for Seq2Seq models"}) eval_wer_config: GenerationConfig = field( default_factory=lambda: GenerationConfig(), metadata={ "help": "beam search config for evaluating wer during training" }, ) eval_wer_tokenizer: Any = field( default=None, metadata={ "help": "tokenizer config for evaluating wer during training" }, ) eval_wer_post_process: str = field( default="letter", metadata={ "help": "remove BPE tokens before scoring (can be sentencepiece, letter, and more)" }, ) autoregressive: bool = field( default=False, metadata={ "help": "required for autoregressive decoders (like seq2seq models); " "adds 'prev_output_tokens' to input and appends eos to target" }, ) num_batch_buckets: int = field( default=0, metadata={"help": "number of buckets"}, ) precompute_mask_indices: bool = field( default=False, metadata={ "help": "flag to compute mask indices in data preparation.", }, ) inferred_w2v_config: Optional[InferredW2vConfig] = field( default=None, metadata={ "help": "wav2vec 2.0 masking arguments used to pre-compute masks (required for TPU)", }, ) tpu: bool = II("common.tpu")