Exemplo n.º 1
0
    def __init__(self, task, wer_args, zero_infinity, sentence_avg,
                 remove_bpe):
        super().__init__(task)
        self.blank_idx = task.target_dictionary.bos()
        self.pad_idx = task.target_dictionary.pad()
        self.eos_idx = task.target_dictionary.eos()
        self.post_process = remove_bpe if remove_bpe else "letter"

        if wer_args is not None:
            from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder

            wer_compute_kenlm, wer_lexicon, lm_w, ws_w = eval(wer_args)

            dec_args = Namespace()
            dec_args.nbest = 1
            dec_args.criterion = "ctc"
            dec_args.kenlm_model = wer_compute_kenlm
            dec_args.lexicon = wer_lexicon
            dec_args.beam = 50
            dec_args.beam_size_token = min(50, len(task.target_dictionary))
            dec_args.beam_threshold = min(50, len(task.target_dictionary))
            dec_args.lm_weight = lm_w
            dec_args.word_score = ws_w
            dec_args.unk_weight = -math.inf
            dec_args.sil_weight = 0

            self.w2l_decoder = W2lKenLMDecoder(dec_args,
                                               task.target_dictionary)
        else:
            self.w2l_decoder = None

        self.zero_infinity = zero_infinity
        self.sentence_avg = sentence_avg
Exemplo n.º 2
0
    def build_generator(cfg: UnsupGenerateConfig):
        w2l_decoder = cfg.w2l_decoder
        if w2l_decoder == DecoderType.VITERBI:
            from examples.speech_recognition.w2l_decoder import W2lViterbiDecoder

            return W2lViterbiDecoder(cfg, task.target_dictionary)
        elif w2l_decoder == DecoderType.KENLM:
            from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder

            return W2lKenLMDecoder(cfg, task.target_dictionary)
        elif w2l_decoder == DecoderType.FAIRSEQ:
            from examples.speech_recognition.w2l_decoder import W2lFairseqLMDecoder

            return W2lFairseqLMDecoder(cfg, task.target_dictionary)
        elif w2l_decoder == DecoderType.KALDI:
            from examples.speech_recognition.kaldi.kaldi_decoder import KaldiDecoder

            assert cfg.kaldi_decoder_config is not None

            return KaldiDecoder(
                cfg.kaldi_decoder_config,
                cfg.beam,
            )
        else:
            raise NotImplementedError(
                "only wav2letter decoders with (viterbi, kenlm, fairseqlm) options are supported at the moment but found "
                + str(w2l_decoder)
            )
Exemplo n.º 3
0
    def build_generator(self, args):
        w2l_decoder = getattr(args, "w2l_decoder", None)
        if w2l_decoder == "viterbi":
            from examples.speech_recognition.w2l_decoder import W2lViterbiDecoder

            return W2lViterbiDecoder(args, self.target_dictionary)
        elif w2l_decoder == "kenlm":
            from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder

            return W2lKenLMDecoder(args, self.target_dictionary)
        else:
            return super().build_generator(args)
Exemplo n.º 4
0
def get_decoder(decoder_args_dict, dictionary):
    decoder_args = Namespace(**decoder_args_dict)

    if decoder_args.decoder_type == "viterbi":
        from examples.speech_recognition.w2l_decoder import W2lViterbiDecoder
        return W2lViterbiDecoder(decoder_args, dictionary)

    elif decoder_args.decoder_type == "kenlm":
        from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder
        decoder_args.beam_size_token = len(dictionary)
        if isinstance(decoder_args.unk_weight, str):
            decoder_args.unk_weight = eval(decoder_args.unk_weight)
        return W2lKenLMDecoder(decoder_args, dictionary)

    return None
Exemplo n.º 5
0
    def build_generator(args):
        w2l_decoder = getattr(args, "w2l_decoder", None)
        if w2l_decoder == "viterbi":
            from examples.speech_recognition.w2l_decoder import W2lViterbiDecoder

            return W2lViterbiDecoder(args, task.target_dictionary)
        elif w2l_decoder == "kenlm":
            from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder
            print(task.target_dictionary.symbols)
            return W2lKenLMDecoder(args, task.target_dictionary)
        elif w2l_decoder == "fairseqlm":
            from examples.speech_recognition.w2l_decoder import W2lFairseqLMDecoder
            print(task.target_dictionary.symbols)
            return W2lFairseqLMDecoder(args, task.target_dictionary)
        else:
            return super().build_generator(args)
Exemplo n.º 6
0
    def build_generator(args):
        w2l_decoder = getattr(args, "w2l_decoder", None)
        if w2l_decoder == "viterbi":
            from examples.speech_recognition.w2l_decoder import W2lViterbiDecoder

            return W2lViterbiDecoder(args, task.target_dictionary)
        elif w2l_decoder == "kenlm":
            from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder

            return W2lKenLMDecoder(args, task.target_dictionary)
        elif w2l_decoder == "fairseqlm":
            from examples.speech_recognition.w2l_decoder import W2lFairseqLMDecoder

            return W2lFairseqLMDecoder(args, task.target_dictionary)
        else:
            print('only wav2letter decoders with (viterbi, kenlm, fairseqlm) options are supported at the moment')
Exemplo n.º 7
0
Arquivo: ctc.py Projeto: tma15/fairseq
    def __init__(self, cfg: CtcCriterionConfig, task: FairseqTask):
        super().__init__(task)
        self.blank_idx = (
            task.target_dictionary.index(task.blank_symbol)
            if hasattr(task, "blank_symbol")
            else 0
        )
        self.pad_idx = task.target_dictionary.pad()
        self.eos_idx = task.target_dictionary.eos()
        self.post_process = cfg.post_process

        if cfg.wer_args is not None:
            (
                cfg.wer_kenlm_model,
                cfg.wer_lexicon,
                cfg.wer_lm_weight,
                cfg.wer_word_score,
            ) = eval(cfg.wer_args)

        if cfg.wer_kenlm_model is not None and cfg.wer_kenlm_model != "":
            from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder

            dec_args = Namespace()
            dec_args.nbest = 1
            dec_args.criterion = "ctc"
            dec_args.kenlm_model = cfg.wer_kenlm_model
            dec_args.lexicon = cfg.wer_lexicon
            dec_args.beam = 50
            dec_args.beam_size_token = min(50, len(task.target_dictionary))
            dec_args.beam_threshold = min(50, len(task.target_dictionary))
            dec_args.lm_weight = cfg.wer_lm_weight
            dec_args.word_score = cfg.wer_word_score
            dec_args.unk_weight = -math.inf
            dec_args.sil_weight = 0

            self.w2l_decoder = W2lKenLMDecoder(dec_args, task.target_dictionary)
        else:
            self.w2l_decoder = None

        self.zero_infinity = cfg.zero_infinity
        self.sentence_avg = cfg.sentence_avg
Exemplo n.º 8
0
    def build_generator(args):
        w2l_decoder = getattr(args, "w2l_decoder", None)
        if w2l_decoder == "viterbi":
            from examples.speech_recognition.w2l_decoder import W2lViterbiDecoder

            return W2lViterbiDecoder(args, task.target_dictionary)
        elif w2l_decoder == "kenlm":
            from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder

            return W2lKenLMDecoder(args, task.target_dictionary)
        elif w2l_decoder == "fairseqlm":
            from examples.speech_recognition.w2l_decoder import W2lFairseqLMDecoder

            return W2lFairseqLMDecoder(args, task.target_dictionary)
        elif w2l_decoder == "ctc_decoder":
            from examples.speech_recognition.ctc_decoder import CTCDecoder

            return CTCDecoder(args, task.target_dictionary)
        elif w2l_decoder == "cif_decoder":
            from examples.speech_recognition.cif_decoder import CIFDecoder

            return CIFDecoder(args, task.target_dictionary, {})
        elif w2l_decoder == "cif_lm_decoder":
            from examples.speech_recognition.cif_decoder import CIFDecoder

            return CIFDecoder(args, task.target_dictionary, ({}, {}))
        elif w2l_decoder == "cif_bert_decoder":
            from examples.speech_recognition.cif_bert_decoder import CIF_BERT_Decoder

            return CIF_BERT_Decoder(args, task.target_dictionary)
        elif w2l_decoder == "seq2seq_decoder":
            from examples.speech_recognition.seq2seq_decoder import Seq2seqDecoder

            return Seq2seqDecoder(args, task.target_dictionary, {})
        elif w2l_decoder == "seq2seq_lm_decoder":
            from examples.speech_recognition.seq2seq_decoder import Seq2seqDecoder

            return Seq2seqDecoder(args, task.target_dictionary, ({}, {}))
        else:
            return super().build_generator(args)