Пример #1
0
 def build_generator(
     self,
     models,
     args,
     seq_gen_cls=None,
     extra_gen_cls_kwargs=None,
 ):
     if self.data_cfg.prepend_tgt_lang_tag and args.prefix_size != 1:
         raise ValueError('Please set "--prefix-size 1" since '
                          "target language ID token is prepended as BOS.")
     lang_token_ids = {
         i
         for s, i in self.tgt_dict.indices.items()
         if SpeechToTextDataset.is_lang_tag(s)
     }
     if extra_gen_cls_kwargs is None:
         extra_gen_cls_kwargs = {
             "symbols_to_strip_from_output": lang_token_ids
         }
     else:
         extra_gen_cls_kwargs[
             "symbols_to_strip_from_output"] = lang_token_ids
     return super().build_generator(
         models,
         args,
         seq_gen_cls=None,
         extra_gen_cls_kwargs=extra_gen_cls_kwargs)
Пример #2
0
    def build_generator(
        self,
        models,
        args,
        seq_gen_cls=None,
        extra_gen_cls_kwargs=None,
    ):
        if self.data_cfg.prepend_tgt_lang_tag and args.prefix_size != 1:
            raise ValueError(
                'Please set "--prefix-size 1" since '
                "target language ID token is prepended as BOS."
            )
        lang_token_ids = {
            i
            for s, i in self.tgt_dict.indices.items()
            if SpeechToTextDataset.is_lang_tag(s)
        }

        if extra_gen_cls_kwargs is None:
            extra_gen_cls_kwargs = {}
        extra_gen_cls_kwargs["symbols_to_strip_from_output"] = lang_token_ids

        eos_token = (
            args.eos_token
            if "eos_token" in args and args.eos_token is not None
            else self.data_cfg.config.get("eos_token", None)
        )

        if self.data_cfg.prepend_bos_and_append_tgt_lang_tag and not eos_token:
            raise Warning(
                "Please provide --eos_token to replace eos in sequence generator"
            )

        eos_id = self.tgt_dict.index(eos_token) if eos_token else None
        extra_gen_cls_kwargs["eos"] = eos_id

        return super().build_generator(
            models, args, seq_gen_cls=None, extra_gen_cls_kwargs=extra_gen_cls_kwargs
        )