Пример #1
0
    def add_args(parser):
        """Add task-specific arguments to the parser."""
        # fmt: off
        parser.add_argument('-s',
                            '--source-lang',
                            default=None,
                            metavar='SRC',
                            help='inference source language')
        parser.add_argument('-t',
                            '--target-lang',
                            default=None,
                            metavar='TARGET',
                            help='inference target language')
        parser.add_argument(
            '--lang-pairs',
            default=None,
            metavar='PAIRS',
            help=
            'comma-separated list of language pairs (in training order): en-de,en-fr,de-fr',
            action=FileContentsAction)
        parser.add_argument(
            '--keep-inference-langtok',
            action='store_true',
            help=
            'keep language tokens in inference output (e.g. for analysis or debugging)'
        )

        SamplingMethod.add_arguments(parser)
        MultilingualDatasetManager.add_args(parser)
Пример #2
0
    def add_args(parser):
        """Add task-specific arguments to the parser."""
        # fmt: off
        parser.add_argument('-s', '--source-lang', default=None, metavar='SRC',
                            help='inference source language')
        parser.add_argument('-t', '--target-lang', default=None, metavar='TARGET',
                            help='inference target language')
        parser.add_argument('--lang-pairs', default=None, metavar='PAIRS',
                            help='comma-separated list of language pairs (in training order): en-de,en-fr,de-fr')

        SamplingMethod.add_arguments(parser)
        MultilingualDatasetManager.add_args(parser)
    def __init__(self, args, langs, dicts, training):
        super().__init__(args)
        self.langs = langs
        self.dicts = dicts
        self.training = training
        if training:
            self.lang_pairs = args.lang_pairs
        else:
            self.lang_pairs = [
                "{}-{}".format(args.source_lang, args.target_lang)
            ]
        # eval_lang_pairs for multilingual translation is usually all of the
        # lang_pairs. However for other multitask settings or when we want to
        # optimize for certain languages we want to use a different subset. Thus
        # the eval_lang_pairs class variable is provided for classes that extend
        # this class.
        self.eval_lang_pairs = self.lang_pairs
        # model_lang_pairs will be used to build encoder-decoder model pairs in
        # models.build_model(). This allows multitask type of sub-class can
        # build models other than the input lang_pairs
        self.model_lang_pairs = self.lang_pairs
        self.source_langs = [d.split("-")[0] for d in self.lang_pairs]
        self.target_langs = [d.split("-")[1] for d in self.lang_pairs]
        self.check_dicts(self.dicts, self.source_langs, self.target_langs)

        self.sampling_method = SamplingMethod.build_sampler(args, self)
        self.data_manager = MultilingualDatasetManager.setup_data_manager(
            args, self.lang_pairs, langs, dicts, self.sampling_method)
    def add_args(parser):
        """Add task-specific arguments to the parser."""
        # fmt: off
        parser.add_argument('-s',
                            '--source-lang',
                            default=None,
                            metavar='SRC',
                            help='inference source language')
        parser.add_argument('-t',
                            '--target-lang',
                            default=None,
                            metavar='TARGET',
                            help='inference target language')
        parser.add_argument(
            '--lang-pairs',
            default=None,
            metavar='PAIRS',
            help=
            'comma-separated list of language pairs (in training order): en-de,en-fr,de-fr'
        )
        parser.add_argument(
            '--keep-inference-langtok',
            action='store_true',
            help=
            'keep language tokens in inference output (e.g. for analysis or debugging)'
        )
        #add hardware parameters
        parser.add_argument('--inter',
                            type=int,
                            default=1,
                            help="inter_op_parallelism_threads")
        parser.add_argument('--intra',
                            type=int,
                            default=1,
                            help="intra_op_parallelism_threads")
        parser.add_argument('--benchmark',
                            type=int,
                            default=0,
                            help="enable benchmark")
        parser.add_argument('--allow_tf32',
                            type=int,
                            default=0,
                            help="enable allow_tf32")

        SamplingMethod.add_arguments(parser)
        MultilingualDatasetManager.add_args(parser)
    def add_args(parser):
        """Add task-specific arguments to the parser."""
        # fmt: off
        parser.add_argument('-s',
                            '--source-lang',
                            default=None,
                            metavar='SRC',
                            help='inference source language')
        parser.add_argument('-t',
                            '--target-lang',
                            default=None,
                            metavar='TARGET',
                            help='inference target language')
        parser.add_argument(
            '--lang-pairs',
            default=None,
            metavar='PAIRS',
            help=
            'comma-separated list of language pairs (in training order): en-de,en-fr,de-fr'
        )
        parser.add_argument(
            '--keep-inference-langtok',
            action='store_true',
            help=
            'keep language tokens in inference output (e.g. for analysis or debugging)'
        )

        # options for reporting BLEU during validation
        parser.add_argument('--eval-bleu',
                            action='store_true',
                            help='evaluation with BLEU scores')
        parser.add_argument(
            '--eval-bleu-detok',
            type=str,
            default="space",
            help='detokenize before computing BLEU (e.g., "moses"); '
            'required if using --eval-bleu; use "space" to '
            'disable detokenization; see fairseq.data.encoders '
            'for other options')
        parser.add_argument('--eval-bleu-detok-args',
                            type=str,
                            metavar='JSON',
                            help='args for building the tokenizer, if needed')
        parser.add_argument('--eval-tokenized-bleu',
                            action='store_true',
                            default=False,
                            help='compute tokenized BLEU instead of sacrebleu')
        parser.add_argument('--eval-bleu-remove-bpe',
                            nargs='?',
                            const='@@ ',
                            default=None,
                            help='remove BPE before computing BLEU')
        parser.add_argument('--eval-bleu-args',
                            type=str,
                            metavar='JSON',
                            help='generation args for BLUE scoring, '
                            'e.g., \'{"beam": 4, "lenpen": 0.6}\'')
        parser.add_argument('--eval-bleu-print-samples',
                            action='store_true',
                            help='print sample generations during validation')
        SamplingMethod.add_arguments(parser)
        MultilingualDatasetManager.add_args(parser)