Exemplo n.º 1
0
 def add_args(parser):
     """Add task-specific arguments to the parser."""
     # fmt: off
     TranslationTask.add_args(parser)
     parser.add_argument(
         '--langs',
         required=True,
         metavar='LANG',
         help='comma-separated list of monolingual language, '
         'for example, "en,de,fr". These should match the '
         'langs from pretraining (and be in the same order). '
         'You should always add all pretraining language idx '
         'during finetuning.')
     parser.add_argument(
         '--extra-lang-symbol',
         default='',
         type=str,
         help='comma-separated list of monolingual language, '
         'for example, "en,de,fr". These should match the '
         'langs from pretraining (and be in the same order). '
         'You should always add all pretraining language idx '
         'during finetuning.')
     parser.add_argument(
         '--prepend-bos',
         action='store_true',
         help='prepend bos token to each sentence, which matches '
         'mBART pretraining')
Exemplo n.º 2
0
 def add_args(parser):
     """Add task-specific arguments to the parser."""
     # fmt: off
     TranslationTask.add_args(parser)
     parser.add_argument(
         '--noise',
         default='random_delete',
         choices=['random_delete', 'random_mask', 'no_noise', 'full_mask'])
     parser.add_argument('--add-mask-token',
                         action='store_true',
                         help='add a mask token for model compatibility.')
     parser.add_argument('--use-mask-token',
                         default=False,
                         action='store_true')
     parser.add_argument('--add-lang-token',
                         default=False,
                         action='store_true')
     parser.add_argument('--use-lang-token',
                         default=False,
                         action='store_true')
     parser.add_argument(
         '--langs',
         default=None,
         metavar='LANG',
         help=
         'comma-separated list of monolingual language, for example, "en,de,fr"'
         'be careful these langs are what you used for pretraining (the same order),'
         'not for finetuning.'
         'you should always add all pretraining language idx during finetuning.'
     )
Exemplo n.º 3
0
 def add_args(parser):
     """Add task-specific arguments to the parser."""
     # fmt: off
     TranslationTask.add_args(parser)
     parser.add_argument(
         '--remove-eos-from-source',
         action='store_true',
         help='if set, remove eos from end of source if it\'s present')
     parser.add_argument('--load-dependency',
                         action='store_true',
                         help='load the dependency heads')
     parser.add_argument(
         '--dependency-with-input',
         action='store_true',
         help='if set, target-side\'s dependencies are based on the inputs')
     parser.add_argument(
         '--use-gold-dependency',
         action='store_true',
         help='use the source\'s gold dependency for inference')
     parser.add_argument(
         '--print-dependency',
         nargs='?',
         const='hard',
         help=
         'if set, uses attention feedback to compute and print dependency')
Exemplo n.º 4
0
 def add_args(parser):
     """Add task-specific arguments to the parser."""
     TranslationTask.add_args(parser)
     parser.add_argument(
         '--noise',
         default='no_noise',
         choices=['random_mask', 'no_noise', 'full_mask'])
Exemplo n.º 5
0
 def add_args(parser):
     """Add task-specific arguments to the parser."""
     # fmt: off
     TranslationTask.add_args(parser)
     parser.add_argument('--method',
                         default='hMoEup',
                         choices=['sMoElp', 'sMoEup', 'hMoElp', 'hMoEup'])
     parser.add_argument('--num-experts',
                         default=3,
                         type=int,
                         metavar='N',
                         help='number of experts')
     parser.add_argument('--mean-pool-gating-network',
                         action='store_true',
                         help='use a simple mean-pooling gating network')
     parser.add_argument('--mean-pool-gating-network-dropout',
                         type=float,
                         help='dropout for mean-pooling gating network')
     parser.add_argument(
         '--mean-pool-gating-network-encoder-dim',
         type=float,
         help='encoder output dim for mean-pooling gating network')
     parser.add_argument('--gen-expert',
                         type=int,
                         default=0,
                         help='which expert to use for generation')
Exemplo n.º 6
0
 def add_args(parser):
     TranslationTask.add_args(parser)
     parser.add_argument('--morpho-dropout', type=float, default=0.5)
     parser.add_argument('--morpho-dropout-initial',
                         type=float,
                         default=None)
     parser.add_argument('--morpho-dropout-end-epoch',
                         type=int,
                         default=None)
Exemplo n.º 7
0
 def add_args(parser):
     """Add task-specific arguments to the parser."""
     # fmt: off
     TranslationTask.add_args(parser)
     parser.add_argument('--noise',
                         default='random_delete',
                         choices=[
                             'random_delete', 'random_delete_shuffle',
                             'random_mask', 'no_noise', 'full_mask'
                         ])
     parser.add_argument('--random-seed', default=1, type=int)
Exemplo n.º 8
0
 def add_args(parser):
     """Add task-specific arguments to the parser."""
     TranslationTask.add_args(parser)
     parser.add_argument(
         '--langs',
         required=True,
         metavar='LANG',
         help='comma-separated list of monolingual language, '
         'for example, "en,de,fr". These should match the '
         'langs from pretraining (and be in the same order). '
         'You should always add all pretraining language idx '
         'during finetuning.')
 def add_args(parser):
     """Add task-specific arguments to the parser."""
     # fmt: off
     TranslationTask.add_args(parser)
     parser.add_argument('--bert-model',
                         type=str,
                         metavar='DIR',
                         required=True,
                         help='path to the BERT model')
     parser.add_argument('--fine-tuning',
                         action='store_true',
                         help='if set, the BERT model will be tuned')
     parser.set_defaults(left_pad_source=False)
 def add_args(parser):
     """Add task-specific arguments to the parser."""
     # fmt: off
     TranslationTask.add_args(parser)
     parser.add_argument(
         '--noise',
         default='random_delete',
         choices=['random_delete', 'random_mask', 'no_noise', 'full_mask'])
     parser.add_argument('--append_eos_to_target',
                         default=True,
                         type=bool,
                         help="the path of the mean file.")
     parser.add_argument('--append_bos_to_target',
                         default=True,
                         type=bool,
                         help="the path of the mean file.")
Exemplo n.º 11
0
    def initialize(self, context):
        """
        load model and extra files.
        """
        logger.info(
            f"Will initialize with system_properties: {context.system_properties}"
        )
        model_pt_path, model_file_dir, device = self._handler_initialize(
            context)
        config = json.loads(
            (Path(model_file_dir) / "model_generation.json").read_text())
        self.device = device

        translation_cfg = TranslationConfig()
        self.vocab = TranslationTask.load_dictionary("dict.txt")

        self.spm = sentencepiece.SentencePieceProcessor()
        self.spm.Load("sentencepiece.bpe.model")
        logger.info("Loaded sentencepiece.bpe.model")

        if config.get("dummy", False):
            self.sequence_generator = FakeGenerator()
            logger.warning("Will use a FakeGenerator model, only testing BPE")
        else:
            task = TranslationTask(translation_cfg, self.vocab, self.vocab)
            [model], cfg = fairseq.checkpoint_utils.load_model_ensemble(
                [model_pt_path], task=task)
            model.eval().to(self.device)
            logger.info(
                f"Loaded model from {model_pt_path} to device {self.device}")
            logger.info(
                f"Will use the following config: {json.dumps(config, indent=4)}"
            )
            self.sequence_generator = SequenceGenerator(
                [model],
                tgt_dict=self.vocab,
                beam_size=config.get("beam_size", 1),
                max_len_a=config.get("max_len_a", 1.3),
                max_len_b=config.get("max_len_b", 5),
                min_len=config.get("min_len", 5),
                max_len=model.max_decoder_positions(),
            )
            if not self.sequence_generator.model.has_incremental:
                logger.warning("Incremental generation is disabled !!!")

        self.taskIO = TaskIO()
        self.initialized = True
 def add_args(parser):
     """Add task-specific arguments to the parser."""
     # fmt: off
     TranslationTask.add_args(parser)
     # Training:
     parser.add_argument('--pretrained', default=None, type=str)
     parser.add_argument('--copy-embeddings', action='store_true')
     parser.add_argument('--copy-network', action='store_true')
     parser.add_argument('--shift', default=1, type=int)
     parser.add_argument('--policy', default='eos', type=str)
     parser.add_argument('--write-threshold', default=0.5, type=float)
     parser.add_argument('--align-index', default=1, type=int)
     parser.add_argument('--pick-alignment', default='index', type=str)
     parser.add_argument('--path-oracle', default='alignment', type=str)
     parser.add_argument('--path-oracle-rank', default=50, type=int)
     parser.add_argument('--path-oracle-tol', default=0.1, type=float)
     parser.add_argument('--path-oracle-lookahead', default=0, type=int)
     parser.add_argument('--path-oracle-waitk', default=7, type=int)
     parser.add_argument('--path-oracle-width', default=3, type=int)
Exemplo n.º 13
0
 def add_args(parser):
     """Add task-specific arguments to the parser."""
     # fmt: off
     TranslationTask.add_args(parser)
     parser.add_argument('--mono-data',
                         default=None,
                         help='monolingual data, split by :')
     parser.add_argument(
         '--mono-one-split-each-epoch',
         action='store_true',
         default=False,
         help='use on split of monolingual data at each epoch')
     parser.add_argument('--parallel-ratio',
                         default=1.0,
                         type=float,
                         help='subsample ratio of parallel data')
     parser.add_argument('--mono-ratio',
                         default=1.0,
                         type=float,
                         help='subsample ratio of mono data')
Exemplo n.º 14
0
 def add_args(parser):
     """Add task-specific arguments to the parser."""
     # fmt: off
     TranslationTask.add_args(parser)
     parser.add_argument(
         '--noise',
         default='random_delete',
         choices=['random_delete', 'random_mask', 'no_noise', 'full_mask'])
     parser.add_argument('--shape_size',
                         default=224,
                         type=int,
                         help="the shape size of image")
     parser.add_argument(
         '--sample-startegy',
         default="sampling_with_tgt_len",
         type=str,
         choices=["sampling_with_tgt_len", "sampling_with_src_len"],
         help="sampling frames from the video")
     parser.add_argument(
         '--tgtlen-times',
         default=5,
         type=int,
         help=
         "The maximum number of frames of the source video is 'tgtlen_times' times"
         "the length of the target sentence, if use, the sample_startegy must be "
         "sampling_with_tgt_len")
     parser.add_argument('--mean-img-file',
                         default=None,
                         type=str,
                         help="the path of the mean file.")
     parser.add_argument('--append_eos_to_target',
                         default=True,
                         type=bool,
                         help="the path of the mean file.")
     parser.add_argument('--append_bos_to_target',
                         default=True,
                         type=bool,
                         help="the path of the mean file.")
Exemplo n.º 15
0
 def add_args(parser):
     """Add task-specific arguments to the parser."""
     # fmt: off
     TranslationTask.add_args(parser)
     parser.add_argument(
         '--langs',
         type=str,
         metavar='LANG',
         help='comma-separated list of monolingual language, '
         'for example, "en,de,fr". These should match the '
         'langs from pretraining (and be in the same order). '
         'You should always add all pretraining language idx '
         'during finetuning.')
     parser.add_argument(
         '--prepend-bos',
         action='store_true',
         help='prepend bos token to each sentence, which matches '
         'mBART pretraining')
     parser.add_argument(
         '--domain-dict',
         type=str,
         required=True,
         help=
         'Path a file that contains a list of all domains (same format as dict.txt)'
     )
     parser.add_argument(
         '--train-domains',
         type=str,
         required=True,
         help='File of same line count as training split where each '
         'line has some domain from the domain_dict.txt')
     parser.add_argument(
         '--valid-domains',
         type=str,
         required=True,
         help='File of same line count as validation split where each '
         'line has some domain from the domain_dict.txt')
    def from_checkpoint(self,
                        checkpoint,
                        roberta_cache_path=None,
                        inspector=None):
        '''
        Initialize model from checkpoint
        '''

        # load fairseq task
        parser = options.get_interactive_generation_parser()
        options.add_optimization_args(parser)
        args = options.parse_args_and_arch(parser, input_args=['--data dummy'])

        # Read extra arguments
        model_folder = os.path.dirname(checkpoint.split(':')[0])
        # config with fairseq-preprocess and fairseq-train args
        config_json = f'{model_folder}/config.json'
        assert os.path.isfile(config_json), \
            "Model trained with v0.3.0 or above?"
        with open(config_json) as fid:
            extra_args = json.loads(fid.read())
        prepro_args = extra_args['fairseq_preprocess_args']
        train_args = extra_args['fairseq_train_args']
        # extra args by hand
        args.source_lang = 'en'
        args.target_lang = 'actions'
        args.path = checkpoint
        args.roberta_cache_path = roberta_cache_path
        dim = train_args['--pretrained-embed-dim'][0]
        args.model_overrides = \
            "{'pretrained_embed_dim':%s, 'task': 'translation'}" % dim
        assert bool(args.left_pad_source), "Only left pad supported"

        # dictionaries
        src_dict_path = f'{model_folder}/dict.{args.source_lang}.txt'
        tgt_dict_path = f'{model_folder}/dict.{args.target_lang}.txt'
        assert os.path.isfile(src_dict_path), \
            f"Missing {src_dict_path}.\nModel trained with v0.3.0 or above?"\
            "\ncheck scripts/stack-transformer/update_model_to_v0.3.0.sh"
        assert os.path.isfile(tgt_dict_path), \
            f"Missing {tgt_dict_path}.\nModel trained with v0.3.0 or above?"\
            "\ncheck scripts/stack-transformer/update_model_to_v0.3.0.sh"
        src_dict = Dictionary.load(src_dict_path)
        tgt_dict = Dictionary.load(tgt_dict_path)

        use_cuda = torch.cuda.is_available() and not args.cpu

        # Override task to ensure compatibility with old models and overide
        # TODO: Task may not be even needed
        task = TranslationTask(args, src_dict, tgt_dict)
        model = load_models(args, task, use_cuda)

        # Load RoBERTa
        embeddings = PretrainedEmbeddings(
            name=prepro_args['--pretrained-embed'][0],
            bert_layers=[int(x) for x in prepro_args['--bert-layers']]
            if '--bert-layers' in prepro_args else None,
            model=load_roberta(name=prepro_args['--pretrained-embed'][0],
                               roberta_cache_path=args.roberta_cache_path,
                               roberta_use_gpu=use_cuda))

        print("Finished loading models")

        # State machine variables
        machine_rules = f'{model_folder}/train.rules.json'
        assert os.path.isfile(machine_rules), f"Missing {machine_rules}"
        machine_type = prepro_args['--machine-type'][0]

        return self(model,
                    machine_rules,
                    machine_type,
                    src_dict,
                    tgt_dict,
                    use_cuda,
                    embeddings=embeddings,
                    inspector=inspector)
Exemplo n.º 17
0
    def build_generator(self,
                        models,
                        args,
                        seq_gen_cls=None,
                        extra_gen_cls_kwargs=None):
        if getattr(args, "score_reference", False):
            raise NotImplementedError()
        else:
            from .noisy_channel_sequence_generator import NoisyChannelSequenceGenerator
            use_cuda = torch.cuda.is_available() and not self.args.cpu
            assert self.args.lm_model is not None, '--lm-model required for noisy channel generation!'
            assert self.args.lm_data is not None, '--lm-data required for noisy channel generation to map between LM and bitext vocabs'
            if self.args.channel_model is not None:
                import copy
                ch_args_task = copy.deepcopy(self.args)
                tmp = ch_args_task.source_lang
                ch_args_task.source_lang = ch_args_task.target_lang
                ch_args_task.target_lang = tmp
                ch_args_task._name = 'translation'
                channel_task = TranslationTask.setup_task(ch_args_task)

            arg_dict = {}
            arg_dict['task'] = 'language_modeling'
            arg_dict['sample_break_mode'] = 'eos'
            arg_dict['data'] = self.args.lm_data
            arg_dict['output_dictionary_size'] = -1
            lm_args = argparse.Namespace(**arg_dict)
            lm_task = LanguageModelingTask.setup_task(lm_args)
            lm_dict = lm_task.output_dictionary

            if self.args.channel_model is not None:
                channel_models, _ = checkpoint_utils.load_model_ensemble(
                    self.args.channel_model.split(':'), task=channel_task)

                for model in channel_models:
                    model.make_generation_fast_(
                        beamable_mm_beam_size=None
                        if args.no_beamable_mm else args.beam,
                        need_attn=args.print_alignment,
                    )
                    if self.args.fp16:
                        model.half()
                    if use_cuda:
                        model.cuda()
            else:
                channel_models = None

            lm_models, _ = checkpoint_utils.load_model_ensemble(
                self.args.lm_model.split(':'), task=lm_task)

            for model in lm_models:
                model.make_generation_fast_(
                    beamable_mm_beam_size=None
                    if args.no_beamable_mm else args.beam,
                    need_attn=args.print_alignment,
                )
                if self.args.fp16:
                    model.half()
                if use_cuda:
                    model.cuda()
            return NoisyChannelSequenceGenerator(
                combine_method=self.args.combine_method,
                tgt_dict=self.target_dictionary,
                src_dict=self.source_dictionary,
                beam_size=getattr(args, 'beam', 5),
                max_len_a=getattr(args, 'max_len_a', 0),
                max_len_b=getattr(args, 'max_len_b', 200),
                min_len=getattr(args, 'min_len', 1),
                len_penalty=getattr(args, 'lenpen', 1),
                unk_penalty=getattr(args, 'unkpen', 0),
                temperature=getattr(args, 'temperature', 1.),
                match_source_len=getattr(args, 'match_source_len', False),
                no_repeat_ngram_size=getattr(args, 'no_repeat_ngram_size', 0),
                normalize_scores=(not getattr(args, 'unnormalized', False)),
                channel_models=channel_models,
                k2=getattr(self.args, 'k2', 50),
                ch_weight=getattr(self.args, 'ch_wt', 1),
                channel_scoring_type=self.args.channel_scoring_type,
                top_k_vocab=self.args.top_k_vocab,
                lm_models=lm_models,
                lm_dict=lm_dict,
                lm_weight=getattr(self.args, 'lm_wt', 1),
                normalize_lm_scores_by_tgt_len=getattr(
                    self.args, 'normalize_lm_scores_by_tgt_len', False),
            )
Exemplo n.º 18
0
 def add_args(parser):
     """Add task-specific arguments to the parser."""
     TranslationTask.add_args(parser)
     # fmt: off
     parser.add_argument(
         '--channel-model',
         metavar='FILE',
         help=
         'path to P(S|T) model. P(S|T) and P(T|S) must share source and target dictionaries.'
     )
     parser.add_argument(
         '--combine-method',
         default='lm_only',
         choices=['lm_only', 'noisy_channel'],
         help="""method for combining direct and channel model scores.
                                 lm_only: decode with P(T|S)P(T)
                                 noisy_channel: decode with 1/t P(T|S) + 1/s(P(S|T)P(T))"""
     )
     parser.add_argument(
         '--normalize-lm-scores-by-tgt-len',
         action='store_true',
         default=False,
         help='normalize lm score by target length instead of source length'
     )
     parser.add_argument(
         '--channel-scoring-type',
         default='log_norm',
         choices=[
             'unnormalized', 'log_norm', 'k2_separate', 'src_vocab',
             'src_vocab_batched'
         ],
         help=
         "Normalize bw scores with log softmax or return bw scores without log softmax"
     )
     parser.add_argument(
         '--top-k-vocab',
         default=0,
         type=int,
         help=
         'top k vocab IDs to use with `src_vocab` in channel model scoring')
     parser.add_argument(
         '--k2',
         default=50,
         type=int,
         help=
         'the top k2 candidates to rescore with the noisy channel model for each beam'
     )
     parser.add_argument('--ch-wt',
                         default=1,
                         type=float,
                         help='weight for the channel model')
     parser.add_argument(
         '--lm-model',
         metavar='FILE',
         help=
         'path to lm model file, to model P(T). P(T) must share the same vocab as the direct model on the target side'
     )
     parser.add_argument(
         '--lm-data',
         metavar='FILE',
         help=
         'path to lm model training data for target language, used to properly load LM with correct dictionary'
     )
     parser.add_argument('--lm-wt',
                         default=1,
                         type=float,
                         help='the weight of the lm in joint decoding')
Exemplo n.º 19
0
 def add_args(parser):
     TranslationTask.add_args(parser)
     parser.add_argument('--without-padding', action='store_true', help='load the dataset lazily')
Exemplo n.º 20
0
    def add_args(parser):
        """Add task-specific arguments to the parser."""
        # fmt: off
        TranslationTask.add_args(parser)
        parser.add_argument('--forget-rate',
                            type=float,
                            default=0.9,
                            metavar='D',
                            help='rho = (t + decay)^{-forget}')
        parser.add_argument('--decay-rate',
                            type=float,
                            default=1.,
                            metavar='D',
                            help='rho = (t + decay)^{-forget}')
        parser.add_argument('--retrieve-split',
                            type=str,
                            default='train',
                            help='the retrieve pool')

        parser.add_argument('--dec-opt-freq',
                            type=int,
                            default=1,
                            help='the relative update freq of decoder')
        parser.add_argument('--enc-opt-freq',
                            type=int,
                            default=1,
                            help='the relative update freq of encoder')

        parser.add_argument('--iw-nsamples',
                            type=int,
                            default=1000,
                            help='number of importance-weighted samples')
        parser.add_argument('--eval-mode',
                            type=str,
                            default='none',
                            choices=[
                                'iw', 'entropy', 'gen_sample',
                                'gen_reconstruction', 'time', 'none',
                                'from_file', 'gen_interpolation'
                            ],
                            help='evaluation modes')
        parser.add_argument('--eval-gen-file',
                            type=str,
                            default=None,
                            help='read in prototypes and edit vectors')
        parser.add_argument('--eval-gen-edit-vec',
                            action='store_true',
                            default=False,
                            help='write edit vectors in the generation file')

        parser.add_argument(
            '--prune-num',
            type=int,
            default=-1,
            help='perform evaluation based on top prune_num templates only')
        # parser.add_argument('--prune-num-offline', type=int, default=-1,
        #                     help='perform evaluation based on top prune_num templates only (offline version)')

        parser.add_argument(
            '--free-bits',
            type=float,
            default=0,
            help='the free bits param to regularize KLt, 0 to disable')
        parser.add_argument(
            '--lambda-t-config',
            default="1.0",
            type=str,
            metavar='CONFIG',
            help='KLt coefficient '
            'use fixed weight during training if set to floating point number. '
            'use piecewise linear function over number of updates to schedule the '
            'weight with the format: w0:step0,w1:step1,...')
        parser.add_argument(
            '--gen-nz',
            type=int,
            default=10,
            help='number of edit vector samples to draw from the prior')
        parser.add_argument('--gen-np',
                            type=int,
                            default=200,
                            help='number of top prototypes')
        parser.add_argument(
            '--write-loss-path',
            type=str,
            default=None,
            help='write out loss at evaluation time for interpolation exp')
Exemplo n.º 21
0
 def add_args(parser):
     """Add task-specific arguments to the parser."""
     TranslationTask.add_args(parser)
     parser.add_argument('--eval-waitk', default=3, type=int)
Exemplo n.º 22
0
 def add_args(parser):
     TranslationTask.add_args(parser)
     pass
Exemplo n.º 23
0
    def add_args(parser):
        TranslationTask.add_args(parser)
        # bart setting
        parser.add_argument(
            "--mask",
            default=0.0,
            type=float,
            help="fraction of words/subwords that will be masked",
        )
        parser.add_argument(
            "--mask-random",
            default=0.0,
            type=float,
            help="instead of using [MASK], use random token this often",
        )
        parser.add_argument(
            "--insert",
            default=0.0,
            type=float,
            help="insert this percentage of additional random tokens",
        )
        parser.add_argument(
            "--poisson-lambda",
            default=3.0,
            type=float,
            help="randomly shuffle sentences for this proportion of inputs",
        )
        parser.add_argument(
            "--mask-length",
            default="span-poisson",
            type=str,
            choices=["subword", "word", "span-poisson"],
            help="mask length to choose",
        )
        parser.add_argument(
            "--replace-length",
            default=1,
            type=int,
            help=
            "when masking N tokens, replace with 0, 1, or N tokens (use -1 for N)",
        )

        # multi-lingual
        parser.add_argument(
            "--multilang-sampling-alpha",
            type=float,
            default=1.0,
            help="smoothing alpha for sample ratios across multiple datasets",
        )
        parser.add_argument(
            "--lang-pairs",
            default="",
            metavar="PAIRS",
            help=
            "comma-separated list of language pairs (in training order): phnen-en,phnfr-fr,phnit-it. Do masking",
        )
        parser.add_argument(
            "--lang-pairs-bitext",
            default="",
            metavar="PAIRS",
            help=
            "comma-separated list of language pairs (in training order): en-de,en-fr,de-fr. No masking",
        )
        parser.add_argument("--add-src-lang-token",
                            default=False,
                            action="store_true")
        parser.add_argument("--add-tgt-lang-token",
                            default=False,
                            action="store_true")
        parser.add_argument(
            "--no-whole-word-mask-langs",
            type=str,
            default="",
            metavar="N",
            help=
            "languages without spacing between words dont support whole word masking",
        )
        parser.add_argument("--use-mask-whole-words",
                            default=False,
                            action="store_true")
Exemplo n.º 24
0
        epoch=epoch,
        disable_iterator_cache=not cached,
        # Set this to False to speed up. However, if set to False, changing max_tokens beyond
        # first call of this method has no effect.
    )
    return batch_iterator


if __name__ == '__main__':
    args = parse_args()
    # todo: 返回是tuble 而不是cfgdic
    hw5_config = get_cfg(args)
    task_cfg = TranslationConfig(
        data=hw5_config.get("data_path"),
        source_lang=hw5_config.get("source_lang"),
        target_lang=hw5_config.get("target_lang"),
        train_subset="train",
        required_seq_len_multiple=8,
        dataset_impl="mmap",
        upsample_primary=1,
    )
    task = TranslationTask.setup_task(task_cfg)
    demo_epoch_obj = load_data_iterator(task,
                                        "valid",
                                        epoch=1,
                                        max_tokens=20,
                                        num_workers=1,
                                        cached=False)
    demo_iter = demo_epoch_obj.next_epoch_itr(shuffle=True)
    sample = next(demo_iter)
    sample
Exemplo n.º 25
0
def main(args):
    if args.max_tokens is None:
        args.max_tokens = 6000
    print(args)

    if not torch.cuda.is_available():
        raise NotImplementedError('Training on CPU is not supported')

    torch.cuda.set_device(args.device_id)
    torch.manual_seed(args.seed)

    # Setup task, e.g., translation, language modeling, etc.
    task = tasks.setup_task(args)

    # Load dataset splits
    load_dataset_splits(task, ['train', 'valid'])

    # Build model and criterion
    model = task.build_model(args)
    # MT_teach = task.build_model(args, mode="MT")
    MT_teach = TranslationTask.load_pretained_model(args.pre_trained_mt,
                                                    args.mt_src_dict,
                                                    args.mt_tgt_dict)

    NHG_teach = task.build_model(args, mode="NHG")
    if hasattr(args, "share_decoder_input_output_embed"):
        args.share_decoder_input_output_embed = False

    # print(model)
    criterion = task.build_criterion(args)
    print('| model {}, criterion {}'.format(args.arch, criterion.__class__.__name__))
    print('| num. model params: {}'.format(sum(p.numel() for p in model.parameters())))

    # Build trainer
    if args.fp16:
        trainer = FP16Trainer(args, task, model, criterion)
    else:
        if torch.cuda.get_device_capability(0)[0] >= 7:
            print('| NOTICE: your device may support faster training with --fp16')
        trainer = Trainer(args, task, model, criterion, NHG_teach, MT_teach)

    print('| training on {} GPUs'.format(args.distributed_world_size))
    print('| max tokens per GPU = {} and max sentences per GPU = {}'.format(
        args.max_tokens,
        args.max_sentences,
    ))

    # Initialize dataloader
    max_positions = trainer.get_model().max_positions()
    epoch_itr = data.EpochBatchIterator(
        dataset=task.dataset(args.train_subset),
        max_tokens=args.max_tokens,
        max_sentences=args.max_sentences_valid,
        max_positions=max_positions,
        ignore_invalid_inputs=True,
        required_batch_size_multiple=8,
        seed=args.seed,
        num_shards=args.distributed_world_size,
        shard_id=args.distributed_rank,
    )

    # Load the latest checkpoint if one is available
    os.makedirs(args.save_dir, exist_ok=True)
    load_checkpoint(args, trainer, epoch_itr, teacher_model=True)

    for para in NHG_teach.parameters():
        para.requires_grad = False

    for para in MT_teach.parameters():
        para.requires_grad = False
    # Send a dummy batch to warm the caching allocator
    # dummy_batch = task.dataset('train').get_dummy_batch(args.max_tokens, max_positions)
    # trainer.dummy_train_step(dummy_batch)
    # Train until the learning rate gets too small
    max_epoch = args.max_epoch or math.inf
    max_update = args.max_update or math.inf
    lr = trainer.get_lr()
    train_meter = StopwatchMeter()
    train_meter.start()
    valid_losses = [None]
    valid_subsets = args.valid_subset.split(',')
    while lr > args.min_lr and epoch_itr.epoch < max_epoch and trainer.get_num_updates() < max_update:
        # train for one epoch
        train(args, trainer, task, epoch_itr)

        if epoch_itr.epoch % args.validate_interval == 0:
            valid_losses, valid_state = validate(args, trainer, task, epoch_itr, valid_subsets)

        # only use first validation loss to update the learning rate
        lr = trainer.lr_step(epoch_itr.epoch, valid_losses[0])

        # save checkpoint
        if epoch_itr.epoch % args.save_interval == 0:
            save_checkpoint(args, trainer, epoch_itr, valid_losses[0], valid_state)
    train_meter.stop()
    print('| done training in {:.1f} seconds'.format(train_meter.sum))
Exemplo n.º 26
0
def convert_fsmt_checkpoint_to_pytorch(fsmt_checkpoint_path,
                                       pytorch_dump_folder_path,
                                       data_path,
                                       spm_model_path=None):
    # assumes join dicitionary

    json_indent = 2

    # prep
    assert os.path.exists(fsmt_checkpoint_path)
    os.makedirs(pytorch_dump_folder_path, exist_ok=True)
    print(f"Writing results to {pytorch_dump_folder_path}")

    chkpt = torch.load(fsmt_checkpoint_path)
    chkpt['cfg']['task'].data = data_path
    chkpt['cfg']['model'].data = data_path
    torch.save(chkpt, fsmt_checkpoint_path)

    task_args, model_args = chkpt['cfg']['task'], chkpt['cfg']['model']

    task = TranslationTask.setup_task(task_args)
    model = task.build_model(model_args)

    # model config
    fsmt_model_config_file = os.path.join(pytorch_dump_folder_path,
                                          "config.json")

    model_conf = {
        "architectures": ["FSMTForConditionalGeneration"],
        "model_type":
        "fsmt",
        "activation_dropout":
        model_args.activation_dropout,
        "activation_function":
        "relu",
        "attention_dropout":
        model_args.attention_dropout,
        "d_model":
        model_args.decoder_embed_dim,
        "dropout":
        model_args.dropout,
        "init_std":
        0.02,
        "max_position_embeddings":
        model_args.max_source_positions,
        "num_hidden_layers":
        model_args.encoder_layers,
        "src_vocab_size":
        len(task.source_dictionary),
        "tgt_vocab_size":
        len(task.target_dictionary),
        "langs": [task_args.source_lang, task_args.target_lang],
        "encoder_attention_heads":
        model_args.encoder_attention_heads,
        "encoder_ffn_dim":
        model_args.encoder_ffn_embed_dim,
        "encoder_layerdrop":
        model_args.encoder_layerdrop,
        "encoder_layers":
        model_args.encoder_layers,
        "decoder_attention_heads":
        model_args.decoder_attention_heads,
        "decoder_ffn_dim":
        model_args.decoder_ffn_embed_dim,
        "decoder_layerdrop":
        model_args.decoder_layerdrop,
        "decoder_layers":
        model_args.decoder_layers,
        "bos_token_id":
        0,
        "pad_token_id":
        1,
        "eos_token_id":
        2,
        "is_encoder_decoder":
        True,
        "scale_embedding":
        not model_args.no_scale_embedding,
        "tie_word_embeddings":
        model_args.share_all_embeddings,
        "share_decoder_input_output_embed":
        model_args.share_decoder_input_output_embed
    }

    # good hparam defaults to start with
    model_conf["num_beams"] = 5
    model_conf["early_stopping"] = False
    model_conf["length_penalty"] = 1.0

    print(f"Generating {fsmt_model_config_file}")
    with open(fsmt_model_config_file, "w", encoding="utf-8") as f:
        f.write(json.dumps(model_conf, ensure_ascii=False, indent=json_indent))

    # model
    hub_gen = TransformerModel.from_pretrained(
        dirname(fsmt_checkpoint_path),
        checkpoint_file=basename(fsmt_checkpoint_path),
        data_name_or_path=task_args.data)

    model_state_dict = hub_gen.models[0].state_dict()

    # rename keys to start with 'model.'
    model_state_dict = OrderedDict(
        ("model." + k, v) for k, v in model_state_dict.items())

    # remove unneeded keys
    ignore_keys = [
        "model.model",
        "model.encoder.version",
        "model.decoder.version",
        #"model.encoder_embed_tokens.weight",
        #"model.decoder_embed_tokens.weight",
        "model.encoder.embed_positions._float_tensor",
        "model.decoder.embed_positions._float_tensor",
    ]
    for k in ignore_keys:
        model_state_dict.pop(k, None)

    #print(model_state_dict.keys())

    config = FSMTConfig.from_pretrained(pytorch_dump_folder_path)
    model_new = FSMTForConditionalGeneration(config)

    # check that it loads ok
    model_new.load_state_dict(model_state_dict, strict=False)

    # save
    pytorch_weights_dump_path = os.path.join(pytorch_dump_folder_path,
                                             WEIGHTS_NAME)
    print(f"Generating {pytorch_weights_dump_path}")
    torch.save(model_state_dict, pytorch_weights_dump_path)

    pytorch_vocab_dump_path = os.path.join(pytorch_dump_folder_path,
                                           "vocab.txt")
    print(f"Generating {pytorch_vocab_dump_path}")
    assert hub_gen.src_dict.indices == hub_gen.tgt_dict.indices
    with open(pytorch_vocab_dump_path, 'w') as f:
        for item in hub_gen.src_dict.indices:
            f.write("%s\n" % item)

    if spm_model_path is not None:
        copyfile(spm_model_path, f"{pytorch_dump_folder_path}/spm_model.spm")

    print("Conversion is done!")