def add_cmdline_args(argparser): """Add command-line arguments specifically for this agent.""" DictionaryAgent.add_cmdline_args(argparser) agent = argparser.add_argument_group('Fairseq Arguments') agent.add_argument( '-tr', '--truncate', type=int, default=-1, help='truncate input & output lengths to speed up training (may ' 'reduce accuracy). This fixes all input and output to have a ' 'maximum length. This reduces the total amount of padding in ' 'the batches.') agent.add_argument( '--max-positions', default=1024, type=int, metavar='N', help='max number of tokens in the sequence') agent.add_argument( '--seed', default=1, type=int, metavar='N', help='pseudo random number generator seed') options.add_optimization_args(argparser) options.add_generation_args(argparser) options.add_model_args(argparser)
def cli_main(): parser = options.get_training_parser() parser.add_argument('--train-subtransformer', action='store_true', default=False, help='whether train SuperTransformer or SubTransformer') parser.add_argument('--sub-configs', required=False, is_config_file=True, help='when training SubTransformer, use --configs to specify architecture and --sub-configs to specify other settings') # for profiling parser.add_argument('--profile-flops', action='store_true', help='measure the FLOPs of a SubTransformer') parser.add_argument('--latgpu', action='store_true', help='measure SubTransformer latency on GPU') parser.add_argument('--latcpu', action='store_true', help='measure SubTransformer latency on CPU') parser.add_argument('--latiter', type=int, default=300, help='how many iterations to run when measure the latency') parser.add_argument('--latsilent', action='store_true', help='keep silent when measure latency') parser.add_argument('--validate-subtransformer', action='store_true', help='evaluate the SubTransformer on the validation set') options.add_generation_args(parser) args = options.parse_args_and_arch(parser) if args.latcpu: args.cpu = True args.fp16 = False if args.latgpu or args.latcpu or args.profile_flops: args.distributed_world_size = 1 if args.pdb: pdb.set_trace() if args.distributed_init_method is None: distributed_utils.infer_init_method(args) if args.distributed_init_method is not None: # distributed training if torch.cuda.device_count() > 1 and not args.distributed_no_spawn: start_rank = args.distributed_rank args.distributed_rank = None # assign automatically torch.multiprocessing.spawn( fn=distributed_main, args=(args, start_rank), nprocs=torch.cuda.device_count(), ) else: distributed_main(args.device_id, args) elif args.distributed_world_size > 1: # fallback for single node with multiple GPUs assert args.distributed_world_size <= torch.cuda.device_count() port = random.randint(10000, 20000) args.distributed_init_method = 'tcp://localhost:{port}'.format(port=port) args.distributed_rank = None # set based on device id if max(args.update_freq) > 1 and args.ddp_backend != 'no_c10d': print('| NOTE: you may get better performance with: --ddp-backend=no_c10d') torch.multiprocessing.spawn( fn=distributed_main, args=(args, ), nprocs=args.distributed_world_size, ) else: # single GPU training main(args)
def get_parser_with_args(): parser = options.get_parser('Generation') options.add_dataset_args(parser, gen=True) options.add_generation_args(parser) add_args(parser) group = parser.add_argument_group('Generation') group.add_argument( '--source-vocab-file', default='', metavar='FILE', help='Path to text file representing the Dictionary to use.') group.add_argument( '--target-vocab-file', default='', metavar='FILE', help='Path to text file representing the Dictionary to use.') group.add_argument( '--source-text-file', default='', metavar='FILE', help='Path to raw text file containing examples in source dialect. ' 'This overrides what would be loaded from the data dir.', ) group.add_argument( '--target-text-file', default='', metavar='FILE', help='Path to raw text file containing examples in target dialect. ' 'This overrides what would be loaded from the data dir.', ) return parser
def get_parser_with_args(): parser = options.get_parser("Generation") options.add_dataset_args(parser, gen=True) options.add_generation_args(parser) add_args(parser) group = parser.add_argument_group("Generation") group.add_argument( "--source-vocab-file", default="", metavar="FILE", help="Path to text file representing the Dictionary to use.", ) group.add_argument( "--target-vocab-file", default="", metavar="FILE", help="Path to text file representing the Dictionary to use.", ) group.add_argument( "--source-text-file", default="", metavar="FILE", help="Path to raw text file containing examples in source dialect. " "This overrides what would be loaded from the data dir.", ) group.add_argument( "--target-text-file", default="", metavar="FILE", help="Path to raw text file containing examples in target dialect. " "This overrides what would be loaded from the data dir.", ) return parser
def main(): parser = options.get_parser('Generation') parser.add_argument('--path', metavar='FILE', required=True, action='append', help='path(s) to model file(s)') options.add_dataset_args(parser) options.add_generation_args(parser) args = parser.parse_args() print(args) use_cuda = torch.cuda.is_available() and not args.cpu # Load ensemble print('| loading model(s) from {}'.format(', '.join(args.path))) models, model_args = utils.load_ensemble_for_inference(args.path, data_dir=args.data) src_dict, dst_dict = models[0].src_dict, models[0].dst_dict print('| [{}] dictionary: {} types'.format(model_args.source_lang, len(src_dict))) print('| [{}] dictionary: {} types'.format(model_args.target_lang, len(dst_dict))) # Optimize ensemble for generation for model in models: model.make_generation_fast_( beamable_mm_beam_size=None if args.no_beamable_mm else args.beam) # Initialize generator translator = SequenceGenerator( models, beam_size=args.beam, stop_early=(not args.no_early_stop), normalize_scores=(not args.unnormalized), len_penalty=args.lenpen, unk_penalty=args.unkpen) if use_cuda: translator.cuda() # Load alignment dictionary for unknown word replacement # (None if no unknown word replacement, empty if no path to align dictionary) align_dict = utils.load_align_dict(args.replace_unk) print('| Type the input sentence and press return:') for src_str in sys.stdin: src_str = src_str.strip() src_tokens = tokenizer.Tokenizer.tokenize(src_str, src_dict, add_if_not_exist=False).long() if use_cuda: src_tokens = src_tokens.cuda() translations = translator.generate(Variable(src_tokens.view(1, -1))) hypos = translations[0] print('O\t{}'.format(src_str)) # Process top predictions for hypo in hypos[:min(len(hypos), args.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu(), align_dict=align_dict, dst_dict=dst_dict, remove_bpe=args.remove_bpe) print('H\t{}\t{}'.format(hypo['score'], hypo_str)) print('A\t{}'.format(' '.join(map(str, alignment))))
def get_training_and_generation_parser(default_task='translation'): parser = options.get_parser('Trainer', default_task) options.add_dataset_args(parser, train=True, gen=True) options.add_generation_args(parser) options.add_distributed_training_args(parser) options.add_model_args(parser) options.add_optimization_args(parser) options.add_checkpoint_args(parser) return parser
def cli_main(): parser = options.get_training_parser() parser.add_argument('--evo-configs', required=True, is_config_file=True) parser.add_argument('--evo-iter', type=int, default=30) parser.add_argument('--population-size', type=int, default=125) parser.add_argument('--parent-size', type=int, default=25) parser.add_argument('--mutation-size', type=int, default=50) parser.add_argument('--crossover-size', type=int, default=50) parser.add_argument('--mutation-prob', type=float, default=0.3) parser.add_argument('--feature-norm', type=float, nargs='+', help='normalizing factor for each feature') parser.add_argument('--lat-norm', type=float, help='normalizing factor for latency') parser.add_argument('--ckpt-path', type=str, help='path to load latency predictor weights') parser.add_argument('--latency-constraint', type=float, default=-1, help='latency constraint') parser.add_argument( '--valid-cnt-max', type=int, default=1e9, help='max number of sentences to use in validation set') parser.add_argument( '--write-config-path', type=str, help='path to write out the searched best SubTransformer') options.add_generation_args(parser) args = options.parse_args_and_arch(parser) if args.pdb: pdb.set_trace() # one GPU is fast enough to do the search args.distributed_world_size = 1 # if search on CPU, use fp32 as default if args.cpu: args.fp16 = False main(args)
def get_parser_with_args(): parser = options.get_parser("Collect Top-K Probs", default_task="pytorch_translate") pytorch_translate_options.add_verbosity_args(parser) pytorch_translate_options.add_dataset_args(parser, gen=True) generation_group = options.add_generation_args(parser) generation_group.add_argument( "--source-binary-file", default="", help="Path for the binary file containing source eval examples. " "(Overrides --source-text-file. Must be used in conjunction with " "--target-binary-file).", ) generation_group.add_argument( "--target-binary-file", default="", help="Path for the binary file containing target eval examples. " "(Overrides --target-text-file. Must be used in conjunction with " "--source-binary-file).", ) generation_group.add_argument( "--k-probs-to-collect", type=int, default=8, help="Number of probabilities to collect for each output step.", ) generation_group.add_argument( "--top-k-probs-binary-file", type=str, default="", help="File into which to save top-K probabilities for each token.", ) return parser
def cli_main( modify_parser: Optional[Callable[[argparse.ArgumentParser], None]] = None ) -> None: parser = options.get_training_parser() # options.add_pruning_args(parser) options.add_generation_args(parser) args = options.parse_args_and_arch(parser, modify_parser=modify_parser) cfg = convert_namespace_to_omegaconf(args) if args.profile: with torch.cuda.profiler.profile(): with torch.autograd.profiler.emit_nvtx(): distributed_utils.call_main(cfg, main) else: distributed_utils.call_main(cfg, main)
def get_parser_with_args(): parser = options.get_parser("Trainer") parser.add_argument( "--log-verbose", action="store_true", help="Whether to output more verbose logs for debugging/profiling.", ) pytorch_translate_options.add_dataset_args(parser, train=True, gen=True) options.add_distributed_training_args(parser) # Adds args related to training (validation and stopping criterions). optimization_group = options.add_optimization_args(parser) pytorch_translate_options.expand_optimization_args(optimization_group) # Adds args related to checkpointing. checkointing_group = options.add_checkpoint_args(parser) pytorch_translate_options.expand_checkpointing_args(checkointing_group) # Add model related args options.add_model_args(parser) # Adds args for generating intermediate BLEU eval while training. generation_group = options.add_generation_args(parser) pytorch_translate_options.expand_generation_args(generation_group, train=True) # Adds args related to input data files (preprocessing, numberizing, and # binarizing text files; creating vocab files) pytorch_translate_options.add_preprocessing_args(parser) return parser
def add_cmdline_args(argparser): """Add command-line arguments specifically for this agent.""" DictionaryAgent.add_cmdline_args(argparser) agent = argparser.add_argument_group('Fairseq Arguments') agent.add_argument('--max-positions', default=1024, type=int, metavar='N', help='max number of tokens in the sequence') agent.add_argument('--seed', default=1, type=int, metavar='N', help='pseudo random number generator seed') options.add_optimization_args(argparser) options.add_generation_args(argparser) options.add_model_args(argparser)
def get_parser_with_args(): parser = options.get_parser("Generation") options.add_dataset_args(parser, gen=True) options.add_generation_args(parser) pytorch_translate_generate.add_args(parser) group = parser.add_argument_group("Generation") group.add_argument( "--source-vocab-file", default="", metavar="FILE", help="Path to text file representing the Dictionary to use.", ) group.add_argument( "--target-vocab-file", default="", metavar="FILE", help="Path to text file representing the Dictionary to use.", ) # Add args related to benchmarking. group = parser.add_argument_group("Benchmarking") group.add_argument( "--increment", default=5, type=int, help="Difference in lengths between synthesized sentences. " "Must be integer >=1.", ) group.add_argument( "--max-length", default=100, type=int, help="Maximum allowed length for synthesized sentences. " "Should be greater than --increment.", ) group.add_argument( "--samples-per-length", default=1, type=int, help="Number of sentences to be synthesized at each length. ", ) return parser
def cli_main(): parser = options.get_training_parser() parser.add_argument('--latgpu', action='store_true', help='measure SubTransformer cache misses on GPU') parser.add_argument('--latcpu', action='store_true', help='measure SubTransformer cache misses on CPU') parser.add_argument( '--latiter', type=int, default=300, help='how many iterations to run when measure the latency' ) #TODO remove it later parser.add_argument('--latsilent', action='store_true', help='keep silent when measure cache misses') parser.add_argument('--lat-dataset-path', type=str, default='./device_dataset/lat.tmp', help='the path to write device dataset') parser.add_argument('--lat-dataset-size', type=int, default=200, help='number of data points for the dataset') options.add_generation_args(parser) args = options.parse_args_and_arch(parser) if args.latcpu: args.cpu = True args.fp16 = False else: print('GPU mode not supported yet') return if args.pdb: pdb.set_trace() main(args)
def cli_main(): parser = options.get_training_parser() parser.add_argument('--latgpu', action='store_true', help='measure SubTransformer latency on GPU') parser.add_argument('--latcpu', action='store_true', help='measure SubTransformer latency on CPU') parser.add_argument( '--latiter', type=int, default=300, help='how many iterations to run when measure the latency') parser.add_argument('--latsilent', action='store_true', help='keep silent when measure latency') parser.add_argument('--lat-dataset-path', type=str, default='./latency_dataset/lat.tmp', help='the path to write latency dataset') parser.add_argument('--lat-dataset-size', type=int, default=200, help='number of data points for the dataset') options.add_generation_args(parser) args = options.parse_args_and_arch(parser) if args.latcpu: args.cpu = True args.fp16 = False if args.pdb: pdb.set_trace() main(args)
def make_parser(): """ Additional args: 1. Provide the dataset dir path using --data. 2. Loading the dataset doesn't require config, provide --config-yaml to apply additional feature transforms """ parser = options.get_speech_generation_parser() parser.add_argument( "--subset", default=None, type=str, required=True, help="Subset to use for dataset generation", ) parser.add_argument( "--dataset-save-dir", default=None, type=str, required=False, help="Dir path in which the datasets are to be saved", ) parser.add_argument( "--ref-dataset", default=None, type=str, required=False, help= "If provided, the ids in the reference dataset will be used to filter the new dataset generated.", ) parser.add_argument("--dataset-save-token", default="", type=str, required=False) options.add_generation_args(parser) return parser
def get_parser_with_args(default_task="pytorch_translate"): parser = options.get_parser("Trainer", default_task=default_task) pytorch_translate_options.add_verbosity_args(parser, train=True) pytorch_translate_options.add_dataset_args(parser, train=True, gen=True) options.add_distributed_training_args(parser) # Adds args related to training (validation and stopping criterions). optimization_group = options.add_optimization_args(parser) pytorch_translate_options.expand_optimization_args(optimization_group) # Adds args related to checkpointing. checkpointing_group = options.add_checkpoint_args(parser) pytorch_translate_options.expand_checkpointing_args(checkpointing_group) # Add model related args options.add_model_args(parser) # Adds args for generating intermediate BLEU eval while training. generation_group = options.add_generation_args(parser) pytorch_translate_options.expand_generation_args(generation_group, train=True) # Adds args related to input data files (preprocessing, numberizing, and # binarizing text files; creating vocab files) pytorch_translate_options.add_preprocessing_args(parser) return parser
def main(): parser = options.get_parser('Generation') parser.add_argument('--path', metavar='FILE', required=True, action='append', help='path(s) to model file(s)') options.add_dataset_args(parser) options.add_generation_args(parser) args = parser.parse_args() print(args) use_cuda = torch.cuda.is_available() and not args.cpu # Load ensemble print('| loading model(s) from {}'.format(', '.join(args.path))) models, model_args = utils.load_ensemble_for_inference(args.path, data_dir=args.data) src_dict, dst_dict = models[0].src_dict, models[0].dst_dict print('| [{}] dictionary: {} types'.format(model_args.source_lang, len(src_dict))) print('| [{}] dictionary: {} types'.format(model_args.target_lang, len(dst_dict))) # Optimize ensemble for generation for model in models: model.make_generation_fast_( beamable_mm_beam_size=None if args.no_beamable_mm else args.beam) # Initialize generator translator = SequenceGenerator(models, beam_size=args.beam, stop_early=(not args.no_early_stop), normalize_scores=(not args.unnormalized), len_penalty=args.lenpen, unk_penalty=args.unkpen) if use_cuda: translator.cuda() # Load alignment dictionary for unknown word replacement # (None if no unknown word replacement, empty if no path to align dictionary) align_dict = utils.load_align_dict(args.replace_unk) print('| Type the input sentence and press return:') for src_str in sys.stdin: src_str = src_str.strip() src_tokens = tokenizer.Tokenizer.tokenize( src_str, src_dict, add_if_not_exist=False).long() if use_cuda: src_tokens = src_tokens.cuda() translations = translator.generate(Variable(src_tokens.view(1, -1))) hypos = translations[0] print('O\t{}'.format(src_str)) # Process top predictions for hypo in hypos[:min(len(hypos), args.nbest)]: hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu(), align_dict=align_dict, dst_dict=dst_dict, remove_bpe=args.remove_bpe) print('H\t{}\t{}'.format(hypo['score'], hypo_str)) print('A\t{}'.format(' '.join(map(str, alignment))))
def get_parser_with_args(): parser = options.get_parser('Trainer') options.add_dataset_args(parser, train=True, gen=True) options.add_distributed_training_args(parser) options.add_optimization_args(parser) options.add_checkpoint_args(parser) options.add_model_args(parser) options.add_generation_args(parser) parser.add_argument( '--log-verbose', action='store_true', help='Whether to output more verbose logs for debugging/profiling.', ) # Adds args related to training (validation and stopping criterions). group = parser.add_argument_group('Optimization') group.add_argument( '--subepoch-validate-interval', default=0, type=int, metavar='N', help='Calculates loss over the validation set every N batch updates. ' 'Note that validation is done at the end of every epoch regardless. ' 'A value of <= 0 disables this.', ) group.add_argument( '--stop-time-hr', default=-1, type=int, metavar='N', help='Stops training after N hours have elapsed. ' 'A value of < 0 disables this.', ) group.add_argument( '--stop-no-best-validate-loss', default=-1, type=int, metavar='N', help='Stops training after N validations have been run without ' 'achieving a better loss than before. Note that this is affected by ' '--validation-interval in how frequently we run validation in the ' 'first place. A value of < 0 disables this.', ) group.add_argument( '--stop-no-best-bleu-eval', default=-1, type=int, metavar='N', help='Stops training after N evals have been run without ' 'achieving a better BLEU score than before. Note that this is affected ' 'by --generate-bleu-eval-interval in how frequently we run BLEU eval ' 'in the first place. A value of < 0 disables this.', ) # Args related to dataset. group = parser.add_argument_group('Dataset and data loading') group.add_argument( '--source-vocab-file', default='', metavar='FILE', help='Path to text file representing the fairseq Dictionary to use. ' 'If left empty, the dict is auto-generated from source training data.', ) group.add_argument( '--source-max-vocab-size', default=-1, type=int, metavar='N', help='If a new vocab file needs to be generated, restrict it to the ' 'top N most common words. If we re-use an existing vocab file, this ' 'flag will have no effect. A value of < 0 means no max size.', ) group.add_argument( '--target-vocab-file', default='', metavar='FILE', help='Path to text file representing the fairseq Dictionary to use. ' 'If left empty, the dict is auto-generated from target training data.', ) group.add_argument( '--target-max-vocab-size', default=-1, type=int, metavar='N', help='If a new vocab file needs to be generated, restrict it to the ' 'top N most common words. If we re-use an existing vocab file, this ' 'flag will have no effect. A value of < 0 means no max size.', ) group.add_argument( '--train-source-text-file', default='', metavar='FILE', help='Path to raw text file containing source training examples. ' 'This overrides what would be loaded from the data dir.', ) group.add_argument( '--train-target-text-file', default='', metavar='FILE', help='Path to raw text file containing target training examples. ' 'This overrides what would be loaded from the data dir.', ) group.add_argument( '--eval-source-text-file', default='', metavar='FILE', help='Path to raw text file containing source eval examples for ' 'calculating validation loss and BLEU eval scores. ' 'This overrides what would be loaded from the data dir.', ) group.add_argument( '--eval-target-text-file', default='', metavar='FILE', help='Path to raw text file containing target eval examples for ' 'calculating validation loss and BLEU eval scores. ' 'This overrides what would be loaded from the data dir.', ) # Adds args related to checkpointing. group = parser.add_argument_group('Checkpointing') group.add_argument( '--no-end-of-epoch-checkpoints', action='store_true', help='Disables saving checkpoints at the end of the epoch. ' 'This differs from --no-save and --no-epoch-checkpoints in that it ' 'still allows for intra-epoch checkpoints if --save-interval is set.') # Adds args for generating intermediate BLEU eval while training. # generate.add_args() adds args used by both train.py and the standalone # generate binary, while the flags defined here are used only by train.py. generate.add_args(parser) group = parser.add_argument_group('Generation') group.add_argument( '--generate-bleu-eval-per-epoch', action='store_true', help='Whether to generate BLEU score eval after each epoch.', ) group.add_argument( '--generate-bleu-eval-interval', default=0, type=int, metavar='N', help='Does BLEU eval every N batch updates. Note that ' '--save-interval also affects this - we can only eval as ' 'frequently as a checkpoint is written. A value of <= 0 ' 'disables this.', ) group.add_argument( '--generate-bleu-eval-avg-checkpoints', default=1, type=int, metavar='N', help='Maximum number of last N checkpoints to average over when ' 'doing BLEU eval. Must be >= 1.', ) group.add_argument( '--continuous-averaging-after-epochs', type=int, default=-1, help=('Average parameter values after each step since previous ' 'checkpoint, beginning after the specified number of epochs. '), ) return parser
def get_parser_with_args(): parser = options.get_parser("Generation") pytorch_translate_options.add_verbosity_args(parser) pytorch_translate_options.add_dataset_args(parser, gen=True) generation_group = options.add_generation_args(parser) pytorch_translate_options.expand_generation_args(generation_group) # Adds args used by the standalone generate binary. generation_group.add_argument( "--source-vocab-file", default="", metavar="FILE", help="Path to text file representing the Dictionary to use.", ) generation_group.add_argument( "--char-source-vocab-file", default="", metavar="FILE", help=( "Same as --source-vocab-file except using characters. " "(For use with char_source models only.)" ), ) generation_group.add_argument( "--target-vocab-file", default="", metavar="FILE", help="Path to text file representing the Dictionary to use.", ) generation_group.add_argument( "--source-text-file", default="", nargs="+", metavar="FILE", help="Path to raw text file containing examples in source dialect. " "This overrides what would be loaded from the data dir. " "You can specify multiple source files (eg. for use in combination " "with --source-ensembling). By default this will only translate the " "first source file", ) generation_group.add_argument( "--target-text-file", default="", metavar="FILE", help="Path to raw text file containing examples in target dialect. " "This overrides what would be loaded from the data dir.", ) generation_group.add_argument( "--source-binary-file", default="", help="Path for the binary file containing source eval examples. " "(Overrides --source-text-file. Must be used in conjunction with " "--target-binary-file).", ) generation_group.add_argument( "--target-binary-file", default="", help="Path for the binary file containing target eval examples. " "(Overrides --target-text-file. Must be used in conjunction with " "--source-binary-file).", ) generation_group.add_argument( "--translation-output-file", default="", type=str, metavar="FILE", help="Path to text file to store the output of the model. ", ) generation_group.add_argument( "--translation-probs-file", default="", type=str, metavar="FILE", help="Path to text file to store the probs of translation output. ", ) generation_group.add_argument( "--multiling-source-lang-id", type=int, default=None, help=( "Must be set for decoding with multilingual models. Set to i if " "the source language is the i-th language in the training parameter " "--multiling-encoder-lang (0-indexed)" ), ) generation_group.add_argument( "--multiling-target-lang-id", type=int, default=None, help=( "Must be set for decoding with multilingual models. Set to i if " "the target language is the i-th language in the training parameter " "--multiling-decoder-lang (0-indexed)" ), ) generation_group.add_argument( "--source-ensembling", action="store_true", help="If this flag is present, the model will ensemble the predictions " "conditioned on multiple source sentences (one per source-text-file)", ) return parser
def get_parser_with_args(): parser = options.get_parser("Trainer") options.add_dataset_args(parser, train=True, gen=True) options.add_distributed_training_args(parser) options.add_optimization_args(parser) options.add_checkpoint_args(parser) options.add_model_args(parser) options.add_generation_args(parser) parser.add_argument( "--log-verbose", action="store_true", help="Whether to output more verbose logs for debugging/profiling.", ) # Adds args related to training (validation and stopping criterions). group = parser.add_argument_group("Optimization") group.add_argument( "--subepoch-validate-interval", default=0, type=int, metavar="N", help="Calculates loss over the validation set every N batch updates. " "Note that validation is done at the end of every epoch regardless. " "A value of <= 0 disables this.", ) group.add_argument( "--stop-time-hr", default=-1, type=int, metavar="N", help="Stops training after N hours have elapsed. " "A value of < 0 disables this.", ) group.add_argument( "--stop-no-best-validate-loss", default=-1, type=int, metavar="N", help="Stops training after N validations have been run without " "achieving a better loss than before. Note that this is affected by " "--validation-interval in how frequently we run validation in the " "first place. A value of < 0 disables this.", ) group.add_argument( "--stop-no-best-bleu-eval", default=-1, type=int, metavar="N", help="Stops training after N evals have been run without " "achieving a better BLEU score than before. Note that this is affected " "by --generate-bleu-eval-interval in how frequently we run BLEU eval " "in the first place. A value of < 0 disables this.", ) # Args related to dataset. group = parser.add_argument_group("Dataset and data loading") group.add_argument( "--source-vocab-file", default="", metavar="FILE", help="Path to text file representing the fairseq Dictionary to use. " "If left empty, the dict is auto-generated from source training data.", ) group.add_argument( "--source-max-vocab-size", default=-1, type=int, metavar="N", help="If a new vocab file needs to be generated, restrict it to the " "top N most common words. If we re-use an existing vocab file, this " "flag will have no effect. A value of < 0 means no max size.", ) group.add_argument( "--target-vocab-file", default="", metavar="FILE", help="Path to text file representing the fairseq Dictionary to use. " "If left empty, the dict is auto-generated from target training data.", ) group.add_argument( "--target-max-vocab-size", default=-1, type=int, metavar="N", help="If a new vocab file needs to be generated, restrict it to the " "top N most common words. If we re-use an existing vocab file, this " "flag will have no effect. A value of < 0 means no max size.", ) group.add_argument( "--train-source-text-file", default="", metavar="FILE", help="Path to raw text file containing source training examples. " "This overrides what would be loaded from the data dir.", ) group.add_argument( "--train-target-text-file", default="", metavar="FILE", help="Path to raw text file containing target training examples. " "This overrides what would be loaded from the data dir.", ) group.add_argument( "--eval-source-text-file", default="", metavar="FILE", help="Path to raw text file containing source eval examples for " "calculating validation loss and BLEU eval scores. " "This overrides what would be loaded from the data dir.", ) group.add_argument( "--eval-target-text-file", default="", metavar="FILE", help="Path to raw text file containing target eval examples for " "calculating validation loss and BLEU eval scores. " "This overrides what would be loaded from the data dir.", ) group.add_argument( "--penalized-target-tokens-file", default="", metavar="FILE", help="Path to text file of tokens to receive a penalty in decoding." "If left empty, no penalty will be applied", ) # Adds args related to checkpointing. group = parser.add_argument_group("Checkpointing") group.add_argument( "--no-end-of-epoch-checkpoints", action="store_true", help="Disables saving checkpoints at the end of the epoch. " "This differs from --no-save and --no-epoch-checkpoints in that it " "still allows for intra-epoch checkpoints if --save-interval is set.", ) group.add_argument( "--max-checkpoints-kept", default=-1, type=int, metavar="N", help="Keep at most the last N checkpoints file around. " "A value < -1 keeps all. " "When --generate-bleu-eval-avg-checkpoints is used and is > N, the " "number of checkpoints kept around is automatically adjusted " "to allow BLEU to work properly.", ) # Adds args for generating intermediate BLEU eval while training. # generate.add_args() adds args used by both train.py and the standalone # generate binary, while the flags defined here are used only by train.py. generate.add_args(parser) group = parser.add_argument_group("Generation") group.add_argument( "--generate-bleu-eval-per-epoch", action="store_true", help="Whether to generate BLEU score eval after each epoch.", ) group.add_argument( "--generate-bleu-eval-interval", default=0, type=int, metavar="N", help="Does BLEU eval every N batch updates. Note that " "--save-interval also affects this - we can only eval as " "frequently as a checkpoint is written. A value of <= 0 " "disables this.", ) group.add_argument( "--generate-bleu-eval-avg-checkpoints", default=1, type=int, metavar="N", help="Maximum number of last N checkpoints to average over when " "doing BLEU eval. Must be >= 1.", ) group.add_argument( "--continuous-averaging-after-epochs", type=int, default=-1, help=("Average parameter values after each step since previous " "checkpoint, beginning after the specified number of epochs. "), ) return parser
def get_parser_with_args(): parser = options.get_parser("Generation", default_task="pytorch_translate") pytorch_translate_options.add_verbosity_args(parser) pytorch_translate_options.add_dataset_args(parser, gen=True) generation_group = options.add_generation_args(parser) pytorch_translate_options.expand_generation_args(generation_group) generation_group.add_argument( "--source-vocab-file", default="", metavar="FILE", help="Path to text file representing the Dictionary to use.", ) generation_group.add_argument( "--char-source-vocab-file", default="", metavar="FILE", help=( "Same as --source-vocab-file except using characters. " "(For use with char_source models only.)" ), ) generation_group.add_argument( "--target-vocab-file", default="", metavar="FILE", help="Path to text file representing the Dictionary to use.", ) generation_group.add_argument( "--multiling-source-lang", action="append", metavar="SRC", help=( "Must be set for decoding with multilingual models. " "Must match an entry from --multiling-encoder-lang from training." ), ) generation_group.add_argument( "--multiling-target-lang", action="append", metavar="TARGET", help=( "Must be set for decoding with multilingual models. " "Must match an entry from --multiling-decoder-lang from training." ), ) # Add args related to benchmarking. group = parser.add_argument_group("Benchmarking") group.add_argument( "--runs-per-length", default=10, type=int, help="Number of times to run generation on each length.", ) group.add_argument( "--examples-per-length", default=1, type=int, help="Sentences of each length to include in each eval (batched if >1).", ) return parser
def main(): parser = options.get_parser('Generation') parser.add_argument('--path', metavar='FILE', required=True, action='append', help='path(s) to model file(s)') dataset_args = options.add_dataset_args(parser) dataset_args.add_argument('--batch-size', default=32, type=int, metavar='N', help='batch size') dataset_args.add_argument( '--gen-subset', default='test', metavar='SPLIT', help='data subset to generate (train, valid, test)') dataset_args.add_argument('--num-shards', default=1, type=int, metavar='N', help='shard generation over N shards') dataset_args.add_argument( '--shard-id', default=0, type=int, metavar='ID', help='id of the shard to generate (id < num_shards)') options.add_generation_args(parser) args = parser.parse_args() if args.no_progress_bar and args.log_format is None: args.log_format = 'none' # print(args) use_cuda = torch.cuda.is_available() and not args.cpu if hasattr(torch, 'set_grad_enabled'): torch.set_grad_enabled(False) # Load dataset if args.replace_unk is None: dataset = data.load_dataset(args.data, [args.gen_subset], args.source_lang, args.target_lang) else: dataset = data.load_raw_text_dataset(args.data, [args.gen_subset], args.source_lang, args.target_lang) if args.source_lang is None or args.target_lang is None: # record inferred languages in args args.source_lang, args.target_lang = dataset.src, dataset.dst # Load ensemble # print('| loading model(s) from {}'.format(', '.join(args.path))) models, _ = utils.load_ensemble_for_inference(args.path, dataset.src_dict, dataset.dst_dict) # print('| [{}] dictionary: {} types'.format(dataset.src, len(dataset.src_dict))) # print('| [{}] dictionary: {} types'.format(dataset.dst, len(dataset.dst_dict))) # print('| {} {} {} examples'.format(args.data, args.gen_subset, len(dataset.splits[args.gen_subset]))) # Optimize ensemble for generation for model in models: model.make_generation_fast_( beamable_mm_beam_size=None if args.no_beamable_mm else args.beam) # Initialize generator translator = SequenceGenerator(models, beam_size=args.beam, stop_early=(not args.no_early_stop), normalize_scores=(not args.unnormalized), len_penalty=args.lenpen, unk_penalty=args.unkpen) if use_cuda: translator.cuda() # Load alignment dictionary for unknown word replacement # (None if no unknown word replacement, empty if no path to align dictionary) align_dict = utils.load_align_dict(args.replace_unk) # Generate and compute BLEU score #scorer = bleu.Scorer(dataset.dst_dict.pad(), dataset.dst_dict.eos(), dataset.dst_dict.unk()) max_positions = min(model.max_encoder_positions() for model in models) itr = dataset.eval_dataloader(args.gen_subset, max_sentences=args.batch_size, max_positions=max_positions, skip_invalid_size_inputs_valid_test=args. skip_invalid_size_inputs_valid_test) if args.num_shards > 1: if args.shard_id < 0 or args.shard_id >= args.num_shards: raise ValueError('--shard-id must be between 0 and num_shards') itr = data.sharded_iterator(itr, args.num_shards, args.shard_id) num_sentences = 0 with utils.build_progress_bar(args, itr) as t: wps_meter = TimeMeter() gen_timer = StopwatchMeter() translations = translator.generate_batched_itr( t, maxlen_a=args.max_len_a, maxlen_b=args.max_len_b, cuda_device=0 if use_cuda else None, timer=gen_timer) correct = 0 total = 0 for sample_id, src_tokens, target_tokens, hypos in translations: # Process input and ground truth target_tokens = target_tokens.int().cpu() # Either retrieve the original sentences or regenerate them from tokens. if align_dict is not None: src_str = dataset.splits[ args.gen_subset].src.get_original_text(sample_id) target_str = dataset.splits[ args.gen_subset].dst.get_original_text(sample_id) else: src_str = dataset.src_dict.string(src_tokens, args.remove_bpe) target_str = dataset.dst_dict.string(target_tokens, args.remove_bpe, escape_unk=True) # if not args.quiet: # print('S-{}\t{}'.format(sample_id, src_str)) # print('T-{}\t{}'.format(sample_id, target_str)) total += 1 # Process top predictions for i, hypo in enumerate(hypos[:min(len(hypos), args.nbest)]): hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu(), align_dict=align_dict, dst_dict=dataset.dst_dict, remove_bpe=args.remove_bpe) #if src_str == 'walk around right thrice after jump opposite left twice': # import pdb; pdb.set_trace() # if not args.quiet: # print('H-{}\t{}\t{}'.format(sample_id, hypo['score'], hypo_str)) # print('A-{}\t{}'.format(sample_id, ' '.join(map(str, alignment)))) # Score only the top hypothesis if i == 0: if align_dict is not None or args.remove_bpe is not None: # Convert back to tokens for evaluation with unk replacement and/or without BPE target_tokens = tokenizer.Tokenizer.tokenize( target_str, dataset.dst_dict, add_if_not_exist=True) #scorer.add(target_tokens, hypo_tokens) mat = '' for row in hypo['attention']: for column in row: mat += str(column) + '\t' mat += '\n' tar = '/' + target_str tra = '=' + str(target_str == hypo_str) to_write.write(mat) to_write.write(src_str) to_write.write('\n') to_write.write(hypo_str) to_write.write('\n') to_write.write(tar) to_write.write('\n') to_write.write(tra) to_write.write('\n') to_write.write('-----------') to_write.write('\n') if hypo_str == target_str: correct += 1 wps_meter.update(src_tokens.size(0)) t.log({'wps': round(wps_meter.avg)}) num_sentences += 1 print('| Correct : {} - Total: {}. Accuracy: {:.5f}'.format( correct, total, correct / total))
def main(): parser = options.get_parser('Generation') parser.add_argument('--path', metavar='FILE', required=True, action='append', help='path(s) to model file(s)') dataset_args = options.add_dataset_args(parser) dataset_args.add_argument('-i', '--interactive', action='store_true', help='generate translations in interactive mode') dataset_args.add_argument('--batch-size', default=32, type=int, metavar='N', help='batch size') dataset_args.add_argument('--gen-subset', default='test', metavar='SPLIT', help='data subset to generate (train, valid, test)') options.add_generation_args(parser) args = parser.parse_args() print(args) if args.no_progress_bar: progress_bar.enabled = False use_cuda = torch.cuda.is_available() and not args.cpu # Load model and dataset print('| loading model(s) from {}'.format(', '.join(args.path))) models, dataset = utils.load_ensemble_for_inference(args.path, args.data) print('| [{}] dictionary: {} types'.format(dataset.src, len(dataset.src_dict))) print('| [{}] dictionary: {} types'.format(dataset.dst, len(dataset.dst_dict))) if not args.interactive: print('| {} {} {} examples'.format(args.data, args.gen_subset, len(dataset.splits[args.gen_subset]))) # Optimize model for generation for model in models: model.make_generation_fast_(not args.no_beamable_mm) # Initialize generator translator = SequenceGenerator(models, dataset.dst_dict, beam_size=args.beam, stop_early=(not args.no_early_stop), normalize_scores=(not args.unnormalized), len_penalty=args.lenpen) align_dict = {} if args.unk_replace_dict != '': assert args.interactive, "Unkown words replacing requires access to original source and is only" \ "supported in interactive mode" with open(args.unk_replace_dict, 'r') as f: for line in f: l = line.split() align_dict[l[0]] = l[1] def replace_unk(hypo_str, align_str, src, unk): hypo_tokens = hypo_str.split() src_tokens = tokenizer.tokenize_line(src) align_idx = [int(i) for i in align_str.split()] for i, ht in enumerate(hypo_tokens): if ht == unk: src_token = src_tokens[align_idx[i]] if src_token in align_dict: hypo_tokens[i] = align_dict[src_token] else: hypo_tokens[i] = src_token return ' '.join(hypo_tokens) if use_cuda: translator.cuda() bpe_symbol = '@@ ' if args.remove_bpe else None def display_hypotheses(id, src, orig, ref, hypos): id_str = '' if id is None else '-{}'.format(id) src_str = to_sentence(dataset.src_dict, src, bpe_symbol) print('S{}\t{}'.format(id_str, src_str)) if orig is not None: print('O{}\t{}'.format(id_str, orig.strip())) if ref is not None: print('T{}\t{}'.format(id_str, to_sentence(dataset.dst_dict, ref, bpe_symbol, ref_unk=True))) for hypo in hypos: hypo_str = to_sentence(dataset.dst_dict, hypo['tokens'], bpe_symbol) align_str = ' '.join(map(str, hypo['alignment'])) if args.unk_replace_dict != '': hypo_str = replace_unk(hypo_str, align_str, orig, unk_symbol(dataset.dst_dict)) print('H{}\t{}\t{}'.format( id_str, hypo['score'], hypo_str)) print('A{}\t{}'.format(id_str, align_str)) if args.interactive: for line in sys.stdin: tokens = tokenizer.Tokenizer.tokenize(line, dataset.src_dict, add_if_not_exist=False).long() start = dataset.src_dict.pad() + 1 positions = torch.arange(start, start + len(tokens)).type_as(tokens) if use_cuda: positions = positions.cuda() tokens = tokens.cuda() translations = translator.generate(Variable(tokens.view(1, -1)), Variable(positions.view(1, -1))) hypos = translations[0] display_hypotheses(None, tokens, line, None, hypos[:min(len(hypos), args.nbest)]) else: def maybe_remove_bpe(tokens): """Helper for removing BPE symbols from a hypothesis.""" if not args.remove_bpe: return tokens assert (tokens == dataset.dst_dict.pad()).sum() == 0 hypo_minus_bpe = to_sentence(dataset.dst_dict, tokens, bpe_symbol) return tokenizer.Tokenizer.tokenize(hypo_minus_bpe, dataset.dst_dict, add_if_not_exist=True) # Generate and compute BLEU score scorer = bleu.Scorer(dataset.dst_dict.pad(), dataset.dst_dict.eos(), dataset.dst_dict.unk()) itr = dataset.dataloader(args.gen_subset, batch_size=args.batch_size, max_positions=args.max_positions) num_sentences = 0 with progress_bar(itr, smoothing=0, leave=False) as t: wps_meter = TimeMeter() gen_timer = StopwatchMeter() translations = translator.generate_batched_itr( t, maxlen_a=args.max_len_a, maxlen_b=args.max_len_b, cuda_device=0 if use_cuda else None, timer=gen_timer) for id, src, ref, hypos in translations: ref = ref.int().cpu() top_hypo = hypos[0]['tokens'].int().cpu() scorer.add(maybe_remove_bpe(ref), maybe_remove_bpe(top_hypo)) display_hypotheses(id, src, None, ref, hypos[:min(len(hypos), args.nbest)]) wps_meter.update(src.size(0)) t.set_postfix(wps='{:5d}'.format(round(wps_meter.avg))) num_sentences += 1 print('| Translated {} sentences ({} tokens) in {:.1f}s ({:.2f} tokens/s)'.format( num_sentences, gen_timer.n, gen_timer.sum, 1. / gen_timer.avg)) print('| Generate {} with beam={}: {}'.format(args.gen_subset, args.beam, scorer.result_string()))
def add_constrainted_generation_args(parser): group = add_generation_args(parser) group.add_argument('--order-constr', action='store_true', help='activate order constraint') return group
def add_args(parser): from fairseq.options import add_generation_args """Add criterion-specific arguments to the parser.""" add_generation_args(parser)
group.add_argument("--one-minus", action="store_true") group.add_argument("--one-head", action="store_true") group.add_argument("--encoder-self-only", action="store_true", help="Only prune from the encoder self attention") group.add_argument("--encoder-decoder-only", action="store_true", help="Only prune from the encoder decoder attention") group.add_argument("--decoder-self-only", action="store_true", help="Only prune from the decoder self attention") if __name__ == '__main__': parser = options.get_training_parser() add_pruning_args(parser) options.add_pruning_args(parser) options.add_generation_args(parser) args = options.parse_args_and_arch(parser) if args.distributed_port > 0 or args.distributed_init_method is not None: from distributed_train import main as distributed_main distributed_main(args) elif args.distributed_world_size > 1: from multiprocessing_train import main as multiprocessing_main multiprocessing_main(args) else: main(args)
def add_cmdline_args(argparser): """Add command-line arguments specifically for this agent.""" # first we need to add the general torch agent operations TorchAgent.add_cmdline_args(argparser) agent = argparser.add_argument_group('Fairseq Arguments') agent.add_argument( '--seed', default=1, type=int, metavar='N', help='pseudo random number generator seed' ) agent.add_argument( '--skip-generation', default=False, type=bool, metavar='BOOL', help='Skips test time beam search. Much faster if you only need PPL', ) # Dictionary construction stuff. Using the subclass in case we end up # needing any fairseq specific things _FairseqDictionary.add_cmdline_args(argparser) # Optimization and learning rate schedule specific arguments options.add_optimization_args(argparser) known_args = argparser.parse_known_args(nohelp=True)[0] if hasattr(known_args, "optimizer"): optimizer = known_args.optimizer opt_group = argparser.add_argument_group( '{} optimizer arguments'.format(optimizer) ) optim.OPTIMIZER_REGISTRY[optimizer].add_args(opt_group) if hasattr(known_args, "lr_scheduler"): lr_scheduler = known_args.lr_scheduler lr_group = argparser.add_argument_group( '{} scheduler arguments'.format(lr_scheduler) ) optim.lr_scheduler.LR_SCHEDULER_REGISTRY[lr_scheduler].add_args(lr_group) # Generation arguments options.add_generation_args(argparser) # We need to find out the fairseq model-specific options, so grab the # architecture stuff and look up its options arch_group = options.add_model_args(argparser) # Fairseq marks the arch flag as required, but it may be specified # by a saved model cache, so we do some weird stuff to undo that for a in arch_group._actions: if a.dest == "arch": a.required = False a.default = None break known_args = argparser.parse_known_args(nohelp=True)[0] if hasattr(known_args, "arch") and known_args.arch is not None: arch = known_args.arch arch_group = argparser.add_argument_group( "{} architecture arguments".format(arch) ) models.ARCH_MODEL_REGISTRY[arch].add_args(arch_group) # Override a few defaults from within fairseq to more sensible defaults argparser.set_defaults( clip_norm=0.1, adam_betas="(0.9,0.98)" )
def add_cmdline_args(cls, argparser): """Add command-line arguments specifically for this agent.""" # first we need to add the general torch agent operations super(FairseqAgent, cls).add_cmdline_args(argparser) # let's store any defaults that were overridden old_defaults = argparser._defaults if 'clip_norm' not in old_defaults: # fairseq has a few awful defaults old_defaults['clip_norm'] = 1.0 if 'optimizer' not in old_defaults: old_defaults['optimizer'] = 'adam' old_defaults['adam_betas'] = '(0.9,0.98)' agent = argparser.add_argument_group('Fairseq Arguments') agent.add_argument('--fp16', default=False, type='bool', help='Use fp16 training') agent.add_argument( '--fp16-init-scale', default=2**7, type=int, help='default FP16 loss scale', ) agent.add_argument( '--seed', default=1, type=int, metavar='N', help='pseudo random number generator seed', ) agent.add_argument( '--skip-generation', default=False, type='bool', metavar='BOOL', help= 'Skips test time beam search. Much faster if you only need PPL', ) # Check subargs for generation, optimizers, criterions, archs, etc options.add_generation_args(argparser) options.add_optimization_args(argparser) options.add_checkpoint_args(argparser) # restore any user set defaults that fairseq possibly overrode argparser.set_defaults(**old_defaults) known_args = argparser.parse_known_args(nohelp=True)[0] if hasattr(known_args, "optimizer"): optimizer = known_args.optimizer opt_group = argparser.add_argument_group( '{} optimizer arguments'.format(optimizer)) optim.OPTIMIZER_REGISTRY[optimizer].add_args(opt_group) if hasattr(known_args, "lr_scheduler"): lr_scheduler = known_args.lr_scheduler lr_group = argparser.add_argument_group( '{} scheduler arguments'.format(lr_scheduler)) optim.lr_scheduler.LR_SCHEDULER_REGISTRY[lr_scheduler].add_args( lr_group) # We need to find out the fairseq model-specific options, so grab the # architecture stuff and look up its options arch_group = options.add_model_args(argparser) # Fairseq marks the arch flag as required, but it may be specified # by a saved model cache, so we do some weird stuff to undo that for a in arch_group._actions: if a.dest == "arch": a.required = False a.default = None break # once again restore any user-set defaults argparser.set_defaults(**old_defaults) known_args = argparser.parse_known_args(nohelp=True)[0] if hasattr(known_args, "arch") and known_args.arch is not None: arch = known_args.arch arch_group = argparser.add_argument_group( "{} architecture arguments".format(arch)) models.ARCH_MODEL_REGISTRY[arch].add_args(arch_group) if hasattr(known_args, "criterion"): crit_group = argparser.add_argument_group( '{} criterion arguments'.format(known_args.criterion)) criterions.CRITERION_REGISTRY[known_args.criterion].add_args( crit_group) # one last time, restore any user set defaults argparser.set_defaults(**old_defaults)
def get_parser_with_args(): parser = options.get_parser("Trainer") options.add_dataset_args(parser, train=True, gen=True) options.add_distributed_training_args(parser) options.add_optimization_args(parser) options.add_checkpoint_args(parser) options.add_model_args(parser) options.add_generation_args(parser) parser.add_argument( "--log-verbose", action="store_true", help="Whether to output more verbose logs for debugging/profiling.", ) # Adds args related to training (validation and stopping criterions). group = parser.add_argument_group("Optimization") group.add_argument( "--subepoch-validate-interval", default=0, type=int, metavar="N", help="Calculates loss over the validation set every N batch updates. " "Note that validation is done at the end of every epoch regardless. " "A value of <= 0 disables this.", ) group.add_argument( "--stop-time-hr", default=-1, type=int, metavar="N", help="Stops training after N hours have elapsed. " "A value of < 0 disables this.", ) group.add_argument( "--stop-no-best-validate-loss", default=-1, type=int, metavar="N", help="Stops training after N validations have been run without " "achieving a better loss than before. Note that this is affected by " "--validation-interval in how frequently we run validation in the " "first place. A value of < 0 disables this.", ) group.add_argument( "--stop-no-best-bleu-eval", default=-1, type=int, metavar="N", help="Stops training after N evals have been run without " "achieving a better BLEU score than before. Note that this is affected " "by --generate-bleu-eval-interval in how frequently we run BLEU eval " "in the first place. A value of < 0 disables this.", ) # Adds args related to input data files (preprocessing, numberizing, and # binarizing text files; creating vocab files) preprocess.add_args(parser) # Adds args related to checkpointing. group = parser.add_argument_group("Checkpointing") group.add_argument( "--no-end-of-epoch-checkpoints", action="store_true", help="Disables saving checkpoints at the end of the epoch. " "This differs from --no-save and --no-epoch-checkpoints in that it " "still allows for intra-epoch checkpoints if --save-interval is set.", ) group.add_argument( "--max-checkpoints-kept", default=-1, type=int, metavar="N", help="Keep at most the last N checkpoints file around. " "A value < -1 keeps all. " "When --generate-bleu-eval-avg-checkpoints is used and is > N, the " "number of checkpoints kept around is automatically adjusted " "to allow BLEU to work properly.", ) # Adds args for generating intermediate BLEU eval while training. # generate.add_args() adds args used by both train.py and the standalone # generate binary, while the flags defined here are used only by train.py. generate.add_args(parser) group = parser.add_argument_group("Generation") group.add_argument( "--generate-bleu-eval-per-epoch", action="store_true", help="Whether to generate BLEU score eval after each epoch.", ) group.add_argument( "--generate-bleu-eval-interval", default=0, type=int, metavar="N", help="Does BLEU eval every N batch updates. Note that " "--save-interval also affects this - we can only eval as " "frequently as a checkpoint is written. A value of <= 0 " "disables this.", ) group.add_argument( "--generate-bleu-eval-avg-checkpoints", default=1, type=int, metavar="N", help="Maximum number of last N checkpoints to average over when " "doing BLEU eval. Must be >= 1.", ) group.add_argument( "--continuous-averaging-after-epochs", type=int, default=-1, help=("Average parameter values after each step since previous " "checkpoint, beginning after the specified number of epochs. "), ) return parser
def add_cmdline_args(cls, argparser): """Add command-line arguments specifically for this agent.""" # first we need to add the general torch agent operations TorchAgent.add_cmdline_args(argparser) agent = argparser.add_argument_group('Fairseq Arguments') agent.add_argument('--fp16', default=False, type=bool, help='Use fp16 training') agent.add_argument('--seed', default=1, type=int, metavar='N', help='pseudo random number generator seed') agent.add_argument( '--skip-generation', default=False, type=bool, metavar='BOOL', help= 'Skips test time beam search. Much faster if you only need PPL', ) # Dictionary construction stuff. Using the subclass in case we end up # needing any fairseq specific things cls.dictionary_class().add_cmdline_args(argparser) # Check subargs for generation, optimizers, criterions, archs, etc options.add_generation_args(argparser) options.add_optimization_args(argparser) # make sure we set defaults according to the model before parsing argparser.set_defaults(**cls.DEFAULT_OPTIONS) known_args = argparser.parse_known_args(nohelp=True)[0] if hasattr(known_args, "optimizer"): optimizer = known_args.optimizer opt_group = argparser.add_argument_group( '{} optimizer arguments'.format(optimizer)) optim.OPTIMIZER_REGISTRY[optimizer].add_args(opt_group) if hasattr(known_args, "lr_scheduler"): lr_scheduler = known_args.lr_scheduler lr_group = argparser.add_argument_group( '{} scheduler arguments'.format(lr_scheduler)) optim.lr_scheduler.LR_SCHEDULER_REGISTRY[lr_scheduler].add_args( lr_group) # We need to find out the fairseq model-specific options, so grab the # architecture stuff and look up its options arch_group = options.add_model_args(argparser) # Fairseq marks the arch flag as required, but it may be specified # by a saved model cache, so we do some weird stuff to undo that for a in arch_group._actions: if a.dest == "arch": a.required = False a.default = None break # make sure we set defaults according to parlai model before parsing argparser.set_defaults(**cls.DEFAULT_OPTIONS) known_args = argparser.parse_known_args(nohelp=True)[0] if hasattr(known_args, "arch") and known_args.arch is not None: arch = known_args.arch arch_group = argparser.add_argument_group( "{} architecture arguments".format(arch)) models.ARCH_MODEL_REGISTRY[arch].add_args(arch_group) if hasattr(known_args, "criterion"): crit_group = argparser.add_argument_group( '{} criterion arguments'.format(known_args.criterion)) criterions.CRITERION_REGISTRY[known_args.criterion].add_args( crit_group) # As one final check, let's make sure we set defaults correctly argparser.set_defaults(**cls.DEFAULT_OPTIONS)
def get_parser_with_args(): parser = options.get_parser("Generation", default_task="pytorch_translate") pytorch_translate_options.add_verbosity_args(parser) pytorch_translate_options.add_dataset_args(parser, gen=True) generation_group = options.add_generation_args(parser) pytorch_translate_options.expand_generation_args(generation_group) # Adds args used by the standalone generate binary. generation_group.add_argument( "--source-vocab-file", default="", metavar="FILE", help="Path to text file representing the Dictionary to use.", ) generation_group.add_argument( "--char-source-vocab-file", default="", metavar="FILE", help=( "Same as --source-vocab-file except using characters. " "(For use with char_source models only.)" ), ) generation_group.add_argument( "--target-vocab-file", default="", metavar="FILE", help="Path to text file representing the Dictionary to use.", ) generation_group.add_argument( "--source-text-file", default="", nargs="+", metavar="FILE", help="Path to raw text file containing examples in source dialect. " "This overrides what would be loaded from the data dir. " "You can specify multiple source files (eg. for use in combination " "with --source-ensembling). By default this will only translate the " "first source file", ) generation_group.add_argument( "--target-text-file", default="", metavar="FILE", help="Path to raw text file containing examples in target dialect. " "This overrides what would be loaded from the data dir.", ) generation_group.add_argument( "--source-binary-file", default="", help="Path for the binary file containing source eval examples. " "(Overrides --source-text-file. Must be used in conjunction with " "--target-binary-file).", ) generation_group.add_argument( "--target-binary-file", default="", help="Path for the binary file containing target eval examples. " "(Overrides --target-text-file. Must be used in conjunction with " "--source-binary-file).", ) generation_group.add_argument( "--translation-output-file", default="", type=str, metavar="FILE", help="Path to text file to store the output of the model. ", ) generation_group.add_argument( "--translation-probs-file", default="", type=str, metavar="FILE", help="Path to text file to store the probs of translation output. ", ) generation_group.add_argument( "--multiling-source-lang", action="append", metavar="SRC", help=( "Must be set for decoding with multilingual models. " "Must match an entry from --multiling-encoder-lang from training." ), ) generation_group.add_argument( "--multiling-target-lang", action="append", metavar="TARGET", help=( "Must be set for decoding with multilingual models. " "Must match an entry from --multiling-decoder-lang from training." ), ) generation_group.add_argument( "--source-ensembling", action="store_true", help="If this flag is present, the model will ensemble the predictions " "conditioned on multiple source sentences (one per source-text-file)", ) generation_group.add_argument( "--competing-completed-beam-search", action="store_true", help="If this flag is present, use the alternative beam search " "implementation in research/beam_search. This beam search keeps completed " "hypos in the beam and let them compete against hypo expansions in the " "next time step.", ) return parser
def main(): parser = options.get_parser('Generation') parser.add_argument('--path', metavar='FILE', required=True, action='append', help='path(s) to model file(s)') dataset_args = options.add_dataset_args(parser) dataset_args.add_argument('--batch-size', default=32, type=int, metavar='N', help='batch size') dataset_args.add_argument('--gen-subset', default='test', metavar='SPLIT', help='data subset to generate (train, valid, test)') options.add_generation_args(parser) args = parser.parse_args() if args.no_progress_bar and args.log_format is None: args.log_format = 'none' print(args) use_cuda = torch.cuda.is_available() and not args.cpu # Load dataset if args.replace_unk is None: dataset = data.load_dataset(args.data, [args.gen_subset], args.source_lang, args.target_lang) else: dataset = data.load_raw_text_dataset(args.data, [args.gen_subset], args.source_lang, args.target_lang) if args.source_lang is None or args.target_lang is None: # record inferred languages in args args.source_lang, args.target_lang = dataset.src, dataset.dst # Load ensemble print('| loading model(s) from {}'.format(', '.join(args.path))) models, _ = utils.load_ensemble_for_inference(args.path, dataset.src_dict, dataset.dst_dict) print('| [{}] dictionary: {} types'.format(dataset.src, len(dataset.src_dict))) print('| [{}] dictionary: {} types'.format(dataset.dst, len(dataset.dst_dict))) print('| {} {} {} examples'.format(args.data, args.gen_subset, len(dataset.splits[args.gen_subset]))) # Optimize ensemble for generation for model in models: model.make_generation_fast_( beamable_mm_beam_size=None if args.no_beamable_mm else args.beam) # Initialize generator translator = SequenceGenerator( models, beam_size=args.beam, stop_early=(not args.no_early_stop), normalize_scores=(not args.unnormalized), len_penalty=args.lenpen, unk_penalty=args.unkpen) if use_cuda: translator.cuda() # Load alignment dictionary for unknown word replacement # (None if no unknown word replacement, empty if no path to align dictionary) align_dict = utils.load_align_dict(args.replace_unk) # Generate and compute BLEU score scorer = bleu.Scorer(dataset.dst_dict.pad(), dataset.dst_dict.eos(), dataset.dst_dict.unk()) max_positions = min(model.max_encoder_positions() for model in models) itr = dataset.eval_dataloader( args.gen_subset, max_sentences=args.batch_size, max_positions=max_positions, skip_invalid_size_inputs_valid_test=args.skip_invalid_size_inputs_valid_test) num_sentences = 0 with utils.build_progress_bar(args, itr) as t: wps_meter = TimeMeter() gen_timer = StopwatchMeter() translations = translator.generate_batched_itr( t, maxlen_a=args.max_len_a, maxlen_b=args.max_len_b, cuda_device=0 if use_cuda else None, timer=gen_timer) for sample_id, src_tokens, target_tokens, hypos in translations: # Process input and ground truth target_tokens = target_tokens.int().cpu() # Either retrieve the original sentences or regenerate them from tokens. if align_dict is not None: src_str = dataset.splits[args.gen_subset].src.get_original_text(sample_id) target_str = dataset.splits[args.gen_subset].dst.get_original_text(sample_id) else: src_str = dataset.src_dict.string(src_tokens, args.remove_bpe) target_str = dataset.dst_dict.string(target_tokens, args.remove_bpe, escape_unk=True) if not args.quiet: print('S-{}\t{}'.format(sample_id, src_str)) print('T-{}\t{}'.format(sample_id, target_str)) # Process top predictions for i, hypo in enumerate(hypos[:min(len(hypos), args.nbest)]): hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu(), align_dict=align_dict, dst_dict=dataset.dst_dict, remove_bpe=args.remove_bpe) if not args.quiet: print('H-{}\t{}\t{}'.format(sample_id, hypo['score'], hypo_str)) print('A-{}\t{}'.format(sample_id, ' '.join(map(str, alignment)))) # Score only the top hypothesis if i == 0: if align_dict is not None or args.remove_bpe is not None: # Convert back to tokens for evaluation with unk replacement and/or without BPE target_tokens = tokenizer.Tokenizer.tokenize(target_str, dataset.dst_dict, add_if_not_exist=True) scorer.add(target_tokens, hypo_tokens) wps_meter.update(src_tokens.size(0)) t.log({'wps': round(wps_meter.avg)}) num_sentences += 1 print('| Translated {} sentences ({} tokens) in {:.1f}s ({:.2f} tokens/s)'.format( num_sentences, gen_timer.n, gen_timer.sum, 1. / gen_timer.avg)) print('| Generate {} with beam={}: {}'.format(args.gen_subset, args.beam, scorer.result_string()))