示例#1
0
 def add_cmdline_args(argparser):
     """Add command-line arguments specifically for this agent."""
     DictionaryAgent.add_cmdline_args(argparser)
     agent = argparser.add_argument_group('Fairseq Arguments')
     agent.add_argument(
         '-tr', '--truncate',
         type=int, default=-1,
         help='truncate input & output lengths to speed up training (may '
              'reduce accuracy). This fixes all input and output to have a '
              'maximum length. This reduces the total amount of padding in '
              'the batches.')
     agent.add_argument(
         '--max-positions',
         default=1024,
         type=int,
         metavar='N',
         help='max number of tokens in the sequence')
     agent.add_argument(
         '--seed',
         default=1,
         type=int,
         metavar='N',
         help='pseudo random number generator seed')
     options.add_optimization_args(argparser)
     options.add_generation_args(argparser)
     options.add_model_args(argparser)
示例#2
0
def cli_main():
    parser = options.get_training_parser()
    parser.add_argument('--train-subtransformer', action='store_true', default=False, help='whether train SuperTransformer or SubTransformer')
    parser.add_argument('--sub-configs', required=False, is_config_file=True, help='when training SubTransformer, use --configs to specify architecture and --sub-configs to specify other settings')

    # for profiling
    parser.add_argument('--profile-flops', action='store_true', help='measure the FLOPs of a SubTransformer')

    parser.add_argument('--latgpu', action='store_true', help='measure SubTransformer latency on GPU')
    parser.add_argument('--latcpu', action='store_true', help='measure SubTransformer latency on CPU')
    parser.add_argument('--latiter', type=int, default=300, help='how many iterations to run when measure the latency')
    parser.add_argument('--latsilent', action='store_true', help='keep silent when measure latency')

    parser.add_argument('--validate-subtransformer', action='store_true', help='evaluate the SubTransformer on the validation set')

    options.add_generation_args(parser)

    args = options.parse_args_and_arch(parser)

    if args.latcpu:
        args.cpu = True
        args.fp16 = False

    if args.latgpu or args.latcpu or args.profile_flops:
        args.distributed_world_size = 1

    if args.pdb:
        pdb.set_trace()

    if args.distributed_init_method is None:
        distributed_utils.infer_init_method(args)

    if args.distributed_init_method is not None:
        # distributed training
        if torch.cuda.device_count() > 1 and not args.distributed_no_spawn:
            start_rank = args.distributed_rank
            args.distributed_rank = None  # assign automatically
            torch.multiprocessing.spawn(
                fn=distributed_main,
                args=(args, start_rank),
                nprocs=torch.cuda.device_count(),
            )
        else:
            distributed_main(args.device_id, args)
    elif args.distributed_world_size > 1:
        # fallback for single node with multiple GPUs
        assert args.distributed_world_size <= torch.cuda.device_count()
        port = random.randint(10000, 20000)
        args.distributed_init_method = 'tcp://localhost:{port}'.format(port=port)
        args.distributed_rank = None  # set based on device id
        if max(args.update_freq) > 1 and args.ddp_backend != 'no_c10d':
            print('| NOTE: you may get better performance with: --ddp-backend=no_c10d')
        torch.multiprocessing.spawn(
            fn=distributed_main,
            args=(args, ),
            nprocs=args.distributed_world_size,
        )
    else:
        # single GPU training
        main(args)
示例#3
0
def get_parser_with_args():
    parser = options.get_parser('Generation')
    options.add_dataset_args(parser, gen=True)
    options.add_generation_args(parser)
    add_args(parser)

    group = parser.add_argument_group('Generation')
    group.add_argument(
        '--source-vocab-file',
        default='',
        metavar='FILE',
        help='Path to text file representing the Dictionary to use.')
    group.add_argument(
        '--target-vocab-file',
        default='',
        metavar='FILE',
        help='Path to text file representing the Dictionary to use.')
    group.add_argument(
        '--source-text-file',
        default='',
        metavar='FILE',
        help='Path to raw text file containing examples in source dialect. '
        'This overrides what would be loaded from the data dir.',
    )
    group.add_argument(
        '--target-text-file',
        default='',
        metavar='FILE',
        help='Path to raw text file containing examples in target dialect. '
        'This overrides what would be loaded from the data dir.',
    )

    return parser
示例#4
0
def get_parser_with_args():
    parser = options.get_parser("Generation")
    options.add_dataset_args(parser, gen=True)
    options.add_generation_args(parser)
    add_args(parser)

    group = parser.add_argument_group("Generation")
    group.add_argument(
        "--source-vocab-file",
        default="",
        metavar="FILE",
        help="Path to text file representing the Dictionary to use.",
    )
    group.add_argument(
        "--target-vocab-file",
        default="",
        metavar="FILE",
        help="Path to text file representing the Dictionary to use.",
    )
    group.add_argument(
        "--source-text-file",
        default="",
        metavar="FILE",
        help="Path to raw text file containing examples in source dialect. "
        "This overrides what would be loaded from the data dir.",
    )
    group.add_argument(
        "--target-text-file",
        default="",
        metavar="FILE",
        help="Path to raw text file containing examples in target dialect. "
        "This overrides what would be loaded from the data dir.",
    )

    return parser
示例#5
0
def main():
    parser = options.get_parser('Generation')
    parser.add_argument('--path', metavar='FILE', required=True, action='append',
                        help='path(s) to model file(s)')
    options.add_dataset_args(parser)
    options.add_generation_args(parser)

    args = parser.parse_args()
    print(args)

    use_cuda = torch.cuda.is_available() and not args.cpu

    # Load ensemble
    print('| loading model(s) from {}'.format(', '.join(args.path)))
    models, model_args = utils.load_ensemble_for_inference(args.path, data_dir=args.data)
    src_dict, dst_dict = models[0].src_dict, models[0].dst_dict

    print('| [{}] dictionary: {} types'.format(model_args.source_lang, len(src_dict)))
    print('| [{}] dictionary: {} types'.format(model_args.target_lang, len(dst_dict)))

    # Optimize ensemble for generation
    for model in models:
        model.make_generation_fast_(
            beamable_mm_beam_size=None if args.no_beamable_mm else args.beam)

    # Initialize generator
    translator = SequenceGenerator(
        models, beam_size=args.beam, stop_early=(not args.no_early_stop),
        normalize_scores=(not args.unnormalized), len_penalty=args.lenpen,
        unk_penalty=args.unkpen)
    if use_cuda:
        translator.cuda()

    # Load alignment dictionary for unknown word replacement
    # (None if no unknown word replacement, empty if no path to align dictionary)
    align_dict = utils.load_align_dict(args.replace_unk)

    print('| Type the input sentence and press return:')
    for src_str in sys.stdin:
        src_str = src_str.strip()
        src_tokens = tokenizer.Tokenizer.tokenize(src_str, src_dict, add_if_not_exist=False).long()
        if use_cuda:
            src_tokens = src_tokens.cuda()
        translations = translator.generate(Variable(src_tokens.view(1, -1)))
        hypos = translations[0]
        print('O\t{}'.format(src_str))

        # Process top predictions
        for hypo in hypos[:min(len(hypos), args.nbest)]:
            hypo_tokens, hypo_str, alignment = utils.post_process_prediction(
                hypo_tokens=hypo['tokens'].int().cpu(),
                src_str=src_str,
                alignment=hypo['alignment'].int().cpu(),
                align_dict=align_dict,
                dst_dict=dst_dict,
                remove_bpe=args.remove_bpe)
            print('H\t{}\t{}'.format(hypo['score'], hypo_str))
            print('A\t{}'.format(' '.join(map(str, alignment))))
示例#6
0
文件: train.py 项目: nilesh-c/kgqa
def get_training_and_generation_parser(default_task='translation'):
    parser = options.get_parser('Trainer', default_task)
    options.add_dataset_args(parser, train=True, gen=True)
    options.add_generation_args(parser)
    options.add_distributed_training_args(parser)
    options.add_model_args(parser)
    options.add_optimization_args(parser)
    options.add_checkpoint_args(parser)
    return parser
def cli_main():
    parser = options.get_training_parser()
    parser.add_argument('--evo-configs', required=True, is_config_file=True)
    parser.add_argument('--evo-iter', type=int, default=30)
    parser.add_argument('--population-size', type=int, default=125)
    parser.add_argument('--parent-size', type=int, default=25)
    parser.add_argument('--mutation-size', type=int, default=50)
    parser.add_argument('--crossover-size', type=int, default=50)
    parser.add_argument('--mutation-prob', type=float, default=0.3)

    parser.add_argument('--feature-norm',
                        type=float,
                        nargs='+',
                        help='normalizing factor for each feature')
    parser.add_argument('--lat-norm',
                        type=float,
                        help='normalizing factor for latency')
    parser.add_argument('--ckpt-path',
                        type=str,
                        help='path to load latency predictor weights')

    parser.add_argument('--latency-constraint',
                        type=float,
                        default=-1,
                        help='latency constraint')
    parser.add_argument(
        '--valid-cnt-max',
        type=int,
        default=1e9,
        help='max number of sentences to use in validation set')

    parser.add_argument(
        '--write-config-path',
        type=str,
        help='path to write out the searched best SubTransformer')

    options.add_generation_args(parser)

    args = options.parse_args_and_arch(parser)

    if args.pdb:
        pdb.set_trace()

    # one GPU is fast enough to do the search
    args.distributed_world_size = 1

    # if search on CPU, use fp32 as default
    if args.cpu:
        args.fp16 = False

    main(args)
示例#8
0
def get_parser_with_args():
    parser = options.get_parser("Collect Top-K Probs",
                                default_task="pytorch_translate")
    pytorch_translate_options.add_verbosity_args(parser)
    pytorch_translate_options.add_dataset_args(parser, gen=True)
    generation_group = options.add_generation_args(parser)

    generation_group.add_argument(
        "--source-binary-file",
        default="",
        help="Path for the binary file containing source eval examples. "
        "(Overrides --source-text-file. Must be used in conjunction with "
        "--target-binary-file).",
    )
    generation_group.add_argument(
        "--target-binary-file",
        default="",
        help="Path for the binary file containing target eval examples. "
        "(Overrides --target-text-file. Must be used in conjunction with "
        "--source-binary-file).",
    )
    generation_group.add_argument(
        "--k-probs-to-collect",
        type=int,
        default=8,
        help="Number of probabilities to collect for each output step.",
    )
    generation_group.add_argument(
        "--top-k-probs-binary-file",
        type=str,
        default="",
        help="File into which to save top-K probabilities for each token.",
    )
    return parser
示例#9
0
def cli_main(
    modify_parser: Optional[Callable[[argparse.ArgumentParser], None]] = None
) -> None:
    parser = options.get_training_parser()
    # options.add_pruning_args(parser)
    options.add_generation_args(parser)
    args = options.parse_args_and_arch(parser, modify_parser=modify_parser)

    cfg = convert_namespace_to_omegaconf(args)

    if args.profile:
        with torch.cuda.profiler.profile():
            with torch.autograd.profiler.emit_nvtx():
                distributed_utils.call_main(cfg, main)
    else:
        distributed_utils.call_main(cfg, main)
示例#10
0
def get_parser_with_args():
    parser = options.get_parser("Trainer")
    parser.add_argument(
        "--log-verbose",
        action="store_true",
        help="Whether to output more verbose logs for debugging/profiling.",
    )
    pytorch_translate_options.add_dataset_args(parser, train=True, gen=True)
    options.add_distributed_training_args(parser)
    # Adds args related to training (validation and stopping criterions).
    optimization_group = options.add_optimization_args(parser)
    pytorch_translate_options.expand_optimization_args(optimization_group)
    # Adds args related to checkpointing.
    checkointing_group = options.add_checkpoint_args(parser)
    pytorch_translate_options.expand_checkpointing_args(checkointing_group)
    # Add model related args
    options.add_model_args(parser)
    # Adds args for generating intermediate BLEU eval while training.
    generation_group = options.add_generation_args(parser)
    pytorch_translate_options.expand_generation_args(generation_group,
                                                     train=True)
    # Adds args related to input data files (preprocessing, numberizing, and
    # binarizing text files; creating vocab files)
    pytorch_translate_options.add_preprocessing_args(parser)
    return parser
示例#11
0
 def add_cmdline_args(argparser):
     """Add command-line arguments specifically for this agent."""
     DictionaryAgent.add_cmdline_args(argparser)
     agent = argparser.add_argument_group('Fairseq Arguments')
     agent.add_argument('--max-positions',
                        default=1024,
                        type=int,
                        metavar='N',
                        help='max number of tokens in the sequence')
     agent.add_argument('--seed',
                        default=1,
                        type=int,
                        metavar='N',
                        help='pseudo random number generator seed')
     options.add_optimization_args(argparser)
     options.add_generation_args(argparser)
     options.add_model_args(argparser)
示例#12
0
def get_parser_with_args():
    parser = options.get_parser("Generation")
    options.add_dataset_args(parser, gen=True)
    options.add_generation_args(parser)
    pytorch_translate_generate.add_args(parser)

    group = parser.add_argument_group("Generation")
    group.add_argument(
        "--source-vocab-file",
        default="",
        metavar="FILE",
        help="Path to text file representing the Dictionary to use.",
    )
    group.add_argument(
        "--target-vocab-file",
        default="",
        metavar="FILE",
        help="Path to text file representing the Dictionary to use.",
    )

    # Add args related to benchmarking.
    group = parser.add_argument_group("Benchmarking")
    group.add_argument(
        "--increment",
        default=5,
        type=int,
        help="Difference in lengths between synthesized sentences. "
        "Must be integer >=1.",
    )
    group.add_argument(
        "--max-length",
        default=100,
        type=int,
        help="Maximum allowed length for synthesized sentences. "
        "Should be greater than --increment.",
    )
    group.add_argument(
        "--samples-per-length",
        default=1,
        type=int,
        help="Number of sentences to be synthesized at each length. ",
    )

    return parser
def cli_main():
    parser = options.get_training_parser()

    parser.add_argument('--latgpu',
                        action='store_true',
                        help='measure SubTransformer cache misses on GPU')
    parser.add_argument('--latcpu',
                        action='store_true',
                        help='measure SubTransformer cache misses on CPU')
    parser.add_argument(
        '--latiter',
        type=int,
        default=300,
        help='how many iterations to run when measure the latency'
    )  #TODO remove it later
    parser.add_argument('--latsilent',
                        action='store_true',
                        help='keep silent when measure cache misses')

    parser.add_argument('--lat-dataset-path',
                        type=str,
                        default='./device_dataset/lat.tmp',
                        help='the path to write device dataset')
    parser.add_argument('--lat-dataset-size',
                        type=int,
                        default=200,
                        help='number of data points for the dataset')

    options.add_generation_args(parser)

    args = options.parse_args_and_arch(parser)

    if args.latcpu:
        args.cpu = True
        args.fp16 = False
    else:
        print('GPU mode not supported yet')
        return

    if args.pdb:
        pdb.set_trace()

    main(args)
示例#14
0
def cli_main():
    parser = options.get_training_parser()

    parser.add_argument('--latgpu',
                        action='store_true',
                        help='measure SubTransformer latency on GPU')
    parser.add_argument('--latcpu',
                        action='store_true',
                        help='measure SubTransformer latency on CPU')
    parser.add_argument(
        '--latiter',
        type=int,
        default=300,
        help='how many iterations to run when measure the latency')
    parser.add_argument('--latsilent',
                        action='store_true',
                        help='keep silent when measure latency')

    parser.add_argument('--lat-dataset-path',
                        type=str,
                        default='./latency_dataset/lat.tmp',
                        help='the path to write latency dataset')
    parser.add_argument('--lat-dataset-size',
                        type=int,
                        default=200,
                        help='number of data points for the dataset')

    options.add_generation_args(parser)

    args = options.parse_args_and_arch(parser)

    if args.latcpu:
        args.cpu = True
        args.fp16 = False

    if args.pdb:
        pdb.set_trace()

    main(args)
示例#15
0
def make_parser():
    """
    Additional args:
        1. Provide the dataset dir path using --data.
        2. Loading the dataset doesn't require config, provide --config-yaml to apply additional feature transforms
    """
    parser = options.get_speech_generation_parser()
    parser.add_argument(
        "--subset",
        default=None,
        type=str,
        required=True,
        help="Subset to use for dataset generation",
    )
    parser.add_argument(
        "--dataset-save-dir",
        default=None,
        type=str,
        required=False,
        help="Dir path in which the datasets are to be saved",
    )
    parser.add_argument(
        "--ref-dataset",
        default=None,
        type=str,
        required=False,
        help=
        "If provided, the ids in the reference dataset will be used to filter the new dataset generated.",
    )
    parser.add_argument("--dataset-save-token",
                        default="",
                        type=str,
                        required=False)

    options.add_generation_args(parser)
    return parser
示例#16
0
文件: train.py 项目: dwraft/translate
def get_parser_with_args(default_task="pytorch_translate"):
    parser = options.get_parser("Trainer", default_task=default_task)
    pytorch_translate_options.add_verbosity_args(parser, train=True)
    pytorch_translate_options.add_dataset_args(parser, train=True, gen=True)
    options.add_distributed_training_args(parser)
    # Adds args related to training (validation and stopping criterions).
    optimization_group = options.add_optimization_args(parser)
    pytorch_translate_options.expand_optimization_args(optimization_group)
    # Adds args related to checkpointing.
    checkpointing_group = options.add_checkpoint_args(parser)
    pytorch_translate_options.expand_checkpointing_args(checkpointing_group)
    # Add model related args
    options.add_model_args(parser)
    # Adds args for generating intermediate BLEU eval while training.
    generation_group = options.add_generation_args(parser)
    pytorch_translate_options.expand_generation_args(generation_group, train=True)
    # Adds args related to input data files (preprocessing, numberizing, and
    # binarizing text files; creating vocab files)
    pytorch_translate_options.add_preprocessing_args(parser)
    return parser
示例#17
0
def main():
    parser = options.get_parser('Generation')
    parser.add_argument('--path',
                        metavar='FILE',
                        required=True,
                        action='append',
                        help='path(s) to model file(s)')
    options.add_dataset_args(parser)
    options.add_generation_args(parser)

    args = parser.parse_args()
    print(args)

    use_cuda = torch.cuda.is_available() and not args.cpu

    # Load ensemble
    print('| loading model(s) from {}'.format(', '.join(args.path)))
    models, model_args = utils.load_ensemble_for_inference(args.path,
                                                           data_dir=args.data)
    src_dict, dst_dict = models[0].src_dict, models[0].dst_dict

    print('| [{}] dictionary: {} types'.format(model_args.source_lang,
                                               len(src_dict)))
    print('| [{}] dictionary: {} types'.format(model_args.target_lang,
                                               len(dst_dict)))

    # Optimize ensemble for generation
    for model in models:
        model.make_generation_fast_(
            beamable_mm_beam_size=None if args.no_beamable_mm else args.beam)

    # Initialize generator
    translator = SequenceGenerator(models,
                                   beam_size=args.beam,
                                   stop_early=(not args.no_early_stop),
                                   normalize_scores=(not args.unnormalized),
                                   len_penalty=args.lenpen,
                                   unk_penalty=args.unkpen)
    if use_cuda:
        translator.cuda()

    # Load alignment dictionary for unknown word replacement
    # (None if no unknown word replacement, empty if no path to align dictionary)
    align_dict = utils.load_align_dict(args.replace_unk)

    print('| Type the input sentence and press return:')
    for src_str in sys.stdin:
        src_str = src_str.strip()
        src_tokens = tokenizer.Tokenizer.tokenize(
            src_str, src_dict, add_if_not_exist=False).long()
        if use_cuda:
            src_tokens = src_tokens.cuda()
        translations = translator.generate(Variable(src_tokens.view(1, -1)))
        hypos = translations[0]
        print('O\t{}'.format(src_str))

        # Process top predictions
        for hypo in hypos[:min(len(hypos), args.nbest)]:
            hypo_tokens, hypo_str, alignment = utils.post_process_prediction(
                hypo_tokens=hypo['tokens'].int().cpu(),
                src_str=src_str,
                alignment=hypo['alignment'].int().cpu(),
                align_dict=align_dict,
                dst_dict=dst_dict,
                remove_bpe=args.remove_bpe)
            print('H\t{}\t{}'.format(hypo['score'], hypo_str))
            print('A\t{}'.format(' '.join(map(str, alignment))))
示例#18
0
def get_parser_with_args():
    parser = options.get_parser('Trainer')
    options.add_dataset_args(parser, train=True, gen=True)
    options.add_distributed_training_args(parser)
    options.add_optimization_args(parser)
    options.add_checkpoint_args(parser)
    options.add_model_args(parser)
    options.add_generation_args(parser)

    parser.add_argument(
        '--log-verbose',
        action='store_true',
        help='Whether to output more verbose logs for debugging/profiling.',
    )

    # Adds args related to training (validation and stopping criterions).
    group = parser.add_argument_group('Optimization')
    group.add_argument(
        '--subepoch-validate-interval',
        default=0,
        type=int,
        metavar='N',
        help='Calculates loss over the validation set every N batch updates. '
        'Note that validation is done at the end of every epoch regardless. '
        'A value of <= 0 disables this.',
    )
    group.add_argument(
        '--stop-time-hr',
        default=-1,
        type=int,
        metavar='N',
        help='Stops training after N hours have elapsed. '
        'A value of < 0 disables this.',
    )
    group.add_argument(
        '--stop-no-best-validate-loss',
        default=-1,
        type=int,
        metavar='N',
        help='Stops training after N validations have been run without '
        'achieving a better loss than before. Note that this is affected by '
        '--validation-interval in how frequently we run validation in the '
        'first place. A value of < 0 disables this.',
    )
    group.add_argument(
        '--stop-no-best-bleu-eval',
        default=-1,
        type=int,
        metavar='N',
        help='Stops training after N evals have been run without '
        'achieving a better BLEU score than before. Note that this is affected '
        'by --generate-bleu-eval-interval in how frequently we run BLEU eval '
        'in the first place. A value of < 0 disables this.',
    )

    # Args related to dataset.
    group = parser.add_argument_group('Dataset and data loading')
    group.add_argument(
        '--source-vocab-file',
        default='',
        metavar='FILE',
        help='Path to text file representing the fairseq Dictionary to use. '
        'If left empty, the dict is auto-generated from source training data.',
    )
    group.add_argument(
        '--source-max-vocab-size',
        default=-1,
        type=int,
        metavar='N',
        help='If a new vocab file needs to be generated, restrict it to the '
        'top N most common words. If we re-use an existing vocab file, this '
        'flag will have no effect. A value of < 0 means no max size.',
    )
    group.add_argument(
        '--target-vocab-file',
        default='',
        metavar='FILE',
        help='Path to text file representing the fairseq Dictionary to use. '
        'If left empty, the dict is auto-generated from target training data.',
    )
    group.add_argument(
        '--target-max-vocab-size',
        default=-1,
        type=int,
        metavar='N',
        help='If a new vocab file needs to be generated, restrict it to the '
        'top N most common words. If we re-use an existing vocab file, this '
        'flag will have no effect. A value of < 0 means no max size.',
    )
    group.add_argument(
        '--train-source-text-file',
        default='',
        metavar='FILE',
        help='Path to raw text file containing source training examples. '
        'This overrides what would be loaded from the data dir.',
    )
    group.add_argument(
        '--train-target-text-file',
        default='',
        metavar='FILE',
        help='Path to raw text file containing target training examples. '
        'This overrides what would be loaded from the data dir.',
    )
    group.add_argument(
        '--eval-source-text-file',
        default='',
        metavar='FILE',
        help='Path to raw text file containing source eval examples for '
        'calculating validation loss and BLEU eval scores. '
        'This overrides what would be loaded from the data dir.',
    )
    group.add_argument(
        '--eval-target-text-file',
        default='',
        metavar='FILE',
        help='Path to raw text file containing target eval examples for '
        'calculating validation loss and BLEU eval scores. '
        'This overrides what would be loaded from the data dir.',
    )

    # Adds args related to checkpointing.
    group = parser.add_argument_group('Checkpointing')
    group.add_argument(
        '--no-end-of-epoch-checkpoints',
        action='store_true',
        help='Disables saving checkpoints at the end of the epoch. '
        'This differs from --no-save and --no-epoch-checkpoints in that it '
        'still allows for intra-epoch checkpoints if --save-interval is set.')

    # Adds args for generating intermediate BLEU eval while training.
    # generate.add_args() adds args used by both train.py and the standalone
    # generate binary, while the flags defined here are used only by train.py.
    generate.add_args(parser)
    group = parser.add_argument_group('Generation')
    group.add_argument(
        '--generate-bleu-eval-per-epoch',
        action='store_true',
        help='Whether to generate BLEU score eval after each epoch.',
    )
    group.add_argument(
        '--generate-bleu-eval-interval',
        default=0,
        type=int,
        metavar='N',
        help='Does BLEU eval every N batch updates. Note that '
        '--save-interval also affects this - we can only eval as '
        'frequently as a checkpoint is written. A value of <= 0 '
        'disables this.',
    )
    group.add_argument(
        '--generate-bleu-eval-avg-checkpoints',
        default=1,
        type=int,
        metavar='N',
        help='Maximum number of last N checkpoints to average over when '
        'doing BLEU eval. Must be >= 1.',
    )
    group.add_argument(
        '--continuous-averaging-after-epochs',
        type=int,
        default=-1,
        help=('Average parameter values after each step since previous '
              'checkpoint, beginning after the specified number of epochs. '),
    )

    return parser
示例#19
0
def get_parser_with_args():
    parser = options.get_parser("Generation")
    pytorch_translate_options.add_verbosity_args(parser)
    pytorch_translate_options.add_dataset_args(parser, gen=True)
    generation_group = options.add_generation_args(parser)
    pytorch_translate_options.expand_generation_args(generation_group)

    # Adds args used by the standalone generate binary.
    generation_group.add_argument(
        "--source-vocab-file",
        default="",
        metavar="FILE",
        help="Path to text file representing the Dictionary to use.",
    )
    generation_group.add_argument(
        "--char-source-vocab-file",
        default="",
        metavar="FILE",
        help=(
            "Same as --source-vocab-file except using characters. "
            "(For use with char_source models only.)"
        ),
    )
    generation_group.add_argument(
        "--target-vocab-file",
        default="",
        metavar="FILE",
        help="Path to text file representing the Dictionary to use.",
    )
    generation_group.add_argument(
        "--source-text-file",
        default="",
        nargs="+",
        metavar="FILE",
        help="Path to raw text file containing examples in source dialect. "
        "This overrides what would be loaded from the data dir. "
        "You can specify multiple source files (eg. for use in combination "
        "with --source-ensembling). By default this will only translate the "
        "first source file",
    )
    generation_group.add_argument(
        "--target-text-file",
        default="",
        metavar="FILE",
        help="Path to raw text file containing examples in target dialect. "
        "This overrides what would be loaded from the data dir.",
    )
    generation_group.add_argument(
        "--source-binary-file",
        default="",
        help="Path for the binary file containing source eval examples. "
        "(Overrides --source-text-file. Must be used in conjunction with "
        "--target-binary-file).",
    )
    generation_group.add_argument(
        "--target-binary-file",
        default="",
        help="Path for the binary file containing target eval examples. "
        "(Overrides --target-text-file. Must be used in conjunction with "
        "--source-binary-file).",
    )
    generation_group.add_argument(
        "--translation-output-file",
        default="",
        type=str,
        metavar="FILE",
        help="Path to text file to store the output of the model. ",
    )
    generation_group.add_argument(
        "--translation-probs-file",
        default="",
        type=str,
        metavar="FILE",
        help="Path to text file to store the probs of translation output. ",
    )
    generation_group.add_argument(
        "--multiling-source-lang-id",
        type=int,
        default=None,
        help=(
            "Must be set for decoding with multilingual models. Set to i if "
            "the source language is the i-th language in the training parameter "
            "--multiling-encoder-lang (0-indexed)"
        ),
    )
    generation_group.add_argument(
        "--multiling-target-lang-id",
        type=int,
        default=None,
        help=(
            "Must be set for decoding with multilingual models. Set to i if "
            "the target language is the i-th language in the training parameter "
            "--multiling-decoder-lang (0-indexed)"
        ),
    )
    generation_group.add_argument(
        "--source-ensembling",
        action="store_true",
        help="If this flag is present, the model will ensemble the predictions "
        "conditioned on multiple source sentences (one per source-text-file)",
    )

    return parser
示例#20
0
def get_parser_with_args():
    parser = options.get_parser("Trainer")
    options.add_dataset_args(parser, train=True, gen=True)
    options.add_distributed_training_args(parser)
    options.add_optimization_args(parser)
    options.add_checkpoint_args(parser)
    options.add_model_args(parser)
    options.add_generation_args(parser)

    parser.add_argument(
        "--log-verbose",
        action="store_true",
        help="Whether to output more verbose logs for debugging/profiling.",
    )

    # Adds args related to training (validation and stopping criterions).
    group = parser.add_argument_group("Optimization")
    group.add_argument(
        "--subepoch-validate-interval",
        default=0,
        type=int,
        metavar="N",
        help="Calculates loss over the validation set every N batch updates. "
        "Note that validation is done at the end of every epoch regardless. "
        "A value of <= 0 disables this.",
    )
    group.add_argument(
        "--stop-time-hr",
        default=-1,
        type=int,
        metavar="N",
        help="Stops training after N hours have elapsed. "
        "A value of < 0 disables this.",
    )
    group.add_argument(
        "--stop-no-best-validate-loss",
        default=-1,
        type=int,
        metavar="N",
        help="Stops training after N validations have been run without "
        "achieving a better loss than before. Note that this is affected by "
        "--validation-interval in how frequently we run validation in the "
        "first place. A value of < 0 disables this.",
    )
    group.add_argument(
        "--stop-no-best-bleu-eval",
        default=-1,
        type=int,
        metavar="N",
        help="Stops training after N evals have been run without "
        "achieving a better BLEU score than before. Note that this is affected "
        "by --generate-bleu-eval-interval in how frequently we run BLEU eval "
        "in the first place. A value of < 0 disables this.",
    )

    # Args related to dataset.
    group = parser.add_argument_group("Dataset and data loading")
    group.add_argument(
        "--source-vocab-file",
        default="",
        metavar="FILE",
        help="Path to text file representing the fairseq Dictionary to use. "
        "If left empty, the dict is auto-generated from source training data.",
    )
    group.add_argument(
        "--source-max-vocab-size",
        default=-1,
        type=int,
        metavar="N",
        help="If a new vocab file needs to be generated, restrict it to the "
        "top N most common words. If we re-use an existing vocab file, this "
        "flag will have no effect. A value of < 0 means no max size.",
    )
    group.add_argument(
        "--target-vocab-file",
        default="",
        metavar="FILE",
        help="Path to text file representing the fairseq Dictionary to use. "
        "If left empty, the dict is auto-generated from target training data.",
    )
    group.add_argument(
        "--target-max-vocab-size",
        default=-1,
        type=int,
        metavar="N",
        help="If a new vocab file needs to be generated, restrict it to the "
        "top N most common words. If we re-use an existing vocab file, this "
        "flag will have no effect. A value of < 0 means no max size.",
    )
    group.add_argument(
        "--train-source-text-file",
        default="",
        metavar="FILE",
        help="Path to raw text file containing source training examples. "
        "This overrides what would be loaded from the data dir.",
    )
    group.add_argument(
        "--train-target-text-file",
        default="",
        metavar="FILE",
        help="Path to raw text file containing target training examples. "
        "This overrides what would be loaded from the data dir.",
    )
    group.add_argument(
        "--eval-source-text-file",
        default="",
        metavar="FILE",
        help="Path to raw text file containing source eval examples for "
        "calculating validation loss and BLEU eval scores. "
        "This overrides what would be loaded from the data dir.",
    )
    group.add_argument(
        "--eval-target-text-file",
        default="",
        metavar="FILE",
        help="Path to raw text file containing target eval examples for "
        "calculating validation loss and BLEU eval scores. "
        "This overrides what would be loaded from the data dir.",
    )
    group.add_argument(
        "--penalized-target-tokens-file",
        default="",
        metavar="FILE",
        help="Path to text file of tokens to receive a penalty in decoding."
        "If left empty, no penalty will be applied",
    )

    # Adds args related to checkpointing.
    group = parser.add_argument_group("Checkpointing")
    group.add_argument(
        "--no-end-of-epoch-checkpoints",
        action="store_true",
        help="Disables saving checkpoints at the end of the epoch. "
        "This differs from --no-save and --no-epoch-checkpoints in that it "
        "still allows for intra-epoch checkpoints if --save-interval is set.",
    )
    group.add_argument(
        "--max-checkpoints-kept",
        default=-1,
        type=int,
        metavar="N",
        help="Keep at most the last N checkpoints file around. "
        "A value < -1 keeps all. "
        "When --generate-bleu-eval-avg-checkpoints is used and is > N, the "
        "number of checkpoints kept around is automatically adjusted "
        "to allow BLEU to work properly.",
    )

    # Adds args for generating intermediate BLEU eval while training.
    # generate.add_args() adds args used by both train.py and the standalone
    # generate binary, while the flags defined here are used only by train.py.
    generate.add_args(parser)
    group = parser.add_argument_group("Generation")
    group.add_argument(
        "--generate-bleu-eval-per-epoch",
        action="store_true",
        help="Whether to generate BLEU score eval after each epoch.",
    )
    group.add_argument(
        "--generate-bleu-eval-interval",
        default=0,
        type=int,
        metavar="N",
        help="Does BLEU eval every N batch updates. Note that "
        "--save-interval also affects this - we can only eval as "
        "frequently as a checkpoint is written. A value of <= 0 "
        "disables this.",
    )
    group.add_argument(
        "--generate-bleu-eval-avg-checkpoints",
        default=1,
        type=int,
        metavar="N",
        help="Maximum number of last N checkpoints to average over when "
        "doing BLEU eval. Must be >= 1.",
    )
    group.add_argument(
        "--continuous-averaging-after-epochs",
        type=int,
        default=-1,
        help=("Average parameter values after each step since previous "
              "checkpoint, beginning after the specified number of epochs. "),
    )

    return parser
示例#21
0
def get_parser_with_args():
    parser = options.get_parser("Generation", default_task="pytorch_translate")
    pytorch_translate_options.add_verbosity_args(parser)
    pytorch_translate_options.add_dataset_args(parser, gen=True)
    generation_group = options.add_generation_args(parser)
    pytorch_translate_options.expand_generation_args(generation_group)

    generation_group.add_argument(
        "--source-vocab-file",
        default="",
        metavar="FILE",
        help="Path to text file representing the Dictionary to use.",
    )
    generation_group.add_argument(
        "--char-source-vocab-file",
        default="",
        metavar="FILE",
        help=(
            "Same as --source-vocab-file except using characters. "
            "(For use with char_source models only.)"
        ),
    )
    generation_group.add_argument(
        "--target-vocab-file",
        default="",
        metavar="FILE",
        help="Path to text file representing the Dictionary to use.",
    )
    generation_group.add_argument(
        "--multiling-source-lang",
        action="append",
        metavar="SRC",
        help=(
            "Must be set for decoding with multilingual models. "
            "Must match an entry from --multiling-encoder-lang from training."
        ),
    )
    generation_group.add_argument(
        "--multiling-target-lang",
        action="append",
        metavar="TARGET",
        help=(
            "Must be set for decoding with multilingual models. "
            "Must match an entry from --multiling-decoder-lang from training."
        ),
    )

    # Add args related to benchmarking.
    group = parser.add_argument_group("Benchmarking")
    group.add_argument(
        "--runs-per-length",
        default=10,
        type=int,
        help="Number of times to run generation on each length.",
    )
    group.add_argument(
        "--examples-per-length",
        default=1,
        type=int,
        help="Sentences of each length to include in each eval (batched if >1).",
    )

    return parser
示例#22
0
def main():
    parser = options.get_parser('Generation')
    parser.add_argument('--path',
                        metavar='FILE',
                        required=True,
                        action='append',
                        help='path(s) to model file(s)')
    dataset_args = options.add_dataset_args(parser)
    dataset_args.add_argument('--batch-size',
                              default=32,
                              type=int,
                              metavar='N',
                              help='batch size')
    dataset_args.add_argument(
        '--gen-subset',
        default='test',
        metavar='SPLIT',
        help='data subset to generate (train, valid, test)')
    dataset_args.add_argument('--num-shards',
                              default=1,
                              type=int,
                              metavar='N',
                              help='shard generation over N shards')
    dataset_args.add_argument(
        '--shard-id',
        default=0,
        type=int,
        metavar='ID',
        help='id of the shard to generate (id < num_shards)')
    options.add_generation_args(parser)

    args = parser.parse_args()
    if args.no_progress_bar and args.log_format is None:
        args.log_format = 'none'
#    print(args)

    use_cuda = torch.cuda.is_available() and not args.cpu
    if hasattr(torch, 'set_grad_enabled'):
        torch.set_grad_enabled(False)

    # Load dataset
    if args.replace_unk is None:
        dataset = data.load_dataset(args.data, [args.gen_subset],
                                    args.source_lang, args.target_lang)
    else:
        dataset = data.load_raw_text_dataset(args.data, [args.gen_subset],
                                             args.source_lang,
                                             args.target_lang)
    if args.source_lang is None or args.target_lang is None:
        # record inferred languages in args
        args.source_lang, args.target_lang = dataset.src, dataset.dst

    # Load ensemble
#    print('| loading model(s) from {}'.format(', '.join(args.path)))
    models, _ = utils.load_ensemble_for_inference(args.path, dataset.src_dict,
                                                  dataset.dst_dict)

    #    print('| [{}] dictionary: {} types'.format(dataset.src, len(dataset.src_dict)))
    #    print('| [{}] dictionary: {} types'.format(dataset.dst, len(dataset.dst_dict)))
    #    print('| {} {} {} examples'.format(args.data, args.gen_subset, len(dataset.splits[args.gen_subset])))

    # Optimize ensemble for generation
    for model in models:
        model.make_generation_fast_(
            beamable_mm_beam_size=None if args.no_beamable_mm else args.beam)

    # Initialize generator
    translator = SequenceGenerator(models,
                                   beam_size=args.beam,
                                   stop_early=(not args.no_early_stop),
                                   normalize_scores=(not args.unnormalized),
                                   len_penalty=args.lenpen,
                                   unk_penalty=args.unkpen)
    if use_cuda:
        translator.cuda()

    # Load alignment dictionary for unknown word replacement
    # (None if no unknown word replacement, empty if no path to align dictionary)
    align_dict = utils.load_align_dict(args.replace_unk)

    # Generate and compute BLEU score
    #scorer = bleu.Scorer(dataset.dst_dict.pad(), dataset.dst_dict.eos(), dataset.dst_dict.unk())
    max_positions = min(model.max_encoder_positions() for model in models)
    itr = dataset.eval_dataloader(args.gen_subset,
                                  max_sentences=args.batch_size,
                                  max_positions=max_positions,
                                  skip_invalid_size_inputs_valid_test=args.
                                  skip_invalid_size_inputs_valid_test)
    if args.num_shards > 1:
        if args.shard_id < 0 or args.shard_id >= args.num_shards:
            raise ValueError('--shard-id must be between 0 and num_shards')
        itr = data.sharded_iterator(itr, args.num_shards, args.shard_id)
    num_sentences = 0
    with utils.build_progress_bar(args, itr) as t:
        wps_meter = TimeMeter()
        gen_timer = StopwatchMeter()
        translations = translator.generate_batched_itr(
            t,
            maxlen_a=args.max_len_a,
            maxlen_b=args.max_len_b,
            cuda_device=0 if use_cuda else None,
            timer=gen_timer)

        correct = 0
        total = 0
        for sample_id, src_tokens, target_tokens, hypos in translations:
            # Process input and ground truth
            target_tokens = target_tokens.int().cpu()
            # Either retrieve the original sentences or regenerate them from tokens.
            if align_dict is not None:
                src_str = dataset.splits[
                    args.gen_subset].src.get_original_text(sample_id)
                target_str = dataset.splits[
                    args.gen_subset].dst.get_original_text(sample_id)
            else:
                src_str = dataset.src_dict.string(src_tokens, args.remove_bpe)
                target_str = dataset.dst_dict.string(target_tokens,
                                                     args.remove_bpe,
                                                     escape_unk=True)

#            if not args.quiet:
#                print('S-{}\t{}'.format(sample_id, src_str))
#                print('T-{}\t{}'.format(sample_id, target_str))
            total += 1
            # Process top predictions
            for i, hypo in enumerate(hypos[:min(len(hypos), args.nbest)]):
                hypo_tokens, hypo_str, alignment = utils.post_process_prediction(
                    hypo_tokens=hypo['tokens'].int().cpu(),
                    src_str=src_str,
                    alignment=hypo['alignment'].int().cpu(),
                    align_dict=align_dict,
                    dst_dict=dataset.dst_dict,
                    remove_bpe=args.remove_bpe)
                #if src_str == 'walk around right thrice after jump opposite left twice':
                #    import pdb; pdb.set_trace()
                #                if not args.quiet:
                #                    print('H-{}\t{}\t{}'.format(sample_id, hypo['score'], hypo_str))
                #                    print('A-{}\t{}'.format(sample_id, ' '.join(map(str, alignment))))

                # Score only the top hypothesis
                if i == 0:
                    if align_dict is not None or args.remove_bpe is not None:
                        # Convert back to tokens for evaluation with unk replacement and/or without BPE
                        target_tokens = tokenizer.Tokenizer.tokenize(
                            target_str,
                            dataset.dst_dict,
                            add_if_not_exist=True)
                    #scorer.add(target_tokens, hypo_tokens)
                mat = ''
                for row in hypo['attention']:
                    for column in row:
                        mat += str(column) + '\t'
                    mat += '\n'
                tar = '/' + target_str
                tra = '=' + str(target_str == hypo_str)
                to_write.write(mat)
                to_write.write(src_str)
                to_write.write('\n')
                to_write.write(hypo_str)
                to_write.write('\n')
                to_write.write(tar)
                to_write.write('\n')
                to_write.write(tra)
                to_write.write('\n')
                to_write.write('-----------')
                to_write.write('\n')
                if hypo_str == target_str:
                    correct += 1
            wps_meter.update(src_tokens.size(0))
            t.log({'wps': round(wps_meter.avg)})
            num_sentences += 1

        print('| Correct : {} - Total: {}. Accuracy: {:.5f}'.format(
            correct, total, correct / total))
示例#23
0
def main():
    parser = options.get_parser('Generation')
    parser.add_argument('--path', metavar='FILE', required=True, action='append',
                        help='path(s) to model file(s)')
    dataset_args = options.add_dataset_args(parser)
    dataset_args.add_argument('-i', '--interactive', action='store_true',
                              help='generate translations in interactive mode')
    dataset_args.add_argument('--batch-size', default=32, type=int, metavar='N',
                              help='batch size')
    dataset_args.add_argument('--gen-subset', default='test', metavar='SPLIT',
                              help='data subset to generate (train, valid, test)')
    options.add_generation_args(parser)

    args = parser.parse_args()
    print(args)

    if args.no_progress_bar:
        progress_bar.enabled = False
    use_cuda = torch.cuda.is_available() and not args.cpu

    # Load model and dataset
    print('| loading model(s) from {}'.format(', '.join(args.path)))
    models, dataset = utils.load_ensemble_for_inference(args.path, args.data)

    print('| [{}] dictionary: {} types'.format(dataset.src, len(dataset.src_dict)))
    print('| [{}] dictionary: {} types'.format(dataset.dst, len(dataset.dst_dict)))
    if not args.interactive:
        print('| {} {} {} examples'.format(args.data, args.gen_subset, len(dataset.splits[args.gen_subset])))

    # Optimize model for generation
    for model in models:
        model.make_generation_fast_(not args.no_beamable_mm)

    # Initialize generator
    translator = SequenceGenerator(models, dataset.dst_dict, beam_size=args.beam,
                                   stop_early=(not args.no_early_stop),
                                   normalize_scores=(not args.unnormalized),
                                   len_penalty=args.lenpen)
    align_dict = {}
    if args.unk_replace_dict != '':
        assert args.interactive, "Unkown words replacing requires access to original source and is only" \
                                 "supported in interactive mode"
        with open(args.unk_replace_dict, 'r') as f:
            for line in f:
                l = line.split()
                align_dict[l[0]] = l[1]

    def replace_unk(hypo_str, align_str, src, unk):
        hypo_tokens = hypo_str.split()
        src_tokens = tokenizer.tokenize_line(src)
        align_idx = [int(i) for i in align_str.split()]
        for i, ht in enumerate(hypo_tokens):
            if ht == unk:
                src_token = src_tokens[align_idx[i]]
                if src_token in align_dict:
                    hypo_tokens[i] = align_dict[src_token]
                else:
                    hypo_tokens[i] = src_token
        return ' '.join(hypo_tokens)

    if use_cuda:
        translator.cuda()

    bpe_symbol = '@@ ' if args.remove_bpe else None
    def display_hypotheses(id, src, orig, ref, hypos):
        id_str = '' if id is None else '-{}'.format(id)
        src_str = to_sentence(dataset.src_dict, src, bpe_symbol)
        print('S{}\t{}'.format(id_str, src_str))
        if orig is not None:
            print('O{}\t{}'.format(id_str, orig.strip()))
        if ref is not None:
            print('T{}\t{}'.format(id_str, to_sentence(dataset.dst_dict, ref, bpe_symbol, ref_unk=True)))
        for hypo in hypos:
            hypo_str = to_sentence(dataset.dst_dict, hypo['tokens'], bpe_symbol)
            align_str = ' '.join(map(str, hypo['alignment']))
            if args.unk_replace_dict != '':
                hypo_str = replace_unk(hypo_str, align_str, orig, unk_symbol(dataset.dst_dict))
            print('H{}\t{}\t{}'.format(
                id_str, hypo['score'], hypo_str))
            print('A{}\t{}'.format(id_str, align_str))

    if args.interactive:
        for line in sys.stdin:
            tokens = tokenizer.Tokenizer.tokenize(line, dataset.src_dict, add_if_not_exist=False).long()
            start = dataset.src_dict.pad() + 1
            positions = torch.arange(start, start + len(tokens)).type_as(tokens)
            if use_cuda:
                positions = positions.cuda()
                tokens = tokens.cuda()
            translations = translator.generate(Variable(tokens.view(1, -1)), Variable(positions.view(1, -1)))
            hypos = translations[0]
            display_hypotheses(None, tokens, line, None, hypos[:min(len(hypos), args.nbest)])

    else:
        def maybe_remove_bpe(tokens):
            """Helper for removing BPE symbols from a hypothesis."""
            if not args.remove_bpe:
                return tokens
            assert (tokens == dataset.dst_dict.pad()).sum() == 0
            hypo_minus_bpe = to_sentence(dataset.dst_dict, tokens, bpe_symbol)
            return tokenizer.Tokenizer.tokenize(hypo_minus_bpe, dataset.dst_dict, add_if_not_exist=True)

        # Generate and compute BLEU score
        scorer = bleu.Scorer(dataset.dst_dict.pad(), dataset.dst_dict.eos(), dataset.dst_dict.unk())
        itr = dataset.dataloader(args.gen_subset, batch_size=args.batch_size, max_positions=args.max_positions)
        num_sentences = 0
        with progress_bar(itr, smoothing=0, leave=False) as t:
            wps_meter = TimeMeter()
            gen_timer = StopwatchMeter()
            translations = translator.generate_batched_itr(
                t, maxlen_a=args.max_len_a, maxlen_b=args.max_len_b,
                cuda_device=0 if use_cuda else None, timer=gen_timer)
            for id, src, ref, hypos in translations:
                ref = ref.int().cpu()
                top_hypo = hypos[0]['tokens'].int().cpu()
                scorer.add(maybe_remove_bpe(ref), maybe_remove_bpe(top_hypo))
                display_hypotheses(id, src, None, ref, hypos[:min(len(hypos), args.nbest)])

                wps_meter.update(src.size(0))
                t.set_postfix(wps='{:5d}'.format(round(wps_meter.avg)))
                num_sentences += 1

        print('| Translated {} sentences ({} tokens) in {:.1f}s ({:.2f} tokens/s)'.format(
            num_sentences, gen_timer.n, gen_timer.sum, 1. / gen_timer.avg))
        print('| Generate {} with beam={}: {}'.format(args.gen_subset, args.beam, scorer.result_string()))
示例#24
0
def add_constrainted_generation_args(parser):
    group = add_generation_args(parser)
    group.add_argument('--order-constr',
                       action='store_true',
                       help='activate order constraint')
    return group
示例#25
0
 def add_args(parser):
     from fairseq.options import add_generation_args
     """Add criterion-specific arguments to the parser."""
     add_generation_args(parser)
示例#26
0
    group.add_argument("--one-minus", action="store_true")
    group.add_argument("--one-head", action="store_true")
    group.add_argument("--encoder-self-only",
                       action="store_true",
                       help="Only prune from the encoder self attention")
    group.add_argument("--encoder-decoder-only",
                       action="store_true",
                       help="Only prune from the encoder decoder attention")
    group.add_argument("--decoder-self-only",
                       action="store_true",
                       help="Only prune from the decoder self attention")


if __name__ == '__main__':
    parser = options.get_training_parser()
    add_pruning_args(parser)
    options.add_pruning_args(parser)
    options.add_generation_args(parser)
    args = options.parse_args_and_arch(parser)

    if args.distributed_port > 0 or args.distributed_init_method is not None:
        from distributed_train import main as distributed_main

        distributed_main(args)
    elif args.distributed_world_size > 1:
        from multiprocessing_train import main as multiprocessing_main

        multiprocessing_main(args)
    else:
        main(args)
示例#27
0
文件: fairseq.py 项目: zxch3n/ParlAI
    def add_cmdline_args(argparser):
        """Add command-line arguments specifically for this agent."""
        # first we need to add the general torch agent operations
        TorchAgent.add_cmdline_args(argparser)

        agent = argparser.add_argument_group('Fairseq Arguments')
        agent.add_argument(
            '--seed',
            default=1,
            type=int,
            metavar='N',
            help='pseudo random number generator seed'
        )
        agent.add_argument(
            '--skip-generation',
            default=False,
            type=bool,
            metavar='BOOL',
            help='Skips test time beam search. Much faster if you only need PPL',
        )

        # Dictionary construction stuff. Using the subclass in case we end up
        # needing any fairseq specific things
        _FairseqDictionary.add_cmdline_args(argparser)

        # Optimization and learning rate schedule specific arguments
        options.add_optimization_args(argparser)
        known_args = argparser.parse_known_args(nohelp=True)[0]
        if hasattr(known_args, "optimizer"):
            optimizer = known_args.optimizer
            opt_group = argparser.add_argument_group(
                '{} optimizer arguments'.format(optimizer)
            )
            optim.OPTIMIZER_REGISTRY[optimizer].add_args(opt_group)
        if hasattr(known_args, "lr_scheduler"):
            lr_scheduler = known_args.lr_scheduler
            lr_group = argparser.add_argument_group(
                '{} scheduler arguments'.format(lr_scheduler)
            )
            optim.lr_scheduler.LR_SCHEDULER_REGISTRY[lr_scheduler].add_args(lr_group)

        # Generation arguments
        options.add_generation_args(argparser)

        # We need to find out the fairseq model-specific options, so grab the
        # architecture stuff and look up its options
        arch_group = options.add_model_args(argparser)
        # Fairseq marks the arch flag as required, but it may be specified
        # by a saved model cache, so we do some weird stuff to undo that
        for a in arch_group._actions:
            if a.dest == "arch":
                a.required = False
                a.default = None
                break
        known_args = argparser.parse_known_args(nohelp=True)[0]
        if hasattr(known_args, "arch") and known_args.arch is not None:
            arch = known_args.arch
            arch_group = argparser.add_argument_group(
                "{} architecture arguments".format(arch)
            )
            models.ARCH_MODEL_REGISTRY[arch].add_args(arch_group)

        # Override a few defaults from within fairseq to more sensible defaults
        argparser.set_defaults(
            clip_norm=0.1,
            adam_betas="(0.9,0.98)"
        )
示例#28
0
    def add_cmdline_args(cls, argparser):
        """Add command-line arguments specifically for this agent."""
        # first we need to add the general torch agent operations
        super(FairseqAgent, cls).add_cmdline_args(argparser)

        # let's store any defaults that were overridden
        old_defaults = argparser._defaults
        if 'clip_norm' not in old_defaults:
            # fairseq has a few awful defaults
            old_defaults['clip_norm'] = 1.0
        if 'optimizer' not in old_defaults:
            old_defaults['optimizer'] = 'adam'
            old_defaults['adam_betas'] = '(0.9,0.98)'

        agent = argparser.add_argument_group('Fairseq Arguments')
        agent.add_argument('--fp16',
                           default=False,
                           type='bool',
                           help='Use fp16 training')
        agent.add_argument(
            '--fp16-init-scale',
            default=2**7,
            type=int,
            help='default FP16 loss scale',
        )
        agent.add_argument(
            '--seed',
            default=1,
            type=int,
            metavar='N',
            help='pseudo random number generator seed',
        )
        agent.add_argument(
            '--skip-generation',
            default=False,
            type='bool',
            metavar='BOOL',
            help=
            'Skips test time beam search. Much faster if you only need PPL',
        )

        # Check subargs for generation, optimizers, criterions, archs, etc
        options.add_generation_args(argparser)
        options.add_optimization_args(argparser)
        options.add_checkpoint_args(argparser)

        # restore any user set defaults that fairseq possibly overrode
        argparser.set_defaults(**old_defaults)
        known_args = argparser.parse_known_args(nohelp=True)[0]

        if hasattr(known_args, "optimizer"):
            optimizer = known_args.optimizer
            opt_group = argparser.add_argument_group(
                '{} optimizer arguments'.format(optimizer))
            optim.OPTIMIZER_REGISTRY[optimizer].add_args(opt_group)
        if hasattr(known_args, "lr_scheduler"):
            lr_scheduler = known_args.lr_scheduler
            lr_group = argparser.add_argument_group(
                '{} scheduler arguments'.format(lr_scheduler))
            optim.lr_scheduler.LR_SCHEDULER_REGISTRY[lr_scheduler].add_args(
                lr_group)
        # We need to find out the fairseq model-specific options, so grab the
        # architecture stuff and look up its options
        arch_group = options.add_model_args(argparser)
        # Fairseq marks the arch flag as required, but it may be specified
        # by a saved model cache, so we do some weird stuff to undo that
        for a in arch_group._actions:
            if a.dest == "arch":
                a.required = False
                a.default = None
                break

        # once again restore any user-set defaults
        argparser.set_defaults(**old_defaults)
        known_args = argparser.parse_known_args(nohelp=True)[0]

        if hasattr(known_args, "arch") and known_args.arch is not None:
            arch = known_args.arch
            arch_group = argparser.add_argument_group(
                "{} architecture arguments".format(arch))
            models.ARCH_MODEL_REGISTRY[arch].add_args(arch_group)

        if hasattr(known_args, "criterion"):
            crit_group = argparser.add_argument_group(
                '{} criterion arguments'.format(known_args.criterion))
            criterions.CRITERION_REGISTRY[known_args.criterion].add_args(
                crit_group)

        # one last time, restore any user set defaults
        argparser.set_defaults(**old_defaults)
示例#29
0
def get_parser_with_args():
    parser = options.get_parser("Trainer")
    options.add_dataset_args(parser, train=True, gen=True)
    options.add_distributed_training_args(parser)
    options.add_optimization_args(parser)
    options.add_checkpoint_args(parser)
    options.add_model_args(parser)
    options.add_generation_args(parser)

    parser.add_argument(
        "--log-verbose",
        action="store_true",
        help="Whether to output more verbose logs for debugging/profiling.",
    )

    # Adds args related to training (validation and stopping criterions).
    group = parser.add_argument_group("Optimization")
    group.add_argument(
        "--subepoch-validate-interval",
        default=0,
        type=int,
        metavar="N",
        help="Calculates loss over the validation set every N batch updates. "
        "Note that validation is done at the end of every epoch regardless. "
        "A value of <= 0 disables this.",
    )
    group.add_argument(
        "--stop-time-hr",
        default=-1,
        type=int,
        metavar="N",
        help="Stops training after N hours have elapsed. "
        "A value of < 0 disables this.",
    )
    group.add_argument(
        "--stop-no-best-validate-loss",
        default=-1,
        type=int,
        metavar="N",
        help="Stops training after N validations have been run without "
        "achieving a better loss than before. Note that this is affected by "
        "--validation-interval in how frequently we run validation in the "
        "first place. A value of < 0 disables this.",
    )
    group.add_argument(
        "--stop-no-best-bleu-eval",
        default=-1,
        type=int,
        metavar="N",
        help="Stops training after N evals have been run without "
        "achieving a better BLEU score than before. Note that this is affected "
        "by --generate-bleu-eval-interval in how frequently we run BLEU eval "
        "in the first place. A value of < 0 disables this.",
    )

    # Adds args related to input data files (preprocessing, numberizing, and
    # binarizing text files; creating vocab files)
    preprocess.add_args(parser)

    # Adds args related to checkpointing.
    group = parser.add_argument_group("Checkpointing")
    group.add_argument(
        "--no-end-of-epoch-checkpoints",
        action="store_true",
        help="Disables saving checkpoints at the end of the epoch. "
        "This differs from --no-save and --no-epoch-checkpoints in that it "
        "still allows for intra-epoch checkpoints if --save-interval is set.",
    )
    group.add_argument(
        "--max-checkpoints-kept",
        default=-1,
        type=int,
        metavar="N",
        help="Keep at most the last N checkpoints file around. "
        "A value < -1 keeps all. "
        "When --generate-bleu-eval-avg-checkpoints is used and is > N, the "
        "number of checkpoints kept around is automatically adjusted "
        "to allow BLEU to work properly.",
    )

    # Adds args for generating intermediate BLEU eval while training.
    # generate.add_args() adds args used by both train.py and the standalone
    # generate binary, while the flags defined here are used only by train.py.
    generate.add_args(parser)
    group = parser.add_argument_group("Generation")
    group.add_argument(
        "--generate-bleu-eval-per-epoch",
        action="store_true",
        help="Whether to generate BLEU score eval after each epoch.",
    )
    group.add_argument(
        "--generate-bleu-eval-interval",
        default=0,
        type=int,
        metavar="N",
        help="Does BLEU eval every N batch updates. Note that "
        "--save-interval also affects this - we can only eval as "
        "frequently as a checkpoint is written. A value of <= 0 "
        "disables this.",
    )
    group.add_argument(
        "--generate-bleu-eval-avg-checkpoints",
        default=1,
        type=int,
        metavar="N",
        help="Maximum number of last N checkpoints to average over when "
        "doing BLEU eval. Must be >= 1.",
    )
    group.add_argument(
        "--continuous-averaging-after-epochs",
        type=int,
        default=-1,
        help=("Average parameter values after each step since previous "
              "checkpoint, beginning after the specified number of epochs. "),
    )

    return parser
示例#30
0
    def add_cmdline_args(cls, argparser):
        """Add command-line arguments specifically for this agent."""
        # first we need to add the general torch agent operations
        TorchAgent.add_cmdline_args(argparser)

        agent = argparser.add_argument_group('Fairseq Arguments')
        agent.add_argument('--fp16',
                           default=False,
                           type=bool,
                           help='Use fp16 training')
        agent.add_argument('--seed',
                           default=1,
                           type=int,
                           metavar='N',
                           help='pseudo random number generator seed')
        agent.add_argument(
            '--skip-generation',
            default=False,
            type=bool,
            metavar='BOOL',
            help=
            'Skips test time beam search. Much faster if you only need PPL',
        )

        # Dictionary construction stuff. Using the subclass in case we end up
        # needing any fairseq specific things
        cls.dictionary_class().add_cmdline_args(argparser)

        # Check subargs for generation, optimizers, criterions, archs, etc
        options.add_generation_args(argparser)
        options.add_optimization_args(argparser)

        # make sure we set defaults according to the model before parsing
        argparser.set_defaults(**cls.DEFAULT_OPTIONS)
        known_args = argparser.parse_known_args(nohelp=True)[0]

        if hasattr(known_args, "optimizer"):
            optimizer = known_args.optimizer
            opt_group = argparser.add_argument_group(
                '{} optimizer arguments'.format(optimizer))
            optim.OPTIMIZER_REGISTRY[optimizer].add_args(opt_group)
        if hasattr(known_args, "lr_scheduler"):
            lr_scheduler = known_args.lr_scheduler
            lr_group = argparser.add_argument_group(
                '{} scheduler arguments'.format(lr_scheduler))
            optim.lr_scheduler.LR_SCHEDULER_REGISTRY[lr_scheduler].add_args(
                lr_group)
        # We need to find out the fairseq model-specific options, so grab the
        # architecture stuff and look up its options
        arch_group = options.add_model_args(argparser)
        # Fairseq marks the arch flag as required, but it may be specified
        # by a saved model cache, so we do some weird stuff to undo that
        for a in arch_group._actions:
            if a.dest == "arch":
                a.required = False
                a.default = None
                break

        # make sure we set defaults according to parlai model before parsing
        argparser.set_defaults(**cls.DEFAULT_OPTIONS)
        known_args = argparser.parse_known_args(nohelp=True)[0]

        if hasattr(known_args, "arch") and known_args.arch is not None:
            arch = known_args.arch
            arch_group = argparser.add_argument_group(
                "{} architecture arguments".format(arch))
            models.ARCH_MODEL_REGISTRY[arch].add_args(arch_group)

        if hasattr(known_args, "criterion"):
            crit_group = argparser.add_argument_group(
                '{} criterion arguments'.format(known_args.criterion))
            criterions.CRITERION_REGISTRY[known_args.criterion].add_args(
                crit_group)

        # As one final check, let's make sure we set defaults correctly
        argparser.set_defaults(**cls.DEFAULT_OPTIONS)
示例#31
0
def get_parser_with_args():
    parser = options.get_parser("Generation", default_task="pytorch_translate")
    pytorch_translate_options.add_verbosity_args(parser)
    pytorch_translate_options.add_dataset_args(parser, gen=True)
    generation_group = options.add_generation_args(parser)
    pytorch_translate_options.expand_generation_args(generation_group)

    # Adds args used by the standalone generate binary.
    generation_group.add_argument(
        "--source-vocab-file",
        default="",
        metavar="FILE",
        help="Path to text file representing the Dictionary to use.",
    )
    generation_group.add_argument(
        "--char-source-vocab-file",
        default="",
        metavar="FILE",
        help=(
            "Same as --source-vocab-file except using characters. "
            "(For use with char_source models only.)"
        ),
    )
    generation_group.add_argument(
        "--target-vocab-file",
        default="",
        metavar="FILE",
        help="Path to text file representing the Dictionary to use.",
    )
    generation_group.add_argument(
        "--source-text-file",
        default="",
        nargs="+",
        metavar="FILE",
        help="Path to raw text file containing examples in source dialect. "
        "This overrides what would be loaded from the data dir. "
        "You can specify multiple source files (eg. for use in combination "
        "with --source-ensembling). By default this will only translate the "
        "first source file",
    )
    generation_group.add_argument(
        "--target-text-file",
        default="",
        metavar="FILE",
        help="Path to raw text file containing examples in target dialect. "
        "This overrides what would be loaded from the data dir.",
    )
    generation_group.add_argument(
        "--source-binary-file",
        default="",
        help="Path for the binary file containing source eval examples. "
        "(Overrides --source-text-file. Must be used in conjunction with "
        "--target-binary-file).",
    )
    generation_group.add_argument(
        "--target-binary-file",
        default="",
        help="Path for the binary file containing target eval examples. "
        "(Overrides --target-text-file. Must be used in conjunction with "
        "--source-binary-file).",
    )
    generation_group.add_argument(
        "--translation-output-file",
        default="",
        type=str,
        metavar="FILE",
        help="Path to text file to store the output of the model. ",
    )
    generation_group.add_argument(
        "--translation-probs-file",
        default="",
        type=str,
        metavar="FILE",
        help="Path to text file to store the probs of translation output. ",
    )
    generation_group.add_argument(
        "--multiling-source-lang",
        action="append",
        metavar="SRC",
        help=(
            "Must be set for decoding with multilingual models. "
            "Must match an entry from --multiling-encoder-lang from training."
        ),
    )
    generation_group.add_argument(
        "--multiling-target-lang",
        action="append",
        metavar="TARGET",
        help=(
            "Must be set for decoding with multilingual models. "
            "Must match an entry from --multiling-decoder-lang from training."
        ),
    )
    generation_group.add_argument(
        "--source-ensembling",
        action="store_true",
        help="If this flag is present, the model will ensemble the predictions "
        "conditioned on multiple source sentences (one per source-text-file)",
    )
    generation_group.add_argument(
        "--competing-completed-beam-search",
        action="store_true",
        help="If this flag is present, use the alternative beam search "
        "implementation in research/beam_search. This beam search keeps completed "
        "hypos in the beam and let them compete against hypo expansions in the "
        "next time step.",
    )

    return parser
示例#32
0
def main():
    parser = options.get_parser('Generation')
    parser.add_argument('--path', metavar='FILE', required=True, action='append',
                        help='path(s) to model file(s)')
    dataset_args = options.add_dataset_args(parser)
    dataset_args.add_argument('--batch-size', default=32, type=int, metavar='N',
                              help='batch size')
    dataset_args.add_argument('--gen-subset', default='test', metavar='SPLIT',
                              help='data subset to generate (train, valid, test)')
    options.add_generation_args(parser)

    args = parser.parse_args()
    if args.no_progress_bar and args.log_format is None:
        args.log_format = 'none'
    print(args)

    use_cuda = torch.cuda.is_available() and not args.cpu

    # Load dataset
    if args.replace_unk is None:
        dataset = data.load_dataset(args.data, [args.gen_subset], args.source_lang, args.target_lang)
    else:
        dataset = data.load_raw_text_dataset(args.data, [args.gen_subset], args.source_lang, args.target_lang)
    if args.source_lang is None or args.target_lang is None:
        # record inferred languages in args
        args.source_lang, args.target_lang = dataset.src, dataset.dst

    # Load ensemble
    print('| loading model(s) from {}'.format(', '.join(args.path)))
    models, _ = utils.load_ensemble_for_inference(args.path, dataset.src_dict, dataset.dst_dict)

    print('| [{}] dictionary: {} types'.format(dataset.src, len(dataset.src_dict)))
    print('| [{}] dictionary: {} types'.format(dataset.dst, len(dataset.dst_dict)))
    print('| {} {} {} examples'.format(args.data, args.gen_subset, len(dataset.splits[args.gen_subset])))

    # Optimize ensemble for generation
    for model in models:
        model.make_generation_fast_(
            beamable_mm_beam_size=None if args.no_beamable_mm else args.beam)

    # Initialize generator
    translator = SequenceGenerator(
        models, beam_size=args.beam, stop_early=(not args.no_early_stop),
        normalize_scores=(not args.unnormalized), len_penalty=args.lenpen,
        unk_penalty=args.unkpen)
    if use_cuda:
        translator.cuda()

    # Load alignment dictionary for unknown word replacement
    # (None if no unknown word replacement, empty if no path to align dictionary)
    align_dict = utils.load_align_dict(args.replace_unk)

    # Generate and compute BLEU score
    scorer = bleu.Scorer(dataset.dst_dict.pad(), dataset.dst_dict.eos(), dataset.dst_dict.unk())
    max_positions = min(model.max_encoder_positions() for model in models)
    itr = dataset.eval_dataloader(
        args.gen_subset, max_sentences=args.batch_size, max_positions=max_positions,
        skip_invalid_size_inputs_valid_test=args.skip_invalid_size_inputs_valid_test)
    num_sentences = 0
    with utils.build_progress_bar(args, itr) as t:
        wps_meter = TimeMeter()
        gen_timer = StopwatchMeter()
        translations = translator.generate_batched_itr(
            t, maxlen_a=args.max_len_a, maxlen_b=args.max_len_b,
            cuda_device=0 if use_cuda else None, timer=gen_timer)
        for sample_id, src_tokens, target_tokens, hypos in translations:
            # Process input and ground truth
            target_tokens = target_tokens.int().cpu()
            # Either retrieve the original sentences or regenerate them from tokens.
            if align_dict is not None:
                src_str = dataset.splits[args.gen_subset].src.get_original_text(sample_id)
                target_str = dataset.splits[args.gen_subset].dst.get_original_text(sample_id)
            else:
                src_str = dataset.src_dict.string(src_tokens, args.remove_bpe)
                target_str = dataset.dst_dict.string(target_tokens, args.remove_bpe, escape_unk=True)

            if not args.quiet:
                print('S-{}\t{}'.format(sample_id, src_str))
                print('T-{}\t{}'.format(sample_id, target_str))

            # Process top predictions
            for i, hypo in enumerate(hypos[:min(len(hypos), args.nbest)]):
                hypo_tokens, hypo_str, alignment = utils.post_process_prediction(
                    hypo_tokens=hypo['tokens'].int().cpu(),
                    src_str=src_str,
                    alignment=hypo['alignment'].int().cpu(),
                    align_dict=align_dict,
                    dst_dict=dataset.dst_dict,
                    remove_bpe=args.remove_bpe)

                if not args.quiet:
                    print('H-{}\t{}\t{}'.format(sample_id, hypo['score'], hypo_str))
                    print('A-{}\t{}'.format(sample_id, ' '.join(map(str, alignment))))

                # Score only the top hypothesis
                if i == 0:
                    if align_dict is not None or args.remove_bpe is not None:
                        # Convert back to tokens for evaluation with unk replacement and/or without BPE
                        target_tokens = tokenizer.Tokenizer.tokenize(target_str,
                                                                     dataset.dst_dict,
                                                                     add_if_not_exist=True)
                    scorer.add(target_tokens, hypo_tokens)

            wps_meter.update(src_tokens.size(0))
            t.log({'wps': round(wps_meter.avg)})
            num_sentences += 1

    print('| Translated {} sentences ({} tokens) in {:.1f}s ({:.2f} tokens/s)'.format(
        num_sentences, gen_timer.n, gen_timer.sum, 1. / gen_timer.avg))
    print('| Generate {} with beam={}: {}'.format(args.gen_subset, args.beam, scorer.result_string()))