def fairseq_train( preprocessed_dir, exp_dir, ngpus=1, batch_size=8192, # Batch size across all gpus (taking update freq into account) max_sentences=64, # Max sentences per GPU arch='transformer', save_interval_updates=100, max_update=50000, lr=0.001, warmup_updates=4000, dropout=0.1, lr_scheduler='inverse_sqrt', criterion='label_smoothed_cross_entropy', seed=None, fp16=True, **kwargs, ): with log_std_streams(exp_dir / 'fairseq_train.stdout'): exp_dir = Path(exp_dir) preprocessed_dir = Path(preprocessed_dir) exp_dir.mkdir(exist_ok=True, parents=True) # Copy dictionaries to exp_dir for generation for dict_path in preprocessed_dir.glob('dict.*.txt'): shutil.copy(dict_path, exp_dir) checkpoints_dir = exp_dir / 'checkpoints' total_real_batch_size = max_sentences * ngpus update_freq = int(round(batch_size / total_real_batch_size, 0)) if seed is None: seed = random.randint(0, 1000) distributed_port = random.randint(10000, 20000) args = f''' {preprocessed_dir} --task translation --source-lang complex --target-lang simple --save-dir {checkpoints_dir} --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 --criterion {criterion} --label-smoothing 0.1 --lr-scheduler {lr_scheduler} --lr {lr} --warmup-updates {warmup_updates} --update-freq {update_freq} --arch {arch} --dropout {dropout} --weight-decay 0.0 --clip-norm 0.1 --share-all-embeddings --no-epoch-checkpoints --save-interval 999999 --validate-interval 999999 --max-update {max_update} --save-interval-updates {save_interval_updates} --keep-interval-updates 1 --patience 10 --batch-size {max_sentences} --seed {seed} --distributed-world-size {ngpus} --distributed-port {distributed_port} ''' if lr_scheduler == 'inverse_sqrt': args += '--warmup-init-lr 1e-07' if fp16: args += f' --fp16' # FIXME: if the kwargs are already present in the args string, they will appear twice but fairseq will take only the last one into account args += f' {args_dict_to_str(kwargs)}' args = remove_multiple_whitespaces(args.replace('\n', ' ')).strip(' ') # Recover lost quotes around adam betas args = re.sub(r'--adam-betas (\(0\.\d+, 0\.\d+\))', r"--adam-betas '\1'", args) print(f'fairseq-train {args}') with mock_cli_args(shlex.split(args)): train.cli_main()
#!/usr/bin/env python3 -u # Copyright (c) Facebook, Inc. and its affiliates. # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. from comet_ml import Experiment from fairseq_cli.train import cli_main if __name__ == '__main__': cli_main()
def ls_cli_main(*args, **kwargs): user_path = pathlib.Path(__file__).parent.joinpath("fs_modules") sys.argv.extend(["--user-dir", str(user_path)]) cli_main(*args, **kwargs)
'--lr', '0.0005', '-s', 'src', '-t', 'tgt', '--label-smoothing', '0.1', '--dropout', '0.3', '--max-tokens', '4000', '--min-lr', '1e-09', '--lr-scheduler', 'inverse_sqrt', '--weight-decay', '0.0001', '--criterion', 'label_smoothed_cross_entropy', '--max-update', '150000', '--warmup-updates', '4000', '--warmup-init-lr', '1e-07', '--adam-betas', '(0.9,0.98)', '--max-source-positions', '10240', '--save-dir', 'checkpoints/transformer', # '--dataset-impl', 'raw', '--share-all-embeddings', # '--encoder-embed-dim', '64', # '--encoder-ffn-embed-dim', '128', # '--encoder-attention-heads', '2', # '--encoder-layers', '2', # '--decoder-embed-dim', '64', # '--decoder-ffn-embed-dim', '128', # '--decoder-attention-heads', '2', # '--decoder-layers', '2' ] train.cli_main()
args_dict['current_host'] = os.environ['SM_CURRENT_HOST'] args_dict['distributed-world-size'] = str(len(args_dict['hosts']) * num_gpus) os.environ['WORLD_SIZE'] = str(len(args_dict['hosts']) * num_gpus) os.environ['RANK'] = str(args_dict['hosts'].index(args_dict['current_host']) * num_gpus) args_dict.pop('hosts', None) args_dict.pop('current_host', None) args_dict['restore-file'] = os.path.join(args_dict['pretrained_path'], 'pretrained_model.pt') args_dict.pop('pretrained_path', None) train_dir = args_dict['train'] args_dict.pop('train', None) args_dict.pop('ngpus', None) try: prefix = '/opt/ml/' param_path = os.path.join(prefix, 'input/config/hyperparameters.json') # Read in any hyperparameters that the user passed with the training job with open(param_path, 'r') as tc: training_params = json.load(tc) for k,v in training_params.items(): args_dict[k] = v except: print("hyperparameters.json not found! Probably running without Sagemaker!") training_args = [train_dir,] + convert_args_dict_to_list(args_dict) + unparsed cli_main(training_args)
#!/usr/local/opt/python/bin/python3.7 # -*- coding: utf-8 -*- import re import sys from os import path fairseq_path = path.abspath(path.join(path.abspath(__file__), '../../fairseq')) sys.path.insert(0, fairseq_path) from fairseq_cli.train import cli_main if __name__ == '__main__': sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) sys.exit(cli_main())