def get_parser(desc, default_task='translation'): # Before creating the true parser, we need to import optional user module # in order to eagerly import custom tasks, optimizers, architectures, etc. usr_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False) usr_parser.add_argument('--user-dir', default=None) usr_args, _ = usr_parser.parse_known_args() utils.import_user_module(usr_args) parser = argparse.ArgumentParser(allow_abbrev=False) # fmt: off parser.add_argument('--no-progress-bar', action='store_true', help='disable progress bar') parser.add_argument('--log-interval', type=int, default=1000, metavar='N', help='log progress every N batches (when progress bar is disabled)') parser.add_argument('--log-format', default=None, help='log format to use', choices=['json', 'none', 'simple', 'tqdm']) parser.add_argument('--tensorboard-logdir', metavar='DIR', default='', help='path to save logs for tensorboard, should match --logdir ' 'of running tensorboard (default: no tensorboard logging)') parser.add_argument("--tbmf-wrapper", action="store_true", help="[FB only] ") parser.add_argument('--seed', default=1, type=int, metavar='N', help='pseudo random number generator seed') parser.add_argument('--cpu', action='store_true', help='use CPU instead of CUDA') parser.add_argument('--fp16', action='store_true', help='use FP16') parser.add_argument('--memory-efficient-fp16', action='store_true', help='use a memory-efficient version of FP16 training; implies --fp16') parser.add_argument('--fp16-init-scale', default=2 ** 7, type=int, help='default FP16 loss scale') parser.add_argument('--fp16-scale-window', type=int, help='number of updates before increasing loss scale') parser.add_argument('--fp16-scale-tolerance', default=0.0, type=float, help='pct of updates that can overflow before decreasing the loss scale') parser.add_argument('--min-loss-scale', default=1e-4, type=float, metavar='D', help='minimum FP16 loss scale, after which training is stopped') parser.add_argument('--threshold-loss-scale', type=float, help='threshold FP16 loss scale from below') parser.add_argument('--user-dir', default=None, help='path to a python module containing custom extensions (tasks and/or architectures)') parser.add_argument('--empty-cache-freq', default=0, type=int, help='how often to clear the PyTorch CUDA cache (0 to disable)') from fairseq.registry import REGISTRIES for registry_name, REGISTRY in REGISTRIES.items(): parser.add_argument( '--' + registry_name.replace('_', '-'), default=REGISTRY['default'], choices=REGISTRY['registry'].keys(), ) # Task definitions can be found under fairseq/tasks/ from fairseq.tasks import TASK_REGISTRY parser.add_argument('--task', metavar='TASK', default=default_task, choices=TASK_REGISTRY.keys(), help='task') # fmt: on return parser
def get_parser(desc, default_task='translation'): parser = argparse.ArgumentParser( description='Facebook AI Research Sequence-to-Sequence Toolkit -- ' + desc) parser.add_argument('--no-progress-bar', action='store_true', help='disable progress bar') parser.add_argument('--log-interval', type=int, default=1000, metavar='N', help='log progress every N batches (when progress bar is disabled)') parser.add_argument('--log-format', default=None, help='log format to use', choices=['json', 'none', 'simple', 'tqdm']) parser.add_argument('--seed', default=1, type=int, metavar='N', help='pseudo random number generator seed') parser.add_argument('--fp16', action='store_true', help='use FP16') # Task definitions can be found under fairseq/tasks/ parser.add_argument( '--task', metavar='TASK', default=default_task, choices=TASK_REGISTRY.keys(), help='task: {} (default: {})'.format(', '.join(TASK_REGISTRY.keys()), default_task) ) return parser
def add_ac_args(parser): from fairseq.tasks import TASK_REGISTRY from fairseq.criterions import CRITERION_REGISTRY parser.add_argument('--actor-restore-file', default='checkpoints/actor/model.pt', metavar='DIR', help='path to restore actor') parser.add_argument('--actor-task', default='translation', metavar='TASK', choices=TASK_REGISTRY.keys(), help='task for actor') parser.add_argument('--actor-criterion', default='ac-loss-actor', metavar='CRITERION', choices=CRITERION_REGISTRY.keys(), help='criterion for actor') parser.add_argument('--actor-save-update', '--asu', default=0, type=int, metavar='N', help='force stop training actor at specified update') parser.add_argument('--critic-restore-file', default='checkpoints/critic/model.pt', metavar='DIR', help='path to restore critic') parser.add_argument('--critic-task', default='translation', metavar='TASK', choices=TASK_REGISTRY.keys(), help='task for critic') parser.add_argument('--critic-criterion', default='ac-loss-critic', metavar='CRITERION', choices=CRITERION_REGISTRY.keys(), help='criterion for critic') parser.add_argument('--critic-save-update', '--csu', default=0, type=int, metavar='N', help='force stop training critic at specified update')
def get_parser(desc, default_task='translation'): parser = argparse.ArgumentParser() # fmt: off parser.add_argument( '--no-progress-bar', action='store_true', help='disable progress bar') parser.add_argument( '--log-interval', type=int, default=1000, metavar='N', help='log progress every N batches (when progress bar is disabled)') parser.add_argument( '--log-format', default=None, help='log format to use', choices=['json', 'none', 'simple', 'tqdm']) parser.add_argument( '--seed', default=1, type=int, metavar='N', help='pseudo random number generator seed') parser.add_argument( '--cpu', action='store_true', help='use CPU instead of CUDA') parser.add_argument('--fp16', action='store_true', help='use FP16') parser.add_argument( '--memory-efficient-fp16', action='store_true', help='use a memory-efficient version of FP16 training; implies --fp16') parser.add_argument( '--fp16-init-scale', default=2**7, type=int, help='default FP16 loss scale') parser.add_argument( '--fp16-scale-window', type=int, help='number of updates before increasing loss scale') parser.add_argument( '--fp16-scale-tolerance', default=0.0, type=float, help='pct of updates that can overflow before decreasing the loss scale' ) # Task definitions can be found under fairseq/tasks/ parser.add_argument( '--task', metavar='TASK', default=default_task, choices=TASK_REGISTRY.keys(), help='task') # fmt: on return parser
def get_parser(desc, default_task='translation'): parser = argparse.ArgumentParser() parser.add_argument('--no-progress-bar', action='store_true', help='disable progress bar') parser.add_argument('--log-interval', type=int, default=1000, metavar='N', help='log progress every N batches (when progress bar is disabled)') parser.add_argument('--log-format', default=None, help='log format to use', choices=['json', 'none', 'simple', 'tqdm']) parser.add_argument('--seed', default=1, type=int, metavar='N', help='pseudo random number generator seed') parser.add_argument('--fp16', action='store_true', help='use FP16') # Task definitions can be found under fairseq/tasks/ parser.add_argument( '--task', metavar='TASK', default=default_task, choices=TASK_REGISTRY.keys(), help='task', ) return parser
def get_parser(desc, default_task='translation'): parser = argparse.ArgumentParser() parser.add_argument('--no-progress-bar', action='store_true', help='disable progress bar') parser.add_argument( '--log-interval', type=int, default=1000, metavar='N', help='log progress every N batches (when progress bar is disabled)') parser.add_argument('--log-format', default=None, help='log format to use', choices=['json', 'none', 'simple', 'tqdm']) parser.add_argument('--seed', default=1, type=int, metavar='N', help='pseudo random number generator seed') parser.add_argument('--fp16', action='store_true', help='use FP16') parser.add_argument('--fp16-init-scale', default=2**7, type=int, help='default FP16 loss scale') parser.add_argument('--fp16-scale-window', type=int, help='number of updates before increasing loss scale') # Task definitions can be found under fairseq/tasks/ parser.add_argument( '--task', metavar='TASK', default=default_task, choices=TASK_REGISTRY.keys(), help='task', ) return parser
def get_parser(desc, default_task='translation'): # Before creating the true parser, we need to import optional user module # in order to eagerly import custom tasks, optimizers, architectures, etc. usr_parser = argparse.ArgumentParser(add_help=False) usr_parser.add_argument('--user-dir', default=None) usr_args, _ = usr_parser.parse_known_args() if usr_args.user_dir is not None: import_user_module(usr_args.user_dir) parser = argparse.ArgumentParser() # fmt: off parser.add_argument('--no-progress-bar', action='store_true', help='disable progress bar') parser.add_argument('--log-interval', type=int, default=1000, metavar='N', help='log progress every N batches (when progress bar is disabled)') parser.add_argument('--log-format', default=None, help='log format to use', choices=['json', 'none', 'simple', 'tqdm']) parser.add_argument('--seed', default=1, type=int, metavar='N', help='pseudo random number generator seed') parser.add_argument('--cpu', action='store_true', help='use CPU instead of CUDA') parser.add_argument('--fp16', action='store_true', help='use FP16') parser.add_argument('--memory-efficient-fp16', action='store_true', help='use a memory-efficient version of FP16 training; implies --fp16') parser.add_argument('--fp16-init-scale', default=2**7, type=int, help='default FP16 loss scale') parser.add_argument('--fp16-scale-window', type=int, help='number of updates before increasing loss scale') parser.add_argument('--fp16-scale-tolerance', default=0.0, type=float, help='pct of updates that can overflow before decreasing the loss scale') parser.add_argument('--user-dir', default=None, help='path to a python module containing custom extensions (tasks and/or architectures)') # Task definitions can be found under fairseq/tasks/ parser.add_argument('--task', metavar='TASK', default=default_task, choices=TASK_REGISTRY.keys(), help='task') # fmt: on return parser
def get_parser(desc, default_task="translation"): # Before creating the true parser, we need to import optional user module # in order to eagerly import custom tasks, optimizers, architectures, etc. usr_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False) usr_parser.add_argument("--user-dir", default=None) usr_args, _ = usr_parser.parse_known_args() utils.import_user_module(usr_args) parser = argparse.ArgumentParser(allow_abbrev=False) # fmt: off parser.add_argument('--no-progress-bar', action='store_true', help='disable progress bar') parser.add_argument( '--log-interval', type=int, default=100, metavar='N', help='log progress every N batches (when progress bar is disabled)') parser.add_argument('--log-format', default=None, help='log format to use', choices=['json', 'none', 'simple', 'tqdm']) parser.add_argument( '--tensorboard-logdir', metavar='DIR', default='', help='path to save logs for tensorboard, should match --logdir ' 'of running tensorboard (default: no tensorboard logging)') parser.add_argument('--seed', default=None, type=int, metavar='N', help='pseudo random number generator seed') parser.add_argument('--cpu', action='store_true', help='use CPU instead of CUDA') parser.add_argument('--tpu', action='store_true', help='use TPU instead of CUDA') parser.add_argument('--ort', action='store_true', help='use ORT') parser.add_argument('--ort_cuda_mem_limit_in_gbs', default=32, type=int, help='GPU memory') parser.add_argument('--bf16', action='store_true', help='use bfloat16; implies --tpu') parser.add_argument('--fp16', action='store_true', help='use FP16') parser.add_argument( '--memory-efficient-bf16', action='store_true', help='use a memory-efficient version of BF16 training; implies --bf16') parser.add_argument( '--memory-efficient-fp16', action='store_true', help='use a memory-efficient version of FP16 training; implies --fp16') parser.add_argument('--fp16-no-flatten-grads', action='store_true', help='don\'t flatten FP16 grads tensor') parser.add_argument('--fp16-init-scale', default=2**7, type=int, help='default FP16 loss scale') parser.add_argument('--fp16-scale-window', type=int, help='number of updates before increasing loss scale') parser.add_argument( '--fp16-scale-tolerance', default=0.0, type=float, help='pct of updates that can overflow before decreasing the loss scale' ) parser.add_argument( '--min-loss-scale', default=1e-4, type=float, metavar='D', help='minimum FP16 loss scale, after which training is stopped') parser.add_argument('--threshold-loss-scale', type=float, help='threshold FP16 loss scale from below') parser.add_argument( '--user-dir', default=None, help= 'path to a python module containing custom extensions (tasks and/or architectures)' ) parser.add_argument( '--empty-cache-freq', default=0, type=int, help='how often to clear the PyTorch CUDA cache (0 to disable)') parser.add_argument( '--all-gather-list-size', default=16384, type=int, help='number of bytes reserved for gathering stats from workers') parser.add_argument('--model-parallel-size', type=int, metavar='N', default=1, help='total number of GPUs to parallelize model over') parser.add_argument('--checkpoint-suffix', default='', help='suffix to add to the checkpoint file name') parser.add_argument('--quantization-config-path', default=None, help='path to quantization config file') parser.add_argument('--profile', action='store_true', help='enable autograd profiler emit_nvtx') from fairseq.registry import REGISTRIES for registry_name, REGISTRY in REGISTRIES.items(): parser.add_argument( '--' + registry_name.replace('_', '-'), default=REGISTRY['default'], choices=REGISTRY['registry'].keys(), ) # Task definitions can be found under fairseq/tasks/ from fairseq.tasks import TASK_REGISTRY parser.add_argument('--task', metavar='TASK', default=default_task, choices=TASK_REGISTRY.keys(), help='task') # fmt: on return parser
help='data subset to generate (train, valid, test)') parser.add_argument('--num-shards', default=1, type=int, metavar='N', help='shard generation over N shards') parser.add_argument('--shard-id', default=0, type=int, metavar='ID', help='id of the shard to generate (id < num_shards)') parser.add_argument('--task', metavar='TASK', default='translation', choices=TASK_REGISTRY.keys(), help='task: {} (default: {})'.format( ', '.join(TASK_REGISTRY.keys()), 'translation')) parser.add_argument('--source-lang', default=None, metavar='SRC', help='source language') parser.add_argument('--target-lang', default=None, metavar='TARGET', help='target language') parser.add_argument('--raw-text', action='store_true', help='load raw text dataset') parser.add_argument('--left-pad-source', default='True',
def get_parser(desc, default_task='translation'): # Before creating the true parser, we need to import optional user module # in order to eagerly import custom tasks, optimizers, architectures, etc. usr_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False) usr_parser.add_argument('--user-dir', default=None) usr_args, _ = usr_parser.parse_known_args() utils.import_user_module(usr_args) parser = argparse.ArgumentParser(allow_abbrev=False) # fmt: off parser.add_argument('--no-progress-bar', action='store_true', help='disable progress bar') parser.add_argument( '--log-interval', type=int, default=1000, metavar='N', help='log progress every N batches (when progress bar is disabled)') parser.add_argument('--log-format', default=None, help='log format to use', choices=['json', 'none', 'simple', 'tqdm']) parser.add_argument( '--tensorboard-logdir', metavar='DIR', default='', help='path to save logs for tensorboard, should match --logdir ' 'of running tensorboard (default: no tensorboard logging)') parser.add_argument("--tbmf-wrapper", action="store_true", help="[FB only] ") parser.add_argument('--seed', default=1, type=int, metavar='N', help='pseudo random number generator seed') parser.add_argument('--cpu', action='store_true', help='use CPU instead of CUDA') parser.add_argument('--fp16', action='store_true', help='use FP16') parser.add_argument( '--memory-efficient-fp16', action='store_true', help='use a memory-efficient version of FP16 training; implies --fp16') parser.add_argument('--fp16-init-scale', default=2**7, type=int, help='default FP16 loss scale') parser.add_argument('--fp16-scale-window', type=int, help='number of updates before increasing loss scale') parser.add_argument( '--fp16-scale-tolerance', default=0.0, type=float, help='pct of updates that can overflow before decreasing the loss scale' ) parser.add_argument( '--min-loss-scale', default=1e-4, type=float, metavar='D', help='minimum FP16 loss scale, after which training is stopped') parser.add_argument('--threshold-loss-scale', type=float, help='threshold FP16 loss scale from below') parser.add_argument( '--user-dir', default=None, help= 'path to a python module containing custom extensions (tasks and/or architectures)' ) parser.add_argument('--sde', action='store_true', help='whether to use sde') parser.add_argument('--update-language-sampling', type=int, default=-1, help='update language sampling every N step') parser.add_argument('--extra-update-language-sampling', type=int, default=-1, help='update language sampling every N step') parser.add_argument('--scale-norm', action='store_true', help='whether to use scaled norm') parser.add_argument('--fix-norm', type=float, default=None, help='whether to use fixed norm at output embedding') parser.add_argument( '--data-actor', type=str, default=None, help='type of data actor [base|ave_emb|only_grad|interpolate_grad]') parser.add_argument('--data-actor-embed-dropout', type=float, default=0., help='') parser.add_argument('--data-actor-proj-dropout', type=float, default=0., help='') parser.add_argument('--data-actor-proj-linear-bias', type=float, default=None, help='the bias term to data actor linear projection') parser.add_argument( '--data-actor-proj-post-bias', type=float, default=0, help='the bias term to add after data actor project activation') parser.add_argument( '--data-actor-sigmoid-scale', type=float, default=1., help='the bias term to add after data actor project activation') parser.add_argument('--extra-data-actor', type=str, default=None, help='type of data actor [ave_emb]') parser.add_argument('--combine-probs', type=str, default=None, help=' [weight_by_size]') parser.add_argument('--data-actor-lr', type=eval_str_list, default=0.01, help='lr for optimizing data actor') parser.add_argument('--data-actor-optim-step', type=int, default=1, help='number of steps to optimize data actor') parser.add_argument('--data-actor-lr-scheduler', type=str, default=None) parser.add_argument('--data-actor-embed-dim', type=int, default=32, help='dimension of word embedding for data actor') parser.add_argument('--lan-embed-dim', type=int, default=None, help='dimension of word embedding for data actor') parser.add_argument('--data-actor-model-embed', type=int, default=0, help='[0|1] whether to use model embedding') parser.add_argument('--data-actor-embed-grad', type=int, default=1, help='[0|1] whether to optimize model embedding') parser.add_argument('--data-loss-lambda', type=float, default=0, help='the percentage of using actual data loss') parser.add_argument('--data-loss-lambda-warmup-steps', type=int, default=-1) parser.add_argument('--data-loss-lambda-init', type=float, default=-1) parser.add_argument('--data-loss-lambda-final', type=float, default=-1) parser.add_argument('--out-score-type', type=str, default='sigmoid', help='[sigmoid|exp]') parser.add_argument( '--data-actor-share-model', action='store_true', help= 'whether to allow data actor and main model to share the same parameter' ) parser.add_argument('--tanh-constant', type=float, default=10, help='the constant multiplier for tanh output') parser.add_argument('--exp-constant', type=float, default=0.1, help='the constant multiplier for tanh output') parser.add_argument('--eval-bleu', action='store_true', help='whether to valid on bleu score') parser.add_argument('--only-load-data-actor', action='store_true', help='whether to valid on bleu score') parser.add_argument('--data-actor-proj-grad-only', action='store_true') parser.add_argument('--load-model-as-data-actor', action='store_true', help='use the model as data actor') parser.add_argument('--grad-sim', type=str, default='cosine', help='[cosine|dot_prod]') parser.add_argument('--dev-grad-eta', type=float, default=0.0001) parser.add_argument('--proj-grad-sim', type=str, default='cosine', help='[cosine|dot_prod]') parser.add_argument('--loss-steps', type=int, default=1, help='number of steps to calculate loss for grad sim') parser.add_argument('--scale-reward', action='store_true', help='whether to scale reward by current p') parser.add_argument('--baseline', action='store_true', help='whether to scale reward by current p') parser.add_argument('--relu-reward', action='store_true', help='whether to relu the reward') parser.add_argument('--discount-reward', type=float, default=-1, help='discount factor for reward') parser.add_argument('--reward-scale', type=float, default=0.0001, help='scale factor of the reward') parser.add_argument('--language-weight', type=str, default=None, help='dev language weights separated by comma') parser.add_argument('--data-actor-step-update', action='store_true', help='whether to update at training step') parser.add_argument( '--exact-update', action='store_true', help='whether to do exact update in the approximate setting') parser.add_argument('--loss-weight', type=str, default=None, help='[low|]') parser.add_argument('--discount-grad', action='store_true', help='whether to use the default discount grad') parser.add_argument('--a0', type=float, default=0.05, help='[low|]') parser.add_argument('--a1', type=float, default=0.95, help='[low|]') parser.add_argument('--switch-obj-epoch', type=int, default=1, help='the epoch to update val loss to trainer') parser.add_argument('--embedding-file', type=str, default=None, help='the file path to init data actor embedding') parser.add_argument('--data-actor-feature-postprocess', type=str, default='last', help='[tanh|average]') # TCS options parser.add_argument( '--lan-dists', default=None, type=str, help='comman separated numbers that indicate language distance') parser.add_argument( '--data-condition', default="target", type=str, help='[source|target] whether to condition on source or target') parser.add_argument( '--sample-instance', action='store_true', help= 'whether to sample for each instance in a batch for mulitlingual_data') parser.add_argument( '--sample-tag-prob', default=-1, type=float, help='probability of using tags other than the language') parser.add_argument('--data-actor-multilin', action='store_true', help='whether to multiling version of the actor') parser.add_argument('--utility-type', type=str, default='ave', help='type of utility function [ave|min-half|median]') parser.add_argument('--eval-lang-pairs', type=str, default=None, help='dev data keys for multilin actor') parser.add_argument('--no-dev', action='store_true', help='not use dev set gradient') parser.add_argument('--pretrain-data-actor', action='store_true', help='pretrain the data actor') parser.add_argument('--pretrain-type', type=str, default='lan_dist', help='[lan_dist|datasize]') parser.add_argument('--feature-type', type=str, default='ones', help='[ones|valid_loss|train_loss]') parser.add_argument('--layerwise-dds', action='store_true', help='use layerwise DDS') parser.add_argument('--tensorwise-dds', action='store_true') parser.add_argument('--dds-no-neg-reward', action='store_true', help='set the negative reward for DDS to 0') parser.add_argument('--proj-grad', action='store_true') parser.add_argument('--train-on-proj', action='store_true') parser.add_argument('--train-proj-grad', action='store_true', help="use the training grad to project") parser.add_argument('--train-proj-grad-sum', action='store_true', help="use the training grad to project") parser.add_argument('--save-proj-train', action='store_true', help="whether to use saved moving avg grad to project") parser.add_argument( '--remove-sample-id', action='store_true', help="do not project on current language being trained") parser.add_argument('--proj-lan-id', type=str, default=None) parser.add_argument('--paramwise-proj-grad', action='store_true') parser.add_argument('--sample-proj-count', type=int, default=1, help='number of tasks to sample for projection') parser.add_argument( '--optim-weight-softmax-tau', type=float, default=-1, help='a float between (0, 1], smaller value makes weight more peaky') parser.add_argument('--optim-weight-above-one', action='store_true') parser.add_argument('--datasize-t', type=int, default=None, help='temperature for controlling datasize sampling') parser.add_argument('--alpha-p', type=float, default=0, help='[0-1] amount of interpolation for p') parser.add_argument( '--num-dev-samples', type=int, default=8, help= "number of samples to select for dev batch for gradient; max token is set to 1200" ) parser.add_argument('--reward-level', type=str, default="sent") parser.add_argument('--reward-constant', type=float, default=0.01) parser.add_argument('--only-optim-model-key', type=str, default=None) parser.add_argument('--upsample-factor', type=int, default=0) parser.add_argument('--data-score-label-smooth', type=str, default="none", help="[no_smooth|weigted_smooth]") from fairseq.registry import REGISTRIES for registry_name, REGISTRY in REGISTRIES.items(): parser.add_argument( '--' + registry_name.replace('_', '-'), default=REGISTRY['default'], choices=REGISTRY['registry'].keys(), ) # Task definitions can be found under fairseq/tasks/ from fairseq.tasks import TASK_REGISTRY parser.add_argument('--task', metavar='TASK', default=default_task, choices=TASK_REGISTRY.keys(), help='task') # fmt: on return parser
def get_parser(desc, default_task='translation'): # Before creating the true parser, we need to import optional user module # in order to eagerly import custom tasks, optimizers, architectures, etc. usr_parser = argparse.ArgumentParser(add_help=False) usr_parser.add_argument('--user-dir', default=None) usr_args, _ = usr_parser.parse_known_args() import_user_module(usr_args) parser = argparse.ArgumentParser() # fmt: off parser.add_argument('--no-progress-bar', action='store_true', help='disable progress bar') parser.add_argument( '--log-interval', type=int, default=1000, metavar='N', help='log progress every N batches (when progress bar is disabled)') parser.add_argument('--log-format', default=None, help='log format to use', choices=['json', 'none', 'simple', 'tqdm']) parser.add_argument( '--tensorboard-logdir', metavar='DIR', default='', help='path to save logs for tensorboard, should match --logdir ' 'of running tensorboard (default: no tensorboard logging)') parser.add_argument('--seed', default=1, type=int, metavar='N', help='pseudo random number generator seed') parser.add_argument('--cpu', action='store_true', help='use CPU instead of CUDA') parser.add_argument('--fp16', action='store_true', help='use FP16') parser.add_argument( '--memory-efficient-fp16', action='store_true', help='use a memory-efficient version of FP16 training; implies --fp16') parser.add_argument('--fp16-init-scale', default=2**7, type=int, help='default FP16 loss scale') parser.add_argument('--fp16-scale-window', type=int, help='number of updates before increasing loss scale') parser.add_argument( '--fp16-scale-tolerance', default=0.0, type=float, help='pct of updates that can overflow before decreasing the loss scale' ) parser.add_argument( '--min-loss-scale', default=1e-4, type=float, metavar='D', help='minimum FP16 loss scale, after which training is stopped') parser.add_argument('--threshold-loss-scale', type=float, help='threshold FP16 loss scale from below') parser.add_argument( '--user-dir', default=None, help= 'path to a python module containing custom extensions (tasks and/or architectures)' ) # Task definitions can be found under fairseq/tasks/ parser.add_argument('--task', metavar='TASK', default=default_task, choices=TASK_REGISTRY.keys(), help='task') # fmt: on parser.add_argument( '--domains', default=None, nargs='+', type=str, metavar='DOMAINSPLIT', help= 'comma separated list of data domains to use for training, valid and test' '[europarl, medical]') parser.add_argument( '--train-domains', default=None, nargs='+', type=str, metavar='TRAINSPLIT', help='comma separated list of data domains to use for training' '[europarl, medical]') parser.add_argument( '--valid-domains', default=None, nargs='+', type=str, metavar='VALIDSPLIT', help='comma separated list of data domains to use for valid' '[europarl, medical]') parser.add_argument( '--test-domains', default=None, nargs='+', type=str, metavar='TESTSPLIT', help='comma separated list of data domains to use for test' '[europarl, medical]') parser.add_argument( '--valid-select', default=None, nargs='+', type=str, metavar='SELECTSPLIT', help='comma separated list of data domains to use for valid select' '[europarl, medical]') parser.add_argument( '--num-ref', default=None, nargs='+', action=StoreDictKeyPair, metavar='NUMREFSPLIT', help='comma separated list of number of references for valid and test' '[1, 1]') return parser
def get_parser(desc, default_task='translation'): parser = argparse.ArgumentParser() parser.add_argument('--no-progress-bar', action='store_true', help='disable progress bar') parser.add_argument( '--log-interval', type=int, default=1000, metavar='N', help='log progress every N batches (when progress bar is disabled)') parser.add_argument('--log-format', default=None, help='log format to use', choices=['json', 'none', 'simple', 'tqdm']) parser.add_argument('--seed', default=1, type=int, metavar='N', help='pseudo random number generator seed') parser.add_argument('--fp16', action='store_true', help='use FP16') parser.add_argument('--fp16-init-scale', default=2**7, type=int, help='default FP16 loss scale') parser.add_argument( '--encoder-seq', type=int, nargs='+', default=[1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1], help='the sequence of encoder nodes that make up the Transformer model' ) parser.add_argument( '--encoder-para', type=int, nargs='+', default=[1, 4, 1, 1, 4, 1, 1, 4, 1, 1, 4, 1, 1, 4, 1, 1, 4, 1], help='parameter information for each encoder node') parser.add_argument( '--decoder-seq', type=int, nargs='+', default=[ 1, 2, 4, 1, 1, 2, 4, 1, 1, 2, 4, 1, 1, 2, 4, 1, 1, 2, 4, 1, 1, 2, 4, 1 ], help='the sequence of decoder nodes that make up the Transformer model' ) parser.add_argument('--decoder-para', type=int, nargs='+', default=[ 1, 4, 4, 1, 1, 4, 4, 1, 1, 4, 4, 1, 1, 4, 4, 1, 1, 4, 4, 1, 1, 4, 4, 1 ], help='parameter information for each decoder node') parser.add_argument( '--para', type=int, default=1, help='The max size of parameters of an evolved transformer') # Task definitions can be found under fairseq/tasks/ parser.add_argument( '--task', metavar='TASK', default=default_task, choices=TASK_REGISTRY.keys(), help='task', ) return parser