def add_args(parser): """Add model-specific arguments to the parser.""" parser.add_argument( "--input-feat-per-channel", type=int, metavar="N", help="encoder input dimension per input channel", ) TransformerModel.add_args(parser) parser.add_argument('--encoder-convolutions', type=str, metavar='EXPR', help='encoder layers [(dim, kernel_size), ...]') parser.add_argument('--normalization-constant', type=float, default=1.0) parser.add_argument('--no-attn-2d', action='store_true', default=False, help="Whether to use 2d attention") parser.add_argument('--distance-penalty', type=str, default=False, choices=['log', 'gauss'], help='Add distance penalty to the encoder') parser.add_argument('--init-variance', type=float, default=1.0, help='Initialization value for variance') parser.add_argument('--ctc-compress-out', action='store_true', default=False, help="If set, compress the CTC output based on predictions") parser.add_argument('--ctc-compress-strategy', type=str, default="avg", choices=['avg', 'weighted', 'softmax'], help="Strategy to use when compressing CTC output") parser.add_argument('--freeze-pretrained', action='store_true', help='if set, all params loaded from the pretrained model are freezed')
def setUp(self): self.task, self.parser = get_dummy_task_and_parser() TransformerModel.add_args(self.parser) self.args = self.parser.parse_args([]) self.args.encoder_layers = 2 self.args.decoder_layers = 1 logging.disable(logging.CRITICAL)
def add_args(parser): """Add model-specific arguments to the parser.""" # fmt: off TransformerModel.add_args(parser) parser.add_argument('--alignment-heads', type=int, metavar='N', help='number of attention heads to be used for ' 'pointing') parser.add_argument('--alignment-layer', type=int, metavar='I', help='layer number to be used for pointing (0 ' 'corresponding to the bottommost layer)') parser.add_argument('--source-position-markers', type=int, metavar='N', help='dictionary includes N additional items that ' 'represent an OOV token at a particular input ' 'position') parser.add_argument( '--force-generation', type=float, metavar='P', default=None, help='set the vocabulary distribution weight to P, ' 'instead of predicting it from the input (1.0 ' 'corresponding to generation, 0.0 to pointing)')
def add_args(parser): """Add model-specific arguments to the parser.""" TransformerModel.add_args(parser) parser.add_argument( "--pretrained-checkpoint", type=str, metavar="STR", ) parser.add_argument( "--init-encoder -only", action="store_true", ) parser.add_argument( "--init-decoder-only", action="store_true", ) parser.add_argument('--share-encoder-embeddings', action='store_true', help='share encoder embeddings across languages') parser.add_argument('--share-decoder-embeddings', action='store_true', help='share decoder embeddings across languages') parser.add_argument('--share-encoders', action='store_true', help='share encoders across languages') parser.add_argument('--share-decoders', action='store_true', help='share decoders across languages')
def test_export_transformer(self): task, parser = get_dummy_task_and_parser() TransformerModel.add_args(parser) args = parser.parse_args([]) model = TransformerModel.build_model(args, task) scripted = torch.jit.script(model) _test_save_and_load(scripted)
def init_from_config(cls, impl, decoder_kwargs, embedding): module = cls(impl) module.embedding = embedding module.decoder_kwargs = decoder_kwargs if impl == "fairseq": args = {} # fairseq default args ap = ArgumentParser() FairseqModel.add_args(ap) args.update(vars(ap.parse_args(""))) # fairseq base architecture args ns = Namespace(**decoder_kwargs) base_architecture(ns) args.update(vars(ns)) # our args args.update(decoder_kwargs) namespace = Namespace(**args) dumb_dict = {0 for _ in range(embedding.weight.shape[0])} module.model = FairseqDecoder(namespace, dumb_dict, embedding) else: raise NotImplementedError() module.is_initialized = True return module
def add_args(parser): TransformerModel.add_args(parser) parser.add_argument( "--apply-bert-init", action="store_true", help="use custom param initialization for BERT", ) parser.add_argument( "--early-exit", default="6,6,6", type=str, help="number of decoder layers for del_word, ins_mask, ins_word", ) parser.add_argument( "--no-share-discriminator", action="store_true", help="addtional decoder-layers to learn deletion", ) parser.add_argument( "--no-share-maskpredictor", action="store_true", help="addtional decoder-layers to learn predicting masks", ) parser.add_argument( "--sampling-for-deletion", action='store_true', help='instead of argmax, use sampling to predict the tokens')
def add_args(parser): TransformerModel.add_args(parser) parser.add_argument( "--apply-bert-init", action="store_true", help="use custom param initialization for BERT", ) parser.add_argument( "--early-exit", default="6,6,6", type=str, help="number of decoder layers before word_del, mask_ins, word_ins", ) parser.add_argument( "--no-share-discriminator", action="store_true", help="separate parameters for discriminator", ) parser.add_argument( "--no-share-maskpredictor", action="store_true", help="separate parameters for mask-predictor", ) parser.add_argument( "--share-discriminator-maskpredictor", action="store_true", help="share the parameters for both mask-predictor and discriminator", ) parser.add_argument( "--sampling-for-deletion", action='store_true', help='instead of argmax, use sampling to predict the tokens' )
def add_args(parser): TransformerModel.add_args(parser) parser.add_argument( "--apply-bert-init", action="store_true", help="use custom param initialization for BERT", )
def add_args(parser): TransformerModel.add_args(parser) parser.add_argument( "--apply-bert-init", action="store_true", help="use custom param initialization for BERT", ) # length prediction parser.add_argument( "--src-embedding-copy", action="store_true", help= "copy encoder word embeddings as the initial input of the decoder") parser.add_argument( "--pred-length-offset", action="store_true", help= "predicting the length difference between the target and source sentences" ) parser.add_argument( "--sg-length-pred", action="store_true", help="stop the gradients back-propagated from the length predictor" ) parser.add_argument("--length-loss-factor", type=float, help="weights on the length prediction loss")
def add_args(parser): """Add model-specific arguments to the parser.""" # fmt: off TransformerModel.add_args(parser) parser.add_argument("--encoder-conv-channels", type=str, metavar="EXPR", help="list of encoder convolution\'s out channels") parser.add_argument("--encoder-conv-kernel-sizes", type=str, metavar="EXPR", help="list of encoder convolution\'s kernel sizes") parser.add_argument("--encoder-conv-strides", type=str, metavar="EXPR", help="list of encoder convolution\'s strides") parser.add_argument("--encoder-transformer-context", type=str, metavar="EXPR", help="left/right context for time-restricted self-attention; " "can be None or a tuple of two non-negative integers/None") parser.add_argument("--decoder-input-dim", type=int, metavar="N", help="decoder input dimension (extra linear layer " "if different from decoder embed dim)") # Scheduled sampling options parser.add_argument("--scheduled-sampling-probs", type=lambda p: options.eval_str_list(p), metavar="P_1,P_2,...,P_N", default=[1.0], help="scheduled sampling probabilities of sampling the truth " "labels for N epochs starting from --start-schedule-sampling-epoch; " "all later epochs using P_N") parser.add_argument("--start-scheduled-sampling-epoch", type=int, metavar="N", default=1, help="start scheduled sampling from the specified epoch")
def add_args(parser): TransformerModel.add_args(parser) # Arguments related to parameter initialization parser.add_argument('--apply-bert-init', action='store_true', help='use custom param initialization for BERT')
def add_args(parser): """Add model-specific arguments to the parser.""" TransformerModel.add_args(parser) #TODO: parser.add_argument( '--encoder-drop-residual', type=int, help='drop residual after self-attention in this encoder layer', )
def add_args(parser): TransformerModel.add_args(parser) parser.add_argument('--add-topic-encoder-pre', default=False, action='store_true', help='') parser.add_argument('--add-topic-encoder-post', default=False, action='store_true', help='') parser.add_argument('--add-topic-decoder', default=False, action='store_true', help='')
def add_args(parser): """Add model-specific arguments to the parser.""" TransformerModel.add_args(parser) parser.add_argument( "--pretrained-deltalm-checkpoint", type=str, metavar="STR", )
def add_args(parser): TransformerModel.add_args(parser) parser.add_argument("--full-mask", action="store_true", help="Full masking") parser.add_argument("--ignore-nat-loss", action="store_true", help="Ignore NAT Loss")
def add_args(parser): """Add model-specific arguments to the parser.""" TransformerModel.add_args(parser) parser.add_argument('--img-dim', type=int, metavar='N', default=2048, help='image feature dimension')
def add_args(parser): TransformerModel.add_args(parser) parser.add_argument( "--apply-bert-init", action="store_true", help="use custom param initialization for BERT", ) parser.add_argument("--label-tau", default=None, type=float)
def add_args(parser): TransformerModel.add_args(parser) parser.add_argument('--share-encoders', action='store_true', help='share encoders across languages') parser.add_argument('--share-decoders', action='store_true', help='share decoders across languages')
def test_export_transformer_no_token_pos_emb(self): task, parser = get_dummy_task_and_parser() TransformerModel.add_args(parser) args = parser.parse_args([]) args.no_token_positional_embeddings = True model = TransformerModel.build_model(args, task) scripted = torch.jit.script(model) _test_save_and_load(scripted)
def add_args(parser): """Add model-specific arguments to the parser.""" TransformerModel.add_args(parser) parser.add_argument( "--decoder-lang-embed-dim", type=int, metavar="N", help="decoder language embedding dimension", )
def add_args(parser): """Add model-specific arguments to the parser.""" TransformerModel.add_args(parser) parser.add_argument( "--pretrained-roberta-checkpoint", type=str, metavar="STR", help="roberta model to use for initializing transformer encoder", )
def add_args(parser): TransformerModel.add_args(parser) """Add task-specific arguments to the parser.""" parser.add_argument( "--vgg-config", type=str, help= """config in json format e.g. '[{"in_channels":64, "subsample": 2}, {"in_channels":64, "subsample": 2}]'. If a dict is empty, default values are used.""", )
def add_args(parser): """Add model-specific arguments to the parser.""" TransformerModel.add_args(parser) parser.add_argument('--share-encoder-embeddings', action='store_true', help='share encoder embeddings across languages') parser.add_argument('--share-decoder-embeddings', action='store_true', help='share decoder embeddings across languages') parser.add_argument('--share-encoders', action='store_true', help='share encoders across languages') parser.add_argument('--share-decoders', action='store_true', help='share decoders across languages')
def add_args(parser): """Add model-specific arguments to the parser.""" TransformerModel.add_args(parser) parser.add_argument('--img-dim', type=int, metavar='N', default=1000, help='image feature dimension') parser.add_argument('--use-img', default=False, action='store_true', help='if set, use image features')
def add_args(parser): TransformerModel.add_args(parser) parser.add_argument( "--apply-bert-init", action="store_true", help="use custom param initialization for BERT", ) parser.add_argument( "--early-exit", default="6,6,6", type=str, help="number of decoder layers before mask_ins, word_ins and word_del heads", )
def setUp(self): self.task, self.parser = get_dummy_task_and_parser() eos = self.task.tgt_dict.eos() src_tokens = torch.randint(3, 50, (2, 10)).long() src_tokens = torch.cat((src_tokens, torch.LongTensor([[eos], [eos]])), -1) src_lengths = torch.LongTensor([2, 10]) self.sample = { "net_input": {"src_tokens": src_tokens, "src_lengths": src_lengths} } TransformerModel.add_args(self.parser) args = self.parser.parse_args([]) args.encoder_layers = 2 args.decoder_layers = 1 self.transformer_model = TransformerModel.build_model(args, self.task)
def add_args(parser): """Add model-specific arguments to the parser.""" TransformerModel.add_args(parser) parser.add_argument( "--bottleneck-dim", default=256, type=int, help="bottleneck size of adapter", ) parser.add_argument( "--num-src-lang", default=1, type=int, help="number of unique adapters", )
def __init__(self): super().__init__() task, parser = _get_dummy_task_and_parser() TransformerModel.add_args(parser) args = parser.parse_args([]) args.encoder_layers = 2 args.decoder_layers = 1 transformer_model = TransformerModel.build_model(args, task) self.sequence_generator = SequenceGenerator( [transformer_model], task.tgt_dict, beam_size=2, no_repeat_ngram_size=2, max_len_b=10, )
def add_args(parser): """Add model-specific arguments to the parser.""" # fmt: off TransformerModel.add_args(parser) parser.add_argument('--encoder-conv-channels', type=str, metavar='EXPR', help='list of encoder convolution\'s out channels') parser.add_argument('--encoder-conv-kernel-sizes', type=str, metavar='EXPR', help='list of encoder convolution\'s kernel sizes') parser.add_argument('--encoder-conv-strides', type=str, metavar='EXPR', help='list of encoder convolution\'s strides')