def __init__(self, args, dictionary, embed_tokens, no_encoder_attn=False): super().__init__( args, dictionary, embed_tokens, no_encoder_attn=no_encoder_attn ) self.dictionary = dictionary self.bos = dictionary.bos() self.unk = dictionary.unk() self.eos = dictionary.eos() self.sampling_for_deletion = getattr(args, "sampling_for_deletion", False) self.embed_mask_ins = Embedding(256, self.output_embed_dim * 2, None) self.embed_word_del = Embedding(2, self.output_embed_dim, None) # del_word, ins_mask, ins_word self.early_exit = [int(i) for i in args.early_exit.split(',')] assert len(self.early_exit) == 3 # copy layers for mask-predict/deletion self.layers_msk = None if getattr(args, "no_share_maskpredictor", False): self.layers_msk = nn.ModuleList([ TransformerDecoderLayer(args, no_encoder_attn) for _ in range(self.early_exit[1]) ]) self.layers_del = None if getattr(args, "no_share_discriminator", False): self.layers_del = nn.ModuleList([ TransformerDecoderLayer(args, no_encoder_attn) for _ in range(self.early_exit[0]) ]) if getattr(args, "share_discriminator_maskpredictor", False): assert getattr(args, "no_share_discriminator", False), "must set saperate discriminator" self.layers_msk = self.layers_del
def __init__(self, args, dictionary, embed_tokens, no_encoder_attn=False): super().__init__( args, dictionary, embed_tokens, no_encoder_attn=no_encoder_attn ) self.dictionary = dictionary self.bos = dictionary.bos() self.unk = dictionary.unk() self.eos = dictionary.eos() self.embed_mask_ins = Embedding(256, self.output_embed_dim * 2, None) self.embed_word_del = Embedding(2, self.output_embed_dim, None) # del_word, ins_mask, ins_word self.early_exit = [int(i) for i in args.early_exit.split(',')] assert len(self.early_exit) == 3
def build_model_decoder(cls, args, dictionary, output_dictionary=None): if output_dictionary is None: output_dictionary = dictionary if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(dictionary), dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int)) else: embed_tokens = Embedding(len(dictionary), args.decoder_input_dim, dictionary.pad()) return BiTransformerDecoder(args, output_dictionary, embed_tokens, no_encoder_attn=True, final_norm=False)
def __init__(self, args, dictionary, embed_tokens, no_encoder_attn=False): super().__init__(args, dictionary, embed_tokens, no_encoder_attn=no_encoder_attn) self.dictionary = dictionary self.bos = dictionary.bos() self.unk = dictionary.unk() self.eos = dictionary.eos() self.encoder_embed_dim = args.encoder_embed_dim self.max_target_positions = args.max_target_positions self.sg_length_pred = getattr(args, "sg_length_pred", False) self.pred_length_offset = getattr(args, "pred_length_offset", False) self.length_loss_factor = getattr(args, "length_loss_factor", 0.1) self.src_embedding_copy = getattr(args, "src_embedding_copy", False) self.embed_length = Embedding(256, self.encoder_embed_dim, None) self.embed_positions = nn.Embedding( num_embeddings=args.max_target_positions, embedding_dim=self.encoder_embed_dim, padding_idx=self.padding_idx) self.embed_langs = nn.Embedding(num_embeddings=2, embedding_dim=self.encoder_embed_dim, padding_idx=None) nn.init.normal_(self.embed_positions.weight, mean=0, std=0.02) nn.init.normal_(self.embed_langs.weight, mean=0, std=0.02)
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_bi_lm_architecture(args) if not hasattr(args, 'max_source_positions'): args.max_source_positions = args.tokens_per_sample if not getattr(args, "max_target_positions", None): args.max_target_positions = args.tokens_per_sample if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( task.dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, max_char_len=args.max_char_len, char_inputs=args.char_inputs, ) else: embed_tokens = Embedding(len(task.dictionary), args.decoder_embed_dim, task.dictionary.pad()) logger.info(args) decoder = BiTransformerDecoder(args, task.output_dictionary, embed_tokens) return BiTransformerLanguageModel(decoder)
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_lm_architecture(args) if getattr(args, 'max_target_positions', None) is None: args.max_target_positions = getattr(args, 'tokens_per_sample', DEFAULT_MAX_TARGET_POSITIONS) if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( task.source_dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(task.source_dictionary), task.source_dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int), ) else: embed_tokens = Embedding(len(task.source_dictionary), args.decoder_input_dim, task.source_dictionary.pad()) if args.tie_adaptive_weights: assert args.adaptive_input assert args.adaptive_input_factor == args.adaptive_softmax_factor assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format( args.adaptive_softmax_cutoff, args.adaptive_input_cutoff) assert args.decoder_input_dim == args.decoder_output_dim decoder = TransformerDecoder( args, task.target_dictionary, embed_tokens, no_encoder_attn=True, ) return TransformerLanguageModel(decoder)
def build_embedding(dictionary, embed_dim, path=None): num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, embed_dim, padding_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) return emb
def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim): from fairseq import utils num_embeddings = len(dictionary) padding_idx = dictionary.pad() embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) embed_dict = utils.parse_embedding(embed_path) utils.print_embed_overlap(embed_dict, dictionary) return utils.load_embedding(embed_dict, dictionary, embed_tokens)
def __init__(self, dictionary): args = Namespace() base_lm_architecture(args) args.decoder_layerdrop=0 args.max_target_positions = getattr(args, 'tokens_per_sample',DEFAULT_MAX_TARGET_POSITIONS) num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, args.decoder_embed_dim, padding_idx) super().__init__(args, dictionary, emb, False)
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_lm_architecture(args) if hasattr( args, 'no_tie_adaptive_proj') and args.no_tie_adaptive_proj is False: # backward compatibility args.tie_adaptive_proj = True if not hasattr(args, 'max_source_positions'): args.max_source_positions = args.tokens_per_sample if not hasattr(args, 'max_target_positions'): args.max_target_positions = args.tokens_per_sample if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( task.dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(task.dictionary), task.dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int), ) else: embed_tokens = Embedding(len(task.dictionary), args.decoder_input_dim, task.dictionary.pad()) if args.tie_adaptive_weights: assert args.adaptive_input assert args.adaptive_input_factor == args.adaptive_softmax_factor assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format( args.adaptive_softmax_cutoff, args.adaptive_input_cutoff) assert args.decoder_input_dim == args.decoder_output_dim decoder = TransformerDecoder( args, task.output_dictionary, embed_tokens, no_encoder_attn=True, final_norm=False, ) return TransformerLanguageModel(decoder)
def build_model(cls, args, vocab_size): """Build a new model instance.""" # make sure all arguments are present in older models base_lm_architecture(args) embed_tokens = Embedding(vocab_size, args.decoder_input_dim) decoder = TransformerDecoder( args, None, embed_tokens, no_encoder_attn=True, ) return TransformerLanguageModel(decoder)
def build_model_input(cls, args, dictionary): # make sure all arguments are present in older fairseq_ext args.context_embeddings = getattr(args, 'context_embeddings', False) args.context_embeddings_layers = getattr(args, 'context_embeddings_layers', [-1]) args.max_source_positions = args.tokens_per_sample args.max_target_positions = args.tokens_per_sample if args.context_embeddings: if args.context_embeddings_type == 'bert': embed_tokens = BERTEmbedder( args.context_embeddings_bert_model, layers=args.context_embeddings_layers) elif args.context_embeddings_type == 'transformers': embed_tokens = TransformerEmbedder( args.context_embeddings_bert_model, layers=args.context_embeddings_layers) else: raise NotImplementedError elif args.character_embeddings: embed_tokens = CharacterTokenEmbedder( dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(dictionary), dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int)) else: args.decoder_embed_pretrained = getattr( args, 'decoder_embed_pretrained', '') if args.decoder_embed_pretrained: embed_tokens = load_pretrained_embedding_from_file( args.decoder_embed_pretrained, dictionary, args.decoder_input_dim) else: embed_tokens = Embedding(len(dictionary), args.decoder_input_dim, dictionary.pad()) return embed_tokens
def build_model(cls, args, task): base_ulm_architecture(args) if getattr(args, "max_target_positions", None) is None: args.max_target_positions = getattr(args, "tokens_per_sample", DEFAULT_MAX_TARGET_POSITIONS) embed_tokens = Embedding( len(task.source_dictionary), args.decoder_input_dim, padding_idx=task.source_dictionary.pad(), ) embed_duration = None if task.cfg.discrete_duration: embed_duration = Embedding( len(task.source_duration_dictionary), args.decoder_input_dim, padding_idx=0, # duration uses 0 for padding ) embed_f0 = None if task.cfg.discrete_f0: embed_f0 = Embedding( len(task.source_f0_dictionary), args.decoder_input_dim, padding_idx=task.source_f0_dictionary.pad(), ) decoder = MultiStreamTransformerDecoder( args, task.target_dictionary, embed_tokens, [embed_duration, embed_f0], no_encoder_attn=True, channel_sizes=task.channel_sizes, ) return cls(args, task, decoder)
def build_embedding(dictionary, embed_dim, path=None): num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, embed_dim, padding_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) if not path and args.disable_training_embeddings: raise ValueError('Do not set --disable_training_embeddings when pretrained embeddings are not provided.') if args.disable_training_embeddings: emb.weight.requires_grad = False return emb
def __init__(self, args, dictionary, embed_tokens, no_encoder_attn=False): super().__init__( args, dictionary, embed_tokens, no_encoder_attn=no_encoder_attn ) self.dictionary = dictionary self.bos = dictionary.bos() self.unk = dictionary.unk() self.eos = dictionary.eos() self.encoder_embed_dim = args.encoder_embed_dim self.sg_length_pred = getattr(args, "sg_length_pred", False) self.pred_length_offset = getattr(args, "pred_length_offset", False) self.length_loss_factor = getattr(args, "length_loss_factor", 0.1) self.src_embedding_copy = getattr(args, "src_embedding_copy", False) self.embed_length = Embedding(256, self.encoder_embed_dim, None)
def build_embedding(dictionary, embed_dim, path=None): """ Copied from fairseq.models.transformer :param dictionary: :param embed_dim: :param path: :return: """ num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, embed_dim, padding_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) return emb
def __init__(self, state_dict, vocab_path): self.dictionary = Dictionary.load(vocab_path) if any(k in state_dict["model"] for k in ["encoder.layer_norm.weight", "layer_norm.weight"]): self.dictionary.add_symbol("<mask>") cfg = state_dict["cfg"]["model"] self.sentemb_criterion = cfg.sentemb_criterion self.pad_idx = self.dictionary.pad_index self.bos_idx = self.dictionary.bos_index embed_tokens = Embedding( len(self.dictionary), cfg.encoder_embed_dim, self.pad_idx, ) super().__init__(cfg, self.dictionary, embed_tokens) if "decoder.version" in state_dict["model"]: self._remove_decoder_layers(state_dict) if "layer_norm.weight" in state_dict["model"]: self.layer_norm = LayerNorm(cfg.encoder_embed_dim) self.load_state_dict(state_dict["model"])
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_bi_lm_architecture(args) if not hasattr(args, 'max_source_positions'): args.max_source_positions = args.tokens_per_sample if not hasattr(args, 'max_target_positions'): args.max_target_positions = args.tokens_per_sample if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( task.dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(task.dictionary), task.dictionary.pad(), args.decoder_embed_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int)) else: embed_tokens = Embedding(len(task.dictionary), args.decoder_embed_dim, task.dictionary.pad()) if args.tie_adaptive_weights: assert args.adaptive_input assert args.adaptive_input_factor == args.adaptive_softmax_factor assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format( args.adaptive_softmax_cutoff, args.adaptive_input_cutoff) print("Model args: ", args) decoder = BiTransformerDecoder(args, task.output_dictionary, embed_tokens) return BiTransformerLanguageModel(decoder)
def build_model(cls, args, task: SemparseClassificationTask): # Fairseq initializes models by calling the ``build_model()`` # function. This provides more flexibility, since the returned model # instance can be of a different type than the one that was called. # In this case we'll just return a SimpleLSTMModel instance. # Initialize our Encoder and Decoder. xlmr = XlmrEncoder(task.xlmr, task.max_positions()[0]) # encoder = LSTMEncoder( # dictionary=task.source_dictionary, # pretrained_embed=xlmr, # embed_dim=args.xlmr_out_dim, # hidden_size=args.decoder_hidden_dim, # dropout_in=args.decoder_dropout, # dropout_out=args.decoder_dropout # ) from fairseq.models.transformer_from_pretrained_xlm import TransformerDecoderFromPretrainedXLM dictionary = task.output_vocab num_embeddings = len(dictionary) padding_idx = dictionary.pad() embed_tokens = Embedding(num_embeddings, args.decoder_embed_dim, padding_idx) decoder = TransformerDecoder(args, dictionary, embed_tokens) # decoder = LSTMDecoder( # dictionary=task.target_dictionary, # encoder_output_units=args.encoder_hidden_dim, # embed_dim=args.decoder_embed_dim, # hidden_size=args.decoder_hidden_dim, # dropout_in=args.decoder_dropout, # dropout_out=args.decoder_dropout # ) model = XlmrTransformerEncoderDecoder(xlmr, decoder) # Print the model architecture. print(model) return model
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_lm_architecture(args) if args.decoder_layers_to_keep: args.decoder_layers = len(args.decoder_layers_to_keep.split(",")) if getattr(args, 'max_target_positions', None) is None: args.max_target_positions = getattr(args, 'tokens_per_sample', DEFAULT_MAX_TARGET_POSITIONS) embed_tokens = Embedding(len(task.source_dictionary), args.decoder_input_dim, task.source_dictionary.pad()) decoder = TransformerDecoder( args, task.target_dictionary, embed_tokens, no_encoder_attn=True, ) return TransformerLanguageModel(decoder)
def build_embedding(dictionary, embed_dim): num_embeddings = len(dictionary) padding_idx = dictionary.pad() return Embedding(num_embeddings, embed_dim, padding_idx)
def build_embedding(cls, args, dictionary, embed_dim, path=None): embed_tokens = Embedding(len(dictionary), embed_dim, dictionary.pad()) return embed_tokens
def build_model_input(cls, args, dictionary): # make sure all arguments are present in older fairseq_ext args.context_embeddings = getattr(args, 'context_embeddings', False) args.max_source_positions = args.tokens_per_sample args.max_target_positions = args.tokens_per_sample if args.context_embeddings: if args.context_embeddings_type == 'qbert': embed_tokens = QBERTEmbedder.from_args( args, {"dictionary": dictionary}) elif args.context_embeddings_type == 'bert': assert not args.context_embeddings_use_embeddings embed_tokens = BERTEmbedder(args.context_embeddings_bert_model, False) elif args.context_embeddings_type == 'elmo': embed_tokens = ELMOEmbedder( args.context_embeddings_elmo_options, args.context_embeddings_elmo_weights, False) elif args.context_embeddings_type == 'flair': embed_tokens = FlairEmbedder( args.context_embeddings_flair_forward, args.context_embeddings_flair_backward, args.context_embeddings_flair_embeddings, False) else: raise NotImplementedError elif args.character_embeddings: embed_tokens = CharacterTokenEmbedder( dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(dictionary), dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int)) else: def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim): from fairseq import utils num_embeddings = len(dictionary) padding_idx = dictionary.pad() embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) embed_dict = utils.parse_embedding(embed_path) utils.print_embed_overlap(embed_dict, dictionary) return utils.load_embedding(embed_dict, dictionary, embed_tokens) if args.decoder_embed_pretrained: embed_tokens = load_pretrained_embedding_from_file( args.decoder_embed_pretrained, dictionary, args.decoder_input_dim) if getattr(args, 'decoder_embed_pretrained', False): for par in embed_tokens.parameters(): par.requires_grad = False else: embed_tokens = Embedding(len(dictionary), args.decoder_input_dim, dictionary.pad()) return embed_tokens
def load_embed_tokens(dictionary, embed_dim): num_embeddings = len(dictionary) padding_idx = dictionary.pad() return Embedding(num_embeddings, embed_dim, padding_idx)