def module(self, vocabs: VocabDict, **kwargs) -> Optional[nn.Module]: vocab = vocabs[self.field] num_tokens_in_trn = len(vocab) embed = build_word2vec_with_vocab(self.embed, vocab, self.extend_vocab, self.unk, self.lowercase, self.trainable, normalize=self.normalize) if self.word_dropout: assert vocab.unk_token, f'unk_token of vocab {self.field} has to be set in order to ' \ f'make use of word_dropout' padding = [] if vocab.pad_token: padding.append(vocab.pad_idx) word_dropout = WordDropout(self.word_dropout, vocab.unk_idx, exclude_tokens=padding) else: word_dropout = None return Word2VecEmbeddingModule(self.field, embed, word_dropout=word_dropout, cpu=self.cpu, second_channel=self.second_channel, num_tokens_in_trn=num_tokens_in_trn, unk_idx=vocab.unk_idx)
def __init__(self, transformer: Union[PreTrainedModel, str], transformer_tokenizer: PreTrainedTokenizer, average_subwords=False, scalar_mix: Union[ScalarMixWithDropoutBuilder, int] = None, word_dropout=None, max_sequence_length=None, ret_raw_hidden_states=False, transformer_args: Dict[str, Any] = None, trainable=Union[bool, Optional[Tuple[int, int]]], training=True) -> None: """A pre-trained transformer encoder. Args: transformer: A ``PreTrainedModel`` or an identifier of a ``PreTrainedModel``. transformer_tokenizer: A ``PreTrainedTokenizer``. average_subwords: ``True`` to average subword representations. scalar_mix: Layer attention. word_dropout: Dropout rate of randomly replacing a subword with MASK. max_sequence_length: The maximum sequence length. Sequence longer than this will be handled by sliding window. If ``None``, then the ``max_position_embeddings`` of the transformer will be used. ret_raw_hidden_states: ``True`` to return hidden states of each layer. transformer_args: Extra arguments passed to the transformer. trainable: ``False`` to use static embeddings. training: ``False`` to skip loading weights from pre-trained transformers. """ super().__init__() self.ret_raw_hidden_states = ret_raw_hidden_states self.average_subwords = average_subwords if word_dropout: oov = transformer_tokenizer.mask_token_id if isinstance(word_dropout, Sequence): word_dropout, replacement = word_dropout if replacement == 'unk': # Electra English has to use unk oov = transformer_tokenizer.unk_token_id elif replacement == 'mask': # UDify uses [MASK] oov = transformer_tokenizer.mask_token_id else: oov = replacement pad = transformer_tokenizer.pad_token_id cls = transformer_tokenizer.cls_token_id sep = transformer_tokenizer.sep_token_id excludes = [pad, cls, sep] self.word_dropout = WordDropout(p=word_dropout, oov_token=oov, exclude_tokens=excludes) else: self.word_dropout = None if isinstance(transformer, str): output_hidden_states = scalar_mix is not None if transformer_args is None: transformer_args = dict() transformer_args['output_hidden_states'] = output_hidden_states transformer = AutoModel_.from_pretrained(transformer, training=training or not trainable, **transformer_args) if max_sequence_length is None: max_sequence_length = transformer.config.max_position_embeddings self.max_sequence_length = max_sequence_length if hasattr(transformer, 'encoder') and hasattr(transformer, 'decoder'): # For seq2seq model, use its encoder transformer = transformer.encoder self.transformer = transformer if not trainable: transformer.requires_grad_(False) elif isinstance(trainable, tuple): layers = [] if hasattr(transformer, 'embeddings'): layers.append(transformer.embeddings) layers.extend(transformer.encoder.layer) for i, layer in enumerate(layers): if i < trainable[0] or i >= trainable[1]: layer.requires_grad_(False) if isinstance(scalar_mix, ScalarMixWithDropoutBuilder): self.scalar_mix: ScalarMixWithDropout = scalar_mix.build() else: self.scalar_mix = None
def __init__( self, config, pretrained_embed: torch.Tensor = None, transformer: PreTrainedModel = None, transformer_tokenizer: PreTrainedTokenizer = None, ): super(EncoderWithContextualLayer, self).__init__() self.secondary_encoder = config.get('secondary_encoder', None) self.config = config if not transformer: self.pad_index = config.pad_index self.unk_index = config.unk_index if config.word_dropout: oov = self.unk_index excludes = [self.pad_index] self.word_dropout = WordDropout(p=config.word_dropout, oov_token=oov, exclude_tokens=excludes) else: self.word_dropout = None if transformer: input_size = 0 if self.config.transformer_lr: hidden_size = transformer.config.hidden_size else: input_size = transformer.config.hidden_size hidden_size = config.n_lstm_hidden * 2 if config.feat == 'pos': self.feat_embed = nn.Embedding(num_embeddings=config.n_feats, embedding_dim=config.n_embed) self.embed_dropout = IndependentDropout(p=config.embed_dropout) if self.config.transformer_lr: hidden_size += config.n_embed else: input_size += config.n_embed if not self.config.transformer_lr: self.lstm = VariationalLSTM(input_size=input_size, hidden_size=config.n_lstm_hidden, num_layers=config.n_lstm_layers, dropout=config.hidden_dropout, bidirectional=True) else: # the embedding layer input_size = config.n_embed self.word_embed = nn.Embedding(num_embeddings=config.n_words, embedding_dim=config.n_embed) if pretrained_embed is not None: if not isinstance(pretrained_embed, torch.Tensor): pretrained_embed = torch.Tensor(pretrained_embed) self.pretrained = nn.Embedding.from_pretrained( pretrained_embed) nn.init.zeros_(self.word_embed.weight) if config.feat == 'pos': self.feat_embed = nn.Embedding(num_embeddings=config.n_feats, embedding_dim=config.n_embed) self.embed_dropout = IndependentDropout(p=config.embed_dropout) input_size += config.n_embed # the word-lstm layer hidden_size = config.n_lstm_hidden * 2 self.lstm = VariationalLSTM(input_size=input_size, hidden_size=config.n_lstm_hidden, num_layers=config.n_lstm_layers, dropout=config.hidden_dropout, bidirectional=True) self.hidden_size = hidden_size self.hidden_dropout = SharedDropout(p=config.hidden_dropout) if transformer: transformer = TransformerEncoder( transformer, transformer_tokenizer, config.average_subwords, word_dropout=config.word_dropout, max_sequence_length=config.max_sequence_length) self.transformer = transformer