# Custom vocab_to_cache logic requires a from_params implementation. @classmethod def from_params(cls, vocab, params): # type: ignore # pylint: disable=arguments-differ params.add_file_to_archive(u'options_file') params.add_file_to_archive(u'weight_file') options_file = params.pop(u'options_file') weight_file = params.pop(u'weight_file') requires_grad = params.pop(u'requires_grad', False) do_layer_norm = params.pop_bool(u'do_layer_norm', False) dropout = params.pop_float(u"dropout", 0.5) namespace_to_cache = params.pop(u"namespace_to_cache", None) if namespace_to_cache is not None: vocab_to_cache = list( vocab.get_token_to_index_vocabulary(namespace_to_cache).keys()) else: vocab_to_cache = None projection_dim = params.pop_int(u"projection_dim", None) params.assert_empty(cls.__name__) return cls(options_file=options_file, weight_file=weight_file, do_layer_norm=do_layer_norm, dropout=dropout, requires_grad=requires_grad, projection_dim=projection_dim, vocab_to_cache=vocab_to_cache) ElmoTokenEmbedder = TokenEmbedder.register(u"elmo_token_embedder")( ElmoTokenEmbedder)
else: weight = None return cls(num_embeddings=num_embeddings, embedding_dim=embedding_dim, projection_dim=projection_dim, weight=weight, padding_index=padding_index, trainable=trainable, max_norm=max_norm, norm_type=norm_type, scale_grad_by_freq=scale_grad_by_freq, sparse=sparse) Embedding = TokenEmbedder.register(u"embedding")(Embedding) def _read_pretrained_embeddings_file(file_uri, embedding_dim, vocab, namespace=u"tokens"): u""" Returns and embedding matrix for the given vocabulary using the pretrained embeddings contained in the given file. Embeddings for tokens not found in the pretrained embedding file are randomly initialized using a normal distribution with mean and standard deviation equal to those of the pretrained embeddings. We support two file formats: * text format - utf-8 encoded text file with space separated fields: [word] [dim 1] [dim 2] ...
self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x def get_output_dim(self): return self._encoder._module.get_output_dim() # pylint: disable=protected-access def forward(self, token_characters): # pylint: disable=arguments-differ mask = (token_characters != 0).long() return self._dropout( self._encoder(self._embedding(token_characters), mask)) # The setdefault requires a custom from_params @classmethod def from_params(cls, vocab, params): # type: ignore # pylint: disable=arguments-differ embedding_params = params.pop(u"embedding") # Embedding.from_params() uses "tokens" as the default namespace, but we need to change # that to be "token_characters" by default. embedding_params.setdefault(u"vocab_namespace", u"token_characters") embedding = Embedding.from_params(vocab, embedding_params) encoder_params = params.pop(u"encoder") encoder = Seq2VecEncoder.from_params(encoder_params) dropout = params.pop_float(u"dropout", 0.0) params.assert_empty(cls.__name__) return cls(embedding, encoder, dropout) TokenCharactersEncoder = TokenEmbedder.register(u"character_encoding")( TokenCharactersEncoder)
num_timesteps, device=get_device_of(inputs)) + vocab_size # Combine the inputs with positional encodings batch_tensor = torch.stack( [ inputs, # (batch_size, num_timesteps) positional_encodings.expand(batch_size, num_timesteps) ], dim=-1) byte_pairs_mask = inputs != 0 # Embeddings is num_output_layers x (batch_size, num_timesteps, embedding_dim) layer_activations = self._transformer(batch_tensor) # Output of scalar_mix is (batch_size, num_timesteps, embedding_dim) mix = self._scalar_mix(layer_activations, byte_pairs_mask) # These embeddings are one per byte-pair, but we want one per original _word_. # So we choose the embedding corresponding to the last byte pair for each word, # which is captured by the ``offsets`` input. range_vector = get_range_vector(batch_size, device=get_device_of(mix)).unsqueeze(1) last_byte_pair_embeddings = mix[range_vector, offsets] return last_byte_pair_embeddings OpenaiTransformerEmbedder = TokenEmbedder.register( u"openai_transformer_embedder")(OpenaiTransformerEmbedder)