Exemplo n.º 1
0
class getElmo(nn.Module):
    def __init__(self, layer=2, dropout=0, out_dim=100, gpu=True):
        super(getElmo, self).__init__()
        options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
        weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
        self.dropout = dropout
        self.gpu = gpu
        self.Elmo = Elmo(options_file, weight_file, layer, dropout=dropout)
        self.Elmo.eval()
        self.layers2one = nn.Linear(
            layer, 1).cuda() if self.gpu else nn.Linear(layer, 1)
        self.optLinear = nn.Linear(
            1024, out_dim).cuda() if self.gpu else nn.Linear(1024, out_dim)

    def forward(self, texts):
        word_idxs = batch_to_ids(texts).cuda() if self.gpu else batch_to_ids(
            texts)
        elmo_embs = self.Elmo.forward(word_idxs)
        elmo_reps = torch.stack(elmo_embs['elmo_representations'],
                                dim=-1).cuda() if self.gpu else torch.stack(
                                    elmo_embs['elmo_representations'], dim=-1)
        elmo_decrease_layer = self.layers2one(elmo_reps).squeeze()
        elmo_fit_hidden = self.optLinear(elmo_decrease_layer)
        mask = elmo_embs['mask']

        return elmo_fit_hidden, mask
Exemplo n.º 2
0
class ElmoHandler(nn.Module):
    def __init__(self, d_content, elmo_dropout, weights):
        super().__init__()

        self.d_content = d_content

        orig_json = "/scratch/skulick/downloaded/embeddings/elmo_2x4096_512_2048cnn_2xhighway_options.json",
        orig_hdf5 = "/scratch/skulick/downloaded/embeddings/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5",

        eebo_json = "/scratch/skulick/downloaded/embeddings/eebo-options1.json"
        eebo_hdf5 = "/scratch/skulick/downloaded/embeddings/eebo-weights2.hdf5"

        assert weights in ('eebo', 'orig'), f'unknown weights {weights}'

        if weights == 'eebo':
            json_fname = eebo_json
            hdf5_fname = eebo_hdf5
        else:
            json_fname = orig_json
            hdf5_fname = orig_hdf5

        print(f'using embeddings {json_fname} {hdf5_fname}')

        self.elmo = Elmo(
            options_file=json_fname,
            weight_file=hdf5_fname,
            num_output_representations=1,
            requires_grad=False,
            do_layer_norm=False,
            keep_sentence_boundaries=True,
            dropout=elmo_dropout,
        )
        d_elmo_annotations = 1024
        # Don't train gamma parameter for ELMo - the projection can do any
        # necessary scaling
        self.elmo.scalar_mix_0.gamma.requires_grad = False

        # Reshapes the embeddings to match the model dimension, and making
        # the projection trainable appears to improve parsing accuracy
        self.project_elmo = nn.Linear(d_elmo_annotations,
                                      self.d_content,
                                      bias=False)

    def forward(self, char_idxs):
        elmo_out = self.elmo.forward(char_idxs)
        elmo_rep0 = elmo_out['elmo_representations'][0]
        elmo_mask = elmo_out['mask']
        #elmo_annotations_packed = elmo_rep0[elmo_mask.byte()].view(packed_len, -1)

        extra_content_annotations = self.project_elmo(elmo_rep0)
        return extra_content_annotations
Exemplo n.º 3
0
from allennlp.modules.elmo import Elmo, batch_to_ids

weights_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5'
options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json'

elmo = Elmo(options_file, weights_file, 1, dropout=0)
text = [
    'Backgammon', 'is', 'one', 'of', 'the', 'oldest', 'known', 'board', 'games'
]

batch = batch_to_ids(text)
print(batch)

dict = elmo.forward(batch)

print(dict['elmo_representations'])