示例#1
0
    def predict(self, context_tokens, answer_features, max_length, pad=False):
        input_token = variable.Variable(
            torch.LongTensor([[self.vocab.start_index]])).cuda()
        end_token = torch.LongTensor([[self.vocab.end_index]]).cuda()
        context_tokens = variable.Variable(
            torch.LongTensor(context_tokens)).cuda()
        answer_features = variable.Variable(
            torch.from_numpy(answer_features)).cuda()

        predictions = self.model.predict(input_token=input_token,
                                         context_tokens=context_tokens,
                                         end_token=end_token,
                                         answer_features=answer_features,
                                         max_length=max_length)

        if pad:
            pad_token = variable.Variable(
                torch.LongTensor([self.vocab.pad_index]).cuda())
            while len(predictions) < max_length:
                predictions.append(pad_token)

        stacked_predictions = torch.stack(predictions, 0)
        tokens = self.get_tokens_single(stacked_predictions.cpu())
        sentence = " ".join(tokens)
        return sentence, stacked_predictions
    def forward(self, inputs):
        batch_size = inputs.size()[1]
        state_shape = self.config['num_layers'] * 2, batch_size, int(
            self.config['hidden_size'] / 2)
        h0 = variable.Variable(inputs.data.new(*state_shape).zero_(),
                               requires_grad=False).cuda()
        c0 = variable.Variable(inputs.data.new(*state_shape).zero_(),
                               requires_grad=False).cuda()

        outputs, (ht, ct) = self.rnn(inputs, (h0, c0))
        return outputs, ht[-2:].transpose(0,
                                          1).contiguous().view(batch_size, -1)
    def combine_predictions(self, context_tokens, predictor_probs, attentions,
                            language_probs):

        max_attention_length = attentions.size(2)
        pad_size = self.config['vocab_size'] - max_attention_length
        batch_size = attentions.size(1)
        seq_size = attentions.size(0)

        context_tokens_padding = variable.Variable(torch.LongTensor(
            batch_size, pad_size).zero_(),
                                                   requires_grad=False).cuda()
        attentions_padding = variable.Variable(
            torch.zeros(batch_size, pad_size) + -1e10,
            requires_grad=False).cuda()
        stacked_context_tokens = torch.cat(
            (context_tokens, context_tokens_padding), 1)

        total_attention_results = []
        softmax_probs = predictor_probs[:, :, 0]
        text_field_probs = predictor_probs[:, :, 1]

        replicated_softmax_probs = softmax_probs.unsqueeze(2)
        replicated_text_field_probs = text_field_probs.unsqueeze(2)

        dims = replicated_softmax_probs.size()
        dims1 = replicated_text_field_probs.size()

        expanded_softmax_probs = replicated_softmax_probs.expand(
            dims[0], dims[1], self.config['vocab_size'])
        expanded_text_field_probs = replicated_text_field_probs.expand(
            dims[0], dims[1], max_attention_length)

        for i in range(0, seq_size):
            selected_text_field_probs = expanded_text_field_probs[i, :, :]
            selected_attention = attentions[
                i, :, :] + selected_text_field_probs
            stacked_attentions = torch.cat(
                (selected_attention, attentions_padding), 1)

            attention_results = variable.Variable(
                torch.zeros(batch_size, self.config['vocab_size']) +
                -1e10).cuda()
            attention_results.scatter_(1, stacked_context_tokens,
                                       stacked_attentions)
            total_attention_results.append(attention_results)

        concated_attention_results = torch.stack(total_attention_results, 0)
        final_probs = torch.log(
            torch.exp(concated_attention_results) +
            torch.exp(language_probs + expanded_softmax_probs))

        return final_probs
    def predict(self,
                input_token,
                context_tokens,
                end_token,
                answer_features,
                max_length=20,
                min_length=3):
        """
        input_token: Input token to start with 
        context_tokens: Context tokens to use
        Do greedy decoding using input token and context tokens
        """

        predicted_tokens = []
        total_loss = 0.0

        batch_first_context_tokens = context_tokens.transpose(0, 1)
        context_embeddings = self.text_field_predictor.forward_prepro(
            context_tokens, input_masks=None, answer_features=answer_features)

        state_shape = (1, self.config['hidden_size'])
        h0 = c0 = variable.Variable(
            context_embeddings.data.new(*state_shape).zero_())
        cur_states = (h0, c0)

        def step(input_token, states):
            cur_input_embedding = self.embedder(input_token)
            hidden_states, new_states = self.base_lstm.forward(cur_input_embedding, \
                states, context_embeddings)

            reshaped_hidden_states = hidden_states.view(
                -1, hidden_states.size(-1))
            predictor_probs = self.combiner(reshaped_hidden_states)

            language_probs = self.softmax_predictor(reshaped_hidden_states)
            reshaped_language_probs = language_probs.view(
                -1, language_probs.size(-1))

            _, attentions, inputs = self.text_field_predictor.forward_similarity(
                hidden_states)

            combined_predictions = self.combine_predictions_single(\
                context_tokens=batch_first_context_tokens,
                predictor_probs=predictor_probs,
                attentions=attentions,
                language_probs=reshaped_language_probs)

            loss, token = torch.max(combined_predictions, 1)
            return loss, token, new_states

        loss, new_token, new_states = step(input_token, cur_states)

        while (not torch_utils.to_bool(new_token.data == end_token)
               or len(predicted_tokens) < min_length
               ) and len(predicted_tokens) < max_length:
            predicted_tokens.append(new_token)
            loss, new_token, new_states = step(new_token, new_states)
        return predicted_tokens
    def combine_predictions_single(self, context_tokens, predictor_probs,
                                   attentions, language_probs):

        max_attention_length = attentions.size(1)
        pad_size = self.config['vocab_size'] - max_attention_length
        batch_size = attentions.size(0)

        context_tokens_padding = variable.Variable(
            torch.LongTensor(batch_size, pad_size).zero_()).cuda()
        attentions_padding = variable.Variable(
            torch.zeros(batch_size, pad_size)).cuda() + -1e10
        stacked_context_tokens = torch.cat(
            (context_tokens, context_tokens_padding), 1)

        softmax_probs = predictor_probs[:, 0]
        text_field_probs = predictor_probs[:, 1]

        replicated_softmax_probs = softmax_probs.unsqueeze(1)
        replicated_text_field_probs = text_field_probs.unsqueeze(1)

        dims = replicated_softmax_probs.size()
        dims1 = replicated_text_field_probs.size()

        expanded_softmax_probs = replicated_softmax_probs.expand(
            dims[0], self.config['vocab_size'])
        expanded_text_field_probs = replicated_text_field_probs.expand(
            dims[0], max_attention_length)

        stacked_attentions = torch.cat((attentions, attentions_padding), 1)
        attention_results = variable.Variable(
            torch.zeros(batch_size, self.config['vocab_size'])).cuda() + -1e10

        attention_results.scatter_(1, stacked_context_tokens,
                                   stacked_attentions)
        use_softmax_predictor = softmax_probs > text_field_probs
        if torch_utils.to_bool(use_softmax_predictor.data):
            return language_probs
        else:
            return attention_results
示例#6
0
def get_index_select(masks):
    """
    Get index select tensor from a list of masks
    """
    num_rows = masks.size(0)
    num_cols =  masks.size(1)
    new_tensor = []
    for i in range(0, num_rows):
        for j in range(0, num_cols):
            if to_bool(masks[i][j].data.cpu() == torch.LongTensor([0])):
                new_tensor.append(i * num_cols + j)

    indices = torch.from_numpy(np.array(new_tensor)).long()
    flattened_indices = variable.Variable(indices)
    return flattened_indices 
    def forward(self, inputs, contexts, answer_features):
        context_embeddings = self.text_field_predictor.forward_prepro(
            contexts, input_masks=None, answer_features=answer_features)
        input_embeddings = self.embeddings(inputs)
        batch_size = inputs.size(1)
        state_shape = (batch_size, self.config['hidden_size'])
        h0 = c0 = variable.Variable(
            input_embeddings.data.new(*state_shape).zero_()).cuda()
        cur_states = (h0, c0)

        out, hidden = self.base_lstm.forward(input_embeddings, cur_states,
                                             context_embeddings)

        h = hidden[0]
        pred = torch.squeeze(self.predictor(h))
        return pred
    def forward(self, input_tokens, context_tokens, context_masks,
                answer_features):
        self.batch_first_context_tokens = context_tokens.transpose(0, 1)
        self.context_embeddings = self.text_field_predictor.forward_prepro(
            context_tokens, context_masks, answer_features)
        self.input_embeddings = self.embedder(input_tokens)

        batch_size = input_tokens.size(1)
        token_length = input_tokens.size(0)

        state_shape = (batch_size, self.config['hidden_size'])
        h0 = c0 = variable.Variable(
            self.input_embeddings.data.new(*state_shape).zero_(),
            requires_grad=False)


        hidden_states, res = self.base_lstm.forward(self.input_embeddings, \
            (h0, c0), \
            self.context_embeddings)

        reshaped_hidden_states = hidden_states.view(batch_size * token_length,
                                                    -1)
        predictor_probs = self.combiner(reshaped_hidden_states)
        reshaped_predictor_probs = predictor_probs.view(
            token_length, batch_size, predictor_probs.size(-1))

        language_probs = self.softmax_predictor(reshaped_hidden_states)
        reshaped_language_probs = language_probs.view(token_length, batch_size,
                                                      language_probs.size(-1))

        attentions_list = []
        for i in range(0, token_length):
            _, attentions, inputs = self.text_field_predictor.forward_similarity(
                hidden_states[i, :, :])
            attentions_list.append(attentions)
        attentions_sequence = torch.stack(attentions_list, 0)

        combined_predictions = self.combine_predictions(
            context_tokens=self.batch_first_context_tokens,
            predictor_probs=reshaped_predictor_probs,
            attentions=attentions_sequence,
            language_probs=reshaped_language_probs)

        #return reshaped_language_probs
        return combined_predictions
    def step(self, batch, train=True):
        inputs = variable.Variable(torch.from_numpy(
            batch['input_tokens'])).cuda()
        desired_inputs = variable.Variable(
            torch.from_numpy(batch['desired_input_tokens'])).cuda()
        desired_input_masks = variable.Variable(
            torch.from_numpy(batch['desired_input_masks'])).cuda()
        contexts = variable.Variable(torch.from_numpy(
            batch['context_tokens'])).cuda()
        context_masks = variable.Variable(
            torch.from_numpy(batch['context_masks'])).cuda()
        answer_features = variable.Variable(
            torch.from_numpy(batch['answer_features'])).cuda()

        language_probs = self.language_model.forward(inputs, contexts,
                                                     context_masks,
                                                     answer_features)
        reshaped_inputs = desired_inputs.contiguous().view(-1)
        reshaped_language_probs = language_probs.view(
            -1, self.config['vocab_size'])

        max_likelihoods, best_indices = torch.max(language_probs, 2)
        #accuracy = torch_utils.average_accuracy(torch.squeeze(best_indices).data, desired_inputs.data)

        #predictions = self.language_wrapper.get_tokens(best_indices.cpu())
        #predictions_text = utils.transpose_join(predictions, " ")
        loss = 0
        select_indices = torch_utils.get_index_select(
            desired_input_masks).cuda()
        gathered_indices = torch.index_select(reshaped_inputs, 0,
                                              select_indices)
        gathered_probs = torch.index_select(reshaped_language_probs, 0,
                                            select_indices)

        if train:
            self.optimizer.zero_grad()
            if not self.config['finetune_embeddings']:
                inputs.detach()
                contexts.detach()
                answer_features.detach()
            """
            batch_size = language_probs.size(1)
            for i in range(0, language_probs.size(1)):
                cur_language_probs = language_probs[:, i, :]
                cur_desired_inputs = desired_inputs[:, i]
                cur_lengths = batch['desired_input_lengths'][i]

                truncated_language_probs = cur_language_probs[0:cur_lengths, :]
                truncated_desired_inputs = cur_desired_inputs[0:cur_lengths]

                loss = self.criterion(truncated_language_probs, truncated_desired_inputs)
                if i == batch_size - 1:
                    loss.backward()
                else:
                    loss.backward(retain_variables=True)
            """

            loss = self.criterion(gathered_probs, gathered_indices)
            loss.backward()
            torch.nn.utils.clip_grad_norm(self.language_model.parameters(), 5)
            self.optimizer.step()

        return loss  #, accuracy, predictions_text
import torch 
import numpy as np 
from torch.autograd import variable 
from models.language_model import TextFieldPredictor, SoftmaxPredictor

config = {}
config['vocab_size'] = 12
config['embedding_size'] = 20 
config['hidden_size'] = 50
config['num_layers'] = 1
config['dropout'] = 0.0
config['batch_first'] = True 

# First test text field predictor
inp = variable.Variable(torch.LongTensor([[1, 2, 3], [4, 5, 6]]))
hidden = variable.Variable(torch.randn(2, config['hidden_size']))
predictor = TextFieldPredictor(config)
lstm_embeddings = predictor.forward_prepro(inp)
h_tilde, attentions, inp = predictor.forward_similarity(hidden)

inp1 = variable.Variable(torch.LongTensor(2, config['vocab_size'] - 3).zero_())
inp2 = variable.Variable(torch.zeros(2, config['vocab_size'] - 3))
stacked_inps = torch.cat((inp, inp1), 1)
stacked_attentions = torch.cat((attentions, inp2), 1)

# Second test softma predictor
softmax_predictor = SoftmaxPredictor(config)
softmax_logits = softmax_predictor.forward(hidden)

res = variable.Variable(torch.zeros(2, config['vocab_size']))
res.scatter_(1, stacked_inps, stacked_attentions)
config['load_model'] = True
config['load_path'] = 'logs/squad_saved_data/model_7_old.pyt7'

pointer_network = PointerNetwork(config).cuda()

criterion1 = nn.CrossEntropyLoss().cuda()
criterion2 = nn.CrossEntropyLoss().cuda()
optimizer = optim.Adam(pointer_network.parameters(), 1e-2)

batch = language_model_loader.get_batch(dataset_type=constants.DATASET_TRAIN,
                                        batch_size=config['batch_size'])

large_negative_number = -1.e-10
while batch is not None:
    optimizer.zero_grad()
    input_lengths = variable.Variable(
        torch.from_numpy(batch['context_lengths'])).cuda()
    input_vals = variable.Variable(torch.from_numpy(
        batch['context_tokens'])).cuda()
    answer_starts = variable.Variable(torch.from_numpy(
        batch['answer_starts'])).cuda()
    answer_ends = variable.Variable(torch.from_numpy(
        batch['answer_ends'])).cuda()
    masks = variable.Variable(
        torch.from_numpy(batch['context_masks'].T).float()).cuda()

    p_start, p_end = pointer_network.forward(input_vals, input_lengths, masks)

    # Batch first
    loss = criterion1(p_start, answer_starts) + \
        criterion2(p_end, answer_ends)
示例#12
0
import numpy as np
import torch
import torch.nn as nn
import torch.optim
from torch.autograd import variable

from models.card_model import CardModel

config = {}
config['vocab_size'] = 52
config['embedding_size'] = 23

model = CardModel(config)

emb1 = nn.Embedding(config['vocab_size'], config['embedding_size'])

desired = variable.Variable(torch.randn(3, 23))
tmp = variable.Variable(torch.LongTensor([1, 2, 3]))
tmp1 = emb1(tmp)
tmp2 = emb1(tmp)

criterion = nn.MSELoss()
loss = criterion(tmp1 + tmp2, desired)
loss.backward()
示例#13
0
import torch
from torch import nn
from torch.autograd import variable
from torch import optim

batch_size = 25
input_size = 125
input_length = 25
hidden_size = 250
ctx_length = 230

net = LSTMAttentionDot(input_size=input_size,
                       hidden_size=hidden_size,
                       batch_first=False).cuda()

inputs = variable.Variable(torch.randn(input_length, batch_size,
                                       input_size)).cuda()
hidden = variable.Variable(torch.randn(batch_size, hidden_size)).cuda()
cell = variable.Variable(torch.randn(batch_size, hidden_size)).cuda()
context = variable.Variable(torch.randn(ctx_length, batch_size,
                                        hidden_size)).cuda()
desired = variable.Variable(torch.randn(batch_size, hidden_size)).cuda()

criterion = nn.MSELoss()

optimizer = optim.Adam(net.parameters(), lr=3e-2)

for i in range(0, 1000):
    print(i)
    optimizer.zero_grad()
    out, h = net.forward(inputs, [hidden, cell], context)
    loss = criterion(h[0], desired)
language_model = torch_utils.load_model(load_path)
language_model = language_model.cuda()

batch_size = 3

embeddings = language_model.embedder
text_field_predictor = language_model.text_field_predictor
base_lstm = language_model.base_lstm

discriminator = LanguageDiscriminator(language_model.config, embeddings,
                                      text_field_predictor, base_lstm).cuda()

discriminator_optimizer = optim.Adam(discriminator.parameters(), lr=3e-2)
discriminator_criterion = nn.BCELoss()

contexts = variable.Variable(
    torch.LongTensor([[1, 2, 3], [2, 3, 4], [4, 5, 6]])).cuda()
answer_features = variable.Variable(
    torch.FloatTensor([[0, 0, 0], [0, 0, 0], [0, 0, 0]])).cuda()
inputs = variable.Variable(
    torch.LongTensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])).cuda()

desired_indices = variable.Variable(torch.FloatTensor([1, 1, 1])).cuda()

for i in range(0, 100):
    discriminator_optimizer.zero_grad()
    pred = discriminator.forward(inputs, contexts, answer_features)
    bce_loss = discriminator_criterion(pred, desired_indices)
    bce_loss.backward()

    print(bce_loss)
    discriminator_optimizer.step()
from models.language_model import LanguageModel 
import torch 
from torch import nn
from torch import optim
from torch.autograd import variable 
from helpers import torch_utils 

config = {}
config['vocab_size'] = 25 
config['hidden_size'] = 50
config['embedding_size'] = 10 
config['num_layers'] = 1
config['dropout'] = 0.0
config['batch_first'] = False

language_model = LanguageModel(config)
language_model.cuda()
# contexts: context_length x batch_size
# inputs: input_length x batch_size
# desired_inputs: input_length x batch_size

input_token = variable.Variable(torch.LongTensor([[1]]))
context_tokens = variable.Variable(torch.LongTensor([[2], [3], [4], [5], [6], [7], [8]]))
language_model.predict(input_token, context_tokens, torch.LongTensor([[1]]))
示例#16
0
config['batch_first'] = False
config['use_pretrained_embeddings'] = False
config['finetune_embeddings'] = True

language_model = LanguageModel(config).cuda()

# contexts: context_length x batch_size
# inputs: input_length x batch_size
# desired_inputs: input_length x batch_size

optimizer = optim.Adam(language_model.parameters(), lr=3e-2)
criterion = nn.NLLLoss()

for i in range(0, 1000):
    optimizer.zero_grad()
    inputs = variable.Variable(torch.LongTensor([[1, 2, 3, 4, 5, 6, 7]] *
                                                100)).cuda()
    contexts = variable.Variable(
        torch.LongTensor([[4, 5, 6, 7, 8, 9, 10], [4, 5, 6, 7, 8, 9, 10],
                          [4, 5, 6, 7, 8, 9, 10], [4, 5, 6, 7, 8, 9,
                                                   10]])).cuda()
    context_masks = variable.Variable(
        torch.FloatTensor([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                           [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                           [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                           [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]])).cuda()
    desired_inputs = variable.Variable(
        torch.LongTensor([[1, 2, 3, 4, 5, 6, 7]] * 100)).cuda()
    input_masks = variable.Variable(
        torch.FloatTensor([[1, 1, 1, 1, 1, 1, 1]] * 100)).cuda()
    answer_features = variable.Variable(
        torch.LongTensor([[4, 5, 6, 7, 8, 9, 10], [4, 5, 6, 7, 8, 9, 10],