Exemplo n.º 1
0
    def __init__(self, batch_size=2, steps=100, validate=False):
        super(BaseTask, self).__init__()
        dataset = SimplePOSTaggerDataset()
        n_total = len(dataset)
        assert batch_size <= n_total, "`batch_size` is greater than size of dataset"
        if validate:
            n_train = int(n_total * 0.9)
            self.train_loader = DataLoader(
                Subset(dataset, range(n_train)),
                batch_size=batch_size,
                collate_fn=collate_wrapper,
            )
            self.val_loader = DataLoader(
                Subset(dataset, range(n_train, n_total)),
                batch_size=batch_size,
                collate_fn=collate_wrapper,
            )
        else:
            self.train_loader = DataLoader(dataset,
                                           batch_size=batch_size,
                                           collate_fn=collate_wrapper)
            self.val_loader = None

        vocab_set = set.union(*[set(v) for v in dataset.data])
        vocab_to_ix = {vocab: i for i, vocab in enumerate(vocab_set)}
        tagset_size = len(dataset.tag_to_ix)

        self.batch_size = batch_size
        self.model = LSTMTagger(6, 6, tagset_size, vocab_to_ix)
        self.optimizer = optim.SGD(self.model.parameters(), lr=1e-5)
        self.criterion = nn.NLLLoss()
        self.device = torch.device("cuda")
Exemplo n.º 2
0
def main():
    args = parse_args()
    data = LabeledDataset(args.train_file)
    loader = DataLoader(data, batch_size=args.batch_size)
    model = LSTMTagger(len(data.x_vocab), len(data.y_vocab), 20, 64, 1)
    if use_cuda:
        model = model.cuda()
    train(model, loader, epochs=2)
Exemplo n.º 3
0
 def __init__(self, PATH):
     self.TEXT = pickle.load(open('TEXT.pkl', 'rb'))
     self.LABELS = pickle.load(open('LABELS.pkl', 'rb'))
     self.BATCH_SIZE = 1
     self.INPUT_DIM = len(self.TEXT.vocab)
     self.EMBEDDING_DIM = 100
     self.HIDDEN_DIM = 256
     self.OUTPUT_DIM = len(self.LABELS.vocab)
     
     self.criterion = nn.CrossEntropyLoss()
     self.criterion = self.criterion.to(device)
     
     self.PATH = PATH
     self.model = LSTMTagger(self.EMBEDDING_DIM, self.HIDDEN_DIM, self.INPUT_DIM, self.OUTPUT_DIM)
     self.model.load_state_dict(torch.load(PATH))
     self.model.to(device)
     self.model.eval()
Exemplo n.º 4
0
def train():
    torch.initial_seed()
    filename = "models/model"

    print_hyperparameters()
    # Loding data and preprocesing
    print('Reading data...')
    data_raw = readtrain()
    data_raw_dev = readdev()
    print('Training Data size', len(data_raw))
    print('Dev data size', len(data_raw_dev))

    print('Preparing data...')

    tag_to_index, word_to_index, index_to_tag, index_to_word = prepare_embedding(
        data_raw)
    idxs = [tag_to_index, word_to_index, index_to_tag, index_to_word]

    dataset = get_loader(data_raw, idxs)
    dev_dataset = get_loader(data_raw_dev, idxs)
    '''
    data = []
    for sentence, tags in data_raw:
        sentence_in = prepare(sentence, word_to_index)
        targets = prepare(tags, tag_to_index)
        data.append((sentence_in, targets))
    '''

    #data_dev = data_raw_dev
    #    data_dev = []
    #    for sentence, tags in data_raw_dev:
    #        sentence_in = prepare(sentence, word_to_index)
    #        targets = prepare(tags, tag_to_index)
    #        data_dev.append((sentence_in, targets))
    #

    #data_batches = mini_batch(idxs, data)
    #    data_dev_batches = mini_batch(idxs, data_dev)
    # print('Training Data size', len(data))

    # Save indexes to data.pickle
    with open('data.pickle', 'wb') as f:
        pickle.dump([tag_to_index, word_to_index, index_to_tag, index_to_word],
                    f, pickle.HIGHEST_PROTOCOL)

    # Create an instance of the NN
    model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_index),
                       len(tag_to_index))
    print('Source size:', len(word_to_index))
    print('Target size:', len(tag_to_index))
    loss_function = nn.NLLLoss(size_average=True)

    if USE_CUDA:
        model = model.cuda()

    if OPTIMIZER == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)
    else:
        optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    #Train
    print('Train with', len(dataset), 'batches.')
    for epoch in range(EPOCHS):
        print(f'Starting epoch {epoch}.')
        loss_sum = 0
        y_true = list()
        y_pred = list()
        for batch, lengths, targets, lengths2 in tqdm(dataset):
            model.zero_grad()
            #batch, targets, lengths = sort_batch(batch, targets, lengths)
            #pred = model(autograd.Variable(batch), lengths.cpu().numpy())
            pred = model(autograd.Variable(batch), lengths.cpu().numpy())
            loss = loss_function(
                pred.view(-1,
                          pred.size()[2]),
                autograd.Variable(targets).view(-1, 1).squeeze(1))
            loss.backward()
            #for f in model.parameters():
            #    print('data is')
            #    print(f.data)
            #    print('grad is')
            #    print(f.grad)
            optimizer.step()
            loss_sum += loss.data[0]
            #print(loss.data[0])
            pred_idx = torch.max(pred, 1)[1]
            y_true += list(targets.int())
            y_pred += list(pred_idx.data.int())

        #acc = accuracy_score(y_true, y_pred)
        loss_total = loss_sum / len(dataset)

        #print('Accuracy on test:', acc, 'loss:', loss_total)
        print('>>> Loss:', loss_total)

        acc = predict(dataset, model_=model, idxs=idxs)
        print("Accuracy on train:", acc)
        acc = predict(dev_dataset, model_=model, idxs=idxs)
        print("Accuracy on dev:", acc)
Exemplo n.º 5
0
class EstTokenizer:
    
    def __init__(self, PATH):
        self.TEXT = pickle.load(open('TEXT.pkl', 'rb'))
        self.LABELS = pickle.load(open('LABELS.pkl', 'rb'))
        self.BATCH_SIZE = 1
        self.INPUT_DIM = len(self.TEXT.vocab)
        self.EMBEDDING_DIM = 100
        self.HIDDEN_DIM = 256
        self.OUTPUT_DIM = len(self.LABELS.vocab)
        
        self.criterion = nn.CrossEntropyLoss()
        self.criterion = self.criterion.to(device)
        
        self.PATH = PATH
        self.model = LSTMTagger(self.EMBEDDING_DIM, self.HIDDEN_DIM, self.INPUT_DIM, self.OUTPUT_DIM)
        self.model.load_state_dict(torch.load(PATH))
        self.model.to(device)
        self.model.eval()
        
        
    def __binary_accuracy(self, preds, y):
        """
        Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
        """

        #round predictions to the closest integer
        _, rounded_preds = torch.max(torch.sigmoid(preds), 1)
        correct = (rounded_preds == y).float() #convert into float for division 
        acc = correct.sum()/len(correct)
        return acc
    
    
    def evaluate(self, iterator):

        epoch_loss = 0
        epoch_acc = 0

        self.model.eval()

        with torch.no_grad():

            for batch in iterator:

                t, l = batch.text

                predictions = self.model(t, l)            
                #predictions = torch.argmax(predictions, dim=2)            
                predictions = predictions.reshape(-1, predictions.size()[-1])            
                predictions = predictions.float()

                labels = batch.labels.reshape(-1)
                labels = labels.long()

                loss = self.criterion(predictions, labels)            
                acc = self.__binary_accuracy(predictions, labels)

                epoch_loss += loss.item()
                epoch_acc += acc.item()

        return epoch_loss / len(iterator), epoch_acc / len(iterator)
    
    
    def tokenize(self, text, output='conllu'):
        text = [t for t in text.split("\n") if len(t) > 0]
        examples = [data.Example().fromlist([t], fields=[('text', self.TEXT)]) for t in text]
        dataset = data.Dataset(examples, fields=[('text', self.TEXT)])
        data_iter = data.BucketIterator(dataset, 
            batch_size=self.BATCH_SIZE,
            sort_key=lambda x: len(x.text),
            sort_within_batch=True,
            shuffle=False,
            device=device)
        
        with torch.no_grad():
            preds = []
            for batch in data_iter:
                t, l = batch.text
                predictions = self.model(t, l)           
                predictions = predictions.float()
                _, rounded_preds = torch.max(torch.sigmoid(predictions), 2)
                preds.append(rounded_preds)

        sents = []
        tokens = []
        for item in list(zip(text, preds[::-1])):
            text = item[0]
            tags = item[1]
            token = ''
            for i in tqdm(range(len(tags[0]))):
                if int(tags[0][i]) == 0:
                    token += text[i]
                elif int(tags[0][i]) == 1:
                    token += text[i]
                    if output == 'conllu':
                        space_after = 1 if text[i + 1] == ' ' else 0
                        tokens.append((token.strip(), space_after))
                    else:
                        tokens.append(token.strip())
                    token = ''
                else:
                    token += text[i]
                    if output == 'conllu':
                        tokens.append((token.strip(), 0))
                    else:
                        tokens.append(token.strip())
                    token = ''
                    sents.append(tokens)
                    tokens = []
        return sents


    def write_conllu(self, sents, filename='lstm_tokenizer_output.conllu'):
        with open(filename, 'w', encoding='utf-8') as f:
            for s_id, sent in enumerate(sents):
                sent_text = ''
                token_lines = []
                for i, token_info in enumerate(sent):
                    token, space_after = token_info[0], token_info[1]
                    if space_after == 1:
                        sent_text += token + ' '
                        token_line = '{}\t{}\t_\t_\t_\t_\t_\t_\t_\t_'.format(i + 1, token)
                        token_lines.append(token_line)
                    else:
                        sent_text += token
                        token_line = '{}\t{}\t_\t_\t_\t_\t_\t_\t_\tSpaceAfter=No'.format(i + 1, token)
                        token_lines.append(token_line)
                f.write('# sent_id = {}\n'.format(s_id + 1))
                f.write('# text = {}\n'.format(sent_text))
                f.write('\n'.join(token_lines))
                f.write('\n\n')
Exemplo n.º 6
0
get_pos_vocab(conll_train, conll_val, conll_test, output=pos_vocab_file)
vocab, embedding = load_glove(glove, dim=embed_dim, save_dir='dataset')
pos_vocab = load_pos_vocab(pos_vocab_file)

# convert words to indices for train and test respectively
words, pos = read_conll(conll_train)
word_ids = [[vocab.get(word, 1) for word in sentence] for sentence in words]
pos_ids = [[pos_vocab.get(pos) for pos in sentence] for sentence in pos]

test_words, test_pos = read_conll(conll_test)
test_word_ids = [[vocab.get(word, 1) for word in sentence]
                 for sentence in words]
test_pos_ids = [[pos_vocab.get(pos) for pos in sentence] for sentence in pos]

embedding = torch.from_numpy(embedding).float()
model = LSTMTagger(embedding, embed_dim, 100, 2, len(pos_vocab)).to(device)
optimizer = optim.Adam(
    [param for param in model.parameters() if param.requires_grad == True],
    lr=0.001)
criterion = nn.NLLLoss()
for epoch in range(num_epochs):
    batch = Batch(word_ids, pos_ids, batch_size=batch_size)
    total_step = len(batch)
    i = 0
    for inputs, labels in batch:
        i += 1
        pad_words_obj = PadSequence(inputs, [len(inputs), 100])
        padded_inputs = torch.Tensor(pad_words_obj.embedding).long().to(device)
        padded_inputs_lens = torch.Tensor(
            pad_words_obj.lengths).long().to(device)
Exemplo n.º 7
0
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from model import LSTMTagger
from config import LSTMConfig
from util import *
import time

configs = LSTMConfig()

word_dict = make_dict(configs.WORD_FILE)
tag_dict = make_dict(configs.TAG_FILE)
id2word_dict = {v: k for k, v in word_dict.id.items()}
id2tag_dict = {v: k for k, v in tag_dict.id.items()}

model = LSTMTagger(configs.word_dim, configs.hidden_dim, word_dict.size,
                   tag_dict.size)
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
model = model.to(device)

optimizer = optim.SGD(model.parameters(),
                      lr=configs.lr,
                      momentum=configs.lr_decay)
loss_function = nn.NLLLoss()
data = PosDataset(configs.POS_FILE, word_dict, tag_dict)


def makeDataset(test_rate=0.2, validation_rate=0):
    test_id = int(data.__len__() * test_rate)
    test_data = DataLoader([data[i] for i in range(test_id)],
                           batch_size=1,
                           num_workers=configs.num_workers)
Exemplo n.º 8
0
def train_model(data_folder, patience, max_epoch, model_path):
    TEXT = data.Field(tokenize=list, include_lengths=True, batch_first=True)
    LABELS = data.Field(dtype=torch.float, tokenize=list, pad_token=None, unk_token=None, batch_first=True)

    train_data, val_data, test_data = data.TabularDataset.splits(
        path='data_folder', train='_train.tsv',
        validation='_dev.tsv', test='_test.tsv', format='tsv',
        fields=[('text', TEXT), ('labels', LABELS)], csv_reader_params={"quotechar": '|'})

    TEXT.build_vocab(train_data)
    LABELS.build_vocab(train_data)

    pickle.dump(TEXT, open('TEXT.pkl', 'wb'))
    pickle.dump(LABELS, open('LABELS.pkl', 'wb'))

    BATCH_SIZE = 64
    train_iter, val_iter, test_iter = data.BucketIterator.splits(
        (train_data, val_data, test_data),
        batch_size=BATCH_SIZE,
        sort_key=lambda x: len(x.text),
        sort_within_batch=True,
        device=device)

    INPUT_DIM = len(TEXT.vocab)
    EMBEDDING_DIM = 100
    HIDDEN_DIM = 256
    OUTPUT_DIM = len(LABELS.vocab)

    model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, INPUT_DIM, OUTPUT_DIM)
    optimizer = optim.Adam(model.parameters())
    criterion = nn.CrossEntropyLoss()
    early_stop = EarlyStopping(patience=patience)

    model = model.to(device)
    criterion = criterion.to(device)

    N_EPOCHS = max_epoch

    train_losses = []
    val_losses = []

    for epoch in range(N_EPOCHS):

        try:
            train_loss, train_acc = train(model, train_iter, optimizer, criterion)
            train_losses.append(train_loss)
        except (TypeError, ValueError):
            print("Exception in user code:")
            print("-"*60)
            traceback.print_exc(file=sys.stdout)
            print("-"*60)
            break
        valid_loss, valid_acc = evaluate(model, val_iter, criterion)
        val_losses.append(valid_loss)
        
        if early_stop.step(valid_loss):
            print('Stopped learning due to lack of progress.')
            break
        
        print(f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}% | Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}% |')

    torch.save(model.state_dict(), model_path)

    test_loss, test_acc = evaluate(model, test_iter, criterion)
    print(f'| Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}% |')
Exemplo n.º 9
0
def train(data, gpu):
    vocabulary_size = data.word_alphabet.size()
    label_size = data.label_alphabet.size()
    EMBEDDING_DIM = data.word_emb_dim
    HIDDEN_DIM = 100
    dropout = 0.2
    lstm_layer = 1
    bilstm = True
    use_char = True
    model = LSTMTagger(data, HIDDEN_DIM, dropout, lstm_layer, bilstm, use_char,
                       gpu)
    loss_function = nn.NLLLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.1)
    best_dev = -1

    ## start training
    for idx in range(100):
        epoch_start = time.time()
        temp_start = epoch_start
        print "Epoch:", idx
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_Ids)
        for words, chars, label in data.train_Ids:
            instance_count += 1
            # if instance_count > 1000:
            #     continue
            label = autograd.Variable(torch.LongTensor(label))
            if gpu:
                label = label.cuda()
            model.zero_grad()
            # model.hidden = model.init_hidden(gpu)
            loss, pred_score, tag_seq = model.neg_log_likelihood(
                [words, chars], label, gpu)
            print pred_score
            print "tagseq:", tag_seq
            print "label:", label
            # loss = loss_function(pred_score, label)
            if gpu:
                pred_score = pred_score.cpu()
                label = label.cpu()
                loss = loss.cpu()
            right, whole = predict_check(pred_score, label)
            right_token += right
            whole_token += whole
            sample_loss += loss.data.numpy()[0]
            if instance_count % 500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                print(
                    "     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"
                    % (instance_count, temp_cost, sample_loss, right_token,
                       whole_token, (right_token + 0.) / whole_token))
                sys.stdout.flush()
                sample_loss = 0
            loss.backward()
            optimizer.step()
        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print("Epoch: %s training finished. Time: %.2fs" % (idx, epoch_cost))
        acc, p, r, f = evaluate(data, model, "dev", gpu)
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish
        print("Dev: time:%.2fs; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" %
              (dev_cost, acc, p, r, f))
        if f > best_dev:
            print "Exceed best f, previous best f:", best_dev
            best_dev = f
        # ## decode test
        acc, p, r, f = evaluate(data, model, "test", gpu)
        test_finish = time.time()
        test_cost = test_finish - dev_finish
        print("Test: time: %.2fs; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" %
              (test_cost, acc, p, r, f))
word_to_ix = {}
for sent, tag in training_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
print(word_to_ix)
tag_to_ix = {'DET': 0, 'NN': 1, 'V': 2}

# These will usually be more like 32 or 64 dimensional.
# We will keep them small, so we can see how the weights change as we train.
EMBEDDING_DIM = 6
HIDDEN_DIM = 6

losses = []
loss_function = nn.NLLLoss()
model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))
optimizer = optim.SGD(model.parameters(), lr=0.1)

# See what the scores are before training
# Note that element i,j of the output is the score for tag j for word i.
inputs = prepare_sequence(training_data[0][0], word_to_ix)
tag_scores = model(inputs)
print(tag_scores)
score_to_tag(tag_scores, tag_to_ix)

for epoch in range(300):
    total_loss = torch.Tensor([0])
    for sentence, tags in training_data:
        # Step 1. Prepare the inputs to be passed to the model (i.e, turn the words
        # into integer indices and wrap them in variables)
        sentence_in = prepare_sequence(sentence, word_to_ix)
corpus = d.Corpus(args.data, device, args.batch_size, args.seq_len)
dict = corpus.dictionary
num_of_train_batches = corpus.total_num_of_train_batches


def accuracy(pred, target):
    mask = target != 2
    total_num = mask.sum()
    p, i = pred.max(2)
    num_correct = (target[mask] == i[mask]).sum()
    return num_correct.item(), total_num.item()


#### train ####
model = LSTMTagger(args.emsize, args.nhid, args.batch_size, len(dict),
                   TAG_CLASS, args.nlayers, args.bidirect,
                   args.dropout).to(device)
loss_fn = nn.CrossEntropyLoss(size_average=False).to(device)
optimizer = optim.Adam(model.parameters())


def evaluate():
    model.eval()
    with torch.no_grad():
        data = corpus.test_data_batched
        labels = corpus.test_label_batched
        pred = model(data)
        correct, total = accuracy(pred, labels)
        print('accuracy = {:.4f}'.format(correct / total))

Exemplo n.º 12
0
def main():
    progress_bar = ProgressBar()
    data_iterator, glove_embeddings, word_to_ix, ix_to_word = load_data()
    logger.info("Building model...")
    model = LSTMTagger(cf.EMBEDDING_DIM, cf.HIDDEN_DIM, len(word_to_ix),
                       cf.BATCH_SIZE, cf.MAX_SENT_LENGTH, glove_embeddings)
    # Ensure the word embeddings aren't modified during training
    optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                 model.parameters()),
                          lr=0.1)
    model.cuda()
    #if(cf.LOAD_PRETRAINED_MODEL):
    #	model.load_state_dict(torch.load('asset/model_trained'))
    #else:
    num_batches = len(data_iterator)
    loss_list = []  # A place to store the loss history
    for epoch in range(1, cf.MAX_EPOCHS + 1):
        epoch_start_time = time.time()
        for (i, (batch_x, batch_y)) in enumerate(data_iterator):
            # Ignore batch if it is not the same size as the others (happens at the end sometimes)
            if len(batch_x) != cf.BATCH_SIZE:
                continue
            batch_x = batch_x.to(device)
            # Step 1. Remember that Pytorch accumulates gradients.
            # We need to clear them out before each instance
            model.zero_grad()

            # Also, we need to clear out the hidden state of the LSTM,
            # detaching it from its history on the last instance.
            model.hidden = model.init_hidden()

            # Step 2. Get our inputs ready for the network, that is, turn them into
            # Tensors of word indices.
            #sentence_in = prepare_sequence(sentence, word_to_ix)
            #target = torch.tensor([word_to_ix[tag]], dtype=torch.long, device=device)

            batch_x_lengths = []
            for x in batch_x:
                batch_x_lengths.append(len(x))

            # Step 3. Run our forward pass.
            tag_scores = model(batch_x, batch_x_lengths)

            #loss = loss_function(tag_scores, batch_y)
            loss = modified_loss(tag_scores, batch_y, batch_x_lengths,
                                 word_to_ix)

            loss.backward()
            optimizer.step()
            progress_bar.draw_bar(i, epoch, num_batches, cf.MAX_EPOCHS,
                                  epoch_start_time)

        progress_bar.draw_completed_epoch(loss, loss_list, epoch,
                                          cf.MAX_EPOCHS, epoch_start_time)

        loss_list.append(loss)
        if epoch % 10 == 0:
            avg_loss = sum([l for l in loss_list[epoch - 10:]]) / 10
            logger.info("Average loss over past 10 epochs: %.6f" % avg_loss)
            if epoch >= 20:
                prev_avg_loss = sum(
                    [l for l in loss_list[epoch - 20:epoch - 10]]) / 10
                if (avg_loss >= prev_avg_loss):
                    logger.info(
                        "Average loss has not improved over past 10 epochs. Stopping early."
                    )
                    evaluate_model(model, ix_to_word)
                    break
        if epoch == 1 or epoch % 10 == 0 or epoch == cf.MAX_EPOCHS:
            evaluate_model(model, ix_to_word)

    logger.info("Saving model...")
    torch.save(model.state_dict(), "asset/model_trained")
    logger.info("Model saved to %s." % "asset/model_trained")
Exemplo n.º 13
0
def predict(data, model_name='', model_=None, idxs=None, out=False):

    if idxs:
        [tag_to_index, word_to_index, index_to_tag, index_to_word] = idxs
    else:
        with open('data.pickle', 'rb') as f:
            [tag_to_index, word_to_index, index_to_tag,
             index_to_word] = pickle.load(f)

    if model_name:
        model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_index),
                           len(tag_to_index))
        load_checkpoint(model_name, model)
    elif not model_:
        raise ValueError('No model specified.')
    else:
        model = model_

    def out_gen(tag_scores, index_to_tag):
        tags = []
        for word in tag_scores:
            maxval = -10000
            maxind = 0
            for i in range(word.size()[0]):
                if maxval < word.data[i]:
                    maxind = i
                    maxval = word.data[i]
            tags.append(index_to_tag[maxind])
        return tags

    correct = 0
    total = 0
    #for line in data:
    #    inputs = prepare(line[0], word_to_index)
    #    tag_scores = model(inputs)
    #    tags = out_gen(tag_scores, index_to_tag)
    for batch, lengths, targets, lengths2 in data:
        pred = model(autograd.Variable(batch), lengths.cpu().numpy())
        _, pred = torch.max(pred, dim=2)
        pred = pred.data
        for p, g in zip(pred, targets):
            for idx in range(len(g)):
                if index_to_tag[g[idx]] in [SOS, PAD]:
                    continue
                elif index_to_tag[g[idx]] == EOS:
                    break
                elif index_to_tag[g[idx]] == index_to_tag[p[idx]]:
                    correct += 1
                    if out:
                        print(index_to_tag[p[idx]], end=' ')
                    total += 1
                else:
                    total += 1
        #for pred, gold in zip(tags, line[1]):
        #    if gold in [EOS, SOS]:
        #        pass
        #    if pred == gold:
        #        correct += 1
        #    total += 1

    return correct / total