示例#1
0
def train(inputs, targets, model, optimizer, batch_size=32, epochs=200):
    inputs_batch_manager = BatchManager(inputs, batch_size)
    targets_batch_manager = BatchManager(targets, batch_size)
    steps = inputs_batch_manager.steps

    for epoch in range(epochs):
        for i in range(steps):
            optimizer.zero_grad()
            batch_inputs = torch.tensor(inputs_batch_manager.next_batch(),
                                        dtype=torch.long)
            batch_targets = torch.tensor(targets_batch_manager.next_batch(),
                                         dtype=torch.long)
            logits = model(batch_inputs, batch_targets)  # exclude start token
            loss = model.loss_layer(logits.transpose(1, 2), batch_targets[:,
                                                                          1:])
            loss.backward()
            optimizer.step()
        print(loss)

    torch.save(model.state_dict(), os.path.join("models", "params.pkl"))
示例#2
0
def eval(model, vocab, inputs, targets, out_len=12):
    model.eval()
    batch_x = BatchManager(inputs, 32)
    batch_y = BatchManager(targets, 32)
    hits = 0
    total = 0
    for i in range(batch_x.steps):
        x = torch.tensor(batch_x.next_batch(), dtype=torch.long).cuda()
        y = torch.tensor(batch_y.next_batch(), dtype=torch.long).cuda()

        tgt_seq = torch.ones(x.shape[0], out_len, dtype=torch.long).cuda()
        tgt_seq *= vocab['<pad>']
        tgt_seq[:, 0] = vocab['<s>']
        for j in range(1, out_len):
            logits = model(x, tgt_seq)
            last_word = torch.argmax(logits[:, j - 1, :], dim=-1).view(-1, 1)
            tgt_seq[:, j] = last_word.squeeze()
            if j != out_len - 1:
                tgt_seq[:, j + 1] = vocab['</s>']
        hits += visualize(x, y, tgt_seq, vocab)
        total += x.shape[0]

    print('%d/%d, accuracy=%f' % (hits, total, hits / total))
    model.train()