def train(inputs, targets, model, optimizer, batch_size=32, epochs=200): inputs_batch_manager = BatchManager(inputs, batch_size) targets_batch_manager = BatchManager(targets, batch_size) steps = inputs_batch_manager.steps for epoch in range(epochs): for i in range(steps): optimizer.zero_grad() batch_inputs = torch.tensor(inputs_batch_manager.next_batch(), dtype=torch.long) batch_targets = torch.tensor(targets_batch_manager.next_batch(), dtype=torch.long) logits = model(batch_inputs, batch_targets) # exclude start token loss = model.loss_layer(logits.transpose(1, 2), batch_targets[:, 1:]) loss.backward() optimizer.step() print(loss) torch.save(model.state_dict(), os.path.join("models", "params.pkl"))
def eval(model, vocab, inputs, targets, out_len=12): model.eval() batch_x = BatchManager(inputs, 32) batch_y = BatchManager(targets, 32) hits = 0 total = 0 for i in range(batch_x.steps): x = torch.tensor(batch_x.next_batch(), dtype=torch.long).cuda() y = torch.tensor(batch_y.next_batch(), dtype=torch.long).cuda() tgt_seq = torch.ones(x.shape[0], out_len, dtype=torch.long).cuda() tgt_seq *= vocab['<pad>'] tgt_seq[:, 0] = vocab['<s>'] for j in range(1, out_len): logits = model(x, tgt_seq) last_word = torch.argmax(logits[:, j - 1, :], dim=-1).view(-1, 1) tgt_seq[:, j] = last_word.squeeze() if j != out_len - 1: tgt_seq[:, j + 1] = vocab['</s>'] hits += visualize(x, y, tgt_seq, vocab) total += x.shape[0] print('%d/%d, accuracy=%f' % (hits, total, hits / total)) model.train()