示例#1
0
params = list(cnn.linear.parameters()) + list(rnn.parameters())
optimizer = torch.optim.Adam(params, lr=1e-3)

for epoch in range(num_epochs):
    tic = time.time()

    for i, (image, captions, lengths) in enumerate(dataset_loader):

        image = image.to(device)
        captions = captions.to(device)
        targets = pack_padded_sequence(captions, lengths, batch_first=True)[0]

        cnn.zero_grad()
        rnn.zero_grad()

        cnn_out = cnn.forward(image)
        lstm_out = rnn.forward(cnn_out, captions, lengths)
        loss = criterion(lstm_out, targets)
        loss.backward()
        optimizer.step()

        if i % 1000 == 0:
            print(
                'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'
                .format(epoch, num_epochs, i, len(dataset_loader), loss.item(),
                        np.exp(loss.item())))

    toc = time.time()
    print('epoch %d time %.2f mins' % (epoch, (toc - tic) / 60))

torch.save(cnn.state_dict(), 'cnn.pkl')
示例#2
0
                                                 batch_size=batch_size,
                                                 shuffle=True,
                                                 num_workers=4,
                                                 collate_fn=collate_func)

    vocab_size = vocab.index

    cnn = EncoderCNN(512).to(device)
    rnn = DecoderRNN(512, 512, vocab_size).to(device)

    cnn.load_state_dict(torch.load('cnn.pkl'))
    rnn.load_state_dict(torch.load('rnn.pkl'))

    hyp = []
    references = []
    for i, (image, captions, lengths, image_id) in enumerate(dataset_loader):
        image = image.to(device)
        for id in image_id:
            references.append([caption_dict[id].split(' ')[1:]])
        features = cnn.forward(image)
        ids_list = rnn.sample(features)
        ids_list = ids_list.cpu().numpy()
        for ids in ids_list:
            snt = vocab.get_sentence(ids).split()
            hyp.append(snt[1:])

    hyp = np.array(hyp)
    references = np.array(references)
    print(hyp.shape, references.shape)
    print(hyp)
    print(compute_bleu(references, hyp))