Exemplo n.º 1
0
def main():
    with open("data/vocab.pkl", 'rb') as f:
        vocab = pickle.load(f)

    img_path = "data/flickr7k_images"
    cap_path = "data/factual_train.txt"
    styled_path = "data/humor/funny_train.txt"
    data_loader = get_data_loader(img_path, cap_path, vocab, 3)
    styled_data_loader = get_styled_data_loader(styled_path, vocab, 3)

    encoder = EncoderRNN(voc_size=60376, emb_size=300, hidden_size=300)
    decoder = FactoredLSTM(300, 512, 512, len(vocab))

    if torch.cuda.is_available():
        encoder = encoder.cuda()
        decoder = decoder.cuda()

    # for i, (images, captions, lengths) in enumerate(data_loader):
    for i, (captions, lengths) in enumerate(styled_data_loader):
        # images = Variable(images, volatile=True)
        captions = Variable(captions.long())

        if torch.cuda.is_available():
            # images = images.cuda()
            captions = captions.cuda()

        # features = encoder(images)

        outputs = decoder(captions, features=None, mode="humorous")
        print(lengths - 1)
        print(outputs)
        print(captions[:, 1:])

        loss = masked_cross_entropy(outputs, captions[:, 1:].contiguous(), lengths - 1)

        print(loss)

        break
        args.hidden_size,
        len(dataset.out_vocab[0]),
        args.n_layers,
        args.dropout,
    )

    # Initialize optimizers and criterion
    # encoder_optimizer = optim.Adam(encoder.parameters(), lr=args.learning_rate)
    # decoder_optimizer = optim.Adam(decoder.parameters(), lr=args.learning_rate * decoder_learning_ratio)
    encoder_optimizer = optim.Adadelta(encoder.parameters())
    decoder_optimizer = optim.Adadelta(decoder.parameters())
    criterion = nn.CrossEntropyLoss()

    # Move models to GPU
    if args.USE_CUDA:
        encoder.cuda()
        decoder.cuda()

    # train(dataset,
    #      args.batch_size,
    #      args.n_epochs,
    #      encoder,
    #      decoder,
    #      encoder_optimizer,
    #      decoder_optimizer,
    #      criterion,
    #      'checkpoints/pov',
    #      lang)

    # evaluate
Exemplo n.º 3
0
            imdb_decoder = torch.load(sys.argv[2])
        else:
            imdb_encoder = torch.load(sys.argv[1],
                                      map_location={'cuda:0': 'cpu'})
            imdb_decoder = torch.load(sys.argv[2],
                                      map_location={'cuda:0': 'cpu'})
    else:
        imdb_encoder = EncoderRNN(input_lang.n_words, hidden_size,
                                  embedding_matrix)
        imdb_decoder = AttnDecoderRNN(hidden_size,
                                      output_lang.n_words,
                                      1,
                                      dropout_p=0.1)

    if use_cuda:
        imdb_encoder = imdb_encoder.cuda()
        imdb_decoder = imdb_decoder.cuda()

    trainIters(imdb_encoder,
               imdb_decoder,
               1500000,
               print_every=100,
               plot_every=100,
               learning_rate=0.01)

    # save model
    torch.save(
        imdb_encoder, 'trained_model/encoder_imdb100000_max16_glove_' +
        str(print_loss_avg) + '_' + str(maximum_norm))
    torch.save(
        imdb_decoder, 'trained_model/decoder_imdb100000_max16_glove_' +
Exemplo n.º 4
0
def main(args):
    torch.cuda.set_device(6)
    model_path = args.model_path
    if not os.path.exists(model_path):
        os.makedirs(model_path)

    # load vocablary
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    img_path = args.img_path
    factual_cap_path = args.factual_caption_path
    humorous_cap_path = args.humorous_caption_path

    # import data_loader
    data_loader = get_data_loader(img_path, factual_cap_path, vocab,
                                  args.caption_batch_size)
    styled_data_loader = get_styled_data_loader(humorous_cap_path, vocab,
                                                args.language_batch_size)

    # import models
    emb_dim = args.emb_dim
    hidden_dim = args.hidden_dim
    factored_dim = args.factored_dim
    vocab_size = len(vocab)
    encoder = EncoderRNN(voc_size=vocab_size,
                         emb_size=emb_dim,
                         hidden_size=emb_dim)
    decoder = FactoredLSTM(emb_dim, hidden_dim, factored_dim, vocab_size)

    if torch.cuda.is_available():
        encoder = encoder.cuda()
        decoder = decoder.cuda()

    # loss and optimizer
    criterion = masked_cross_entropy
    cap_params = list(decoder.parameters()) + list(encoder.parameters())
    lang_params = list(decoder.S_hc.parameters()) + list(decoder.S_hf.parameters()) \
                  + list(decoder.S_hi.parameters()) + list(decoder.S_ho.parameters())
    optimizer_cap = torch.optim.Adam(cap_params, lr=args.lr_caption)
    optimizer_lang = torch.optim.Adam(lang_params, lr=args.lr_language)

    # train
    total_cap_step = len(data_loader)
    total_lang_step = len(styled_data_loader)
    epoch_num = args.epoch_num
    for epoch in range(epoch_num):
        # caption
        for i, (messages, m_lengths, targets,
                t_lengths) in enumerate(data_loader):

            messages = to_var(messages.long())
            targets = to_var(targets.long())

            # forward, backward and optimize
            decoder.zero_grad()
            encoder.zero_grad()
            output, features = encoder(messages, list(m_lengths))
            outputs = decoder(targets, features, mode="factual")
            loss = criterion(outputs[:, 1:, :].contiguous(),
                             targets[:, 1:].contiguous(), t_lengths - 1)
            loss.backward()
            optimizer_cap.step()

            # print log
            if i % args.log_step_caption == 0:
                print("Epoch [%d/%d], CAP, Step [%d/%d], Loss: %.4f" %
                      (epoch + 1, epoch_num, i, total_cap_step, loss.data[0]))

        eval_outputs(outputs, vocab)

        # language
        for i, (captions, lengths) in enumerate(styled_data_loader):
            captions = to_var(captions.long())

            # forward, backward and optimize
            decoder.zero_grad()
            outputs = decoder(captions, mode='humorous')
            loss = criterion(outputs, captions[:, 1:].contiguous(),
                             lengths - 1)
            loss.backward()
            optimizer_lang.step()

            # print log
            if i % args.log_step_language == 0:
                print("Epoch [%d/%d], LANG, Step [%d/%d], Loss: %.4f" %
                      (epoch + 1, epoch_num, i, total_lang_step, loss.data[0]))

        # save models
        torch.save(decoder.state_dict(),
                   os.path.join(model_path, 'decoder-%d.pkl' % (epoch + 1, )))

        torch.save(encoder.state_dict(),
                   os.path.join(model_path, 'encoder-%d.pkl' % (epoch + 1, )))
Exemplo n.º 5
0
        decoder_input = decoder_input.cuda() if use_cuda else decoder_input

    return decoded_words  #, decoder_attentions[:di + 1]


def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')


input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
noise = torch.Tensor(list(range(output_lang.n_words)))
print(random.choice(pairs))

hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_size)
decoder1 = DecoderRNN(hidden_size, output_lang.n_words, 1)

if use_cuda:
    encoder1 = encoder1.cuda()
    decoder1 = decoder1.cuda()

trainIters(encoder1, decoder1, 25000, print_every=50)

evaluateRandomly(encoder1, decoder1, 20)
Exemplo n.º 6
0
def train(x,
          y,
          optimizer=optim.Adam,
          criterion=nn.MSELoss(),
          n_steps=100,
          attn_model="general",
          hidden_size=128,
          n_layers=1,
          dropout=0,
          batch_size=50,
          elr=0.001,
          dlr=0.005,
          clip=50.0,
          print_every=10,
          teacher_forcing_ratio=lambda x: 1 if x < 10 else 0):
    # Configure training/optimization
    encoder_learning_rate = elr
    decoder_learning_ratio = dlr

    # Initialize models
    encoder = EncoderRNN(1, hidden_size, n_layers, dropout=dropout)
    decoder = LuongAttnDecoderRNN(attn_model,
                                  1,
                                  hidden_size,
                                  n_layers,
                                  dropout=dropout)

    # Initialize optimizers and criterion
    encoder_optimizer = optimizer(encoder.parameters(),
                                  lr=encoder_learning_rate)
    decoder_optimizer = optimizer(decoder.parameters(),
                                  lr=decoder_learning_ratio)

    # Move models to GPU
    if USE_CUDA:
        encoder.cuda()
        decoder.cuda()

    # Begin!
    print_loss_total = 0
    step = 0
    while step < n_steps:
        step += 1
        # Get training data for this cycle
        batch_idx = np.random.randint(0, x.shape[1], batch_size)
        input_batches, target_batches = x[:, batch_idx], y[:, batch_idx]

        # Run the train function
        loss, _ = _train(input_batches,
                         target_batches,
                         encoder,
                         decoder,
                         encoder_optimizer,
                         decoder_optimizer,
                         criterion,
                         teacher_forcing_ratio=teacher_forcing_ratio(step),
                         clip=clip)
        # print(np.mean(np.square((output.data.cpu().numpy() - series[-20:,  batch_idx]))))
        # Keep track of loss
        print_loss_total += loss

        if step % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print_summary = '(%d %d%%) %.4f' % (step, step / n_steps * 100,
                                                print_loss_avg)
            print(print_summary)
    return encoder, decoder