Пример #1
0
def main():
    # Load vocabulary wrapper.
    with open(vocab_path) as f:
        vocab = pickle.load(f)

    encoder = EncoderCNN(4096, embed_dim)
    decoder = DecoderRNN(embed_dim, hidden_size, len(vocab), num_layers_rnn)
    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()
    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(encoder.linear.parameters())
    optimizer = torch.optim.Adam(params, lr=0.001)

    #load data
    with open(image_data_file) as f:
        image_data = pickle.load(f)
    image_features = si.loadmat(image_feature_file)

    img_features = image_features['fc7'][0]
    img_features = np.concatenate(img_features)

    print 'here'
    iteration = 0
    save_loss = []
    for i in range(10):  # epoch
        use_caption = i % 5
        print 'Epoch', i
        for x, y in make_mini_batch(img_features,
                                    image_data,
                                    use_caption=use_caption):
            word_padding, lengths = make_word_padding(y, vocab)

            x = Variable(torch.from_numpy(x).cuda())
            word_index = Variable(torch.from_numpy(word_padding).cuda())

            encoder.zero_grad()
            decoder.zero_grad()

            features = encoder(x)
            targets = pack_padded_sequence(word_index,
                                           lengths,
                                           batch_first=True)[0]
            outputs = decoder(features, word_index, lengths)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            if iteration % 100 == 0:
                print 'loss', loss.data[0]
                save_loss.append(loss.data[0])

            iteration += 1

        torch.save(decoder.state_dict(), 'decoder.pkl')
        torch.save(encoder.state_dict(), 'encoder.pkl')
        with open('losses.txt', 'w') as f:
            print >> f, losses
Пример #2
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Build the models
    encoder = EncoderCNN(args.embed_size).to(device)
    decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab),
                         args.num_layers).to(device)

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(
        encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    # Train the models
    total_step = len(data_loader)

    # For each TSP problem
    for epoch in range(args.num_epochs):
        for i, (images, captions, lengths) in enumerate(data_loader):

            # Set mini-batch dataset
            images = images.to(device)
            captions = captions.to(device)
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]

            # Forward, backward and optimize
            features = encoder(images)
            outputs = decoder(features, captions, lengths)
            loss = criterion(outputs, targets)
            decoder.zero_grad()
            encoder.zero_grad()
            loss.backward()
            optimizer.step()

            # Print log info
            if i % args.log_step == 0:
                print(
                    'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'
                    .format(epoch, args.num_epochs, i, total_step, loss.item(),
                            np.exp(loss.item())))

                # Save the model checkpoints
            if (i + 1) % args.save_step == 0:
                torch.save(
                    decoder.state_dict(),
                    os.path.join(args.model_path,
                                 'decoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))
                torch.save(
                    encoder.state_dict(),
                    os.path.join(args.model_path,
                                 'encoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))
Пример #3
0
def epoch_training(train_iter,
                   val_iter,
                   num_epoch=100,
                   learning_rate=1e-4,
                   hidden_size=100,
                   early_stop=False,
                   patience=2,
                   epsilon=1e-4):
    # define model
    encoder = EncoderRNN(input_size=len(EN.vocab), hidden_size=hidden_size)
    decoder = DecoderRNN(hidden_size=hidden_size, output_size=len(DE.vocab))

    # define loss criterion
    criterion = nn.NLLLoss(ignore_index=PAD_token)
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)

    losses = np.ndarray(patience)

    res_loss = 13
    res_encoder = None
    res_decoder = None
    res_epoch = 0
    base_bleu = 0
    not_updated = 0

    for epoch in range(num_epoch):
        tl = train(train_iter, encoder, decoder, encoder_optimizer,
                   decoder_optimizer, criterion)
        loss, val_bleu = evaluate(val_iter, encoder, decoder, criterion)
        logging.warning('******Epoch: ' + str(epoch) + ' Training Loss: ' +
                        str(tl) + ' Validation Loss: ' + str(loss) +
                        ' Validation Bleu: ' + str(val_bleu) + '*********')
        #save the model with the lowest validation loss
        if base_bleu <= val_bleu:
            base_bleu = val_bleu
            res_loss = loss
            res_encoder = encoder
            res_decoder = decoder
            res_epoch = epoch
            not_updated = 0
            logging.warning('Updated validation loss as ' + str(res_loss) +
                            'With validation Bleu as ' + str(base_bleu) +
                            ' at epoch ' + str(res_epoch))
        else:
            not_updated += 1
        if not_updated == patience:
            break
    print('Stop at Epoch: ' + str(res_epoch) + ", With Validation Loss: " +
          str(res_loss) + ", Validation Bleu: " + str(base_bleu))
    logging.warning('Stop at Epoch: ' + str(res_epoch) +
                    ", With Validation Loss: " + str(res_loss) +
                    ", Validation Bleu: " + str(base_bleu))
    return res_loss, res_encoder, res_decoder, base_bleu
Пример #4
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Image preprocessing
    # Composea all processing together, to a tensor with (C,H,W) and value in range (0 - 1)
    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    # Load vocabulary wrapper.
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Build data loader
    data_loader = get_loader(args.image_dir,
                             args.caption_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    # Build the models
    encoder = EncoderCNN(args.embed_size)
    decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab),
                         args.num_layers)

    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(encoder.resnet.fc.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    # Train the Models
    total_step = len(data_loader)
    for epoch in range(args.num_epochs):
        for i, (images, captions, lengths) in enumerate(data_loader):

            # Set mini-batch dataset
            images = Variable(images)
            captions = Variable(captions)
            print("cap size %s" % str(captions.size()))
            if torch.cuda.is_available():
                images = images.cuda()
                captions = captions.cuda()
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]
            print(targets)
            # Forward, Backward and Optimize
            decoder.zero_grad()
            encoder.zero_grad()
            features = encoder(images)
            print("cnn feats %s" % str(features.size()))
            outputs = decoder(features, captions, lengths)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            # Print log info
            if i % args.log_step == 0:
                print(
                    'Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f'
                    % (epoch, args.num_epochs, i, total_step, loss.data[0],
                       np.exp(loss.data[0])))

            # Save the models
            if (i + 1) % args.save_step == 0:
                torch.save(
                    decoder.state_dict(),
                    os.path.join(args.model_path,
                                 'decoder-%d-%d.pkl' % (epoch + 1, i + 1)))
                torch.save(
                    encoder.state_dict(),
                    os.path.join(args.model_path,
                                 'encoder-%d-%d.pkl' % (epoch + 1, i + 1)))
def main():
    # Configuration for hyper-parameters
    config = Config()
    
    # Image preprocessing
    transform = config.train_transform
    
    # Load vocabulary wrapper
    with open(os.path.join(config.vocab_path, 'vocab.pkl'), 'rb') as f:
        vocab = pickle.load(f)

    # Build data loader
    image_path = os.path.join(config.image_path, 'train2014')
    json_path = os.path.join(config.caption_path, 'captions_train2014.json')
    train_loader = get_data_loader(image_path, json_path, vocab, 
                                   transform, config.batch_size,
                                   shuffle=True, num_workers=config.num_threads) 
    total_step = len(train_loader)

    # Build Models
    teachercnn = EncoderCNN(config.embed_size)
    teachercnn.eval()
    studentcnn = StudentCNN_Model1(config.embed_size)
    #Load the best teacher model
    teachercnn.load_state_dict(torch.load(os.path.join('../TrainedModels/TeacherCNN', config.trained_encoder))) 
    studentlstm = DecoderRNN(config.embed_size, config.hidden_size/2, 
                         len(vocab), config.num_layers/2)

    if torch.cuda.is_available():
        teachercnn.cuda()
	studentcnn.cuda()
        studentlstm.cuda()

    # Loss and Optimizer
    criterion_lstm = nn.CrossEntropyLoss()
    criterion_cnn = nn.MSELoss()
    params = list(studentlstm.parameters()) + list(studentcnn.parameters())
    optimizer_lstm = torch.optim.Adam(params, lr=config.learning_rate)    
    optimizer_cnn = torch.optim.Adam(studentcnn.parameters(), lr=config.cnn_learningrate)    
    
    print('entering in to training loop')    
    # Train the Models
	
    for epoch in range(config.num_epochs):
        for i, (images, captions, lengths, img_ids) in enumerate(train_loader):
	    images = Variable(images)
            captions = Variable(captions)
            if torch.cuda.is_available():
                images = images.cuda()
                captions = captions.cuda()
            targets = pack_padded_sequence(captions, lengths, batch_first=True)[0]
            # Forward, Backward and Optimize
	    optimizer_lstm.zero_grad()
	    optimizer_cnn.zero_grad()
            features_tr = teachercnn(images)
	    features_st = studentcnn(images)
            outputs = studentlstm(features_st, captions, lengths)
            loss = criterion(features_st, features_tr.detach()) + criterion_lstm(outputs, targets)
            loss.backward()
            optimizer_cnn.step()
            optimizer_lstm.step()
     
           # Print log info
            if i % config.log_step == 0:
                print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f'
                      %(epoch, config.num_epochs, i, total_step, 
                        loss.data[0], np.exp(loss.data[0]))) 
                
            # Save the Model
            if (i+1) % config.save_step == 0:
                torch.save(studentlstm.state_dict(), 
                           os.path.join(config.student_lstm_path, 
                                        'decoder-%d-%d.pkl' %(epoch+1, i+1)))
		torch.save(studentcnn.state_dict(), 
                           os.path.join(config.student_cnn_path, 
                                        'encoder-%d-%d.pkl' %(epoch+1, i+1)))
Пример #6
0
                             h_RNN_layers=RNN_HIDDEN_LAYERS,
                             h_RNN=RNN_HIDDEN_NODES,
                             h_FC_dim=RNN_FC,
                             dropout=DROPOUT,
                             num_classes=2).to(device)

    # Parallelize model to multiple GPUs
    if torch.cuda.device_count() > 1:
        print("Using", torch.cuda.device_count(), "GPUs!")
        cnn_encoder = nn.DataParallel(cnn_encoder)
        rnn_decoder = nn.DataParallel(rnn_decoder)

        # Combine all EncoderCNN + DecoderRNN parameters
        crnn_params = list(cnn_encoder.module.fc1.parameters()) + list(cnn_encoder.module.bn1.parameters()) + \
             list(cnn_encoder.module.fc2.parameters()) + list(cnn_encoder.module.bn2.parameters()) + \
             list(cnn_encoder.module.fc3.parameters()) + list(rnn_decoder.parameters())

    elif torch.cuda.device_count() == 1:
        print("Using", torch.cuda.device_count(), "GPU!")
        # Combine all EncoderCNN + DecoderRNN parameters
        crnn_params = list(cnn_encoder.fc1.parameters()) + list(cnn_encoder.bn1.parameters()) + \
             list(cnn_encoder.fc2.parameters()) + list(cnn_encoder.bn2.parameters()) + \
             list(cnn_encoder.fc3.parameters()) + list(rnn_decoder.parameters())

    else:
        crnn_params = list(cnn_encoder.fc1.parameters()) + list(cnn_encoder.bn1.parameters()) + \
             list(cnn_encoder.fc2.parameters()) + list(cnn_encoder.bn2.parameters()) + \
             list(cnn_encoder.fc3.parameters()) + list(rnn_decoder.parameters())

    optimizer = torch.optim.Adam(crnn_params, lr=LEARNING_RATE)
Пример #7
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Image preprocessing, normalization for the pretrained resnet
    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Load vocabulary wrapper
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Build data loader
    data_loader = get_loader(args.image_dir,
                             args.caption_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    # Build the models
    encoder = EncoderCNN(args.embed_size).to(device)
    decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab),
                         args.num_layers).to(device)

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(
        encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    # Train the models
    save_in_file_loss = open(
        '/media/raid6/shivam/imagecaption/simple_cnn_img_attention/mod_epoch_loss402.txt',
        "w")
    save_in_file_perplex = open(
        '/media/raid6/shivam/imagecaption/simple_cnn_img_attention/mod_epoch_perplex402.txt',
        "w")
    save_in_file = open(
        '/media/raid6/shivam/imagecaption/simple_cnn_img_attention/mod_step_loss402.txt',
        "w")
    loss_per_epoch = {}
    perplex_per_epoch = {}
    total_step = len(data_loader)
    print('\ntotal-step\n')
    print(total_step)

    for epoch in range(args.num_epochs):

        total_loss = 0
        for i, (images, captions, lengths) in enumerate(data_loader):

            # Set mini-batch dataset
            images = images.to(device)
            captions = captions.to(device)
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]

            # Forward, backward and optimize
            features = encoder(images)
            outputs = decoder(features, captions, lengths)
            loss = criterion(outputs, targets)
            decoder.zero_grad()
            encoder.zero_grad()
            loss.backward()
            optimizer.step()

            # Print log info
            if i % args.log_step == 0:
                print(
                    'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'
                    .format(epoch, args.num_epochs, i, total_step, loss.item(),
                            np.exp(loss.item())))

                total_loss += loss.item()

                text = 'Epoch : ' + str(epoch) + '\nStep : ' + str(
                    i) + '\nLoss : ' + str(
                        loss.item()) + '\nPerplexity : ' + str(
                            np.exp(loss.item()))
                print('\ntext\n')
                print(text)
                save_in_file.write(text)

            # Save the model checkpoints

            if (i + 1) % args.save_step == 0:
                print('saving')
                torch.save(
                    decoder.state_dict(),
                    os.path.join(args.model_path,
                                 'decoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))
                torch.save(
                    encoder.state_dict(),
                    os.path.join(args.model_path,
                                 'encoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))

        loss_per_epoch[epoch + 1] = total_loss / (total_step * args.batch_size)
        loss_text = str(epoch + 1) + ' : ' + str(loss_per_epoch[epoch + 1])
        save_in_file_loss.write(loss_text)
        save_in_file_loss.write('\n')

        print('\nloss_text : ' + loss_text)

        perplex_per_epoch[epoch + 1] = np.exp(loss_per_epoch[epoch + 1])
        perplex_text = str(epoch + 1) + ' : ' + str(
            perplex_per_epoch[epoch + 1])
        save_in_file_perplex.write(perplex_text)
        save_in_file_perplex.write('\n')
        print('\nperplex_text : ' + perplex_text)

    save_in_file.close()
Пример #8
0
#number of input char types
char_vocab = len(string.printable)

# number of output classes = vocab size
numOutputClass = len(labelCorpus.dictionary)
print("Number of Classes:" + str(numOutputClass))

# Initialize models and start training

encoder = CharCNN(char_vocab, args.hidden_size)

decoder = DecoderRNN(args.hidden_size, numOutputClass)

encoder_optimizer = torch.optim.Adam(encoder.parameters(),
                                     lr=args.learning_rate)
decoder_optimizer = torch.optim.Adam(decoder.parameters(),
                                     lr=args.learning_rate)
criterion = nn.CrossEntropyLoss()

if args.cuda:
    criterion.cuda()
    encoder.cuda()
    decoder.cuda()

start = time.time()
all_losses = []
loss_avg = 0

try:
    print("Training for %d epochs..." % args.n_epochs)
    numMiniBatches = len(linesInTrain) / args.batch_size
Пример #9
0
def main(args):
    train_losses = []
    train_acc = []

    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Image preprocessing, normalization for the pretrained resnet
    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Load vocabulary wrapper
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)
    # Build data loader
    data_loader = get_loader(args.image_dir,
                             args.caption_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    # Build the models
    encoder = EncoderCNN(args.embed_size).to(device)
    decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab),
                         args.num_layers).to(device)

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(
        encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    # Train the models
    total_step = len(data_loader)
    for epoch in range(args.num_epochs):
        losses = []
        accuracy = 0.0

        for i, (images, captions, lengths) in enumerate(data_loader):

            # Set mini-batch dataset
            images = images.to(device)
            captions = captions.to(device)
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]

            # Forward, backward and optimize
            features = encoder(images)
            outputs = decoder(features, captions, lengths)
            loss = criterion(outputs, targets)

            # record accuracy and loss
            losses.append(loss.item())
            topv, topi = outputs.topk(1, dim=1)
            targets = targets.unsqueeze(-1)
            accuracy += float((topi == targets).sum()) / targets.shape[0]
            # update params
            decoder.zero_grad()
            encoder.zero_grad()
            loss.backward()
            optimizer.step()

            # Print log info
            if i % args.log_step == 0:
                print(
                    'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}, Accuracy: {:.4f}'
                    .format(epoch + 1, args.num_epochs, i, total_step,
                            loss.item(), np.exp(loss.item()),
                            accuracy / float(i + 1)))
                with open('my_train_loss_t4_resnext.txt', 'a') as fi:
                    fi.write('\n' +
                             'epoch = {}, i = {}, tr_loss = {}, acc = {}'.
                             format(epoch + 1, i + 1, loss.item(), accuracy /
                                    float(i + 1)))

            # Save the model checkpoints
            if (i + 1) % args.save_step == 0:
                torch.save(
                    decoder.state_dict(),
                    os.path.join(
                        args.model_path,
                        'my-decoder-{}-{}-t4-resnext.ckpt'.format(
                            epoch + 1, i + 1)))
                torch.save(
                    encoder.state_dict(),
                    os.path.join(
                        args.model_path,
                        'my-encoder-{}-{}-t4-resnext.ckpt'.format(
                            epoch + 1, i + 1)))

        train_losses.append(sum(losses) / total_step)
        train_acc.append(accuracy / total_step)

        # save losses over epoch
        f = open("train_loss.txt", "a")
        f.write(str(train_losses))
        f.close()

        # save accuracies over epoch
        f = open("train_acc.txt", "a")
        f.write(str(train_acc))
        f.close()
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Image preprocessing
    # For normalization, see https://github.com/pytorch/vision#models
    # transform = transforms.Compose([
    #     transforms.RandomCrop(args.crop_size),
    #     transforms.RandomHorizontalFlip(),
    #     transforms.ToTensor(),
    #     transforms.Normalize((0.485, 0.456, 0.406),
    #                          (0.229, 0.224, 0.225))])

    transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Load vocabulary wrapper.
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # data_loader = get_loader(args.image_dir, args.caption_path, vocab,
    #                          transform, args.batch_size,
    #                          shuffle=True, num_workers=args.num_workers)
    sasr_data_loader = SASR_Data_Loader(vocab, transform)
    sasr_data_loader.load_data(args.data_file, args.init_flag)
    frogger_data_loader = sasr_data_loader.data_loader(
        args.batch_size, transform, shuffle=True, num_workers=args.num_workers)
    # Build the models
    encoder = EncoderCNN(args.embed_size)
    decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab),
                         args.num_layers)

    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(
        encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    total_step = len(frogger_data_loader)
    for epoch in range(args.num_epochs):
        for i, (images, captions, lengths) in enumerate(frogger_data_loader):
            images = to_var(images, volatile=True)
            if (list(images.size())[0] != 1):
                captions = to_var(captions)
                # print(list(images.size())[0])
                # print(captions)
                # exit(0)

                targets = pack_padded_sequence(captions,
                                               lengths,
                                               batch_first=True)[0]
                decoder.zero_grad()
                encoder.zero_grad()
                features = encoder(images)
                outputs = decoder(features, captions, lengths)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()

                # Print log info
                if i % args.log_step == 0:
                    print(
                        'Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f'
                        % (epoch, args.num_epochs, i, total_step, loss.data[0],
                           np.exp(loss.data[0])))

                # Save the models
                if (i + 1) % args.save_step == 0:
                    torch.save(
                        decoder.state_dict(),
                        os.path.join(args.model_path,
                                     'decoder-%d-%d.pkl' % (epoch + 1, i + 1)))
                    torch.save(
                        encoder.state_dict(),
                        os.path.join(args.model_path,
                                     'encoder-%d-%d.pkl' % (epoch + 1, i + 1)))
Пример #11
0
def main(args):

    configure(os.path.join(args['exp_dir'], 'log_dir'))

    transform = transforms.Compose([
        transforms.RandomCrop(args['crop_size']),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    data_loader = get_loader({
        'data_dir': args['data_dir'],
        'exp_dir': args['exp_dir'],
        'raw_data_dir': args['raw_data_dir'],
        'batch_size': args['batch_size'],
        'transform': transform,
        'num_workers': args['num_workers'],
        'shuffle': args['shuffle'],
        'mode': 'train'
    })

    #    valid_data_loader=get_loader({'data_dir' : args['data_dir'],
    #                             'raw_data_dir' : args['raw_data_dir'],
    #                             'batch_size' : int(args['batch_size']/4),
    #                             'transform' : transform,
    #                             'num_workers' : args['num_workers'],
    #                             'shuffle' : args['shuffle'],
    #                             'mode':'validate'})

    args['vocab_size'] = len(Vocabulary.load_vocab(args['exp_dir']))

    encoder = EncoderCNN(args).train()
    decoder = DecoderRNN(args).train()

    if args['pretrained']:
        checkpoint_path = Checkpoint.get_latest_checkpoint(args['exp_dir'])
        checkpoint = Checkpoint.load(checkpoint_path)
        encoder.load_state_dict(checkpoint.encoder)
        decoder.load_state_dict(checkpoint.decoder)
        step = checkpoint.step
        epoch = checkpoint.epoch
        omit = True

    else:
        step = 0
        epoch = 0
        omit = False

    encoder.to(device)
    decoder.to(device)

    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(
        encoder.linear.parameters()) + list(encoder.bn.parameters())
    #    params=list(decoder.parameters()) + list(encoder.parameters())
    optimizer = torch.optim.Adam(params, lr=args['lr'])
    scheduler = StepLR(optimizer, step_size=40, gamma=0.1)
    #    optimizer=YFOptimizer(params)

    total_step = len(data_loader)
    min_valid_loss = float('inf')

    for epoch in range(epoch, args['num_epochs']):
        scheduler.step()
        for idx, (images, captions, leng) in enumerate(data_loader):

            if omit:
                if idx < (step - total_step * epoch):
                    logger.info(
                        'idx:{},step:{}, epoch:{}, total_step:{}, diss:{}'.
                        format(idx, step, epoch, total_step,
                               step - total_step * epoch))
                    continue
                else:
                    omit = False

            images = images.to(device)
            captions = captions.to(device)
            targets = pack_padded_sequence(captions, leng, batch_first=True)[0]

            features = encoder(images)
            outputs = decoder(features, captions, leng)
            loss = criterion(outputs, targets)
            decoder.zero_grad()
            encoder.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(decoder.parameters(), 5)
            optimizer.step()

            log_value('loss', loss.item(), step)
            step += 1

            if step % args['log_step'] == 0:
                logger.info(
                    'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'
                    .format(epoch, args['num_epochs'], idx, total_step,
                            loss.item(), np.exp(loss.item())))

            if step % args['valid_step'] == 0:
                #                valid_loss=validate(encoder.eval(),decoder,criterion,valid_data_loader)
                #                if valid_loss<min_valid_loss:
                #                    min_valid_loss=valid_loss
                Checkpoint(encoder, decoder, optimizer, epoch,
                           step).save(args['exp_dir'])
Пример #12
0
    target_tensor = tensor_from_sentence(output_lang, pair[1])
    return (input_tensor, target_tensor)


if __name__ == "__main__":
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    input_lang, output_lang, pairs = load_data()
    hidden_size = 256

    encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device)
    decoder = DecoderRNN(hidden_size, output_lang.n_words).to(device)

    criterion = nn.NLLLoss()
    encoder_optimizer = optim.SGD(encoder.parameters(), lr=0.01)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=0.01)

    training_pairs = [
        tensors_from_pair(input_lang, output_lang, random.choice(pairs))
        for i in range(75000)
    ]

    # epoch 75000
    for epoch in range(2):
        for i, pair in enumerate(training_pairs):
            encoder_hidden = encoder.init_hidden().to(device)

            input_tensor = pair[0]
            output_tensor = pair[1]

            encoder_optimizer.zero_grad()
Пример #13
0
                                             output_seq_len=out_seq_len)
    inp = Variable(torch.FloatTensor(in_seq).view(batch_size, in_seq_len, input_size))
    epoch_loss = train(inp, out_seq, encoder, decoder, enc_opt, dec_opt, criterion, teacher_forcing_prob=0.5)
    loss_total += epoch_loss
    if epoch % print_every == print_every - 1:
      print("[%d/%d] Avg. loss per epoch: %0.3f" % (epoch + 1, num_epochs, loss_total / print_every))
      loss_total = 0.0

encoder = EncoderRNN(input_size, hidden_size, n_layers=enc_layers)
for tensor in encoder.parameters():
  if len(list(tensor.size())) < 2:
    torch.nn.init.uniform(tensor)
  else:
    torch.nn.init.xavier_uniform(tensor)
decoder = DecoderRNN(output_size, hidden_size, n_layers=dec_layers)
for tensor in decoder.parameters():
  if len(list(tensor.size())) < 2:
    torch.nn.init.uniform(tensor)
  else:
    torch.nn.init.xavier_uniform(tensor)

train_epochs(1000, encoder, decoder, 0.005)

def evaluate(in_seqs, num_steps, output_size):
  '''
  Generate predictions for configurable batch_size and number 
  of steps to predict.
  '''

  inp = Variable(torch.FloatTensor(in_seqs))
  enc_hidden = encoder.initHidden(inp.size()[0])
Пример #14
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)
    
    # Image preprocessing
    # For normalization, see https://github.com/pytorch/vision#models
    transform = transforms.Compose([ 
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(), 
        transforms.ToTensor(), 
        transforms.Normalize((0.485, 0.456, 0.406), 
                             (0.229, 0.224, 0.225))])
    
    # Load vocabulary wrapper.
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)
    
    # Build data loader
    data_loader = get_loader(args.image_dir, args.caption_path, vocab, 
                             transform, args.batch_size,
                             shuffle=True, num_workers=args.num_workers) 

    # Build the models
    encoder = EncoderCNN(args.embed_size)
    decoder = DecoderRNN(args.embed_size, args.hidden_size, 
                         len(vocab), args.num_layers)
    
    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)
    
    # Train the Models
    total_step = len(data_loader)
    for epoch in range(args.num_epochs):
        for i, (images, captions, lengths) in enumerate(data_loader):
            
            # Set mini-batch dataset
            images = to_var(images, volatile=True)
            captions = to_var(captions)
            targets = pack_padded_sequence(captions, lengths, batch_first=True)[0]
            
            # Forward, Backward and Optimize
            decoder.zero_grad()
            encoder.zero_grad()
            features = encoder(images)
            outputs = decoder(features, captions, lengths)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            # Print log info
            if i % args.log_step == 0:
                print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f'
                      %(epoch, args.num_epochs, i, total_step, 
                        loss.data[0], np.exp(loss.data[0]))) 
                
            # Save the models
            if (i+1) % args.save_step == 0:
                torch.save(decoder.state_dict(), 
                           os.path.join(args.model_path, 
                                        'decoder-%d-%d.pkl' %(epoch+1, i+1)))
                torch.save(encoder.state_dict(), 
                           os.path.join(args.model_path, 
                                        'encoder-%d-%d.pkl' %(epoch+1, i+1)))
Пример #15
0
Файл: train.py Проект: afcarl/sn
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    worker_thread_count = 1
    retry_for_failed = 2

    # Image preprocessing
    # For normalization, see https://github.com/pytorch/vision#models
    transform = transforms.Compose([
        #     transforms.RandomCrop(args.crop_size),
        #     transforms.RandomHorizontalFlip(),
        transforms.Scale(args.crop_size),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    #transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

    # Load vocabulary wrapper.
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Build data loader
    data_loader = get_loader(args.image_dir,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    # Build the models
    encoder = EncoderCNN(args.embed_size)
    decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab),
                         args.num_layers)

    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()

    # Loss and Optimizer
    criterion = nn.L1Loss()
    params = list(decoder.parameters()) + list(
        encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    # Train the Models
    total_step = len(data_loader)
    for epoch in range(args.num_epochs):
        for i, (images, captions, lengths) in enumerate(data_loader):
            processed_items = []
            threads = []
            has_data_to_process = True

            def do_request(item):
                position = item['position']
                #print(position)
                #print(item)
                retry = retry_for_failed
                while retry:
                    r = requests.post('http://localhost:4567/', data=item)
                    if r.status_code == 200:
                        pil = Image.open(io.BytesIO(r.content)).convert('RGB')
                        processed_items[position] = transform(pil)
                        #print(position, processed_items[position])
                        break
                    else:
                        print("shouldb be here")
                        time.sleep(2)
                        retry -= 1

            # Set mini-batch dataset
            image_tensors = to_var(images, volatile=True)
            captions = to_var(captions)
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]
            #print(images.size())
            #print(torch.equal(images[0] ,images[1]))

            # Forward, Backward and Optimize
            decoder.zero_grad()
            encoder.zero_grad()
            features = encoder(image_tensors)
            outputs = decoder(features, captions, lengths)
            codes = []

            def worker():
                while items_to_process.qsize() > 0 or has_data_to_process:
                    item = items_to_process.get()
                    if item is None:
                        break
                    do_request(item)
                    items_to_process.task_done()
                print("ended thread processing")

            for j in range(worker_thread_count):
                t = threading.Thread(target=worker)
                t.daemon = True  # thread dies when main thread (only non-daemon thread) exits.
                t.start()
                threads.append(t)
            for ii, image in enumerate(images):
                image_tensor = to_var(image.unsqueeze(0), volatile=True)
                feature = encoder(image_tensor)
                sampled_ids = decoder.sample(feature)
                sampled_ids = sampled_ids.cpu().data.numpy()
                sampled_caption = []
                for word_id in sampled_ids:
                    word = vocab.idx2word[word_id]
                    sampled_caption.append(word)
                    if word == '<end>':
                        break
                sentence = ' '.join(sampled_caption)
                payload = {'code': sentence}
                data = {'position': ii, 'code': sentence}
                items_to_process.put(data)
                processed_items.append('failed')
                codes.append(sentence)
            has_data_to_process = False
            print(codes)
            print(items_to_process.qsize())
            print(image.size())
            print("waiting for threads")
            for t in threads:
                t.join()
            print("done reassembling images")
            for t in threads:
                t.shutdown = True
                t.join()
            bad_value = False
            for pi in processed_items:
                if isinstance(pi, str) and pi == "failed":
                    bad_value = True
            if bad_value == True:
                print("failed conversion,skipping batch")
                continue
            output_tensor = torch.FloatTensor(len(processed_items), 3,
                                              images.size()[2],
                                              images.size()[3])
            for ii, image_tensor in enumerate(processed_items):
                output_tensor[ii] = processed_items[ii]
            output_var = to_var(output_tensor, False)
            target_var = to_var(images, False)
            #loss = criterion(output_var,target_var)
            print("loss")
            print(loss)

            loss.backward()
            optimizer.step()

            # Print log info
            if i % args.log_step == 0:
                print(
                    'Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f'
                    % (epoch, args.num_epochs, i, total_step, loss.data[0],
                       np.exp(loss.data[0])))

            # Save the models
            if (i + 1) % args.save_step == 0:
                torch.save(
                    decoder.state_dict(),
                    os.path.join(args.model_path,
                                 'decoder-%d-%d.pkl' % (epoch + 1, i + 1)))
                torch.save(
                    encoder.state_dict(),
                    os.path.join(args.model_path,
                                 'encoder-%d-%d.pkl' % (epoch + 1, i + 1)))
Пример #16
0
def main(args):

    #setup tensorboard
    if args.tensorboard:
        cc = CrayonClient(hostname="localhost")
        print(cc.get_experiment_names())
        #if args.name in cc.get_experiment_names():
        try:
            cc.remove_experiment(args.name)
        except:
            print("experiment didnt exist")
        cc_server = cc.create_experiment(args.name)

    # Create model directory
    full_model_path = args.model_path + "/" + args.name
    if not os.path.exists(full_model_path):
        os.makedirs(full_model_path)
    with open(full_model_path + "/parameters.json", 'w') as f:
        f.write((json.dumps(vars(args))))

    # Image preprocessing

    transform = transforms.Compose([
        transforms.Scale(args.crop_size),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    mini_transform = transforms.Compose(
        [transforms.ToPILImage(),
         transforms.Scale(20),
         transforms.ToTensor()])

    # Load vocabulary wrapper.
    if args.vocab_path is not None:
        with open(args.vocab_path, 'rb') as f:
            vocab = pickle.load(f)
    else:
        print("building new vocab")
        vocab = build_vocab(args.image_dir, 1, None)
        with open((full_model_path + "/vocab.pkl"), 'wb') as f:
            pickle.dump(vocab, f)

    # Build data loader
    data_loader = get_loader(args.image_dir,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)
    code_data_set = ProcessingDataset(root=args.image_dir,
                                      vocab=vocab,
                                      transform=transform)
    train_ds, val_ds = validation_split(code_data_set)
    train_loader = torch.utils.data.DataLoader(train_ds, collate_fn=collate_fn)
    test_loader = torch.utils.data.DataLoader(val_ds, collate_fn=collate_fn)
    train_size = len(train_loader)
    test_size = len(test_loader)

    # Build the models
    encoder = EncoderCNN(args.embed_size, args.train_cnn)
    print(encoder)
    decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab),
                         args.num_layers)
    print(decoder)
    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(
        encoder.linear.parameters()) + list(encoder.bn.parameters())
    #params = list(decoder.parameters()) #+ list(encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)
    start_time = time.time()
    add_log_entry(args.name, start_time, vars(args))

    # Train the Models
    total_step = len(data_loader)
    for epoch in range(args.num_epochs):
        for i, (images, captions, lengths) in enumerate(data_loader):
            decoder.train()
            encoder.train()
            # Set mini-batch dataset
            image_ts = to_var(images, volatile=True)
            captions = to_var(captions)
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]
            count = images.size()[0]

            # Forward, Backward and Optimize
            decoder.zero_grad()
            encoder.zero_grad()
            features = encoder(image_ts)
            outputs = decoder(features, captions, lengths)

            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            total = targets.size(0)
            max_index = outputs.max(dim=1)[1]
            #correct = (max_index == targets).sum()
            _, predicted = torch.max(outputs.data, 1)
            correct = predicted.eq(targets.data).cpu().sum()
            accuracy = 100. * correct / total

            if args.tensorboard:
                cc_server.add_scalar_value("train_loss", loss.data[0])
                cc_server.add_scalar_value("perplexity", np.exp(loss.data[0]))
                cc_server.add_scalar_value("accuracy", accuracy)

            # Print log info
            if i % args.log_step == 0:
                print(
                    'Epoch [%d/%d], Step [%d/%d], Loss: %.4f, accuracy: %2.2f Perplexity: %5.4f'
                    % (epoch, args.num_epochs, i, total_step, loss.data[0],
                       accuracy, np.exp(loss.data[0])))

            # Save the models
            if (i + 1) % args.save_step == 0:
                torch.save(
                    decoder.state_dict(),
                    os.path.join(full_model_path,
                                 'decoder-%d-%d.pkl' % (epoch + 1, i + 1)))
                torch.save(
                    encoder.state_dict(),
                    os.path.join(full_model_path,
                                 'encoder-%d-%d.pkl' % (epoch + 1, i + 1)))
            if 1 == 2 and i % int(train_size / 10) == 0:
                encoder.eval()
                #decoder.eval()
                correct = 0
                for ti, (timages, tcaptions,
                         tlengths) in enumerate(test_loader):
                    timage_ts = to_var(timages, volatile=True)
                    tcaptions = to_var(tcaptions)
                    ttargets = pack_padded_sequence(tcaptions,
                                                    tlengths,
                                                    batch_first=True)[0]
                    tfeatures = encoder(timage_ts)
                    toutputs = decoder(tfeatures, tcaptions, tlengths)
                    print(ttargets)
                    print(toutputs)
                    print(ttargets.size())
                    print(toutputs.size())
                    #correct = (ttargets.eq(toutputs[0].long())).sum()

                accuracy = 100 * correct / test_size
                print('accuracy: %.4f' % (accuracy))
                if args.tensorboard:
                    cc_server.add_scalar_value("accuracy", accuracy)

    torch.save(
        decoder.state_dict(),
        os.path.join(full_model_path,
                     'decoder-%d-%d.pkl' % (epoch + 1, i + 1)))
    torch.save(
        encoder.state_dict(),
        os.path.join(full_model_path,
                     'encoder-%d-%d.pkl' % (epoch + 1, i + 1)))
    end_time = time.time()
    print("finished training, runtime: %d", [(end_time - start_time)])
Пример #17
0
# Initialize the encoder and decoder.
encoder = EncoderCNN(embed_size)
decoder = DecoderRNN(embed_size, hidden_size, vocab_size)

# Move models to GPU if CUDA is available.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
encoder.to(device)
decoder.to(device)

# Define the loss function.
criterion = nn.CrossEntropyLoss().cuda() if torch.cuda.is_available(
) else nn.CrossEntropyLoss()

# TODO #3: Specify the learnable parameters of the model.
params = list(decoder.parameters()) +\
         list(encoder.embed.parameters())  # We don't want to retrain the resnet

# TODO #4: Define the optimizer.
optimizer = torch.optim.RMSprop(params)

# Set the total number of training steps per epoch.
total_step = math.ceil(
    len(data_loader.dataset.caption_lengths) /
    data_loader.batch_sampler.batch_size)

################################################################

import torch.utils.data as data
import numpy as np
import os
Пример #18
0
def train(batch_size=32,
          vocab_threshold=5,
          vocab_from_file=True,
          embed_size=256,
          hidden_size=512,
          num_epochs=10,
          latest_model=None,
          cocoapi_dir="./Coco/"):
    # Keep track of train and validation losses and validation Bleu-4 scores by epoch
    train_losses = []

    # Define a transform to pre-process the training images
    transform_train = transforms.Compose([
        transforms.Resize(256),
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Build data loader, applying the transforms
    train_loader = get_loader(transform=transform_train,
                              mode='train',
                              batch_size=batch_size,
                              vocab_threshold=vocab_threshold,
                              vocab_from_file=vocab_from_file,
                              cocoapi_loc=cocoapi_dir)

    # The size of the vocabulary
    vocab_size = len(train_loader.dataset.vocab)

    # Initialize the encoder and decoder
    checkpoint = None
    if latest_model:
        checkpoint = torch.load(latest_model)
    start_epoch = 1
    if checkpoint:
        train_losses = checkpoint['train_losses']
        val_losses = checkpoint['val_losses']
        start_epoch = checkpoint['epoch']
    encoder = EncoderCNN(embed_size)
    decoder = DecoderRNN(embed_size, hidden_size, vocab_size)
    if checkpoint:
        encoder.load_state_dict(checkpoint['encoder'])
        decoder.load_state_dict(checkpoint['decoder'])

    # Move models to GPU if CUDA is available
    if torch.cuda.is_available():
        torch.cuda.set_device(1)
        encoder.cuda()
        decoder.cuda()

    # Define the loss function
    loss = nn.CrossEntropyLoss().cuda() if torch.cuda.is_available(
    ) else nn.CrossEntropyLoss()

    # Specify the learnable parameters of the model
    params = list(decoder.parameters()) + list(
        encoder.embed.parameters()) + list(encoder.bn.parameters())

    # Define the optimizer
    optimizer = torch.optim.Adam(params=params, lr=0.001)
    if checkpoint:
        optimizer.load_state_dict(checkpoint['optimizer'])

    # Set the total number of training and validation steps per epoch
    total_train_step = math.ceil(
        len(train_loader.dataset.caption_lengths) /
        train_loader.batch_sampler.batch_size)

    start_time = time.time()
    for epoch in range(start_epoch, num_epochs + 1):
        train_loss = train_one(train_loader, encoder, decoder, loss, optimizer,
                               vocab_size, epoch, total_train_step)
        train_losses.append(train_loss)
        # Save the entire model anyway, regardless of being the best model so far or not
        filename = os.path.join("./models", "model-{}.pkl".format(epoch))
        save_epoch(filename, encoder, decoder, optimizer, train_losses, epoch)
        print("Epoch [%d/%d] took %ds" %
              (epoch, num_epochs, time.time() - start_time))
        start_time = time.time()
Пример #19
0
#                              transform, args.batch_size,
#                              shuffle=True, num_workers=args.num_workers)

trainloader = get_loader(train_image_dir, train_caption_path,
                         vocab, transform_train, batch_size, shuffle=True, num_workers=8)

testloader = get_loader(test_image_dir, test_caption_path, vocab,
                        transform_test, batch_size, shuffle=False, num_workers=8)

checkpoints = os.listdir('checkpoint')

encoder = EncoderCNN(embed_size)
decoder = DecoderRNN(embed_size, hidden_size, len(vocab), num_layers=1)
encoder = encoder.to(device)
decoder = decoder.to(device)
params = list(decoder.parameters()) + list(encoder.linear.parameters())
optimizer = torch.optim.Adam(params, lr=learning_rate)
cur_epoch = 0

if checkpoints:
    num_checkpoint = -1
    for cp in checkpoints:
        name, num = cp[:-4].split('_')
        num = int(num)
        if name == model_name and num_checkpoint < num:
            num_checkpoint = num
    if num_checkpoint > -1:
        state_dict = torch.load('checkpoint/{}_{}.tar'.format(model_name,num_checkpoint))
        encoder.load_state_dict(state_dict['encoder_state_dict'])
        decoder.load_state_dict(state_dict['decoder_state_dict'])
        optimizer.load_state_dict(state_dict['optimizer_state_dict'])
Пример #20
0
def main(args):

    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)

    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Image preprocessing
    # For normalization, see https://github.com/pytorch/vision#models
    transform = transforms.Compose([
        # transforms.RandomCrop(args.crop_size),
        # transforms.RandomHorizontalFlip(),
        transforms.Scale(args.crop_size),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Load vocabulary wrapper.
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Build data loader
    data_loader = get_loader(args.image_dir,
                             args.caption_path,
                             vocab,
                             args.MSCOCO_result,
                             args.coco_detection_result,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers,
                             dummy_object=99,
                             yolo=False)

    # Build the models
    encoder = EncoderCNN(args.embed_size)
    # the layout encoder hidden state size must be the same with decoder input size
    layout_encoder = LayoutEncoder(args.layout_embed_size, args.embed_size,
                                   100, args.num_layers)
    decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab),
                         args.num_layers)

    if torch.cuda.is_available():
        encoder.cuda()
        layout_encoder.cuda()
        decoder.cuda()

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(layout_encoder.parameters()) + list(decoder.parameters()) + \
      list(encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    # Train the Models
    total_step = len(data_loader)
    for epoch in range(args.num_epochs):
        for i, (images, captions, lengths, label_seqs, location_seqs,
                visual_seqs, layout_lengths) in enumerate(data_loader):
            # Set mini-batch dataset
            images = to_var(images)
            captions = to_var(captions)
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]

            # Forward, Backward and Optimize
            # decoder.zero_grad()
            # layout_encoder.zero_grad()
            # encoder.zero_grad()

            # Modify This part for using visual features or not

            # features = encoder(images)
            layout_encoding = layout_encoder(label_seqs, location_seqs,
                                             layout_lengths)
            # comb_features = features + layout_encoding
            comb_features = layout_encoding

            outputs = decoder(comb_features, captions, lengths)

            loss = criterion(outputs, targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Print log info
            if i % args.log_step == 0:
                print(
                    'Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f'
                    % (epoch, args.num_epochs, i, total_step, loss.data[0],
                       np.exp(loss.data[0])))

                # Save the models
            if (i + 1) % args.save_step == 0:
                torch.save(
                    decoder.state_dict(),
                    os.path.join(args.model_path,
                                 'decoder-%d-%d.pkl' % (epoch + 1, i + 1)))

                torch.save(
                    layout_encoder.state_dict(),
                    os.path.join(
                        args.model_path,
                        'layout_encoding-%d-%d.pkl' % (epoch + 1, i + 1)))
Пример #21
0
def main(args):
    model_name = args.model_name
    model_path = os.path.join(args.model_path,model_name)
    # Create model directory
    if not os.path.exists(model_path):
        os.makedirs(model_path)
        
    # Create results directory
    if not os.path.isdir("./results"):
        os.system('mkdir ./results')
    
    # Image preprocessing, normalization for the pretrained resnet
    transform = transforms.Compose([ 
        transforms.Resize(args.crop_size),
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(), 
        transforms.ToTensor(), 
        transforms.Normalize((0.485, 0.456, 0.406), 
                             (0.229, 0.224, 0.225))])
    
    # Load vocabulary wrapper
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)
    
    #get ids
    ids = []
    with open('TrainImageIds.csv', 'r') as f:
        reader = csv.reader(f)
        trainIds = list(reader)
    trainIds = [int(i) for i in trainIds[0]]
    coco = COCO('./data/annotations/captions_train2014.json')
    for img_id in trainIds:
        for entry in coco.imgToAnns[img_id]:
            ids.append(entry['id'])
    
    
    #get val ids
    val_ids = []
    with open('ValImageIds.csv', 'r') as f:
        reader = csv.reader(f)
        valIds = list(reader)

    valIds = [int(i) for i in valIds[0]]
    coco = COCO('./data/annotations/captions_train2014.json')
    for img_id in valIds:
        for entry in coco.imgToAnns[img_id]:
            val_ids.append(entry['id'])
            
    
    
    # Build data loader
    train_loader = get_loader(args.image_dir, args.caption_path, ids, vocab, 
                             transform, args.batch_size_train,
                             shuffle=True, num_workers=args.num_workers) 
    
    val_loader = get_loader(args.val_image_dir, args.caption_path, val_ids, vocab, 
                             transform, args.batch_size_val,
                             shuffle=True, num_workers=args.num_workers) 
    
    
    # Build the models
    encoder = EncoderCNN(args.embed_size).to(device)
    decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers).to(device)
    
    # load pretrained model (optional)
#     encoder.load_state_dict(torch.load('./models/rnn/encoder-best.ckpt')) # put checkpoint name
#     decoder.load_state_dict(torch.load('./models/rnn/decoder-best.ckpt'))
    
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)
    
    
    # Train the models
    def train(init_epoch=0):
        total_step = len(train_loader)
        
        train_losses = []
        val_losses = []
        prev_loss = -100
        loss_increase_counter = 0
        early_stop = True
        early_stop_threshold = 5
        best_model = None
        
        for epoch in range(init_epoch, args.num_epochs):
            running_loss = 0.0
            for i, (images, captions, lengths) in enumerate(train_loader):

                # Set mini-batch dataset
                images = images.to(device)
                captions = captions.to(device)
                targets = pack_padded_sequence(captions, lengths, batch_first=True)[0]

                # Forward, backward and optimize
                features = encoder(images)
                outputs = decoder(features, captions, lengths, pretrained=args.pretrained)
                outputs = outputs
                loss = criterion(outputs, targets)
                decoder.zero_grad()
                encoder.zero_grad()
                loss.backward()
                optimizer.step()
                
                running_loss += loss.item() * images.size(0)

                # Print log info
                if i % args.log_step == 0:
                    print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'
                          .format(epoch, args.num_epochs, i, total_step, loss.item(), np.exp(loss.item()))) 

            # Save the model checkpoints
            if (epoch+1) % args.save_step == 0:
                torch.save(decoder.state_dict(), os.path.join(
                    model_path, 'decoder-{}.ckpt'.format(epoch+1)))
                torch.save(encoder.state_dict(), os.path.join(
                    model_path, 'encoder-{}.ckpt'.format(epoch+1)))
            
            train_loss = running_loss/len(ids)
            train_losses.append(train_loss)
            val_loss = val(epoch)
            val_losses.append(val_loss)
            
            if val_loss == min(val_losses):
                torch.save(decoder.state_dict(), os.path.join(
                    model_path, 'decoder-best.ckpt'))
                torch.save(encoder.state_dict(), os.path.join(
                    model_path, 'encoder-best.ckpt'))
            
            #write results to csv
            with open("./results/{}_results.csv".format(model_name),'a+', newline='') as csv_file:
                writer = csv.writer(csv_file, delimiter=',')
                # writer.writerow(["Epoch", "Train Loss", "Val Loss"])
                writer.writerow([epoch+1, train_loss,val_loss])
            
            if val_loss > prev_loss:
                loss_increase_counter += 1
            else:
                loss_increase_counter = 0
            if early_stop and loss_increase_counter > early_stop_threshold:
                print("Early Stopping..")
                break
            prev_loss = val_loss
            
    def val(epoch):        
        running_loss = 0.0
        for i, (images, captions, lengths) in enumerate(val_loader):
    
            images = images.to(device)
            captions = captions.to(device)
            targets = pack_padded_sequence(captions, lengths, batch_first=True)[0]
            
            features = encoder(images)
            outputs = decoder(features, captions, lengths, pretrained=args.pretrained)
            outputs = outputs
            loss = criterion(outputs, targets)
            
            running_loss += loss.item() * images.size(0)

        return (running_loss/len(val_ids))
        
    train(0)
Пример #22
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Load vocabulary wrapper
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Build data loader
    data_loader = get_loader(args.image_dir,
                             args.caption_path,
                             vocab,
                             args.dictionary,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    # Build the models
    #encoder = EncoderCNN(args.embed_size).to(device)
    dictionary = pd.read_csv(args.dictionary,
                             header=0,
                             encoding='unicode_escape',
                             error_bad_lines=False)
    dictionary = list(dictionary['keys'])

    decoder = DecoderRNN(len(dictionary), args.hidden_size, len(vocab),
                         args.num_layers).to(device)

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters(
    ))  # + list(encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    # Train the models
    total_step = len(data_loader)
    for epoch in range(args.num_epochs):
        for i, (array, captions, lengths) in enumerate(data_loader):

            # Set mini-batch dataset
            array = array.to(device)
            captions = captions.to(device)
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]

            # Forward, backward and optimize
            #features = encoder(images)
            outputs = decoder(array, captions, lengths)
            loss = criterion(outputs, targets)
            decoder.zero_grad()
            #encoder.zero_grad()
            loss.backward()
            optimizer.step()

            # Print log info
            if i % args.log_step == 0:
                print(
                    'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'
                    .format(epoch, args.num_epochs, i, total_step, loss.item(),
                            np.exp(loss.item())))

            # Save the model checkpoints
            if (i + 1) % args.save_step == 0:
                torch.save(
                    decoder.state_dict(),
                    os.path.join(args.model_path,
                                 'decoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))
Пример #23
0
def main():
    # load vocab Data here!

    with open('VocabData.pkl', 'rb') as f:
        VocabData = pickle.load(f)
    with open('FullImageCaps.pkl', 'rb') as f:
        FullImageCaps = pickle.load(f)
    # FullImageCaps_sub = loadData("full_image_descriptions.json")
    coco = loadCoco('captions_train2017.json')

    data = FullImageCaps + coco
    print(len(data) / 128)
    recovery = sys.argv[2]
    mode = sys.argv[1]

    lmdata = LMDataset(VocabData, data)
    lmloader = lmdata.getLoader(batchSize=128, shuffle=True)
    testloader = lmdata.getLoader(batchSize=1, shuffle=False)
    embedding = torch.Tensor(lmdata.embedding)
    vocab_size = len(lmdata.wordDict)
    max_len = 100
    hidden_size = 1024
    embedding_size = 300
    max_epoch = 10
    sos_id = lmdata.sos_id
    eos_id = lmdata.eos_id
    pad_id = lmdata.pad_id

    wordDict = VocabData['word_dict']
    rev_vocab = [''] * vocab_size
    for word in wordDict:
        rev_vocab[wordDict[word]] = word

    they = torch.zeros(1, vocab_size)
    are = torch.zeros(1, vocab_size)
    students = torch.zeros(1, vocab_size)
    _from = torch.zeros(1, vocab_size)
    that = torch.zeros(1, vocab_size)
    school = torch.zeros(1, vocab_size)
    they_id = wordDict['they']
    are_id = wordDict['are']
    students_id = wordDict['students']
    from_id = wordDict['from']
    that_id = wordDict['that']
    school_id = wordDict['school']

    they[0, they_id] = 1
    are[0, are_id] = 1
    students[0, students_id] = 1
    _from[0, from_id] = 1
    that[0, that_id] = 1
    school[0, school_id] = 1

    strange_sentence = torch.cat([they, are, are, are, are, are],
                                 0).unsqueeze(0)
    regular_sentence = torch.cat([they, are, students, _from, that, school],
                                 0).unsqueeze(0)

    PATH = 'LMcheckpoint(1)'

    model = DecoderRNN(vocab_size,
                       max_len,
                       hidden_size,
                       embedding_size,
                       sos_id,
                       eos_id,
                       embedding_parameter=embedding,
                       rnn_cell='lstm')
    if recovery == '1':
        model = loadCheckpoint(PATH, model)
    optimizer = optim.Adam(model.parameters(), lr=0.0002)
    criterion = nn.NLLLoss(ignore_index=pad_id)
    if torch.cuda.is_available():
        model = model.cuda()
        criterion = criterion.cuda()
    if mode == 'train':
        train_LM(lmloader, model, optimizer, criterion, pad_id, max_epoch,
                 max_len)

    if mode == 'test':
        lm_loss = LanguageModelLoss(PATH,
                                    vocab_size,
                                    max_len,
                                    hidden_size,
                                    embedding_size,
                                    sos_id,
                                    eos_id,
                                    use_prob_vector=True)
        loss1 = lm_loss(strange_sentence)
        loss2 = lm_loss(regular_sentence)
        print(loss1.item(), loss2.item())

    sampleSentence(model, testloader, rev_vocab)
Пример #24
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Load vocabulary wrapper.
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Image preprocessing
    # For normalization, see https://github.com/pytorch/vision#models
    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])
    val_loader = get_loader('./data/val_resized2014/',
                            './data/annotations/captions_val2014.json', vocab,
                            transform, 1, False, 1)

    start_epoch = 0

    encoder_state = args.encoder
    decoder_state = args.decoder

    # Build the models
    encoder = EncoderCNN(args.embed_size)
    if not args.train_encoder:
        encoder.eval()
    decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab),
                         args.num_layers)

    if args.restart:
        encoder_state, decoder_state = 'new', 'new'

    if encoder_state == '': encoder_state = 'new'
    if decoder_state == '': decoder_state = 'new'

    if decoder_state != 'new':
        start_epoch = int(decoder_state.split('-')[1])

    print("Using encoder: {}".format(encoder_state))
    print("Using decoder: {}".format(decoder_state))

    # Build data loader
    data_loader = get_loader(args.image_dir,
                             args.caption_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)
    """ Make logfile and log output """
    with open(args.model_path + args.logfile, 'a+') as f:
        f.write("Training on vanilla loss (using new model). Started {} .\n".
                format(str(datetime.now())))
        f.write("Using encoder: new\nUsing decoder: new\n\n")

    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(
        encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    batch_loss = []
    batch_acc = []

    # Train the Models
    total_step = len(data_loader)
    for epoch in range(start_epoch, args.num_epochs):
        for i, (images, captions, lengths, _, _) in enumerate(data_loader):

            # Set mini-batch dataset
            images = to_var(images, volatile=True)
            captions = to_var(captions)
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]

            # Forward, Backward and Optimize
            decoder.zero_grad()
            encoder.zero_grad()
            features = encoder(images)
            out = decoder(features, captions, lengths)
            loss = criterion(out, targets)
            batch_loss.append(loss.data[0])

            loss.backward()
            optimizer.step()

            # # Print log info
            # if i % args.log_step == 0:
            #     print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f, Val: %.5f, %.5f'
            #           %(epoch, args.num_epochs, i, total_step,
            #             loss.data[0], np.exp(loss.data[0]), acc, gt_acc))

            #     with open(args.model_path + args.logfile, 'a') as f:
            #         f.write('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f, Val: %.5f, %.5f\n'
            #               %(epoch, args.num_epochs, i, total_step,
            #                 loss.data[0], np.exp(loss.data[0]), acc, gt_acc))

            # Save the models
            if (i + 1) % args.save_step == 0:
                torch.save(
                    decoder.state_dict(),
                    os.path.join(args.model_path,
                                 'decoder-%d-%d.pkl' % (epoch + 1, i + 1)))
                torch.save(
                    encoder.state_dict(),
                    os.path.join(args.model_path,
                                 'encoder-%d-%d.pkl' % (epoch + 1, i + 1)))
                with open(args.model_path + 'training_loss.pkl', 'w+') as f:
                    pickle.dump(batch_loss, f)
                with open(args.model_path + 'training_val.pkl', 'w+') as f:
                    pickle.dump(batch_acc, f)
    with open(args.model_path + args.logfile, 'a') as f:
        f.write("Training finished at {} .\n\n".format(str(datetime.now())))
Пример #25
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Image preprocessing
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.033, 0.032, 0.033), (0.027, 0.027, 0.027))
    ])

    # Build vocab
    vocab = build_vocab(args.root_path, threshold=0)
    vocab_path = args.vocab_path
    with open(vocab_path, 'wb') as f:
        pickle.dump(vocab, f)
    len_vocab = vocab.idx
    print(vocab.idx2word)

    # Build data loader
    data_loader = get_loader(args.root_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    # Build the models
    encoder = ResNet(ResidualBlock, [3, 3, 3], args.embed_size)
    decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab),
                         args.num_layers)

    #Build atten models
    if torch.cuda.is_available():
        encoder.cuda(1)
        decoder.cuda(1)

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(encoder.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    # Train the Models
    total_step = len(data_loader)
    for epoch in range(args.num_epochs):
        for i, (images, captions, lengths) in enumerate(data_loader):

            # make one hot
            # cap_ = torch.unsqueeze(captions,2)
            # one_hot_ = torch.FloatTensor(captions.size(0),captions.size(1),len_vocab).zero_()
            # one_hot_caption = one_hot_.scatter_(2, cap_, 1)

            # Set mini-batch dataset
            images = to_var(images)
            captions = to_var(captions)
            #captions_ = to_var(one_hot_caption)

            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]
            # Forward, Backward and Optimize
            optimizer.zero_grad()
            features = encoder(images)
            outputs = decoder(features, captions, lengths)

            captions = captions.view(-1)
            outputs = outputs.view(-1, len_vocab)

            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            #print(targets)
            #print(outputs)

            # Print log info
            if i % args.log_step == 0:
                print(
                    'Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f'
                    % (epoch, args.num_epochs, i, total_step, loss.data[0],
                       np.exp(loss.data[0])))

                #test set accuracy
                #print(outputs.max(1)[1])
                outputs_np = outputs.max(1)[1].cpu().data.numpy()
                targets_np = targets.cpu().data.numpy()

                print(outputs_np)
                print(targets_np)

                location_match = 0
                size_match = 0
                shape_match = 0
                exact_match = 0
                for i in range(len(targets_np)):
                    if outputs_np[i] == targets_np[i]:
                        exact_match += 1
                    if i >= args.batch_size and i < args.batch_size * 2 and outputs_np[
                            i] == targets_np[i]:
                        shape_match += 1
                    elif i >= args.batch_size * 2 and i < args.batch_size * 3 and outputs_np[
                            i] == targets_np[i]:
                        location_match += 1
                    elif i >= args.batch_size * 3 and i < args.batch_size * 4 and outputs_np[
                            i] == targets_np[i]:
                        size_match += 1

                print(
                    'location match : %.4f, shape match : %.4f, exact_match: %.4f'
                    % (location_match / (args.batch_size), shape_match /
                       args.batch_size, exact_match / len(targets_np)))

            # Save the models
            if (i + 1) % args.save_step == 0:
                torch.save(
                    decoder.state_dict(),
                    os.path.join(args.model_path,
                                 'decoder-%d-%d.pkl' % (epoch + 1, i + 1)))
                torch.save(
                    encoder.state_dict(),
                    os.path.join(args.model_path,
                                 'encoder-%d-%d.pkl' % (epoch + 1, i + 1)))
Пример #26
0
vocab_size = len(data_loader.dataset.vocab)

# Initialize the encoder and decoder. 
encoder = EncoderCNN(embed_size)
decoder = DecoderRNN(embed_size, hidden_size, vocab_size)

# Move models to GPU if CUDA is available. 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
encoder.to(device)
decoder.to(device)

# Define the loss function. 
criterion = nn.CrossEntropyLoss().cuda() if torch.cuda.is_available() else nn.CrossEntropyLoss()

# TODO #3: Specify the learnable parameters of the model.
params = list(decoder.parameters()) +\
         list(encoder.embed.parameters())  # We don't want to retrain the resnet

# TODO #4: Define the optimizer.
optimizer = torch.optim.RMSprop(params)

# Set the total number of training steps per epoch.
total_step = math.ceil(len(data_loader.dataset.caption_lengths) / data_loader.batch_sampler.batch_size)

################################################################

import torch.utils.data as data
import numpy as np
import os
import time
Пример #27
0
def main(args):
    print(args)
    epochs_since_improvement = 0

    # Create model directory
    make_dir(args.model_path)

    # Image pre-processing, normalization for the pre-trained res-net
    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Load vocabulary wrapper
    vocab_path = args.vocab_path
    with open(vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Build data loader
    train_root = args.image_dir + cfg['train']['TRAIN_DIR']
    train_json = args.caption_path + cfg['train']['train_annotation']

    val_root = args.image_dir + cfg['train']['VAL_DIR']
    val_json = args.caption_path + cfg['train']['valid_annotation']

    # After patience epochs without improvement, break training
    patience = cfg['train']['patience']
    early_stopping = EarlyStopping(patience=patience, verbose=True)

    if args.check_point and os.path.isfile(args.check_point):
        checkpoint = torch.load(args.check_point)

    old_vocab_size = 0
    if args.fine_tuning:
        encoder = checkpoint['encoder']
        decoder = checkpoint['decoder']
        print("Fine tuning with check point is {}".format(args.check_point))

        vocab, old_vocab_size = append_vocab(args.check_point_vocab, vocab)

        with open(vocab_path, 'wb') as v:
            print("Dump {} entries to vocab {}".format(vocab.idx, vocab_path))
            pickle.dump(vocab, v)
        vocab_size = len(vocab)

        # Get decoder's previous state
        old_embed = decoder.embed.weight.data[:4124]
        old_weight = decoder.linear.weight.data[:4124]
        old_bias = decoder.linear.bias.data[:4124]

        # Initialize new embedding and linear layers
        decoder.embed = nn.Embedding(vocab_size, args.embed_size)
        decoder.linear = nn.Linear(args.hidden_size, vocab_size)

        if args.freeze_cri or args.lwf or args.distill:
            # Assign old neurons to the newly-initialized layer, fine-tuning only should ignore this
            print(
                "Assigning old neurons of embedding and linear layer to new decoder..."
            )

            # Init by decoder's params
            decoder.embed.weight.data[:
                                      4124, :] = old_embed  # 4124 is the vocab size of S19
            decoder.linear.weight.data[:4124] = old_weight
            decoder.linear.bias.data[:4124] = old_bias

        encoder.to(device)
        decoder.to(device)

    else:
        # Normal training procedure
        encoder = EncoderCNN(args.embed_size).to(device)
        decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab),
                             args.num_layers).to(device)

    if args.freeze_enc:
        args.task_name += '_freeze_enc'
    elif args.freeze_dec:
        args.task_name += '_freeze_dec'
    elif args.freeze_cri:
        args.task_name += '_freeze_cri'
    elif args.lwf:
        args.task_name += '_lwf'
    elif args.distill and args.kd1:
        args.task_name += '_kd1'
    elif args.distill and args.kd2:
        args.task_name += '_kd2'

    if args.task_type == 'seq':
        args.model_path = cfg['model']['model_path_format'].format(
            args.task_type, args.task_name + '_seq', 'models')
        args.cpkt_path = cfg['model']['model_path_format'].format(
            args.task_type, args.task_name + '_seq', 'best')
    else:
        args.model_path = cfg['model']['model_path_format'].format(
            args.task_type, args.task_name, 'models')
        args.cpkt_path = cfg['model']['model_path_format'].format(
            args.task_type, args.task_name, 'best')

    # Create model directory
    make_dir(args.model_path)

    # Pseudo-labeling option
    if args.lwf:
        print("Running pseudo-labeling option...")
        # Infer pseudo-labels using previous model
        pseudo_labels = infer_caption(img_path=train_root,
                                      json_path=train_json,
                                      model=args.check_point,
                                      vocab_path=vocab_path,
                                      prediction_path=None,
                                      id2class_path=None)

        # Freeze LSTM and decoder for later joint optimization
        for param in decoder.lstm.parameters():
            param.requires_grad_(False)
        for param in encoder.parameters():
            param.requires_grad_(False)

        data = append_json(pseudo_labels, train_json)

        # Create a new json file from the train_json
        train_json = args.caption_path + 'captions_train_lwf.json'
        with open(train_json, 'w') as file:
            json.dump(data, file)

        # Knowledge distillation option
    if args.distill:
        print("Running knowledge distillation...")
        # Teacher
        teacher_cnn = checkpoint['encoder']
        teacher_lstm = checkpoint['decoder']
        teacher_cnn.train()
        teacher_lstm.train()

        # Initialize a totally new captioning model - Student
        encoder = EncoderCNN(args.embed_size).to(device)
        decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab),
                             args.num_layers).to(device)

        # Student
        student_cnn = encoder
        student_lstm = decoder

        # Move teacher to cuda
        teacher_cnn.to(device)
        teacher_lstm.to(device)

        # Loss between GT caption and the prediction
        criterion_lstm = nn.CrossEntropyLoss()
        # Loss between predictions of teacher and student
        criterion_distill = nn.MSELoss()

        # Params of student
        params_st = list(student_lstm.parameters()) + list(
            student_cnn.parameters())

        optimizer_lstm = torch.optim.Adam(params_st, lr=1e-4)
        optimizer_distill = torch.optim.Adam(student_cnn.parameters(), lr=1e-5)

    if args.freeze_enc:
        print("Freeze encoder technique!")
        for param in encoder.parameters():
            param.requires_grad_(False)

    if args.freeze_dec:
        print("Freeze decoder technique!")
        for param in decoder.lstm.parameters():
            param.requires_grad_(False)

    if args.freeze_cri:
        print("Critical Freezing technique!")
        layer_idx = -1
        for child in encoder.resnet.children():
            layer_idx += 1
            if layer_idx == 0 or layer_idx == 4:  # blk 1 & 2
                for param in child.parameters():
                    param.requires_grad = False

    train_loader = get_loader(root=train_root,
                              json=train_json,
                              vocab=vocab,
                              transform=transform,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers)

    val_loader = get_loader(root=val_root,
                            json=val_json,
                            vocab=vocab,
                            transform=transform,
                            batch_size=args.batch_size,
                            shuffle=True,
                            num_workers=args.num_workers)

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(encoder.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    # Theses vars are for plotting
    avg_train_losses = []
    avg_val_losses = []

    for epoch in range(args.num_epochs):

        if args.distill:
            print("Training with distillation option!")
            train_step, train_loss_step = train_distill(
                epoch,
                train_loader=train_loader,
                student_cnn=student_cnn,
                student_lstm=student_lstm,
                teacher_cnn=teacher_cnn,
                teacher_lstm=teacher_lstm,
                criterion_lstm=criterion_lstm,
                criterion_distill=criterion_distill,
                optimizer_lstm=optimizer_lstm,
                optimizer_distill=optimizer_distill)
            # Validate after an epoch
            recent_val_loss, val_step, val_loss_step = validate(
                epoch,
                val_loader=val_loader,
                encoder=student_cnn,
                decoder=student_lstm,
                criterion=criterion)
        else:

            train_step, train_loss_step = train(epoch,
                                                train_loader=train_loader,
                                                encoder=encoder,
                                                decoder=decoder,
                                                criterion=criterion,
                                                optimizer=optimizer,
                                                first_training=True,
                                                old_vocab_size=old_vocab_size)
            # Validate after an epoch
            recent_val_loss, val_step, val_loss_step = validate(
                epoch,
                val_loader=val_loader,
                encoder=encoder,
                decoder=decoder,
                criterion=criterion)
        train_loss = np.average(train_loss_step)
        val_loss = np.average(val_loss_step)

        avg_train_losses.append(train_loss)
        avg_val_losses.append(val_loss)

        # Save checkpoint
        make_dir(args.cpkt_path)
        early_stopping(args.cpkt_path, cfg['train']['data_name'], epoch,
                       epochs_since_improvement, encoder, decoder, optimizer,
                       optimizer, val_loss)

        if early_stopping.early_stop:
            print("Early Stopping!")
            break

    if args.lwf:
        # Make all trainable
        for param in decoder.linear.parameters():
            param.requires_grad_(True)
        for param in decoder.embed.parameters():
            param.requires_grad_(True)
        for param in decoder.lstm.parameters():
            param.requires_grad_(True)
        for param in encoder.parameters():
            param.requires_grad_(True)

        print("Unfreezing parameters ...")

        print("Critical Freezing technique!")
        layer_idx = -1
        for child in encoder.resnet.children():
            layer_idx += 1
            if layer_idx == 0 or layer_idx == 4:  # blk 1 & 2
                for param in child.parameters():
                    param.requires_grad = False

        # Joint optimization starts

        early_stopping = EarlyStopping(patience=patience, verbose=True)
        for epoch in range(args.num_epochs):
            train_step, train_loss_step = train(epoch,
                                                train_loader=train_loader,
                                                encoder=encoder,
                                                decoder=decoder,
                                                criterion=criterion,
                                                optimizer=optimizer,
                                                first_training=False,
                                                old_vocab_size=old_vocab_size)
            # Validate after an epoch
            recent_val_loss, val_step, val_loss_step = validate(
                epoch,
                val_loader=val_loader,
                encoder=encoder,
                decoder=decoder,
                criterion=criterion)

            train_loss = np.average(train_loss_step)
            val_loss = np.average(val_loss_step)

            avg_train_losses.append(train_loss)
            avg_val_losses.append(val_loss)

            # Save checkpoint
            make_dir(args.cpkt_path)
            early_stopping(args.cpkt_path, cfg['train']['data_name'], epoch,
                           epochs_since_improvement, encoder, decoder,
                           optimizer, optimizer, val_loss)

            if early_stopping.early_stop:
                print("Early Stopping!")
                break
Пример #28
0
batch_size = 20
num_epochs = 100
img_dataset = ImageFolderWithPaths(root='./train', transform=transform)
dataset_loader = torch.utils.data.DataLoader(img_dataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=4,
                                             collate_fn=collate_func)

vocab_size = vocab.index

cnn = EncoderCNN(512).to(device)
rnn = DecoderRNN(512, 512, vocab_size).to(device)

criterion = nn.CrossEntropyLoss()
params = list(cnn.linear.parameters()) + list(rnn.parameters())
optimizer = torch.optim.Adam(params, lr=1e-3)

for epoch in range(num_epochs):
    tic = time.time()

    for i, (image, captions, lengths) in enumerate(dataset_loader):

        image = image.to(device)
        captions = captions.to(device)
        targets = pack_padded_sequence(captions, lengths, batch_first=True)[0]

        cnn.zero_grad()
        rnn.zero_grad()

        cnn_out = cnn.forward(image)
Пример #29
0
def main():
    # Configuration for hyper-parameters

    torch.cuda.set_device(0)
    config = Config()
    # Image preprocessing
    transform = config.train_transform
    # Load vocabulary wrapper
    with open(os.path.join(config.vocab_path, 'vocab.pkl'), 'rb') as f:
        vocab = pickle.load(f)
    # Build data loader
    train_image_path = os.path.join(config.image_path, 'train2017')
    json_path = os.path.join(config.caption_path, 'captions_train2017.json')
    train_loader = get_data_loader(train_image_path,
                                   json_path,
                                   vocab,
                                   transform,
                                   config.batch_size,
                                   shuffle=False,
                                   num_workers=config.num_threads)

    val_image_path = os.path.join(config.image_path, 'val2017')
    json_path = os.path.join(config.caption_path, 'captions_val2017.json')
    val_loader = get_data_loader(val_image_path,
                                 json_path,
                                 vocab,
                                 transform,
                                 config.batch_size,
                                 shuffle=False,
                                 num_workers=config.num_threads)

    total_step = len(train_loader)

    # Build Models
    encoder = EncoderCNN(config.embed_size)
    encoder.eval()
    decoder = DecoderRNN(config.embed_size, config.hidden_size, len(vocab),
                         config.num_layers)

    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(encoder.resnet.fc.parameters())
    optimizer = torch.optim.Adam(params, lr=config.learning_rate)

    print('entering in to training loop')
    # Train the Models

    with open('train1_log.txt', 'w') as logfile:
        logfile.write('Validation Error,Training Error')
        for epoch in range(0, 25):
            for i, (images, captions, lengths,
                    img_ids) in enumerate(train_loader):
                images = Variable(images)
                captions = Variable(captions)
                if torch.cuda.is_available():
                    images = images.cuda()
                    captions = captions.cuda()
                targets = pack_padded_sequence(captions,
                                               lengths,
                                               batch_first=True)[0]
                # Forward, Backward and Optimize
                optimizer.zero_grad()
                features = encoder(images)
                outputs = decoder(features, captions, lengths)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()
                # Print log info
                if i % config.log_step == 0:
                    print(
                        'Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f'
                        % (epoch, config.num_epochs, i, total_step,
                           loss.data[0], np.exp(loss.data[0])))

                # Save the Model
                if (i + 1) % config.save_step == 0:
                    torch.save(
                        encoder.state_dict(),
                        os.path.join(config.teacher_cnn_path,
                                     'encoder-%d-%d.pkl' % (epoch + 1, i + 1)))
                    torch.save(
                        decoder.state_dict(),
                        os.path.join(config.teacher_lstm_path,
                                     'decoder-%d-%d.pkl' % (epoch + 1, i + 1)))

            print('Just Completed an Epoch, Initite Validation Error Test')
            avgvalloss = 0
            for j, (images, captions, lengths,
                    img_ids) in enumerate(val_loader):
                images = Variable(images)
                captions = Variable(captions)
                if torch.cuda.is_available():
                    images = images.cuda()
                    captions = captions.cuda()
                targets = pack_padded_sequence(captions,
                                               lengths,
                                               batch_first=True)[0]
                optimizer.zero_grad()
                features = encoder(images)
                outputs = decoder(features, captions, lengths)
                valloss = criterion(outputs, targets)
                if j == 0:
                    avgvalloss = valloss.data[0]
                avgvalloss = (avgvalloss + valloss.data[0]) / 2
                if ((j + 1) % 1000 == 0):
                    print('Average Validation Loss: %.4f' % (avgvalloss))
                    logfile.write(
                        str(avgvalloss) + ',' + str(loss.data[0]) + str('\n'))
                    break
Пример #30
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Image preprocessing, normalization for the pretrained resnet
    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Load vocabulary wrapper
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Build data loader
    data_loader = get_loader(args.image_dir,
                             args.caption_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    # Build the models
    encoder = EncoderCNN(args.embed_size).to(device)
    decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab),
                         args.num_layers).to(device)

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(
        encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    # Train the models
    total_step = len(data_loader)
    for epoch in range(args.num_epochs):
        for i, (images, captions, lengths) in enumerate(data_loader):

            # Set mini-batch dataset
            images = images.to(device)
            captions = captions.to(device)
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]

            # Forward, backward and optimize
            features = encoder(images)
            outputs = decoder(features, captions, lengths)
            loss = criterion(outputs, targets)
            decoder.zero_grad()
            encoder.zero_grad()
            loss.backward()
            optimizer.step()

            # Print log info
            if i % args.log_step == 0:
                print(
                    'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'
                    .format(epoch, args.num_epochs, i, total_step, loss.item(),
                            np.exp(loss.item())))

            # Save the model checkpoints
            if (i + 1) % args.save_step == 0:
                torch.save(
                    decoder.state_dict(),
                    os.path.join(args.model_path,
                                 'decoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))
                torch.save(
                    encoder.state_dict(),
                    os.path.join(args.model_path,
                                 'encoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))
Пример #31
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Load vocabulary wrapper
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Build data loader
    data_loader = get_caption_loader(args.caption_path,
                                     vocab,
                                     75,
                                     args.batch_size,
                                     shuffle=True,
                                     num_workers=args.num_workers)

    # Build the models
    encoder = EncoderRNN(len(vocab), args.embed_size,
                         args.hidden_size).to(device)
    decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab),
                         args.num_layers).to(device)

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(
        encoder.embedding.parameters()) + list(encoder.rnn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    # Train the models
    total_step = len(data_loader)
    for epoch in range(args.num_epochs):
        for i, (captions_src, captions_tgt, lengths) in enumerate(data_loader):

            # Set mini-batch dataset
            captions_src = captions_src.to(device)
            captions_tgt = captions_tgt.to(device)
            targets = pack_padded_sequence(captions_tgt,
                                           lengths,
                                           batch_first=True)[0]

            # Forward, backward and optimize
            enc_output, enc_hidden = encoder(captions_src)
            outputs = decoder(enc_hidden[:, -1:, :], captions_tgt, lengths)
            loss = criterion(outputs, targets)
            decoder.zero_grad()
            encoder.zero_grad()
            loss.backward()
            optimizer.step()

            # Print log info
            if i % args.log_step == 0:
                print(
                    'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'
                    .format(epoch, args.num_epochs, i, total_step, loss.item(),
                            np.exp(loss.item())))

            # Save the model checkpoints
            if (i + 1) % args.save_step == 0:
                torch.save(
                    decoder.state_dict(),
                    os.path.join(args.model_path,
                                 'decoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))
                torch.save(
                    encoder.state_dict(),
                    os.path.join(args.model_path,
                                 'encoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))
Пример #32
0
# Initialize the encoder and decoder.
encoder = EncoderCNN(embed_size)
decoder = DecoderRNN(embed_size, hidden_size, vocab_size)

# Move models to GPU if CUDA is available.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
encoder.to(device)
decoder.to(device)

# Define the loss function.
criterion = nn.CrossEntropyLoss().cuda() if torch.cuda.is_available(
) else nn.CrossEntropyLoss()

# TODO #3: Specify the learnable parameters of the model.
params = list(decoder.parameters()) + list(encoder.embed.parameters())

# TODO #4: Define the optimizer.
optimizer = torch.optim.Adam(params=params, lr=0.001)

# Set the total number of training steps per epoch.
total_step = math.ceil(
    len(data_loader.dataset.caption_lengths) /
    data_loader.batch_sampler.batch_size)

# <a id='step2'></a>
# ## Step 2: Train your Model
#
# Once you have executed the code cell in **Step 1**, the training procedure below should run without issue.
#
# It is completely fine to leave the code cell below as-is without modifications to train your model.  However, if you would like to modify the code used to train the model below, you must ensure that your changes are easily parsed by your reviewer.  In other words, make sure to provide appropriate comments to describe how your code works!
Пример #33
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)
    
    # Image preprocessing, normalization for the pretrained resnet
    transform = transforms.Compose([ 
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(), 
        transforms.ToTensor(), 
        transforms.Normalize((0.485, 0.456, 0.406), 
                             (0.229, 0.224, 0.225))])
    
    # Load vocabulary wrapper
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)
    
    # Build data loader
    data_loader = get_loader(args.image_dir, args.caption_path, vocab, 
                             transform, args.batch_size,
                             shuffle=True, num_workers=args.num_workers) 

    # Build the models
    encoder = EncoderCNN(args.embed_size).to(device)
    decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers).to(device)
    
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)
    
    # Train the models
    total_step = len(data_loader)
    for epoch in range(args.num_epochs):
        for i, (images, captions, lengths) in enumerate(data_loader):
            
            # Set mini-batch dataset
            images = images.to(device)
            captions = captions.to(device)
            targets = pack_padded_sequence(captions, lengths, batch_first=True)[0]
            
            # Forward, backward and optimize
            features = encoder(images)
            outputs = decoder(features, captions, lengths)
            loss = criterion(outputs, targets)
            decoder.zero_grad()
            encoder.zero_grad()
            loss.backward()
            optimizer.step()

            # Print log info
            if i % args.log_step == 0:
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'
                      .format(epoch, args.num_epochs, i, total_step, loss.item(), np.exp(loss.item()))) 
                
            # Save the model checkpoints
            if (i+1) % args.save_step == 0:
                torch.save(decoder.state_dict(), os.path.join(
                    args.model_path, 'decoder-{}-{}.ckpt'.format(epoch+1, i+1)))
                torch.save(encoder.state_dict(), os.path.join(
                    args.model_path, 'encoder-{}-{}.ckpt'.format(epoch+1, i+1)))