Пример #1
0
 def __init__(self, input_size, embedding_size, hidden_size, vocab_size,
              num_layer):
     super(Model, self).__init__()
     self.encoder = model.EncoderCNN(input_size, embedding_size)
     self.decoder = model.DecoderRNN(embedding_size, hidden_size,
                                     vocab_size, num_layer)
     self.criterion = nn.CrossEntropyLoss()
Пример #2
0
def main(args):
  transform = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
      std=[0.229, 0.224, 0.225])
  ])

  with open(args.vocab_path, "rb") as f1, \
    open(args.batched_file_path, "rb") as f2:
      vocab = pickle.load(f1)
      batched_val_set = pickle.load(f2)

  coco_caps = COCO(args.caption_path)
  batched_val_loader = get_loader(args.image_dir,
                                  args.caption_path,
                                  batched_val_set,
                                  vocab,
                                  transform,
                                  shuffle=True,
                                  num_workers=3)

  encoder = model.EncoderCNN()
  decoder = model.DecoderRNN(512, 196, 512, 512, len(vocab), 1)
  if torch.cuda.is_available():
    encoder = encoder.cuda()
    decoder = decoder.cuda()

  checkpoint = torch.load(args.load_checkpoint)
  decoder.load_state_dict(checkpoint["state_dict"])
  checkpoint = None
  torch.cuda.empty_cache()

  for i, (images, captions, lengths, ids) in enumerate(batched_val_loader):
    if i == args.num_runs:
      break
    print("\nactual captions for batch " + str(i) + " are: ")
    annIds = coco_caps.getAnnIds(imgIds=ids)
    anns = coco_caps.loadAnns(annIds)
    for ann in anns:
      print(ann["caption"])
    images = to_var(images, volatile=True)
    captions = to_var(captions, volatile=True)
    features = encoder(images)
    results = decoder.sample(features, args.beam_size)
    print("predicted captions are: ")
    for result in results:
      candidate = [vocab(i) for i in result[1][:-1]]
      references = [nltk.tokenize.word_tokenize(ann["caption"].lower()) for ann in anns]
      score = bleu_score.sentence_bleu(references, candidate)
      print("probability: %5.4f, BLEU score: %5.4f, caption: %s" %(result[0], score, caption_id_to_string(result[1], vocab)))
Пример #3
0
with open('vocabSet.pkl', 'rb') as f:
    vocabularySet = pickle.load(f)

print("Loaded Vocabulary Set")

with open('vocabSet2.pkl', 'rb') as f:
    vocabularySet2 = pickle.load(f)

print("Loaded Reverse Vocabulary Set")

modelsPath = "LSTM4Models/"
imagesPath = "../data/val2014/"
captionsPath = "../data/annotations/captions_val.json"

cnnEn = model.EncoderCNN(wordEmbeddings).eval()
lstmDe = model.DecoderRNN(wordEmbeddings, lstmHiddenStates, len(vocabularySet),
                          lstmLayers)
cnnEn = cnnEn.to(device)
lstmDe = lstmDe.to(device)

valData = COCO(captionsPath)

#Exploiting Pycocotools to get insights about data
print("Total Annotations: " + str(len(valData.anns.keys())))
print("Total Images: " + str(len(valData.imgs.keys())))

#Visualise
print(valData.imgToAnns[393212])

for (i, key) in enumerate(valData.imgToAnns.keys()):
Пример #4
0
batch_size = args.bs

imgh = args.imh
imgw = args.imw

embed_dim = args.embed_size
hidden_dim = args.nhid
attention_dim =args.attention_dim

transform = transforms.Compose([transforms.Resize((imgh, imgw)), 
									transforms.ToTensor(),
									transforms.Normalize((0.5, 0.5, 0.5),
														 (0.5, 0.5, 0.5))
									])
fine_tune_encoder = False
encoder = model.EncoderCNN().to(device)
encoder.fine_tune(fine_tune_encoder)
decoder = model.DecoderRNN(ntokens, embed_dim, hidden_dim, idx2word, word2idx).to(device)

loss_fn = nn.CrossEntropyLoss().to(device)

decoder_optimizer = t.optim.Adam(params=filter(lambda p: p.requires_grad, decoder.parameters()),
                                             lr=decoder_lr)
encoder_optimizer = t.optim.Adam(params=filter(lambda p: p.requires_grad, encoder.parameters()),
                                             lr=encoder_lr) if fine_tune_encoder else None

# def prepare_sequence(seq, to_ix):
#     idxs = [to_ix[w] for w in seq]
#     return t.tensor(idxs, dtype=t.long, device = device)

def batchify(data, bs):
Пример #5
0
## DataLoaders provide various ways to get batches of examples.
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=args.batch_size,
                                           shuffle=True,
                                           collate_fn=dataload.collate_fn,
                                           **kwargs)
val_loader = torch.utils.data.DataLoader(val_dataset,
                                         batch_size=args.batch_size,
                                         shuffle=True,
                                         collate_fn=dataload.collate_fn,
                                         **kwargs)

## Load the proper neural network model.
if args.model == 'Pretrained':

    model.encoder = model.EncoderCNN(args.embed_dim)
    model.decoder = model.DecoderRNN(embed_size=args.embed_dim,
                                     hidden_size=args.hidden_dim,
                                     vocab_size=vocab_size,
                                     num_layers=1,
                                     max_seq_length=10)

else:
    raise Exception('Unknown model {}'.format(args.model))

## the loss function -cross-entropy.

criterion = functional.cross_entropy

## Activate CUDA if specified and available.
if args.cuda:
Пример #6
0
def main(args):
    transform = transforms.Compose([
        transforms.Resize(256),
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    useCuda = not args.disable_cuda

    with open(args.vocab_path,
              'rb') as f1, open(args.batched_train_path,
                                'rb') as f2, open(args.batched_val_path,
                                                  'rb') as f3:
        vocab = pickle.load(f1)
        batched_train_set = pickle.load(f2)
        batched_val_set = pickle.load(f3)

    batched_train_loader = get_loader(args.train_image_dir,
                                      args.train_caption_path,
                                      batched_train_set,
                                      vocab,
                                      transform,
                                      shuffle=True,
                                      num_workers=3)
    batched_val_loader = get_loader(args.val_image_dir,
                                    args.val_caption_path,
                                    batched_val_set,
                                    vocab,
                                    transform,
                                    shuffle=True,
                                    num_workers=1)
    random_val_loader = get_loader(args.val_image_dir,
                                   args.val_caption_path,
                                   batched_val_set,
                                   vocab,
                                   transform,
                                   shuffle=True,
                                   num_workers=1)

    encoder_cnn = model.EncoderCNN(args.is_normalized, useCuda=useCuda)
    decoder_rnn = model.DecoderRNN(args.embedding_dim,
                                   args.hidden_size,
                                   len(vocab),
                                   args.batch_size,
                                   dropout=args.dropout,
                                   useCuda=useCuda)
    if torch.cuda.is_available() and useCuda:
        decoder_rnn.cuda()
    loss_function = nn.NLLLoss()
    #loss_function = nn.CrossEntropyLoss()
    params = list(decoder_rnn.parameters())
    optimizer = optim.Adam(params, lr=args.encoder_lr)
    #scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=1)

    output_train_file = open(args.output_train_name, 'w')
    output_val_file = open(args.output_val_name, 'w')
    start_epoch = 0

    save_name = file_namer.make_checkpoint_name(args.batch_size, args.min_occurrences, args.num_epochs, \
      args.dropout, args.decoder_lr, args.encoder_lr, args.embedding_dim, args.hidden_size, args.grad_clip, \
      args.is_normalized) if args.load_checkpoint == "" else args.load_checkpoint
    checkpoint_name = file_namer.get_checkpoint(save_name)
    if checkpoint_name is not None:
        print("loading from checkpoint " + checkpoint_name)
        checkpoint = torch.load(checkpoint_name) if useCuda else torch.load(
            checkpoint_name, map_location=lambda storage, loc: storage)
        start_epoch = checkpoint['epoch']
        decoder_rnn.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        args.load_checkpoint = checkpoint_name
        checkpoint = None
        torch.cuda.empty_cache()
    else:
        print("No existing checkpoints, starting from scratch")
        args.load_checkpoint = "No checkpoint found"

    full_return_index = mp.Value('i', 0)
    full_return_value = mp.Value('d', 0.0)
    full_val_processes = None
    for epoch in range(start_epoch, args.num_epochs):
        val_processes = None
        return_index = mp.Value('i', 0)
        return_value = mp.Value('d', 0.0)
        train_progress_bar = tqdm(iterable=batched_train_loader,
                                  desc='Epoch [%i/%i] (Train)' %
                                  (epoch, args.num_epochs))
        train_sum_loss = 0
        for i, (images, captions, _) in enumerate(train_progress_bar):
            train_sum_loss += trainer.train(encoder_cnn, decoder_rnn,
                                            loss_function, optimizer, images,
                                            captions, args.grad_clip, useCuda)
            train_progress_bar.set_postfix(loss=train_sum_loss /
                                           ((i % 100) + 1))
            if i % 100 == 0:
                output_train_file.write(
                    "%d, %5.4f\n" %
                    (epoch * len(batched_train_loader) + i,
                     train_sum_loss / 100 if i > 0 else train_sum_loss))
                if i % 1000 == 0:
                    if val_processes is not None:
                        val_processes.join()
                        output_val_file.write(
                            "%d, %5.4f\n" %
                            (return_index.value, return_value.value))
                    val_processes = mp.Process(
                        target=validate,
                        args=(random_val_loader, encoder_cnn, decoder_rnn,
                              loss_function, useCuda,
                              epoch * len(batched_train_loader) + i,
                              return_index, return_value))
                    val_processes.start()
                train_sum_loss = 0

        if full_val_processes is not None:
            full_val_processes.join()
            #scheduler.step(full_return_value.value)
            output_val_file.write(
                "End of Epoch\n%d, %5.4f\n" %
                (full_return_index.value, full_return_value.value))
        full_val_processes = mp.Process(
            target=validate_full,
            args=(batched_val_loader, encoder_cnn, decoder_rnn, loss_function,
                  useCuda, epoch, args.num_epochs, len(batched_train_loader),
                  full_return_index, full_return_value))
        full_val_processes.start()
        torch.save({'epoch': epoch + 1,
                    'state_dict': decoder_rnn.state_dict(),
                    'optimizer': optimizer.state_dict()},
                    file_namer.make_checkpoint_name(args.batch_size, args.min_occurrences, epoch + 1, args.dropout, \
                    args.decoder_lr, args.encoder_lr, args.embedding_dim, args.hidden_size, args.grad_clip, args.is_normalized))
    if full_val_processes is not None:
        full_val_processes.join()
        output_val_file.write(
            "End of Epoch\n%d, %5.4f\n" %
            (full_return_index.value, full_return_value.value))
        full_val_processes = None

    output_train_file.close()
    output_val_file.close()

    if args.plot:
        args.train_files.append(args.output_train_name)
        args.val_files.append(args.output_val_name)
        plot(args)
        args.png_files = [args.plot_name]
    if args.send_email:
        args.txt_files = [args.output_train_name, args.output_val_name]
        f = open('arguments.txt', 'w')
        for arg in sorted(vars(args)):
            # arguments we don't want sent in the email
            ignore_args = [
                'user', 'password', 'to', 'plot_name', 'train_image_dir',
                'val_image_dir', 'send_email', 'plot', 'plot_name',
                'train_caption_path', 'val_caption_path', 'png_files',
                'txt_files', 'disable_cuda', 'body', 'output_train_name',
                'output_val_name', 'show', 'subject', 'max_batched_set_size'
            ]
            if not arg in ignore_args:
                f.write("%s: %s\n" % (arg, getattr(args, arg)))
        f.close()
        if not args.body:
            args.body = 'arguments.txt'
        else:
            args.txt_files.append('arguments.txt')
        send_email(args)
Пример #7
0
                                           collate_fn=dataload.collate_fn,
                                           **kwargs)
val_loader = torch.utils.data.DataLoader(val_dataset,
                                         batch_size=args.batch_size,
                                         shuffle=True,
                                         collate_fn=dataload.collate_fn,
                                         **kwargs)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=args.batch_size,
                                          shuffle=True,
                                          collate_fn=dataload.val_collate,
                                          **kwargs)
## Load the proper neural network model.
if args.model == 'Pretrained':
    # Problem 2 (no hidden layer, input -> output)
    model.encoder = model.EncoderCNN(10)
    model.decoder = model.DecoderRNN(encoder_dim=2048,
                                     decoder_dim=512,
                                     attention_dim=512,
                                     embed_size=512,
                                     hidden_size=args.hidden_dim,
                                     vocab_size=vocab_size,
                                     num_layers=1,
                                     max_seq_length=15)
#elif args.model == 'resnet_common':
# Problem 5 (multiple hidden layers, input -> hidden layers -> output)
#   print("sruthi check 1")
#  model = models.resnetcommon.ResnetCommon(im_size, args.hidden_dim, args.kernel_size, n_classes)

else:
    raise Exception('Unknown model {}'.format(args.model))
Пример #8
0
    vocabularySet2 = pickle.load(f)

print("Loaded Reverse Vocabulary Set")
modelPath = "models/"
imagesPath = "../data/images/"
captionsPath = "../data/annotations/captions_train.json"

#Hyper Parameters  -  TUNABLE
lstmLayers = 3
lstmHiddenStates = 512
wordEmbeddings = 256
epochs = 5
batchSize = 64
learningRate = 0.001

cnn = model.EncoderCNN(wordEmbeddings).to(device)
lstm = model.DecoderRNN(wordEmbeddings, lstmHiddenStates, len(vocabularySet),
                        lstmLayers).to(device)

criterion = torch.nn.CrossEntropyLoss()
parameters = list(lstm.parameters()) + list(cnn.linear.parameters()) + list(
    cnn.bn.parameters())
optimizer = torch.optim.Adam(parameters, lr=learningRate)

#Preprocessing of Image data
transform = transforms.Compose([
    transforms.RandomCrop(224),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
Пример #9
0
with open("../ImageCaptioner/data/vocab/vocab_occurrence_5.pkl", 'rb') as f1,\
    open("../ImageCaptioner/data/batched_data/val_batch_1.pkl", "rb") as f2:
    vocab = pickle.load(f1)
    batched_val_set = pickle.load(f2)
coco_caps = COCO("../ImageCaptioner/data/annotations/captions_val2014.json")
batched_val_loader = get_loader(
    "../ImageCaptioner/data/val2014",
    "../ImageCaptioner/data/annotations/captions_val2014.json",
    batched_val_set,
    vocab,
    transform,
    shuffle=True,
    num_workers=3)

encoder = model.EncoderCNN()
decoder = model.DecoderRNN(512, 196, 512, 512, len(vocab), 1)
if torch.cuda.is_available():
    encoder = encoder.cuda()
    decoder = decoder.cuda()

checkpoint = torch.load(
    "noNorm/model_batch_100_dims_512x512_lr_0.0001/checkpoint_25.pt")
decoder.load_state_dict(checkpoint['state_dict'])
checkpoint = None
torch.cuda.empty_cache()

for i, (images, captions, lengths, ids) in enumerate(batched_val_loader):
    if i == 1:
        break
    print("actual captions are: ")
Пример #10
0
def main(args):
    transform = transforms.Compose([
        transforms.Resize(256),
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    useCuda = not args.disable_cuda

    with open(args.vocab_path, 'rb') as vocab_path, \
        open(args.batched_train_path, 'rb') as batched_train_path, \
        open(args.batched_val_path, 'rb') as batched_val_path:
        vocab = pickle.load(vocab_path)
        batched_train_set = pickle.load(batched_train_path)
        batched_val_set = pickle.load(batched_val_path)

    batched_train_loader = get_loader(args.train_image_dir,
                                      args.train_caption_path,
                                      batched_train_set,
                                      vocab,
                                      transform,
                                      shuffle=True,
                                      num_workers=3)
    batched_val_loader = get_loader(args.val_image_dir,
                                    args.val_caption_path,
                                    batched_val_set,
                                    vocab,
                                    transform,
                                    shuffle=True,
                                    num_workers=1)
    batched_val_loader_full = get_loader(args.val_image_dir,
                                         args.val_caption_path,
                                         batched_val_set,
                                         vocab,
                                         transform,
                                         shuffle=True,
                                         num_workers=1)

    encoder_cnn = model.EncoderCNN()
    decoder_rnn = model.DecoderRNN(512,
                                   196,
                                   args.embedding_dim,
                                   args.hidden_dim,
                                   len(vocab),
                                   args.num_layers,
                                   args.dropout,
                                   useCuda=useCuda)
    if torch.cuda.is_available() and useCuda:
        encoder_cnn.cuda()
        decoder_rnn.cuda()
    loss_function = nn.NLLLoss()
    params = list(decoder_rnn.parameters())
    optimizer = optim.Adam(params, lr=args.lr)

    output_train_file = open(
        args.output_dir + "/train_" + str(args.num_epochs) + ".txt", 'w')
    output_val_file = open(
        args.output_dir + "/val_" + str(args.num_epochs) + ".txt", 'w')
    start_epoch = 0

    if args.load_checkpoint is not None:
        checkpoint = torch.load(
            args.load_checkpoint) if useCuda else torch.load(
                args.load_checkpoint,
                map_location=lambda storage, loc: storage)
        print("loading from checkpoint " + str(args.load_checkpoint))
        start_epoch = checkpoint['epoch']
        decoder_rnn.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        checkpoint = None
        torch.cuda.empty_cache()

    for epoch in range(start_epoch, args.num_epochs):
        progress_bar = tqdm(iterable=batched_train_loader,
                            desc='Epoch [%i/%i] (Train)' %
                            (epoch, args.num_epochs))
        train_sum_loss = 0
        for i, (images, captions, lengths, ids) in enumerate(progress_bar, 1):
            loss = train(images, captions, encoder_cnn, decoder_rnn,
                         loss_function, optimizer, args.grad_clip, useCuda)
            train_sum_loss += loss.data.select(0, 0)
            progress_bar.set_postfix(loss=train_sum_loss / ((i % 100) + 1))
            if i % 100 == 0:
                output_train_file.write("%d, %5.4f\n" %
                                        (epoch * len(batched_train_loader) + i,
                                         train_sum_loss / 100))
                train_sum_loss = 0
                if i % 1000 == 0:
                    temp_loss = validate(batched_val_loader, encoder_cnn,
                                         decoder_rnn, loss_function, useCuda)
                    output_val_file.write(
                        "%d, %5.4f\n" %
                        (epoch * len(batched_train_loader) + i, temp_loss))
        # end of batch
        output_train_file.write(
            "%d, %5.4f\n" % ((epoch + 1) * len(batched_train_loader),
                             train_sum_loss / len(batched_train_loader) / 100))

        val_sum_loss = 0
        val_progress_bar = tqdm(iterable=batched_val_loader_full,
                                desc='Epoch [%i/%i] (Val)' %
                                (epoch, args.num_epochs))
        for i, (images, captions, lengths,
                ids) in enumerate(val_progress_bar, 1):
            loss = evaluate(images, captions, encoder_cnn, decoder_rnn,
                            loss_function, optimizer, useCuda)
            val_sum_loss += loss.data.select(0, 0)
            val_progress_bar.set_postfix(loss=val_sum_loss / i)
        output_val_file.write("%d, %5.4f\n" %
                              ((epoch + 1) * len(batched_train_loader),
                               val_sum_loss / len(batched_val_loader_full)))

        torch.save(
            {
                'epoch': epoch + 1,
                'state_dict': decoder_rnn.state_dict(),
                'optimizer': optimizer.state_dict()
            }, args.output_dir + "/checkpoint_" + str(epoch + 1) + ".pt")

    output_train_file.close()
    output_val_file.close()
Пример #11
0
def main():
    VOCAB = vocabulary.load_vocab(vocab_dict_path)
    print(len(VOCAB))
    dataset_raw = preprocessing.PreprocessedData(tagg_toy, im_toy,
                                                 dataset_new_folder, nlp,
                                                 VOCAB)
    # dataset_raw = preprocessing.PreprocessedData(tagged_files_paths, im_paths, dataset_new_folder, nlp, VOCAB)
    print("\nDataset lengths.... data / labels")
    print(len(dataset_raw.train_data), len(dataset_raw.labels))

    dataset_processed = dataset.ImageTextDataset(dataset_raw)
    print('\nLength of dataset')
    print(len(dataset_processed))

    U = utils.Utils()
    train_n, val_n = U.partition_numbers(.8, len(dataset_processed))
    train_set, val_set = torch.utils.data.random_split(dataset_processed,
                                                       [train_n, val_n])
    print('\nTrainset {}, valset {}'.format(train_set, val_set))

    ########################### HYPERPARAMETERS ##########################################################
    num_workers = 8 if CUDA else 0
    batch_size = 32
    embedding_dim = 64
    num_hidden_nodes = 512
    size_of_vocab = len(VOCAB)
    num_output_nodes = size_of_vocab
    num_layers = 3
    bidirection = True
    dropout = 0
    nepochs = 20
    lr = 0.001
    weight_decay = 0.00001
    #####################################################################################################

    train_dataloader = DataLoader(train_set,
                                  batch_size=batch_size,
                                  collate_fn=dataset.collate,
                                  shuffle=True,
                                  num_workers=num_workers,
                                  drop_last=False)
    val_dataloader = DataLoader(val_set,
                                batch_size=batch_size,
                                collate_fn=dataset.collate,
                                shuffle=True,
                                num_workers=num_workers,
                                drop_last=True)

    # Instantiate
    encoder = model.EncoderCNN(embedding_dim)
    decoder = model.DECODER(size_of_vocab,
                            embedding_dim,
                            num_hidden_nodes,
                            num_output_nodes,
                            num_layers,
                            bidirectional=bidirection,
                            dropout=dropout)

    # Criterion & Optimizer
    criterion = nn.CrossEntropyLoss(ignore_index=0)
    optimizer = optim.Adam(decoder.parameters(),
                           lr=lr,
                           weight_decay=weight_decay)

    # Train
    train_losses, test_losses, train_perplexities, test_perplexities = train_test.run_epochs(
        encoder, decoder, optimizer, criterion, train_dataloader,
        val_dataloader, nepochs)

    # Generate
    generate.generate_labels(dataset_raw.train_data[0:20])