Exemplo n.º 1
0
def evaluate():
    coco_dataset = CocoCaptions(
        root="../data/train2014/train2014", 
        annFile= train_ann_file
    )

    capgen = CaptionGenerator(coco_dataset, k=k, early_stop=train_early_stop, load_knn=load_knn)

    # evaluate
    val_dataset = CocoCaptions(
        root="../data/val2014/val2014", 
        annFile=valid_ann_file
    )

    best_captions, cap_map = capgen.evaluate(val_dataset, early_stop=val_early_stop) # best_captions = list(dict('image_id':img_id, 'caption':'caption'))
    with open(res_file, 'w') as f:
        json.dump(best_captions, f)


    # evaluate best captions against gt
    coco_result = capgen.coco.coco.loadRes(res_file)
    cocoEval = COCOEvalCap(capgen.coco.coco, coco_result)
    cocoEval.params['image_id'] = coco_result.getImgIds()
    cocoEval.evaluate()

    indices = ["BLEU 1-gram", "BLEU 2-gram", "BLEU 3-gram", "BLEU 4-gram",
    		   "METEOR", "ROUGE_L", "CIDEr", "SPICE"]
    data = [cocoEval.eval['Bleu_1']] + [cocoEval.eval['Bleu_2']] + [cocoEval.eval['Bleu_3']] + [cocoEval.eval['Bleu_4']] + \
    	   [cocoEval.eval['METEOR']] + [cocoEval.eval['ROUGE_L']] + [cocoEval.eval['CIDEr']] + [cocoEval.eval['SPICE']]
    results = pd.DataFrame(columns=[f"k={k}_Train_num={train_early_stop}_Val_num={val_early_stop}"], index=indices, data=data)
    results.to_excel(out_file)
    print(f"Results saved to {out_file}")
Exemplo n.º 2
0
def calc_bleu():
    image_dir = "/home/roberto/Documentos/TFM-UOC/pytorch-tutorial/tutorials/03-advanced/image_captioning/data/"

    json = image_dir + "annotations/captions_train2014.json"
    cap = CocoCaptions(root=image_dir + "train2014/", annFile=json)

    print('Number of samples: ', len(cap))
    img, target = cap[0]  # load 4th sample
    print(target)
Exemplo n.º 3
0
def _get_extract_dataloader(data_path,
                            image_shape=None,
                            batch_size=1,
                            num_workers=0):
    from torch.utils.data.dataloader import DataLoader
    transform = get_transform(image_shape)

    dataset = CocoCaptions(data_path, data_path / 'captions.json', transform)
    return DataLoader(dataset, batch_size, num_workers=num_workers)
Exemplo n.º 4
0
 def __init__(self,
              root,
              annFile,
              filepath,
              transform=None,
              freq_threadshold=5,
              train=True):
     self.data = CocoCaptions(root=root,
                              annFile=annFile,
                              transform=transform)
     self.vocab = Vocabulary(freq_threadshold)
     self.vocab.build_vocabulary(self.data, filepath)
     self.train = train
Exemplo n.º 5
0
import matplotlib.pyplot as plt

image_dir = "./data/"

json_path = image_dir + "annotations/captions_train2014.json"
'''
coco = COCO(json_path)
img, target, description = coco[3]
print(target)
imgplot = plt.imshow(img)
plt.show()
'''

import torchvision.transforms as transforms

cap = CocoCaptions(root=image_dir + "train2014/", annFile=json_path)

print('Number of samples: ', len(cap))
img, target = cap[2]  # load 4th sample

print(target)
imgplot = plt.imshow(img)
plt.show()
'''

print(target)
import matplotlib.pyplot as plt

import matplotlib.image as mpimg

imgplot = plt.imshow(img)
def main():
    if len(sys.argv) == 2:
        sample_random = True
        load_knn = sys.argv[1]
    elif len(sys.argv) == 3:
        sample_random = False
        load_knn = sys.argv[1]
        img_path = sys.argv[2]
    else:
        raise Exception(
            f"Got {len(sys.argv)-1} args, was expecting 1 or 2 (path_to_knn-model, [img_path])"
        )

    ### CHANGE PARAMETERS HERE ###
    train_ann_file = "../annotations/annotations_trainval2014/annotations/captions_train2014.json"
    valid_ann_file = "../annotations/annotations_trainval2014/annotations/captions_val2014.json"

    coco_dataset = CocoCaptions(root="../data/train2014/train2014",
                                annFile=train_ann_file)

    k = int(load_knn.split("knn_k=")[-1].split("_num")[0])
    train_early_stop = int(load_knn.split("_num_")[-1])
    res_file = f"./results/val2014_k={k}_num_{train_early_stop}_results"
    capgen = CaptionGenerator(coco_dataset,
                              k=k,
                              early_stop=train_early_stop,
                              load_knn=load_knn)

    # get images from validation file
    if sample_random:
        val_dataset = CocoCaptions(root="../data/val2014/val2014",
                                   annFile=valid_ann_file)
        sample_imgs = []
        sample_img_ids = []
        for i in range(5):
            idx = random.choice(range(len(val_dataset)))
            img_id = val_dataset.ids[idx]
            sample_img_ids.append(img_id)
            img, caps = val_dataset[idx]
            sample_imgs.append(img)
    else:
        sample_imgs = [img_path]

    img_names = sample_img_ids if sample_random else img_path
    print(f"Getting caption prediction for images: {img_names}")
    best_captions = capgen.get_captions(sample_imgs)

    if sample_random:
        # evaluate and save results
        results, references, hypothesis = evaluate(img_names, best_captions,
                                                   capgen)

        with open(res_file, 'w') as f:
            json.dump(results, f)

        print(
            "-------------------------------------RESULTS-------------------------------------"
        )
        # evaluate results
        # coco_result = capgen.coco.coco.loadRes(res_file)
        # cocoEval = COCOEvalCap(capgen.coco.coco, coco_result)
        # cocoEval.params['image_id'] = coco_result.getImgIds()
        # output = cocoEval.evaluate()
        print(
            "--------------------------------------------------------------------------------"
        )
        for idx, img_id in enumerate(sample_img_ids):
            real_caption = references[img_id][img_id][0]
            bleu, scores = Bleu(4).compute_score(references[img_id],
                                                 hypothesis[img_id],
                                                 verbose=0)
            # bleu = scores[2] # 3-gram
            print(
                f"ID: {img_id} \n Real caption (1 of 5): {real_caption} \n Sampled caption: {best_captions[idx]} \n BLEU: {bleu}"
            )
    else:
        print(f"Sampled caption: {best_captions[0]}")
        out_path = "results/" + img_path.split("/")[-1]
        if img_path.startswith('http'):
            img = Image.open(requests.get(img_path, stream=True).raw)
        else:
            img = Image.open(img_path)
        plt.axis('off')
        plt.title(img_path)
        plt.imshow(img)
        plt.figtext(0.5,
                    0.01,
                    best_captions[0],
                    wrap=True,
                    horizontalalignment='center',
                    fontsize=12)
        plt.savefig(out_path)
        print(f"Output saved to {out_path}")
Exemplo n.º 7
0
    def _load_data(self, root, split, img_input_size):
        coco = CocoCaptions(root=f'{root}/{split}2014', annFile=f'{root}/annotations/captions_{split}2014.json',
                            transform=get_image_transforms(img_input_size))
        captions = self._extract_captions(coco)

        return coco, captions
Exemplo n.º 8
0
NUM_IMAGES_SAVE = 4

# data
print(IMAGE_PATH)
compose = T.Compose([
    T.Resize(IMAGE_SIZE),
    T.CenterCrop(IMAGE_SIZE),
    T.ToTensor(),
])


def collate_fn(batch):
    return tuple(zip(*batch))


ds = CocoCaptions(root=IMAGE_PATH, annFile=ANNO_PATH, transform=compose)
dl = DataLoader(ds,
                BATCH_SIZE,
                shuffle=True,
                num_workers=8,
                collate_fn=collate_fn)

vae_params = dict(image_size=IMAGE_SIZE,
                  num_layers=NUM_LAYERS,
                  num_tokens=NUM_TOKENS,
                  codebook_dim=EMB_DIM,
                  hidden_dim=HID_DIM,
                  num_resnet_blocks=NUM_RESNET_BLOCKS)

vae = DiscreteVAE(**vae_params,
                  smooth_l1_loss=SMOOTH_L1_LOSS,
Exemplo n.º 9
0
 def __init__(self, root, annFile):
     CocoCaptions.__init__(self,root, annFile, transform= img_transform)
     self.pred = []