Exemplo n.º 1
0
    def __init__(self, coco, useBleu=False, useCider=False):
        self.coco = coco
        self.useBleu = useBleu
        self.useCider = useCider
        self.params = {'image_id': coco.getImgIds()}

        imgIds = self.params['image_id']
        gts = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]

        if self.useBleu:
            self.b_scorer = BleuScorer()
        if self.useCider:
            self.c_scorer = CiderScorer()

        print('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)

        for imgId in imgIds:
            ref = gts[imgId]

            assert (type(ref) is list)
            assert (len(ref) > 0)

            if self.useCider:
                self.c_scorer += (None, ref)

        if self.useCider:
            self.c_scorer.compute_doc_freq()
            assert (len(self.c_scorer.ctest) >= max(
                self.c_scorer.document_frequency.values()))
Exemplo n.º 2
0
    def evaluate(self):

        evalRefIds = [ann['ref_id'] for ann in self.Res]

        refToGts = {}
        refToGtRanks1 = {}
        refToGtRanks2 = {}
        for ref_id in evalRefIds:
            ref = self.refer.Refs[ref_id]
            gt_sents = [sent['sent'] for sent in ref['sentences']]
            refToGts[ref_id] = gt_sents
            if self.eval_cider_r:
                gt_ranks1 = self.refer.get_rank1(ref)
                gt_ranks2 = self.refer.get_rank2(ref)
                refToGtRanks1[ref_id] = gt_ranks1
                refToGtRanks2[ref_id] = gt_ranks2

        refToRes = {ann['ref_id']: [ann['sent']] for ann in self.Res}

        tokenizer = PTBTokenizer()
        self.refToRes = tokenizer.tokenize(refToRes)
        self.refToGts = tokenizer.tokenize(refToGts)

        scorers = [(Cider(), "CIDEr")]
        for scorer, method in scorers:
            score, scores = scorer.compute_score(self.refToGts, self.refToRes)
            self.setEval(score, method)
            self.setRefToEvalRefs(scores, self.refToGts.keys(), method)
        self.setEvalRefs()
Exemplo n.º 3
0
    def evaluate(self):
        # ==================================================
        # Tokenization, remove punctutions
        # ==================================================
        print "tokenization ..."
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(self.ref)
        res = tokenizer.tokenize(self.res)

        # ==================================================
        # Set up scorers
        # ==================================================
        print "setting up scorers ..."
        scorers = [(Bleu(4), ("Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4")),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # ==================================================
        # Set up scorers
        # ==================================================
        out = {}
        for scorer, method in scorers:
            print "computing %s score ..." % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if isinstance(method, tuple):
                for sc, scs, m in zip(score, scores, method):
                    out[m] = sc
                    print "%s: %0.4f" % (m, sc)
            else:
                print "%s: %0.4f" % (method, score)
                out[method] = score

        return out
Exemplo n.º 4
0
def evaluate_captions(res: dict, gts: dict):

    # =================================================
    # Set up scorers
    # =================================================
    print('tokenization...')
    tokenizer = PTBTokenizer()
    gts = tokenizer.tokenize(gts)
    res = tokenizer.tokenize(res)

    # =================================================
    # Set up scorers
    # =================================================
    print('setting up scorers...')
    scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
               (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"),
               (Spice(), "SPICE")]
    rtn = {}
    # =================================================
    # Compute scores
    # =================================================
    for scorer, method in scorers:
        print('computing %s score...' % (scorer.method()))
        score, scores = scorer.compute_score(gts, res)
        if type(method) == list:
            for sc, scs, m in zip(score, scores, method):
                rtn[m] = sc
        else:
            rtn[method] = score

    return rtn
Exemplo n.º 5
0
def main():

    import sys
    res_path = sys.argv[1]

    gt_path = osp.join(this_dir, 'tgif-v1.0.tsv')
    test_list_path = osp.join(this_dir, 'splits', 'test.txt')

    test_keys = load_list(test_list_path)
    all_sents = load_sentences(gt_path)
    res = load_sentences(res_path)

    # make sure res has and only has single sentence
    # for all testing keys
    gts = {}
    for key in test_keys:
        gts[key] = all_sents[key]
        if key in res:
            res[key] = [res[key][0]]
        else:
            res[key] = [""]

    # =================================================
    # Convert to COCO format
    # =================================================
    gts = to_coco(gts, res.keys())
    res = to_coco(res, res.keys())

    # =================================================
    # Set up scorers
    # =================================================
    print 'tokenization...'
    tokenizer = PTBTokenizer()
    gts  = tokenizer.tokenize(gts)
    res = tokenizer.tokenize(res)

    # =================================================
    # Set up scorers
    # =================================================
    print 'setting up scorers...'
    scorers = [
        (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        (Meteor(),"METEOR"),
        (Rouge(), "ROUGE_L"),
        (Cider(), "CIDEr")
    ]

    # =================================================
    # Compute scores
    # =================================================
    eval = {}
    for scorer, method in scorers:
        print 'computing %s score...'%(scorer.method())
        score, scores = scorer.compute_score(gts, res)
        if type(method) == list:
            for sc, scs, m in zip(score, scores, method):
                print "%s: %0.3f"%(m, sc)
        else:
            print "%s: %0.3f"%(method, score)
    def evaluate(self):

        gts = {}
        res = {}
        counter = 0
        for i in self.input_captions['v_preds']:
            imgId = i[self.key_name]
            if imgId not in res:
                res[imgId] = []
            res[imgId].append(i)
            gts[imgId] = self.ground_captions[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        if self.no_print == False:
            print('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        if self.no_print == False:
            print('setting up scorers...')
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")
                   #(Spice(), "SPICE")
                   ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            if self.no_print == False:
                print('computing %s score...' % (scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    if self.no_print == False:
                        print("%s: %0.3f" % (m, sc))
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                if self.no_print == False:
                    print("%s: %0.3f" % (method, score))
        self.setEvalImgs()

        res_diff_method = {}
        for metric, score in self.eval.items():
            score_round = '%.3f' % (score)
            res_diff_method[metric] = float(score_round)

        return res_diff_method
Exemplo n.º 7
0
    def evaluate(self):
        """
        Load the sentences from json files
        """
        def readJson(refName, candName):

            path_to_ref_file = os.path.join(self._pathToData, refName)
            path_to_cand_file = os.path.join(self._pathToData, candName)

            ref_list = json.loads(open(path_to_ref_file, 'r').read())
            cand_list = json.loads(open(path_to_cand_file, 'r').read())

            gts = defaultdict(list)
            res = defaultdict(list)
            # change of naming convention from ref to gts
            for l in ref_list:
                gts[l['image_id']].append({"caption": l['caption']})

            # change of naming convention from cand to res
            for l in cand_list:
                res[l['image_id']].append({"caption": l['caption']})

            return gts, res

        print 'Loading Data...'
        gts, res = readJson(self._refName, self._candName)
        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(self._dfMode), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...'%(scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
            else:
                self.setEval(score, method)
Exemplo n.º 8
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # print(imgIds)
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        # print(len(imgIds)) ## 676476 ids; 1000 in total
        # print(self.coco.imgToAnns) ## key-value pairs
        for imgId in imgIds:
            # print(imgId)
            gts[imgId] = self.coco.imgToAnns[imgId] ## length = 5
            # print(len(gts[imgId]))
            # print(gts[imgId])
            res[imgId] = self.cocoRes.imgToAnns[imgId]
            # print(len(res[imgId]))
            # print(res[imgId])

        # =================================================
        # Set up scorers
        # =================================================
        print '===== tokenization... gts'
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        print '===== tokenization... res'
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print '===== computing %s score...'%(scorer.method())
            score, scores = scorer.compute_score(gts, res)
            # print(scores)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
                    print "%s: %0.3f"%(m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
                print "%s: %0.3f"%(method, score)
        self.setEvalImgs()
Exemplo n.º 9
0
def main():

    import sys
    res_path = sys.argv[1]

    gt_path = osp.join(this_dir, 'tgif-v1.0.tsv')
    test_list_path = osp.join(this_dir, 'splits', 'test.txt')

    test_keys = load_list(test_list_path)
    all_sents = load_sentences(gt_path)
    res = load_sentences(res_path)

    # make sure res has and only has single sentence
    # for all testing keys
    gts = {}
    for key in test_keys:
        gts[key] = all_sents[key]
        if key in res:
            res[key] = [res[key][0]]
        else:
            res[key] = [""]

    # =================================================
    # Convert to COCO format
    # =================================================
    gts = to_coco(gts, res.keys())
    res = to_coco(res, res.keys())

    # =================================================
    # Set up scorers
    # =================================================
    print 'tokenization...'
    tokenizer = PTBTokenizer()
    gts = tokenizer.tokenize(gts)
    res = tokenizer.tokenize(res)

    # =================================================
    # Set up scorers
    # =================================================
    print 'setting up scorers...'
    scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
               (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")]

    # =================================================
    # Compute scores
    # =================================================
    eval = {}
    for scorer, method in scorers:
        print 'computing %s score...' % (scorer.method())
        score, scores = scorer.compute_score(gts, res)
        if type(method) == list:
            for sc, scs, m in zip(score, scores, method):
                print "%s: %0.3f" % (m, sc)
        else:
            print "%s: %0.3f" % (method, score)
Exemplo n.º 10
0
def test_tokenize():
    t = PTBTokenizer()
    tokens = t.tokenize(
        dict(id1=[
            dict(caption="Is this a good question?"),
            dict(caption="Is this a better question?")
        ],
             id2=[dict(caption="How's this question?")]))
    assert tokens == dict(
        id1=['is this a good question', 'is this a better question'],
        id2=['how \'s this question'])
Exemplo n.º 11
0
    def __init__(self, gts, res, df):
        print 'tokenization...'
        tokenizer = PTBTokenizer('gts')
        _gts = tokenizer.tokenize(gts)
        print 'tokenized refs'
        tokenizer = PTBTokenizer('res')
        _res = tokenizer.tokenize(res)
        print 'tokenized cands'

        self.gts = _gts
        self.res = _res
        self.df = df
Exemplo n.º 12
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # print(imgIds)
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        # print(len(imgIds)) ## 676476 ids; 1000 in total
        # print(self.coco.imgToAnns) ## key-value pairs
        for imgId in imgIds:
            # print(imgId)
            gts[imgId] = self.coco.imgToAnns[imgId]  ## length = 5
            # print(len(gts[imgId]))
            # print(gts[imgId])
            res[imgId] = self.cocoRes.imgToAnns[imgId]
            # print(len(res[imgId]))
            # print(res[imgId])

        # =================================================
        # Set up scorers
        # =================================================
        print '===== tokenization... gts'
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        print '===== tokenization... res'
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print '===== computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            # print(scores)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
                print "%s: %0.3f" % (method, score)
        self.setEvalImgs()
Exemplo n.º 13
0
    def evaluate(self, gts=None, res=None):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        if gts is None and res is None:
            gts = {}
            res = {}
            for imgId in imgIds:
                gts[imgId] = self.coco.imgToAnns[imgId]
                res[imgId] = self.cocoRes.imgToAnns[imgId]

            print("before")
            print(gts[184321])
            print(res[184321])

            # =================================================
            # Set up scorers
            # =================================================
            print 'tokenization...'
            tokenizer = PTBTokenizer()
            gts = tokenizer.tokenize(gts)
            res = tokenizer.tokenize(res)

        print("after")
        return gts, res
        print(gts[184321])
        print(res[184321])

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr"), (Spice(), "SPICE")]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                print "%s: %0.3f" % (method, score)
        self.setEvalImgs()
Exemplo n.º 14
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
	  anno = {}
	  anno['image_id'] = imgId
	  anno['caption'] = self.ref[imgId]
          gts[imgId] = [anno]
	  anno_pred = {}
	  anno_pred['image_id'] = imgId
	  anno_pred['caption'] = self.pred[imgId]
          res[imgId] = [anno_pred]

        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
           # (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
           # (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print 'computing %s score...'%(scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
                    print "%s: %0.3f"%(m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
                print "%s: %0.3f"%(method, score)
        self.setEvalImgs()
Exemplo n.º 15
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print 'computing %s score...'%(scorer.method())
            # test
            #gts = {1: ['a metallic refrigerator freezer sitting inside of a kitchen', 'a small kitchen with a stove and refrigerator', "a stainless steel refrigerator in a home 's kitchen", 'a kitchen with a stove and a refrigerator', 'a kitchen has a fridge a stove and a counter top']}
            #res = {1: ['a kitchen with a stove and a refrigerator']}
            #gts = {1: ['a kitchen with a stove and a refrigerator', 'a kitchen with a stove and a refrigerator']}
            import ipdb; ipdb.set_trace()
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
                    print "%s: %0.3f"%(m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
                print "%s: %0.3f"%(method, score)
        self.setEvalImgs()
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco[imgId]  #.imgToAnns[imgId]
            res[imgId] = self.cocoRes[imgId]  #.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            #(cBleu(4), ["cBleu_1", "cBleu_2", "cBleu_3", "cBleu_4"]),
            #(Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L")
            #(Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        final_score = 0
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
                    print "%s: %0.3f" % (m, sc)
                    final_score = sc
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
                print "%s: %0.3f" % (method, score)
        self.setEvalImgs()
        return final_score
Exemplo n.º 17
0
    def evaluate(self):
        imgIds = self.params
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        
        gts= self.coco
        res= self.cocoRes
        #print type(gts),' \r\n ',type(res)

        #print gts,' ',res
        #pdb.set_trace()
        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print 'computing %s score...'%(scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
                    print "%s: %0.3f"%(m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
                print "%s: %0.3f"%(method, score)
        self.setEvalImgs()
Exemplo n.º 18
0
    def evaluate(self, ngram_metric):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco[imgId]#.imgToAnns[imgId]
            res[imgId] = self.cocoRes[imgId]#.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        if ngram_metric == 'ROUGE_L':
            scorers = [
                (Bleu(1), ["Bleu_1"]),
                (Rouge(), "ROUGE_L")
            ]
        else:
            assert ngram_metric.startswith('Bleu_')
            i = ngram_metric[len('Bleu_'):]
            assert i.isdigit()
            i = int(i)
            assert i > 0
            scorers = [
                (Bleu(i), ['Bleu_{}'.format(j) for j in range(1, i + 1)]),
            ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
        self.setEvalImgs()
        return self.evalImgs
Exemplo n.º 19
0
def eval(target, refers, scorer, tokenizer = PTBTokenizer(), use_private = False):
    """docstring for main"""
    k = 'single'

    res_single = {k:[target]}
    gts = {k:refers}

    # =================================================
    # Convert to COCO format
    # =================================================
    gts = to_coco(gts, res_single.keys())
    res = to_coco(res_single, res_single.keys())

    # =================================================
    # Set up scorers
    # =================================================
    #print 'tokenization...'
    #tokenizer = PTBTokenizer()
    #import ipdb; ipdb.set_trace()
    gts  = tokenizer.tokenize(gts)
    res = tokenizer.tokenize(res)

    if use_private:
        # initialize the meteor.jar
        score, scores = scorer._compute_score(gts, res)
    else:
        score, scores = scorer.compute_score(gts, res)
    print score
    return score, scores
Exemplo n.º 20
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        print('tokenization...')
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)



        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print('computing %s score...'%(scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    print("%s: %0.3f"%(m, sc))
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                print("%s: %0.3f"%(method, score))
        self.setEvalImgs()
Exemplo n.º 21
0
    def evaluate(self):
        imgIds = self.params
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}

        gts = self.coco
        res = self.cocoRes
        #print type(gts),' \r\n ',type(res)

        #print gts,' ',res
        #pdb.set_trace()
        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
                print "%s: %0.3f" % (method, score)
        self.setEvalImgs()
Exemplo n.º 22
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(["-m", "stem", '-w', '1.0', '-p',
                     '0.85 0.2 0.0 0.75']), "METEOR_Stems"),
            (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"),
            (Spice(), "SPICE")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                print "%s: %0.3f" % (method, score)
        self.setEvalImgs()
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]
        print('res:', res[imgId])
        # print('gt:',gts[imgId])

        # =================================================
        # Set up scorers
        # =================================================
        print('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)
        #weiyu
        print('res:', res[imgId])

        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print('computing %s score...' % (scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    print("%s: %0.3f" % (m, sc))
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                print("%s: %0.3f" % (method, score))
        self.setEvalImgs()
Exemplo n.º 24
0
    def evaluate(self):

        evalRefIds = [ann['ref_id'] for ann in self.Res]

        refToGts = {}
        for ref_id in evalRefIds:
            ref = self.refer.Refs[ref_id]
            gt_sents = [sent['sent'] for sent in ref['sentences']]  # up to 3 expressions
            refToGts[ref_id] = gt_sents
        refToRes = {ann['ref_id']: [ann['sent']] for ann in self.Res}

        print 'tokenization...'
        tokenizer = PTBTokenizer()
        self.refToRes = tokenizer.tokenize(refToRes)
        self.refToGts = tokenizer.tokenize(refToGts)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...'%(scorer.method())
            score, scores = scorer.compute_score(self.refToGts, self.refToRes)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setRefToEvalRefs(scs, self.refToGts.keys(), m)
                    print "%s: %0.3f"%(m, sc)
            else:
                self.setEval(score, method)
                self.setRefToEvalRefs(scores, self.refToGts.keys(), method)
                print "%s: %0.3f"%(method, score)
        self.setEvalRefs()
Exemplo n.º 25
0
    def evaluate(self):

        evalRefIds = [ann['ref_id'] for ann in self.Res]

        refToGts = {}
        for ref_id in evalRefIds:
            ref = self.refer.Refs[ref_id]
            gt_sents = [sent['sent'] for sent in ref['sentences']]  # up to 3 expressions
            refToGts[ref_id] = gt_sents
        refToRes = {ann['ref_id']: [ann['sent']] for ann in self.Res}

        print 'tokenization...'
        tokenizer = PTBTokenizer()
        self.refToRes = tokenizer.tokenize(refToRes)
        self.refToGts = tokenizer.tokenize(refToGts)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...'%(scorer.method())
            score, scores = scorer.compute_score(self.refToGts, self.refToRes)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setRefToEvalRefs(scs, self.refToGts.keys(), m)
                    print "%s: %0.3f"%(m, sc)
            else:
                self.setEval(score, method)
                self.setRefToEvalRefs(scores, self.refToGts.keys(), method)
                print "%s: %0.3f"%(method, score)
        self.setEvalRefs()
Exemplo n.º 26
0
    def evaluate(self, gts, res):
        # =================================================
        # Set up scorers
        # =================================================
        logging.info('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        logging.info('setting up scorers...')
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")
                   #(Spice(), "SPICE")
                   ]

        # =================================================
        # Compute scores
        # =================================================
        bleu_4_score = 0
        for scorer, method in scorers:
            logging.info('computing %s score...' % (scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    if m == "Bleu_4":
                        bleu_4_score = sc
                    logging.info("%s: %0.3f" % (m, sc))
                    print("%s: %0.3f" % (m, sc))
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                logging.info("%s: %0.3f" % (method, score))
                print("%s: %0.3f" % (method, score))
        self.setEvalImgs()
        return bleu_4_score
Exemplo n.º 27
0
    def setup(self):
        imgIds = self.params['image_id']

        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.coco.imgToAnns[imgId]

        # Tokenize
        print('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # print len(gts.keys), len(res.keys)

        # Setup cider
        assert (gts.keys() == res.keys())
        imgIds = gts.keys()

        cider_scorer = CiderScorer()

        for id in imgIds:
            hypo = res[id]
            ref = gts[id]

            # Sanity check.
            assert (type(hypo) is list)
            # assert(len(hypo) == 1)
            assert (type(ref) is list)
            assert (len(ref) > 0)

            cider_scorer += (hypo[0], ref)

        cider_scorer.compute_doc_freq()
        assert (len(cider_scorer.ctest) >= max(
            cider_scorer.document_frequency.values()))
        self.flag_setup = True
        self.cider_scorer = cider_scorer
Exemplo n.º 28
0
def main():
    coco_train = COCO(
        "/data/home/wuzhiron/lixin/coco14/annotations/captions_train2014.json")
    coco_val = COCO(
        "/data/home/wuzhiron/lixin/coco14/annotations/captions_val2014.json")
    # res_train = coco_train.getImgIds()
    # res_val = coco_val.getImgIds()
    # print(np.all(res_train == gts_train))
    # print(np.all(res_val == gts_val))
    # print(res_train[:10])
    # print(res_val[:10])
    # print(gts_train[:10])
    # print(gts_val[:10])
    train_imgids = pkl.load(
        open("/data/home/wuzhiron/lixin/coco14/train_imgids.pkl", 'rb'))
    val_imgids = pkl.load(
        open("/data/home/wuzhiron/lixin/coco14/val_imgids.pkl", 'rb'))

    train_caps = {}
    val_caps = {}

    for imgid in train_imgids:
        train_caps[imgid] = coco_train.imgToAnns[imgid]
    for imgid in val_imgids:
        val_caps[imgid] = coco_val.imgToAnns[imgid]

    tokenizer = PTBTokenizer()
    train_caps = tokenizer.tokenize(train_caps)
    val_caps = tokenizer.tokenize(val_caps)

    scores = np.zeros((100, 5, len(train_caps)), dtype=np.float32)
    for i in range(100):
        for j in range(5):
            scores[i][j] = compute_score(train_caps, val_caps, train_imgids,
                                         val_imgids, i, j)
        #print(".", end="")
        print("{} / 100".format(i))

    np.save("cider_scores", scores)
    return
Exemplo n.º 29
0
    def evaluate(self):

        # imgIds = self.coco.getImgIds()
        gts = dict(zip(range(0, len(self.predicted_list)),
                       self.predicted_list))
        res = dict(zip(range(0, len(self.label_list)), self.label_list))

        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.set_textid_to_eval(scs, gts.keys(), m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.set_textid_to_eval(scores, gts.keys(), method)
                print "%s: %0.3f" % (method, score)
        self.set_eval()
Exemplo n.º 30
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
        self.setEvalImgs()
Exemplo n.º 31
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [(self.Spice, "SPICE")]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                print "%s: %0.3f" % (method, score)
        self.setEvalImgs()
Exemplo n.º 32
0
    def setup(self):
        imgIds = self.params['image_id']

        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.coco.imgToAnns[imgId]

        # Tokenize
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)
        
        # print len(gts.keys), len(res.keys)

        # Setup cider
        assert(gts.keys() == res.keys())
        imgIds = gts.keys()

        cider_scorer = CiderScorer()

        for id in imgIds:
            hypo = res[id]
            ref = gts[id]

            # Sanity check.
            assert(type(hypo) is list)
            # assert(len(hypo) == 1)
            assert(type(ref) is list)
            assert(len(ref) > 0)

            cider_scorer += (hypo[0], ref)
            
        cider_scorer.compute_doc_freq()
        assert(len(cider_scorer.ctest) >= max(cider_scorer.document_frequency.values()))
        self.flag_setup = True
        self.cider_scorer = cider_scorer
Exemplo n.º 33
0
    def __init__(self, gts, res, df):
        print('tokenization...')
        tokenizer = PTBTokenizer('gts')
        _gts = tokenizer.tokenize(gts)
        print('tokenized refs')
        tokenizer = PTBTokenizer('res')
        _res = tokenizer.tokenize(res)
        print('tokenized cands')

        self.gts = _gts
        self.res = _res
        self.df = df
Exemplo n.º 34
0
class SpiceEval():
    def __init__(self):
        self.evalImgs = []
        self.eval = {}
        self.imgToEval = {}
        self.spice = Spice()
        self.tokenizer = PTBTokenizer()

    """
    The input have structure
    {'123': [{'image_id':123, 'caption': 'xxxxx'}, {'image_id':123, 'caption': 'yyy'}], ...}
    """

    def evaluate(self, gts, res):
        assert set(gts.keys()) == set(res.keys())
        imgIds = gts.keys()
        gts = self.tokenizer.tokenize(gts)
        res = self.tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================

        # =================================================
        # Compute scores
        # =================================================
        print 'computing %s score...' % (self.spice.method())
        score, scores = self.spice.compute_score(gts, res)
        print "%s: %0.3f" % ("spice", score)
        self.eval['spice'] = score
        print scores
        for imgId, score in zip(sorted(imgIds), scores):
            if not imgId in self.imgToEval:
                self.imgToEval[imgId] = {}
                self.imgToEval[imgId]["image_id"] = imgId
            self.imgToEval[imgId]["spice"] = score
        return self.eval['spice'], self.imgToEval
Exemplo n.º 35
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        gts_all = {}
        gts_region_idx = {}
        for imgId in imgIds:
  
          gts[imgId] = self.ref[imgId]
          res[imgId] = self.pred[imgId]
          gts_all[imgId] = []
          
          for i,anno in enumerate(gts[imgId]):
            for cap in anno['captions']:
              gts_all[imgId].append({'image_id': anno['image_id'], 'caption': cap, 'region_id': i})
              
        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts_tokens  = tokenizer.tokenize(gts_all)
        res_tokens = tokenizer.tokenize(res)
        #insert caption tokens to gts 
        for imgId in imgIds:
          for tokens, cap_info in zip(gts_tokens[imgId], gts_all[imgId]):
            region_id = cap_info['region_id']
            if 'caption_tokens' not in gts[imgId][region_id]:
              gts[imgId][region_id]['caption_tokens'] = []
            gts[imgId][region_id]['caption_tokens'].append(tokens)

        

        # =================================================
        # Compute scores
        # =================================================
        # Holistic score, as in DenseCap paper: multi-to-multi matching
        eval = {}
        
        print 'computing Meteor score...'
        score, scores = Meteor().compute_score_m2m(gts_tokens, res_tokens)
        #self.setEval(score, method)
        #self.setImgToEvalImgs(scores, imgIds, method)
        print "Meteor: %0.3f"%(score)
        #self.setEvalImgs()
        # mean ap settings, as in DenseCap paper
        overlap_ratios = [0.3,0.4,0.5,0.6,0.7]
        metoer_score_th = [0, 0.05, 0.1, 0.15, 0.2, 0.25]
        ap_matrix = np.zeros((len(overlap_ratios), len(metoer_score_th)))
        gt_region_n = sum([len(gts[imgId]) for imgId in imgIds])
        #calculate the nxm bbox overlap in one pass
        #overlap_matrices = {}
        eval_stats = {}
        gts_tokens_match = {}
        res_tokens_match = {}
        all_keys = []
        t1 = time.time()
        for imgId in imgIds:
          model_caption_locations = res[imgId]
          gt_caption_locations = gts[imgId]
          #should be sorted using predicted prob in advance
          #model_caption_locations.sort(key=lambda x:-x['log_prob'])
          ov_matrix = self.calculate_overlap_matrix(model_caption_locations, gt_caption_locations)
          match_gt_ids, match_ratios = self.bbox_match(ov_matrix)
          probs = np.array([x['prob'] for x in model_caption_locations])
          scores = np.zeros((len(res[imgId])))
          match_model_ids = np.where(match_gt_ids > -1)[0]
          match_pairs = zip(match_model_ids, match_gt_ids[match_model_ids])
        
          for model_id, gt_id in match_pairs:
            key = (imgId, model_id)
            all_keys.append(key)
            gts_tokens_match[key] = gts[imgId][gt_id]['caption_tokens']
            res_tokens_match[key] = [res_tokens[imgId][model_id]]
          #assert(gts_tokens_match.keys() == match_model_ids.tolist())
          #score_match, scores_match = Meteor().compute_score(gts_tokens_match, res_tokens_match)
          #scores[match_model_ids] = scores_match
            
              
          eval_stats[imgId] = {'match_ids': match_gt_ids, 'match_ratios': match_ratios, 'probs': probs, 'meteor_scores': scores}
        #compute meteor scores of matched regions in one pass
        score_match, scores_match = Meteor().compute_score(gts_tokens_match, res_tokens_match, imgIds=all_keys)
        for key, score in zip(all_keys, scores_match):
          eval_stats[key[0]]['meteor_scores'][key[1]] = score
        t2 = time.time()
        print 'caption scoring finished, takes %f seconds' % (t2-t1)

        all_match_ratios = np.concatenate([v['match_ratios'] for k,v in eval_stats.iteritems()])
        all_probs = np.concatenate([v['probs'] for k,v in eval_stats.iteritems()])
        all_scores = np.concatenate([v['meteor_scores'] for k,v in eval_stats.iteritems()])
        prob_order = np.argsort(all_probs)[::-1]
        all_match_ratios = all_match_ratios[prob_order]
        all_scores = all_scores[prob_order]
      

        for rid, overlap_r in enumerate(overlap_ratios):
          for th_id, score_th in enumerate(metoer_score_th):
            # compute AP for each setting
            tp = (all_match_ratios > overlap_r) & (all_scores > score_th)
            fp = 1 - tp
            tp = tp.cumsum().astype(np.float32)
            fp = fp.cumsum().astype(np.float32)
            rec = tp / gt_region_n
            prec = tp / (fp + tp)
            ap = 0
            all_t = np.linspace(0,1,100)
            apn = len(all_t)
            for t in all_t:
              mask = rec > t
              p = np.max(prec * mask)
              ap += p
            ap_matrix[rid, th_id] = ap / apn
        t3 = time.time()
        print 'mean ap computing finished, takes %f seconds' % (t3 - t2)
        mean_ap = np.mean(ap_matrix) * 100 # percent
        print 'ap matrix'
        print ap_matrix
        print "mean average precision is %0.3f" % mean_ap
Exemplo n.º 36
0
 def __init__(self):
     self.evalImgs = []
     self.eval = {}
     self.imgToEval = {}
     self.spice = Spice()
     self.tokenizer = PTBTokenizer()
Exemplo n.º 37
0
    def evaluate(self):
        """
        Load the sentences from json files
        """
        def readJson(refName, candName):

            path_to_ref_file = os.path.join(refName)
            path_to_cand_file = os.path.join(self._pathToData, candName)

            ref_list = json.loads(open(path_to_ref_file, 'r').read())
            cand_list = json.loads(open(path_to_cand_file, 'r').read())
            ref_list = ref_list['annotations']

            res = defaultdict(list)

            for id_cap in cand_list:
                res[id_cap['image_id']].extend(id_cap['captions'])

            gts = defaultdict(list)

            # change of naming convention from ref to gts
            for l in ref_list:
                gts[l['image_id']].append({"caption": l['caption']})

            return gts, res

        print 'Loading Data...'
        gts, res = readJson(self._refName, self._candName)
        # =================================================
        # Set up scorers
        # =================================================
        accuracy = {}
        tokenizer = PTBTokenizer()
        token_gts = tokenizer.tokenize(gts)
        avg_cider = 0
        for image_id in res.keys():
            accuracy[image_id] = []
            new_gts = {image_id: token_gts[image_id]}
            # new_gts = {"0": [
            #     "zebras stand on grassy field with trees",
            #     "zebras stand on grassy field"
            #     # "zebras gazing"
            # ]
            # }
            for i in range(self._num):
                new_res = {image_id: [res[image_id][i]]}
                # new_res = {"0": ["grassy field with trees"]}
                scorers = [
                    # (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                    # (Meteor(),"METEOR"),
                    # (Rouge(), "ROUGE_L"),
                    (Cider(self._dfMode, self._df_file), "CIDEr"),
                    # (Spice(), "SPICE")
                ]

                # =================================================
                # Compute scores
                # =================================================
                for scorer, method in scorers:
                    print 'computing %s score...' % (scorer.method())
                    score, scores = scorer.compute_score(new_gts, new_res)
                accuracy[image_id].append(score)
            avg_cider += np.mean(accuracy[image_id])
        self.eval = accuracy
        print('Average Cider: %.5f') % (avg_cider / len(accuracy))
Exemplo n.º 38
0
    def cross_evaluate(self):
        """
		We will evaluate how relevant is the generated expression to the ground-truth expressions,
		and how different it is to the expressions of the other objects within the same image.
		Thus, the prerequisite is the dataset is split by image_id, and each ann has multiple
		expressions, e.g., our new RefCOCO dataset whose tesing object has ~10 expressions.
		We first compute score on sc_ii = (sent_i, gd_sents_i), then compute score on 
		sc_ij = (sent_i, gd_sents_j), the margin of max(0, sc_ii - sc_ij) will be considered
		as final score.
		Speficically, we choose METEOR and CIDEr for this kind of evaluation.

		For doing so, we need to prepare ref_to_gts and ref_to_res. As we want to do cross evaluation,
		our key would be paird_id, i.e., "ref_id1_to_ref_id2", e.g, '123_456', then 
		input:
		- Gts[123_456] = [456's gd sents]
		- Res[123_456] = [123's predicted sents]. 
		return:
		- ref_to_eval[123_456] = {method: score}, which measures 123's generation over 456's gd-sents
		Note, we also compute score of 123_123
		
		We will use "sids" and "cids" to donate source_ref_ids and cross_ref_ids.
		"""
        source_ref_ids = [pred['ref_id'] for pred in self.preds]
        Preds = {pred['ref_id']: pred['sent'] for pred in self.preds}

        # construct pair_id, which is [source_ref_id]_[target_ref_id], i.e, 123_456
        Gts = {}
        Res = {}
        for source_ref_id in source_ref_ids:
            image_id = self.refer.Refs[source_ref_id]['image_id']
            cross_refs = self.refer.imgToRefs[
                image_id]  # including source_ref itself
            for cross_ref in cross_refs:
                pair_id = str(source_ref_id) + '_' + str(cross_ref['ref_id'])
                Res[pair_id] = [Preds[source_ref_id]]
                Gts[pair_id] = [
                    sent['sent'] for sent in cross_ref['sentences']
                ]

        # tokenize
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        Gts = tokenizer.tokenize(Gts)
        Res = tokenizer.tokenize(Res)

        # set up scorers
        print 'setting up scorers...'
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # compute scores
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(Gts, Res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEvals(scs, Gts.keys(), m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEvals(scores, Gts.keys(), method)
                print "%s: %0.3f" % (method, score)
Exemplo n.º 39
0
    def evaluate(self):
        """
        Load the sentences from json files
        """
        def readJson(candName, num=10):
            path_to_cand_file = os.path.join(self._pathToData, candName)
            cand_list = json.loads(open(path_to_cand_file, 'r').read())

            res = defaultdict(list)

            for id_cap in cand_list:
                res[id_cap['image_id']].extend(id_cap['captions'])

            return res

        print 'Loading Data...'
        res = readJson(self._candName)
        ratio = {}
        for im_id in res.keys():
            print('number of images: %d\n') % (len(ratio))
            cov = np.zeros([10, 10])
            for i in range(10):
                for j in range(i, 10):
                    new_gts = {}
                    new_res = {}
                    new_res[im_id] = [{'caption': res[im_id][i]}]
                    new_gts[im_id] = [{'caption': res[im_id][j]}]
                    # new_gts[im_id] = gt
                    # =================================================
                    # Set up scorers
                    # =================================================
                    print 'tokenization...'
                    tokenizer = PTBTokenizer()
                    new_gts = tokenizer.tokenize(new_gts)
                    new_res = tokenizer.tokenize(new_res)

                    # =================================================
                    # Set up scorers
                    # =================================================
                    print 'setting up scorers...'
                    scorers = [
                        # (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                        # (Meteor(),"METEOR"),
                        # (Rouge(), "ROUGE_L"),
                        (Cider(self._dfMode, self._df_file), "CIDEr")
                    ]

                    # =================================================
                    # Compute scores
                    # =================================================
                    for scorer, method in scorers:
                        print 'computing %s score...' % (scorer.method())
                        score, scores = scorer.compute_score(new_gts, new_res)

                        cov[i, j] = score
                        cov[j, i] = cov[i, j]
            u, s, v = np.linalg.svd(cov)
            r = max(s) / s.sum()
            print('ratio=%.5f\n') % (r)
            ratio[im_id] = r
            if len(ratio) == 5000:
                break

        self.eval = ratio