def __init__(self, ):
     self.blue_scorer = Bleu(4)
     self.rouge_scorer = Rouge()
     self.cider_scorer = Cider()
     self.truth = None
     remove = string.punctuation + "、。,."
     self.remove_pattern = r"[{}]".format(remove)  # create the pattern
示例#2
0
def compute_score(gts, val_caps, train_imgids, val_imgids, i, j):
    res = {}
    for imgid in train_imgids:
        res[imgid] = [val_caps[val_imgids[i]][j]]

    scorer = Bleu(4)
    score, scores = scorer.compute_score(gts, res, train_imgids)
    return np.array(scores)
示例#3
0
    def evaluate(self, ngram_metric):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco[imgId]#.imgToAnns[imgId]
            res[imgId] = self.cocoRes[imgId]#.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        if ngram_metric == 'ROUGE_L':
            scorers = [
                (Bleu(1), ["Bleu_1"]),
                (Rouge(), "ROUGE_L")
            ]
        else:
            assert ngram_metric.startswith('Bleu_')
            i = ngram_metric[len('Bleu_'):]
            assert i.isdigit()
            i = int(i)
            assert i > 0
            scorers = [
                (Bleu(i), ['Bleu_{}'.format(j) for j in range(1, i + 1)]),
            ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
        self.setEvalImgs()
        return self.evalImgs
示例#4
0
def evaluator(gts, res):
    eval = {}
    # =================================================
    # Set up scorers
    # =================================================
    print 'tokenization...'
    # Todo: use Spacy for tokenization
    gts = tokenize(gts)
    res = tokenize(res)

    # =================================================
    # Set up scorers
    # =================================================
    print 'setting up scorers...'
    scorers = [
        (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        (Meteor(), "METEOR"),
        # (Rouge(), "ROUGE_L"),
        (Cider(), "CIDEr"),
        (Spice(), "SPICE")
    ]

    # =================================================
    # Compute scores
    # =================================================
    for scorer, method in scorers:
        print 'computing %s score...' % (scorer.method())
        score, scores = scorer.compute_score(gts, res)
        if type(method) == list:
            for sc, scs, m in zip(score, scores, method):
                eval[m] = sc
                print "%s: %0.3f" % (m, sc)
        else:
            eval[method] = score
            print "%s: %0.3f" % (method, score)
示例#5
0
    def evaluate(self, album_to_Gts, album_to_Res):
        self.album_to_Res = album_to_Res
        self.album_to_Gts = album_to_Gts

        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        scorers = []
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(), "METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")  # df='VIST/VIST-train-words'
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print('computing %s score ...' % (scorer.method()))
            score, scores = scorer.compute_score(self.album_to_Gts,
                                                 self.album_to_Res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setAlbumToEval(scs, self.album_to_Gts.keys(), m)
                    print('%s: %.3f' % (m, sc))
            else:
                self.setEval(score, method)
                self.setAlbumToEval(scores, self.album_to_Gts.keys(), method)
                print('%s: %.3f' % (method, score))

        self.setEvalAlbums()
示例#6
0
def evaluate(gts=None, res=None):
    # imgIds = self.coco.getImgIds()
    # =================================================
    # Set up scorers
    # =================================================
    print 'setting up scorers...'
    scorers = [
        (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        (Meteor(), "METEOR"),
        (Rouge(), "ROUGE_L"),
        (Cider(), "CIDEr"),
    ]

    # =================================================
    # Compute scores
    # =================================================
    res_scores = []
    for scorer, method in scorers:
        print 'computing %s score...' % (scorer.method())
        score, scores = scorer.compute_score(gts, res)
        if type(method) == list:
            for sc, scs, m in zip(score, scores, method):
                res_scores.append((m, sc))
        else:
            res_scores.append((method, score))
    return res_scores
示例#7
0
def evaluate_captions(res: dict, gts: dict):

    # =================================================
    # Set up scorers
    # =================================================
    print('tokenization...')
    tokenizer = PTBTokenizer()
    gts = tokenizer.tokenize(gts)
    res = tokenizer.tokenize(res)

    # =================================================
    # Set up scorers
    # =================================================
    print('setting up scorers...')
    scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
               (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"),
               (Spice(), "SPICE")]
    rtn = {}
    # =================================================
    # Compute scores
    # =================================================
    for scorer, method in scorers:
        print('computing %s score...' % (scorer.method()))
        score, scores = scorer.compute_score(gts, res)
        if type(method) == list:
            for sc, scs, m in zip(score, scores, method):
                rtn[m] = sc
        else:
            rtn[method] = score

    return rtn
    def evaluate(self):
        output = []
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            # (Meteor(),"METEOR"),
            # (Rouge(), "ROUGE_L"),
            # (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            # print 'computing %s score...'%(scorer.method())
            score, scores = scorer.compute_score(self.gts, self.res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    print
                    "%s: %0.5f" % (m, sc)
                    output.append(sc)
            else:
                print
                "%s: %0.5f" % (method, score)
                output.append(score)
        return output
def language_eval(sample_seqs, gt_seqs):# sample_seqs:list[[x,x],[x,x],...], gt_seqs:list[[list1,list2,...],[list1,list2,...],...]
	import sys
        sys.path.append("coco-caption/pycocoevalcap/")
	from bleu.bleu import Bleu
	from cider.cider import Cider
	from meteor.meteor import Meteor
	from rouge.rouge import Rouge

	assert len(sample_seqs) == len(gt_seqs),"number of eval data is different"
	res = OrderedDict()  # res: {0:[xx],1:[xx],...}
	for i in range(len(sample_seqs)): # for each data(sent)
		res[i] = [sample_seqs[i]]

	gts = OrderedDict() # gts: {0:[sent1,sent2,...],1:[sent1,sent2,...], ...}
	for i in range(len(gt_seqs)):
		gts[i] = [gt_seqs[i][j] for j in range(len(gt_seqs[i]))]

	res = {i: res[i] for i in range(len(sample_seqs))}
	gts = {i: gts[i] for i in range(len(gt_seqs))}

	avg_bleu_score, bleu_scores = Bleu(4).compute_score(gts, res)
	avg_cider_score, cider_scores = Cider().compute_score(gts, res)
	avg_meteor_score, meteor_scores = Meteor().compute_score(gts, res)
	avg_rouge_score, rouge_scores = Rouge().compute_score(gts, res)

	print(" BLEU1:{}\n BLEU2:{}\n BLEU3:{}\n BLEU4:{}\n METEOR:{}\n ROUGE:{}\n CIDEr:{}\n"\
		.format(avg_bleu_score[0], avg_bleu_score[1], avg_bleu_score[2], avg_bleu_score[3], \
				avg_meteor_score, avg_rouge_score, avg_cider_score))

	return {'BLEU':avg_bleu_score, 'METEOR':avg_meteor_score, 'ROUGE':avg_rouge_score, 'CIDEr':avg_cider_score}
示例#10
0
    def evaluate(self):
        output = []
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # =================================================
        # Compute scores
        # =================================================
        scores_dict = {}
        #scores_dict["model_key"] = self.model_key
        for scorer, method in scorers:
            # print 'computing %s score...'%(scorer.method())
            score, scores = scorer.compute_score(self.gts, self.res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    print("%s: %0.5f" % (m, sc))
                    output.append(sc)
                    scores_dict[m] = str(sc)
            else:
                print("%s: %0.5f" % (method, score))
                output.append(score)
                scores_dict[method] = score

        return output
示例#11
0
class EvalCap:
    ref_list = [
        'this is a reference sentence for sentence2 which was generated by your model'
    ]
    hyp_list = ['this is sentence2 which has been generated by your model']

    refs = {idx: [lines.strip()] for (idx, lines) in enumerate(ref_list)}
    hyps = {idx: [lines.strip()] for (idx, lines) in enumerate(hyp_list)}

    scorers = [
        (Bleu(4), ['Bleu_1', 'Bleu_2', 'Bleu_3', 'Bleu_4']),
        # (Meteor(), "METEOR"),
        (Rouge(), "ROUGE_L"),
        (Cider(), "CIDEr")
    ]
    for scorer, method in scorers:
        print('computing %s score...' % (scorer.method()))
        score, scores = scorer.compute_score(hyps, refs)
        if type(method) == list:
            for sc, scs, m in zip(score, scores, method):
                # self.setEval(sc, m)
                print("%s: %0.3f" % (m, sc))
        else:
            # self.setEval(score, method)
            print("%s: %0.3f" % (method, score))
示例#12
0
    def evaluate(self):
        # ==================================================
        # Tokenization, remove punctutions
        # =================================================
        '''
        print "tokenization ..."
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(self.ref)
        res = tokenizer.tokenize(self.res)
        '''
        gts = self.ref
        # ==================================================
        # Set up scorers
        # ==================================================
        print "setting up scorers ..."
        scorers = [(Bleu(4), ("Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4")),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # ==================================================
        # Set up scorers
        # ==================================================
        out = {}
        for scorer, method in scorers:
            print "computing %s score ..." % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if isinstance(method, tuple):
                for sc, scs, m in zip(score, scores, method):
                    out[m] = sc
                    print "%s: %0.4f" % (m, sc)
            else:
                print "%s: %0.4f" % (method, score)
                out[method] = score

        return out
示例#13
0
def compute_metrics_from_files(p_path_to_reference_file,
                               p_path_to_candidate_file, p_max_bleu_order):
    """Compute BLEU-N and ROUGE-L metrics.
    IMPORTANT: No-answer reference will be excluded from calculation.

    Args:
    p_path_to_reference_file (str): path to reference file.
    p_path_to_candidate_file (str): path to candidate file.
        Both files should be in format:
            {QUERY_ID_JSON_ID: <a_query_id_int>,
             ANSWERS_JSON_ID: [<list_of_answers_string>]}
    p_max_bleu_order: the maximum n order in bleu_n calculation.

    Returns:
    dict: dictionary of {'bleu_n': <bleu_n score>, 'rouge_l': <rouge_l score>}
    """

    reference_dictionary, reference_no_answer_query_ids = \
        load_file(p_path_to_reference_file)
    candidate_dictionary, _ = load_file(p_path_to_candidate_file)

    the_ids = set()
    for key, value in candidate_dictionary.items():
        the_ids.add(key)

    filtered_reference_dictionary = \
        {key: value for key, value in reference_dictionary.items() \
                    if key not in reference_no_answer_query_ids and key in the_ids}
    #if key not in reference_no_answer_query_ids}

    filtered_candidate_dictionary = \
        {key: value for key, value in candidate_dictionary.items() \
                    if key not in reference_no_answer_query_ids}

    for query_id, answers in filtered_candidate_dictionary.items():
        assert \
            len(answers) <= 1, \
            'query_id %d contains more than 1 answer \"%s\" in candidate file' % \
            (query_id, str(answers))

    reference_query_ids = set(filtered_reference_dictionary.keys())
    candidate_query_ids = set(filtered_candidate_dictionary.keys())
    common_query_ids = reference_query_ids.intersection(candidate_query_ids)
    assert (len(common_query_ids) == len(reference_query_ids)) and \
           (len(common_query_ids) == len(candidate_query_ids)), \
           'Reference and candidate files must share same query ids'

    all_scores = {}
    bleu_scores, _ = \
        Bleu(p_max_bleu_order).compute_score(filtered_reference_dictionary, \
                                             filtered_candidate_dictionary)
    for i, bleu_score in enumerate(bleu_scores):
        all_scores['bleu_%d' % (i + 1)] = bleu_score

    rouge_score, _ = Rouge().compute_score(filtered_reference_dictionary, \
                                           filtered_candidate_dictionary)
    all_scores['rouge_l'] = rouge_score

    return all_scores
    def evaluate(self):

        gts = {}
        res = {}
        counter = 0
        for i in self.input_captions['v_preds']:
            imgId = i[self.key_name]
            if imgId not in res:
                res[imgId] = []
            res[imgId].append(i)
            gts[imgId] = self.ground_captions[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        if self.no_print == False:
            print('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        if self.no_print == False:
            print('setting up scorers...')
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")
                   #(Spice(), "SPICE")
                   ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            if self.no_print == False:
                print('computing %s score...' % (scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    if self.no_print == False:
                        print("%s: %0.3f" % (m, sc))
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                if self.no_print == False:
                    print("%s: %0.3f" % (method, score))
        self.setEvalImgs()

        res_diff_method = {}
        for metric, score in self.eval.items():
            score_round = '%.3f' % (score)
            res_diff_method[metric] = float(score_round)

        return res_diff_method
示例#15
0
    def evaluate(self):
        """
        Load the sentences from json files
        """
        def readJson(refName, candName):

            path_to_ref_file = os.path.join(self._pathToData, refName)
            path_to_cand_file = os.path.join(self._pathToData, candName)

            ref_list = json.loads(open(path_to_ref_file, 'r').read())
            cand_list = json.loads(open(path_to_cand_file, 'r').read())

            gts = defaultdict(list)
            res = defaultdict(list)
            # change of naming convention from ref to gts
            for l in ref_list:
                gts[l['image_id']].append({"caption": l['caption']})

            # change of naming convention from cand to res
            for l in cand_list:
                res[l['image_id']].append({"caption": l['caption']})

            return gts, res

        print 'Loading Data...'
        gts, res = readJson(self._refName, self._candName)
        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(self._dfMode), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...'%(scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
            else:
                self.setEval(score, method)
示例#16
0
    def evaluate(self):
        """
        Load the sentences from json files
        """
        def readJson():
            path_to_cand_file = os.path.join(self._pathToData, self._candName)
            cand_list = json.loads(open(path_to_cand_file, 'r').read())

            res = defaultdict(list)

            for id_cap in cand_list:
                res[id_cap['image_id']].extend(id_cap['captions'])

            return res

        print 'Loading Data...'
        res = readJson()
        ratio = {}
        avg_diversity = 0
        for im_id in res.keys():
            print('number of images: %d\n') % (len(ratio))
            final_score = []
            for i in range(self._num):
                new_gts = {}
                new_res = {}
                new_res[im_id] = [res[im_id][i]]
                new_gts[im_id] = [
                    res[im_id][j] for j in range(self._num) if j != i
                ]
                # =================================================
                # Set up scorers
                # =================================================
                print 'setting up scorers...'
                scorers = [
                    (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                    # (Meteor(), "METEOR"),
                    # (Rouge(), "ROUGE_L"),
                    # (Cider(self._dfMode, self._df_file), "CIDEr"),
                    # (Spice(), "SPICE")
                ]

                # =================================================
                # Compute scores
                # =================================================
                for scorer, method in scorers:
                    print 'computing %s score...' % (scorer.method())
                    score, scores = scorer.compute_score(gts=new_gts,
                                                         res=new_res)
                final_score.append(score)
            mbleus = np.array(final_score).sum(0) / self._num
            ratio[im_id] = list(mbleus)
            avg_diversity += sum(mbleus) / 4
            if len(ratio) == 5000:
                break
        print('Average diversity: %.5f') % (avg_diversity / len(ratio))
        self.eval = ratio
示例#17
0
def main():

    import sys
    res_path = sys.argv[1]

    gt_path = osp.join(this_dir, 'tgif-v1.0.tsv')
    test_list_path = osp.join(this_dir, 'splits', 'test.txt')

    test_keys = load_list(test_list_path)
    all_sents = load_sentences(gt_path)
    res = load_sentences(res_path)

    # make sure res has and only has single sentence
    # for all testing keys
    gts = {}
    for key in test_keys:
        gts[key] = all_sents[key]
        if key in res:
            res[key] = [res[key][0]]
        else:
            res[key] = [""]

    # =================================================
    # Convert to COCO format
    # =================================================
    gts = to_coco(gts, res.keys())
    res = to_coco(res, res.keys())

    # =================================================
    # Set up scorers
    # =================================================
    print 'tokenization...'
    tokenizer = PTBTokenizer()
    gts = tokenizer.tokenize(gts)
    res = tokenizer.tokenize(res)

    # =================================================
    # Set up scorers
    # =================================================
    print 'setting up scorers...'
    scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
               (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")]

    # =================================================
    # Compute scores
    # =================================================
    eval = {}
    for scorer, method in scorers:
        print 'computing %s score...' % (scorer.method())
        score, scores = scorer.compute_score(gts, res)
        if type(method) == list:
            for sc, scs, m in zip(score, scores, method):
                print "%s: %0.3f" % (m, sc)
        else:
            print "%s: %0.3f" % (method, score)
示例#18
0
def init_cider_scorer(reward_type):
    global CiderD_scorer
    # CiderD_scorer = CiderD_scorer or CiderD(df=cached_tokens)
    if reward_type == 'BLEU':
        CiderD_scorer = CiderD_scorer or Bleu()
    elif reward_type == 'METEOR':
        CiderD_scorer = CiderD_scorer or Meteor()
    elif reward_type == 'ROUGE':
        CiderD_scorer = CiderD_scorer or Rouge()
    elif reward_type == 'CIDEr':
        CiderD_scorer = CiderD_scorer or Cider()
示例#19
0
    def evaluate(self, album_to_Gts, album_to_Res):
        """
		measure is a subset of ['bleu', 'meteor', 'rouge', 'cider']
		if measure is None, we will apply all the above.
		"""

        # # album_id -> pred story str
        # album_to_Res = {item['album_id']: [item['pred_story_str'].encode('ascii', 'ignore').decode('ascii')]
        # 				for item in self.preds }

        # # album_id -> gt story str(s)
        # album_to_Gts = {}
        # for album_id in album_to_Res.keys():
        # 	album = self.vist_sis.Albums[album_id]
        # 	gd_story_strs = []
        # 	for story_id in album['story_ids']:
        # 		gd_sent_ids = self.vist_sis.Stories[story_id]['sent_ids']
        # 		gd_story_str = ' '.join([self.vist_sis.Sents[sent_id]['text'] for sent_id in gd_sent_ids])
        # 		gd_story_str = gd_story_str.encode('ascii', 'ignore').decode('ascii')  # ignore some weird token
        # 		gd_story_strs += [gd_story_str]
        # 	album_to_Gts[album_id] = gd_story_strs

        self.album_to_Res = album_to_Res
        self.album_to_Gts = album_to_Gts

        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        scorers = []
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print('computing %s score ...' % (scorer.method()))
            score, scores = scorer.compute_score(self.album_to_Gts,
                                                 self.album_to_Res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setAlbumToEval(scs, self.album_to_Gts.keys(), m)
                    print('%s: %.4f' % (m, sc))
            else:
                self.setEval(score, method)
                self.setAlbumToEval(scores, self.album_to_Gts.keys(), method)
                print('%s: %.4f' % (method, score))

        self.setEvalAlbums()
示例#20
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # print(imgIds)
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        # print(len(imgIds)) ## 676476 ids; 1000 in total
        # print(self.coco.imgToAnns) ## key-value pairs
        for imgId in imgIds:
            # print(imgId)
            gts[imgId] = self.coco.imgToAnns[imgId]  ## length = 5
            # print(len(gts[imgId]))
            # print(gts[imgId])
            res[imgId] = self.cocoRes.imgToAnns[imgId]
            # print(len(res[imgId]))
            # print(res[imgId])

        # =================================================
        # Set up scorers
        # =================================================
        print '===== tokenization... gts'
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        print '===== tokenization... res'
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print '===== computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            # print(scores)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
                print "%s: %0.3f" % (method, score)
        self.setEvalImgs()
示例#21
0
    def evaluate(self, gts=None, res=None):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        if gts is None and res is None:
            gts = {}
            res = {}
            for imgId in imgIds:
                gts[imgId] = self.coco.imgToAnns[imgId]
                res[imgId] = self.cocoRes.imgToAnns[imgId]

            print("before")
            print(gts[184321])
            print(res[184321])

            # =================================================
            # Set up scorers
            # =================================================
            print 'tokenization...'
            tokenizer = PTBTokenizer()
            gts = tokenizer.tokenize(gts)
            res = tokenizer.tokenize(res)

        print("after")
        return gts, res
        print(gts[184321])
        print(res[184321])

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr"), (Spice(), "SPICE")]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                print "%s: %0.3f" % (method, score)
        self.setEvalImgs()
示例#22
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print 'computing %s score...'%(scorer.method())
            # test
            #gts = {1: ['a metallic refrigerator freezer sitting inside of a kitchen', 'a small kitchen with a stove and refrigerator', "a stainless steel refrigerator in a home 's kitchen", 'a kitchen with a stove and a refrigerator', 'a kitchen has a fridge a stove and a counter top']}
            #res = {1: ['a kitchen with a stove and a refrigerator']}
            #gts = {1: ['a kitchen with a stove and a refrigerator', 'a kitchen with a stove and a refrigerator']}
            import ipdb; ipdb.set_trace()
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
                    print "%s: %0.3f"%(m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
                print "%s: %0.3f"%(method, score)
        self.setEvalImgs()
def evaluate(hyp, ref):
    with open(hyp, 'r') as r:
        hypothesis = r.readlines()
        res = {
            k: [" ".join(v.strip().lower().split())]
            for k, v in enumerate(hypothesis)
        }
    with open(ref, 'r') as r:
        references = r.readlines()
        gts = {k: [v.strip().lower()] for k, v in enumerate(references)}
    score_Bleu, stderr = Bleu().compute_score(hyp, ref)
    print(Bleu().compute_score(hyp, ref))
    print("Bleu_4: " + str(score_Bleu))

    #score_Meteor, scores_Meteor = Meteor().compute_score(gts, res)
    #print("Meteor: ", str(score_Meteor))

    files_rouge = FilesRouge(hyp, ref)
    scores = files_rouge.get_scores(avg=True)
    print('Rouge: ' + str(scores))

    score_Cider, scores_Cider = Cider().compute_score(gts, res)
    print("Cider: " + str(score_Cider))
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco[imgId]  #.imgToAnns[imgId]
            res[imgId] = self.cocoRes[imgId]  #.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            #(cBleu(4), ["cBleu_1", "cBleu_2", "cBleu_3", "cBleu_4"]),
            #(Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L")
            #(Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        final_score = 0
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
                    print "%s: %0.3f" % (m, sc)
                    final_score = sc
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
                print "%s: %0.3f" % (method, score)
        self.setEvalImgs()
        return final_score
示例#25
0
def test(loader, model, opt, vocab):
    bleu_scores = []
    write_to_txt = []

    res = {}
    gts = {}

    for batch_id, (video_input, language_feat, captions, time_seg, batch_lens,
                   duration, video_id) in enumerate(loader):

        # Convert the textual input to numeric labels
        cap_gts, cap_mask = convert_caption_labels(captions,
                                                   loader.dataset.get_vocab(),
                                                   opt['max_length'])

        video_input = video_input.cuda()
        cap_gts = torch.tensor(cap_gts).cuda().long()
        # cap_mask = cap_mask.cuda()

        with torch.no_grad():
            # Beam Search Starts From Here
            batch_hyp = translate_batch(model, video_input, opt)

        # Stack all GTs captions
        references = [[cap.split(' ')] for cap in captions]

        # Stack all Predicted Captions
        hypotheses = []
        for predict in zip(batch_hyp):
            predict = predict[0]
            _ = []
            if EOS in predict[0]:
                sep_id = predict[0].index(EOS)
            else:
                sep_id = -1
            for word in predict[0][0:sep_id]:
                _.append(vocab[str(word)])
            hypotheses.append(_)

        # Stack all predictions for the Gougue/Meteour Scores
        res[batch_id] = [list_to_sentence(hypotheses[0])]
        gts[batch_id] = [list_to_sentence(references[0][0])]
        print(batch_id)
    avg_bleu_score, bleu_scores = Bleu(4).compute_score(gts, res)
    avg_cider_score, cider_scores = Cider().compute_score(gts, res)
    avg_meteor_score, meteor_scores = Meteor().compute_score(gts, res)
    avg_rouge_score, rouge_scores = Rouge().compute_score(gts, res)
    print('C, M, R, B:', avg_cider_score, avg_meteor_score, avg_rouge_score,
          avg_bleu_score)
示例#26
0
 def __init__(self,
              references,
              scorers=['bleu', 'rouge', 'cider', 'meteor']):
     self.scorers = {}
     for scorer in scorers:
         if scorer == 'bleu':
             self.scorers['bleu'] = Bleu(4)
         elif scorer == 'rouge':
             self.scorers['rouge'] = Rouge()
         elif scorer == 'cider':
             self.scorers['cider'] = Cider()
         elif scorer == 'meteor':
             self.scorers['meteor'] = Meteor()
         else:
             raise NotImplementedError()
     self.references = references
示例#27
0
    def evaluate(self):
        imgIds = self.params
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}

        gts = self.coco
        res = self.cocoRes
        #print type(gts),' \r\n ',type(res)

        #print gts,' ',res
        #pdb.set_trace()
        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
                print "%s: %0.3f" % (method, score)
        self.setEvalImgs()
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]
        print('res:', res[imgId])
        # print('gt:',gts[imgId])

        # =================================================
        # Set up scorers
        # =================================================
        print('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)
        #weiyu
        print('res:', res[imgId])

        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print('computing %s score...' % (scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    print("%s: %0.3f" % (m, sc))
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                print("%s: %0.3f" % (method, score))
        self.setEvalImgs()
示例#29
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(["-m", "stem", '-w', '1.0', '-p',
                     '0.85 0.2 0.0 0.75']), "METEOR_Stems"),
            (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"),
            (Spice(), "SPICE")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                print "%s: %0.3f" % (method, score)
        self.setEvalImgs()
示例#30
0
    def evaluate(self):

        evalRefIds = [ann['ref_id'] for ann in self.Res]

        refToGts = {}
        for ref_id in evalRefIds:
            ref = self.refer.Refs[ref_id]
            gt_sents = [sent['sent'] for sent in ref['sentences']]  # up to 3 expressions
            refToGts[ref_id] = gt_sents
        refToRes = {ann['ref_id']: [ann['sent']] for ann in self.Res}

        print 'tokenization...'
        tokenizer = PTBTokenizer()
        self.refToRes = tokenizer.tokenize(refToRes)
        self.refToGts = tokenizer.tokenize(refToGts)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...'%(scorer.method())
            score, scores = scorer.compute_score(self.refToGts, self.refToRes)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setRefToEvalRefs(scs, self.refToGts.keys(), m)
                    print "%s: %0.3f"%(m, sc)
            else:
                self.setEval(score, method)
                self.setRefToEvalRefs(scores, self.refToGts.keys(), method)
                print "%s: %0.3f"%(method, score)
        self.setEvalRefs()