Exemplo n.º 1
0
    def evaluate(self):
        output = []
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # =================================================
        # Compute scores
        # =================================================
        scores_dict = {}
        #scores_dict["model_key"] = self.model_key
        for scorer, method in scorers:
            # print 'computing %s score...'%(scorer.method())
            score, scores = scorer.compute_score(self.gts, self.res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    print("%s: %0.5f" % (m, sc))
                    output.append(sc)
                    scores_dict[m] = str(sc)
            else:
                print("%s: %0.5f" % (method, score))
                output.append(score)
                scores_dict[method] = score

        return output
Exemplo n.º 2
0
def main():

    args = get_args()
    rouge_dir = os.path.join(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
        'rouge/RELEASE-1.5.5/')

    data_path = os.path.join(args.iobasedir, 'processed/', args.dataset,
                             args.domain, args.split)
    log_path = os.path.join(args.iobasedir, 'logs')
    log_file = os.path.join(
        args.iobasedir, 'logs', 'baselines_rsumm_%s_%s_%s_%s.log' %
        (args.dataset, args.domain, args.split, str(args.summary_size)))
    mkdirp(log_path)
    set_logger(log_file)

    data_file = os.path.join(data_path, 'test0.csv')
    df = pd.read_csv(data_file,
                     sep=",",
                     quotechar='"',
                     engine='python',
                     header=None,
                     skiprows=1,
                     names=[
                         "user_id", "product_id", "rating", "review", "nouns",
                         "summary", 'time'
                     ])

    #   check_index = 1099
    for index, row in df.iterrows():
        #        if index != check_index:
        #            continue
        topic = row['user_id'] + '_' + row['product_id']
        docs = [[sent] for sent in sent_tokenize(row['review'].strip())]
        refs = [sent_tokenize(row['summary'].strip())]
        if not refs:
            continue

        if not args.summary_size:
            summary_size = len(" ".join(refs[0]).split(' '))
        else:
            summary_size = int(args.summary_size)

        logger.info('Topic ID: %s', topic)
        logger.info('###')
        logger.info('Summmary_len: %d', summary_size)

        rouge = Rouge(rouge_dir)
        algos = [
            'Luhn', 'LexRank', 'TextRank', 'LSA', 'KL', "ICSI", 'UB1', 'UB2'
        ]
        best_summary = []
        best_score = 0.0
        for algo in algos:
            best_summary, best_score = get_summary_scores(
                algo, docs, refs, summary_size, args.language, rouge,
                best_summary, best_score)

        rouge._cleanup()
        logger.info('###')
Exemplo n.º 3
0
    def evaluate(self):
        # ==================================================
        # Tokenization, remove punctutions
        # =================================================
        '''
        print "tokenization ..."
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(self.ref)
        res = tokenizer.tokenize(self.res)
        '''
        gts = self.ref
        # ==================================================
        # Set up scorers
        # ==================================================
        print "setting up scorers ..."
        scorers = [(Bleu(4), ("Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4")),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # ==================================================
        # Set up scorers
        # ==================================================
        out = {}
        for scorer, method in scorers:
            print "computing %s score ..." % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if isinstance(method, tuple):
                for sc, scs, m in zip(score, scores, method):
                    out[m] = sc
                    print "%s: %0.4f" % (m, sc)
            else:
                print "%s: %0.4f" % (method, score)
                out[method] = score

        return out
Exemplo n.º 4
0
    def evaluate(self):
        output = []
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(), "METEOR"),
            (Rouge(), "ROUGE_L"),
            # (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            # print 'computing %s score...'%(scorer.method())
            score, scores = scorer.compute_score(self.gts, self.res)

            # set_trace()

            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    print "%s: %0.5f" % (m, sc)
                    output.append(sc)
            else:
                print "%s: %0.5f" % (method, score)
                output.append(score)
        return output
Exemplo n.º 5
0
class EvalCap:
    ref_list = [
        'this is a reference sentence for sentence2 which was generated by your model'
    ]
    hyp_list = ['this is sentence2 which has been generated by your model']

    refs = {idx: [lines.strip()] for (idx, lines) in enumerate(ref_list)}
    hyps = {idx: [lines.strip()] for (idx, lines) in enumerate(hyp_list)}

    scorers = [
        (Bleu(4), ['Bleu_1', 'Bleu_2', 'Bleu_3', 'Bleu_4']),
        # (Meteor(), "METEOR"),
        (Rouge(), "ROUGE_L"),
        (Cider(), "CIDEr")
    ]
    for scorer, method in scorers:
        print('computing %s score...' % (scorer.method()))
        score, scores = scorer.compute_score(hyps, refs)
        if type(method) == list:
            for sc, scs, m in zip(score, scores, method):
                # self.setEval(sc, m)
                print("%s: %0.3f" % (m, sc))
        else:
            # self.setEval(score, method)
            print("%s: %0.3f" % (method, score))
Exemplo n.º 6
0
    def evaluate(self, album_to_Gts, album_to_Res):
        self.album_to_Res = album_to_Res
        self.album_to_Gts = album_to_Gts

        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        scorers = []
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(), "METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")  # df='VIST/VIST-train-words'
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print('computing %s score ...' % (scorer.method()))
            score, scores = scorer.compute_score(self.album_to_Gts,
                                                 self.album_to_Res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setAlbumToEval(scs, self.album_to_Gts.keys(), m)
                    print('%s: %.3f' % (m, sc))
            else:
                self.setEval(score, method)
                self.setAlbumToEval(scores, self.album_to_Gts.keys(), method)
                print('%s: %.3f' % (method, score))

        self.setEvalAlbums()
Exemplo n.º 7
0
def evaluate_captions(res: dict, gts: dict):

    # =================================================
    # Set up scorers
    # =================================================
    print('tokenization...')
    tokenizer = PTBTokenizer()
    gts = tokenizer.tokenize(gts)
    res = tokenizer.tokenize(res)

    # =================================================
    # Set up scorers
    # =================================================
    print('setting up scorers...')
    scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
               (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"),
               (Spice(), "SPICE")]
    rtn = {}
    # =================================================
    # Compute scores
    # =================================================
    for scorer, method in scorers:
        print('computing %s score...' % (scorer.method()))
        score, scores = scorer.compute_score(gts, res)
        if type(method) == list:
            for sc, scs, m in zip(score, scores, method):
                rtn[m] = sc
        else:
            rtn[method] = score

    return rtn
Exemplo n.º 8
0
def language_eval(sample_seqs, gt_seqs):# sample_seqs:list[[x,x],[x,x],...], gt_seqs:list[[list1,list2,...],[list1,list2,...],...]
	import sys
        sys.path.append("coco-caption/pycocoevalcap/")
	from bleu.bleu import Bleu
	from cider.cider import Cider
	from meteor.meteor import Meteor
	from rouge.rouge import Rouge

	assert len(sample_seqs) == len(gt_seqs),"number of eval data is different"
	res = OrderedDict()  # res: {0:[xx],1:[xx],...}
	for i in range(len(sample_seqs)): # for each data(sent)
		res[i] = [sample_seqs[i]]

	gts = OrderedDict() # gts: {0:[sent1,sent2,...],1:[sent1,sent2,...], ...}
	for i in range(len(gt_seqs)):
		gts[i] = [gt_seqs[i][j] for j in range(len(gt_seqs[i]))]

	res = {i: res[i] for i in range(len(sample_seqs))}
	gts = {i: gts[i] for i in range(len(gt_seqs))}

	avg_bleu_score, bleu_scores = Bleu(4).compute_score(gts, res)
	avg_cider_score, cider_scores = Cider().compute_score(gts, res)
	avg_meteor_score, meteor_scores = Meteor().compute_score(gts, res)
	avg_rouge_score, rouge_scores = Rouge().compute_score(gts, res)

	print(" BLEU1:{}\n BLEU2:{}\n BLEU3:{}\n BLEU4:{}\n METEOR:{}\n ROUGE:{}\n CIDEr:{}\n"\
		.format(avg_bleu_score[0], avg_bleu_score[1], avg_bleu_score[2], avg_bleu_score[3], \
				avg_meteor_score, avg_rouge_score, avg_cider_score))

	return {'BLEU':avg_bleu_score, 'METEOR':avg_meteor_score, 'ROUGE':avg_rouge_score, 'CIDEr':avg_cider_score}
Exemplo n.º 9
0
 def __init__(self, ):
     self.blue_scorer = Bleu(4)
     self.rouge_scorer = Rouge()
     self.cider_scorer = Cider()
     self.truth = None
     remove = string.punctuation + "、。,."
     self.remove_pattern = r"[{}]".format(remove)  # create the pattern
Exemplo n.º 10
0
def evaluate(gts=None, res=None):
    # imgIds = self.coco.getImgIds()
    # =================================================
    # Set up scorers
    # =================================================
    print 'setting up scorers...'
    scorers = [
        (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        (Meteor(), "METEOR"),
        (Rouge(), "ROUGE_L"),
        (Cider(), "CIDEr"),
    ]

    # =================================================
    # Compute scores
    # =================================================
    res_scores = []
    for scorer, method in scorers:
        print 'computing %s score...' % (scorer.method())
        score, scores = scorer.compute_score(gts, res)
        if type(method) == list:
            for sc, scs, m in zip(score, scores, method):
                res_scores.append((m, sc))
        else:
            res_scores.append((method, score))
    return res_scores
Exemplo n.º 11
0
def compute_metrics_from_files(p_path_to_reference_file,
                               p_path_to_candidate_file, p_max_bleu_order):
    """Compute BLEU-N and ROUGE-L metrics.
    IMPORTANT: No-answer reference will be excluded from calculation.

    Args:
    p_path_to_reference_file (str): path to reference file.
    p_path_to_candidate_file (str): path to candidate file.
        Both files should be in format:
            {QUERY_ID_JSON_ID: <a_query_id_int>,
             ANSWERS_JSON_ID: [<list_of_answers_string>]}
    p_max_bleu_order: the maximum n order in bleu_n calculation.

    Returns:
    dict: dictionary of {'bleu_n': <bleu_n score>, 'rouge_l': <rouge_l score>}
    """

    reference_dictionary, reference_no_answer_query_ids = \
        load_file(p_path_to_reference_file)
    candidate_dictionary, _ = load_file(p_path_to_candidate_file)

    the_ids = set()
    for key, value in candidate_dictionary.items():
        the_ids.add(key)

    filtered_reference_dictionary = \
        {key: value for key, value in reference_dictionary.items() \
                    if key not in reference_no_answer_query_ids and key in the_ids}
    #if key not in reference_no_answer_query_ids}

    filtered_candidate_dictionary = \
        {key: value for key, value in candidate_dictionary.items() \
                    if key not in reference_no_answer_query_ids}

    for query_id, answers in filtered_candidate_dictionary.items():
        assert \
            len(answers) <= 1, \
            'query_id %d contains more than 1 answer \"%s\" in candidate file' % \
            (query_id, str(answers))

    reference_query_ids = set(filtered_reference_dictionary.keys())
    candidate_query_ids = set(filtered_candidate_dictionary.keys())
    common_query_ids = reference_query_ids.intersection(candidate_query_ids)
    assert (len(common_query_ids) == len(reference_query_ids)) and \
           (len(common_query_ids) == len(candidate_query_ids)), \
           'Reference and candidate files must share same query ids'

    all_scores = {}
    bleu_scores, _ = \
        Bleu(p_max_bleu_order).compute_score(filtered_reference_dictionary, \
                                             filtered_candidate_dictionary)
    for i, bleu_score in enumerate(bleu_scores):
        all_scores['bleu_%d' % (i + 1)] = bleu_score

    rouge_score, _ = Rouge().compute_score(filtered_reference_dictionary, \
                                           filtered_candidate_dictionary)
    all_scores['rouge_l'] = rouge_score

    return all_scores
    def evaluate(self):

        gts = {}
        res = {}
        counter = 0
        for i in self.input_captions['v_preds']:
            imgId = i[self.key_name]
            if imgId not in res:
                res[imgId] = []
            res[imgId].append(i)
            gts[imgId] = self.ground_captions[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        if self.no_print == False:
            print('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        if self.no_print == False:
            print('setting up scorers...')
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")
                   #(Spice(), "SPICE")
                   ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            if self.no_print == False:
                print('computing %s score...' % (scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    if self.no_print == False:
                        print("%s: %0.3f" % (m, sc))
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                if self.no_print == False:
                    print("%s: %0.3f" % (method, score))
        self.setEvalImgs()

        res_diff_method = {}
        for metric, score in self.eval.items():
            score_round = '%.3f' % (score)
            res_diff_method[metric] = float(score_round)

        return res_diff_method
Exemplo n.º 13
0
    def evaluate(self):
        """
        Load the sentences from json files
        """
        def readJson(refName, candName):

            path_to_ref_file = os.path.join(self._pathToData, refName)
            path_to_cand_file = os.path.join(self._pathToData, candName)

            ref_list = json.loads(open(path_to_ref_file, 'r').read())
            cand_list = json.loads(open(path_to_cand_file, 'r').read())

            gts = defaultdict(list)
            res = defaultdict(list)
            # change of naming convention from ref to gts
            for l in ref_list:
                gts[l['image_id']].append({"caption": l['caption']})

            # change of naming convention from cand to res
            for l in cand_list:
                res[l['image_id']].append({"caption": l['caption']})

            return gts, res

        print 'Loading Data...'
        gts, res = readJson(self._refName, self._candName)
        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(self._dfMode), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...'%(scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
            else:
                self.setEval(score, method)
Exemplo n.º 14
0
def evaluate_summary_rouge(cand,model,max_sum_len=100):
    rouge_scorer = Rouge(ROUGE_DIR,BASE_DIR,True)
    r1, r2, rl, rsu4 = rouge_scorer(cand,[model],max_sum_len)
    rouge_scorer.clean()
    dic = OrderedDict()
    dic['ROUGE-1'] = r1
    dic['ROUGE-2'] = r2
    dic['ROUGE-L'] = rl
    dic['ROUGE-SU4'] = rsu4
    return dic
Exemplo n.º 15
0
def compute_score(gts, val_caps, train_imgids, val_imgids, i, j):
    res = {}
    for imgid in train_imgids:
        res[imgid] = [val_caps[val_imgids[i]][j]]

    scorer = Rouge()
    score, scores = scorer.compute_score(gts, res, train_imgids)
    #print(score)
    #print(len(scores))
    return np.array(scores)
Exemplo n.º 16
0
def main():

    import sys
    res_path = sys.argv[1]

    gt_path = osp.join(this_dir, 'tgif-v1.0.tsv')
    test_list_path = osp.join(this_dir, 'splits', 'test.txt')

    test_keys = load_list(test_list_path)
    all_sents = load_sentences(gt_path)
    res = load_sentences(res_path)

    # make sure res has and only has single sentence
    # for all testing keys
    gts = {}
    for key in test_keys:
        gts[key] = all_sents[key]
        if key in res:
            res[key] = [res[key][0]]
        else:
            res[key] = [""]

    # =================================================
    # Convert to COCO format
    # =================================================
    gts = to_coco(gts, res.keys())
    res = to_coco(res, res.keys())

    # =================================================
    # Set up scorers
    # =================================================
    print 'tokenization...'
    tokenizer = PTBTokenizer()
    gts = tokenizer.tokenize(gts)
    res = tokenizer.tokenize(res)

    # =================================================
    # Set up scorers
    # =================================================
    print 'setting up scorers...'
    scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
               (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")]

    # =================================================
    # Compute scores
    # =================================================
    eval = {}
    for scorer, method in scorers:
        print 'computing %s score...' % (scorer.method())
        score, scores = scorer.compute_score(gts, res)
        if type(method) == list:
            for sc, scs, m in zip(score, scores, method):
                print "%s: %0.3f" % (m, sc)
        else:
            print "%s: %0.3f" % (method, score)
Exemplo n.º 17
0
def init_cider_scorer(reward_type):
    global CiderD_scorer
    # CiderD_scorer = CiderD_scorer or CiderD(df=cached_tokens)
    if reward_type == 'BLEU':
        CiderD_scorer = CiderD_scorer or Bleu()
    elif reward_type == 'METEOR':
        CiderD_scorer = CiderD_scorer or Meteor()
    elif reward_type == 'ROUGE':
        CiderD_scorer = CiderD_scorer or Rouge()
    elif reward_type == 'CIDEr':
        CiderD_scorer = CiderD_scorer or Cider()
Exemplo n.º 18
0
    def evaluate(self, album_to_Gts, album_to_Res):
        """
		measure is a subset of ['bleu', 'meteor', 'rouge', 'cider']
		if measure is None, we will apply all the above.
		"""

        # # album_id -> pred story str
        # album_to_Res = {item['album_id']: [item['pred_story_str'].encode('ascii', 'ignore').decode('ascii')]
        # 				for item in self.preds }

        # # album_id -> gt story str(s)
        # album_to_Gts = {}
        # for album_id in album_to_Res.keys():
        # 	album = self.vist_sis.Albums[album_id]
        # 	gd_story_strs = []
        # 	for story_id in album['story_ids']:
        # 		gd_sent_ids = self.vist_sis.Stories[story_id]['sent_ids']
        # 		gd_story_str = ' '.join([self.vist_sis.Sents[sent_id]['text'] for sent_id in gd_sent_ids])
        # 		gd_story_str = gd_story_str.encode('ascii', 'ignore').decode('ascii')  # ignore some weird token
        # 		gd_story_strs += [gd_story_str]
        # 	album_to_Gts[album_id] = gd_story_strs

        self.album_to_Res = album_to_Res
        self.album_to_Gts = album_to_Gts

        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        scorers = []
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print('computing %s score ...' % (scorer.method()))
            score, scores = scorer.compute_score(self.album_to_Gts,
                                                 self.album_to_Res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setAlbumToEval(scs, self.album_to_Gts.keys(), m)
                    print('%s: %.4f' % (m, sc))
            else:
                self.setEval(score, method)
                self.setAlbumToEval(scores, self.album_to_Gts.keys(), method)
                print('%s: %.4f' % (method, score))

        self.setEvalAlbums()
Exemplo n.º 19
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # print(imgIds)
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        # print(len(imgIds)) ## 676476 ids; 1000 in total
        # print(self.coco.imgToAnns) ## key-value pairs
        for imgId in imgIds:
            # print(imgId)
            gts[imgId] = self.coco.imgToAnns[imgId]  ## length = 5
            # print(len(gts[imgId]))
            # print(gts[imgId])
            res[imgId] = self.cocoRes.imgToAnns[imgId]
            # print(len(res[imgId]))
            # print(res[imgId])

        # =================================================
        # Set up scorers
        # =================================================
        print '===== tokenization... gts'
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        print '===== tokenization... res'
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print '===== computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            # print(scores)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
                print "%s: %0.3f" % (method, score)
        self.setEvalImgs()
Exemplo n.º 20
0
    def evaluate(self, gts=None, res=None):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        if gts is None and res is None:
            gts = {}
            res = {}
            for imgId in imgIds:
                gts[imgId] = self.coco.imgToAnns[imgId]
                res[imgId] = self.cocoRes.imgToAnns[imgId]

            print("before")
            print(gts[184321])
            print(res[184321])

            # =================================================
            # Set up scorers
            # =================================================
            print 'tokenization...'
            tokenizer = PTBTokenizer()
            gts = tokenizer.tokenize(gts)
            res = tokenizer.tokenize(res)

        print("after")
        return gts, res
        print(gts[184321])
        print(res[184321])

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr"), (Spice(), "SPICE")]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                print "%s: %0.3f" % (method, score)
        self.setEvalImgs()
Exemplo n.º 21
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print 'computing %s score...'%(scorer.method())
            # test
            #gts = {1: ['a metallic refrigerator freezer sitting inside of a kitchen', 'a small kitchen with a stove and refrigerator', "a stainless steel refrigerator in a home 's kitchen", 'a kitchen with a stove and a refrigerator', 'a kitchen has a fridge a stove and a counter top']}
            #res = {1: ['a kitchen with a stove and a refrigerator']}
            #gts = {1: ['a kitchen with a stove and a refrigerator', 'a kitchen with a stove and a refrigerator']}
            import ipdb; ipdb.set_trace()
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
                    print "%s: %0.3f"%(m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
                print "%s: %0.3f"%(method, score)
        self.setEvalImgs()
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco[imgId]  #.imgToAnns[imgId]
            res[imgId] = self.cocoRes[imgId]  #.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            #(cBleu(4), ["cBleu_1", "cBleu_2", "cBleu_3", "cBleu_4"]),
            #(Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L")
            #(Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        final_score = 0
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
                    print "%s: %0.3f" % (m, sc)
                    final_score = sc
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
                print "%s: %0.3f" % (method, score)
        self.setEvalImgs()
        return final_score
Exemplo n.º 23
0
    def evaluate(self, ngram_metric):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco[imgId]#.imgToAnns[imgId]
            res[imgId] = self.cocoRes[imgId]#.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        if ngram_metric == 'ROUGE_L':
            scorers = [
                (Bleu(1), ["Bleu_1"]),
                (Rouge(), "ROUGE_L")
            ]
        else:
            assert ngram_metric.startswith('Bleu_')
            i = ngram_metric[len('Bleu_'):]
            assert i.isdigit()
            i = int(i)
            assert i > 0
            scorers = [
                (Bleu(i), ['Bleu_{}'.format(j) for j in range(1, i + 1)]),
            ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
        self.setEvalImgs()
        return self.evalImgs
Exemplo n.º 24
0
 def __init__(self,
              references,
              scorers=['bleu', 'rouge', 'cider', 'meteor']):
     self.scorers = {}
     for scorer in scorers:
         if scorer == 'bleu':
             self.scorers['bleu'] = Bleu(4)
         elif scorer == 'rouge':
             self.scorers['rouge'] = Rouge()
         elif scorer == 'cider':
             self.scorers['cider'] = Cider()
         elif scorer == 'meteor':
             self.scorers['meteor'] = Meteor()
         else:
             raise NotImplementedError()
     self.references = references
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]
        print('res:', res[imgId])
        # print('gt:',gts[imgId])

        # =================================================
        # Set up scorers
        # =================================================
        print('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)
        #weiyu
        print('res:', res[imgId])

        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print('computing %s score...' % (scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    print("%s: %0.3f" % (m, sc))
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                print("%s: %0.3f" % (method, score))
        self.setEvalImgs()
Exemplo n.º 26
0
    def evaluate(self):
        imgIds = self.params
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}

        gts = self.coco
        res = self.cocoRes
        #print type(gts),' \r\n ',type(res)

        #print gts,' ',res
        #pdb.set_trace()
        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
                print "%s: %0.3f" % (method, score)
        self.setEvalImgs()
Exemplo n.º 27
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(["-m", "stem", '-w', '1.0', '-p',
                     '0.85 0.2 0.0 0.75']), "METEOR_Stems"),
            (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"),
            (Spice(), "SPICE")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                print "%s: %0.3f" % (method, score)
        self.setEvalImgs()
Exemplo n.º 28
0
    def evaluate(self):

        evalRefIds = [ann['ref_id'] for ann in self.Res]

        refToGts = {}
        for ref_id in evalRefIds:
            ref = self.refer.Refs[ref_id]
            gt_sents = [sent['sent'] for sent in ref['sentences']]  # up to 3 expressions
            refToGts[ref_id] = gt_sents
        refToRes = {ann['ref_id']: [ann['sent']] for ann in self.Res}

        print 'tokenization...'
        tokenizer = PTBTokenizer()
        self.refToRes = tokenizer.tokenize(refToRes)
        self.refToGts = tokenizer.tokenize(refToGts)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...'%(scorer.method())
            score, scores = scorer.compute_score(self.refToGts, self.refToRes)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setRefToEvalRefs(scs, self.refToGts.keys(), m)
                    print "%s: %0.3f"%(m, sc)
            else:
                self.setEval(score, method)
                self.setRefToEvalRefs(scores, self.refToGts.keys(), method)
                print "%s: %0.3f"%(method, score)
        self.setEvalRefs()
def score(ref, hypo):
    """
    ref, dictionary of reference sentences (id, sentence)
    hypo, dictionary of hypothesis sentences (id, sentence)
    score, dictionary of scores
    """
    scorers = [
        (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        (Rouge(), "ROUGE_L"),
    ]
    final_scores = {}
    for scorer, method in scorers:
        score, scores = scorer.compute_score(ref, hypo)
        if type(score) == list:
            for m, s in zip(method, score):
                final_scores[m] = s
        else:
            final_scores[method] = score
    return final_scores
Exemplo n.º 30
0
    def __init__(self,
                 iobasedir,
                 rouge_dir,
                 out=None,
                 scores_dir=None,
                 override_results_files=False,
                 pickle_store=None,
                 k=0.1):
        self.iobasedir = path.normpath(path.expanduser(iobasedir))
        # resolved_rouge_dir = path.normpath(path.expanduser(rouge_dir))
        self.rouge = Rouge(rouge_dir)
        self.k = k
        if out is None:
            self.out = None
        else:
            self.out = path.normpath(path.expanduser(out))
        if scores_dir is None:
            self.scores_storage_path = path.normpath(
                path.join(self.iobasedir, "scores_new"))
        else:
            self.scores_storage_path = path.normpath(
                path.join(self.iobasedir, scores_dir))

        if not path.exists(self.scores_storage_path):
            os.mkdir(self.scores_storage_path)

        self.override_results_switch = override_results_files

        if pickle_store is None:
            self.pickle_store = pickle_store
        else:
            p, f = path.split(path.join(self.iobasedir, pickle_store))
            if path.exists(p):
                self.pickle_store = path.join(self.iobasedir, pickle_store)
            else:
                p, f = path.split(pickle_store)
                if (path.exists(p)):
                    self.pickle_store = pickle_store
                else:
                    raise BaseException(
                        "Cannot resolve %s to a existing path for storing the serialized summarizer"
                        % (pickle_store))