def compute_score(gts, val_caps, train_imgids, val_imgids, i, j): res = {} for imgid in train_imgids: res[imgid] = [val_caps[val_imgids[i]][j]] scorer = Spice() score, scores = scorer.compute_score(gts, res, train_imgids) #print(score) #print(len(scores)) return np.array(scores)
def evaluate_captions(res: dict, gts: dict): # ================================================= # Set up scorers # ================================================= print('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print('setting up scorers...') scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"), (Spice(), "SPICE")] rtn = {} # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print('computing %s score...' % (scorer.method())) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): rtn[m] = sc else: rtn[method] = score return rtn
def evaluator(gts, res): eval = {} # ================================================= # Set up scorers # ================================================= print 'tokenization...' # Todo: use Spacy for tokenization gts = tokenize(gts) res = tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), # (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"), (Spice(), "SPICE") ] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): eval[m] = sc print "%s: %0.3f" % (m, sc) else: eval[method] = score print "%s: %0.3f" % (method, score)
def __init__(self, coco, cocoRes): self.evalImgs = [] self.eval = {} self.imgToEval = {} self.coco = coco self.cocoRes = cocoRes self.params = {'image_id': coco.getImgIds()} self.Spice = Spice()
def evaluate(self, gts=None, res=None): imgIds = self.params['image_id'] # imgIds = self.coco.getImgIds() if gts is None and res is None: gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] res[imgId] = self.cocoRes.imgToAnns[imgId] print("before") print(gts[184321]) print(res[184321]) # ================================================= # Set up scorers # ================================================= print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) print("after") return gts, res print(gts[184321]) print(res[184321]) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"), (Spice(), "SPICE")] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, gts.keys(), m) print "%s: %0.3f" % (m, sc) else: self.setEval(score, method) self.setImgToEvalImgs(scores, gts.keys(), method) print "%s: %0.3f" % (method, score) self.setEvalImgs()
class SpiceEval(): def __init__(self): self.evalImgs = [] self.eval = {} self.imgToEval = {} self.spice = Spice() self.tokenizer = PTBTokenizer() """ The input have structure {'123': [{'image_id':123, 'caption': 'xxxxx'}, {'image_id':123, 'caption': 'yyy'}], ...} """ def evaluate(self, gts, res): assert set(gts.keys()) == set(res.keys()) imgIds = gts.keys() gts = self.tokenizer.tokenize(gts) res = self.tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= # ================================================= # Compute scores # ================================================= print 'computing %s score...' % (self.spice.method()) score, scores = self.spice.compute_score(gts, res) print "%s: %0.3f" % ("spice", score) self.eval['spice'] = score print scores for imgId, score in zip(sorted(imgIds), scores): if not imgId in self.imgToEval: self.imgToEval[imgId] = {} self.imgToEval[imgId]["image_id"] = imgId self.imgToEval[imgId]["spice"] = score return self.eval['spice'], self.imgToEval
def evaluate(self): imgIds = self.params["image_id"] # imgIds = self.coco.getImgIds() gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] res[imgId] = self.cocoRes.imgToAnns[imgId] # ================================================= # Set up scorers # ================================================= print "tokenization..." tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print "setting up scorers..." scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"), (Spice(), "SPICE"), ] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print "computing %s score..." % (scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, gts.keys(), m) print "%s: %0.3f" % (m, sc) else: self.setEval(score, method) self.setImgToEvalImgs(scores, gts.keys(), method) print "%s: %0.3f" % (method, score) self.setEvalImgs()
def evaluate(self, res, gts): # ================================================= # Set up scorers # ================================================= print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(),"METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"), (Spice(), "SPICE") ] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print 'computing %s score...'%(scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, gts.keys(), m) print "%s: %0.3f"%(m, sc) else: self.setEval(score, method) self.setImgToEvalImgs(scores, gts.keys(), method) print "%s: %0.3f"%(method, score) self.setEvalImgs()
def __init__(self): self.evalImgs = [] self.eval = {} self.imgToEval = {} self.spice = Spice() self.tokenizer = PTBTokenizer()
# 1. parse sentence using SPICE and save the parsed information into json file (adapted from create coco_sg.py) data_path = './data/caption_sentences.txt' sent_list = [item for item in open(data_path, 'r')] gts = {} res = {} img_ids = [] for img_id, this_sent in enumerate(sent_list): gts[img_id] = [] gts[img_id].append(this_sent) res[img_id] = [] res[img_id].append('place holder') img_ids.append(img_id) scorer = Spice() score, scores = scorer.compute_score(gts, res) # 2. extract the parsed triplets from json file (adapted from process_spice_sg.py) from nltk.stem import WordNetLemmatizer from functools import partial def change_word(lem, word_ori): """ Lemmatizer a word, like change 'holding' to 'hold' or 'cats' to 'cat' """ word_ori = word_ori.lower() word_change = lem.lemmatize(word_ori) if word_change == word_ori:
# print(comparsions_better_than_me) for model in model_index: rank = len(comparsions_better_than_me.get(model, [])) #print(idx) sum_all_5[model][idx] = rank idx += 1 comparsions_better_than_me = defaultdict(list) gts = {} res = {} human_ranks = [sum_all_1,sum_all_2,sum_all_3,sum_all_4,sum_all_4] models = ['Leven-Const','GPT-2','BERT-Gen','UniLM','BART','T5'] scorers = [ (Bleu(4), "Bleu_4"), (Meteor(), "METEOR"), # (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"), (Spice(), "SPICE") ] #scorers = [Bleu(4),Meteor(),Cider(),Spice()] with codecs.open('human_eval.jsonl', encoding='utf-8') as f: lines = f.readlines() for scorer,method in scorers: agg = compute_agreement(scorer,lines) print "The averaged Kendall's tau of %s with 5 human annotators is %s" % (method,str(agg))
def calc_spice(gts, res): spice = Spice() score, scores = spice.compute_score(gts, res) return score, scores