Exemplo n.º 1
0
    def __init__(self,
                 ground_truth_filenames=None,
                 prediction_filename=None,
                 tious=None,
                 max_proposals=1000,
                 prediction_fields=PREDICTION_FIELDS,
                 verbose=False):
        # Check that the gt and submission files exist and load them
        if len(tious) == 0:
            raise IOError('Please input a valid tIoU.')
        if not ground_truth_filenames:
            raise IOError('Please input a valid ground truth file.')
        if not prediction_filename:
            raise IOError('Please input a valid prediction file.')

        self.verbose = verbose
        self.tious = tious
        self.max_proposals = max_proposals
        self.pred_fields = prediction_fields
        self.ground_truths = self.import_ground_truths(ground_truth_filenames)
        self.prediction = self.import_prediction(prediction_filename)
        self.tokenizer = PTBTokenizer()

        # Set up scorers, if not verbose, we only use the one we're
        # testing on: METEOR
        if self.verbose:
            self.scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3",
                                       "Bleu_4"]), (Meteor(), "METEOR"),
                            (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")]
        else:
            self.scorers = [(Meteor(), "METEOR")]
Exemplo n.º 2
0
def generate_data_list(mode, caption_file_path):
    # get file name
    if mode == 'train':
        file_name = 'train.json'
    elif mode == 'val':
        file_name = 'val_1.json'
    else:
        print 'Invalid mode:' % mode
        sys.exit()

    # get timestamps and sentences
    input_dict = {}
    data = json.loads(open(os.path.join( \
            caption_file_path, file_name)).read())
    for vid, content in data.iteritems():
        sentences = content['sentences']
        timestamps = content['timestamps']
        for t, s in zip(timestamps, sentences):
            dictkey = ' '.join([vid, str(t[0]), str(t[1])])
            input_dict[dictkey] = [{'caption': remove_nonascii(s)}]

    # ptbtokenizer
    tokenizer = PTBTokenizer()
    output_dict = tokenizer.tokenize(input_dict)

    with open('%s.list' % mode, 'wb') as f:
        for id, sentence in output_dict.iteritems():
            try:
                f.write('\t'.join(id.split() + sentence) + '\n')
            except:
                pass

    print 'Generate %s.list done ...' % mode
Exemplo n.º 3
0
 def __init__(self, logger, args, data_path, is_training, passages=None):
     super(QGData, self).__init__(logger, args, data_path, is_training,
                                  passages)
     self.qg_tokenizer = PTBTokenizer()
     self.metric = "Bleu"
     if not self.is_training:
         self.qg_tokenizer = PTBTokenizer()
def score(gts, res, ids):
    origingts = gts
    originres = res
    tokenizer = PTBTokenizer()
    gts = tokenizer.tokenize(gts)
    res = tokenizer.tokenize(res)
    """
  scorers = [
      (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
      (Meteor(),"METEOR"),
      (Rouge(), "ROUGE_L"),
      (Cider(), "CIDEr")]
  """
    scorers = [(Meteor(), "METEOR")]
    for scorer, method in scorers:
        score, scores = scorer.compute_score(gts, res)
        print("{:<14}:\t{:0.4f}".format(method, score))

    F1_score = F1(originres, origingts)
    avg = 0.0
    for noc_word in sorted(F1_score.keys()):
        print("{:<14}:\t{:0.4f}".format(noc_word, F1_score[noc_word]))
        avg += F1_score[noc_word]

    avg = avg / len(F1_score.keys())
    print("{:<14}:\t{:0.4f}".format("Average", avg))
Exemplo n.º 5
0
def test(model_path='models/model-61', video_feat_path=video_feat_path):

    train_data, test_data = get_video_data(video_data_path, video_feat_path, train_ratio=0.7)
    test_videos = test_data['video_path'].values
    test_captions = test_data['Description'].values
    ixtoword = pd.Series(np.load('./data/ixtoword.npy').tolist())

    test_videos_unique = list()
    test_captions_list = list()
    for (video, caption) in zip(test_videos, test_captions):
        if len(test_videos_unique) == 0 or test_videos_unique[-1] != video:
            test_videos_unique.append(video)
            test_captions_list.append([caption])
        else:
            test_captions_list[-1].append(caption)

    model = Video_Caption_Generator(
            dim_image=dim_image,
            n_words=len(ixtoword),
            dim_embed=dim_embed,
            dim_hidden=dim_hidden,
            batch_size=batch_size,
            encoder_max_sequence_length=encoder_step,
            decoder_max_sentence_length=decoder_step,
            bias_init_vector=None)

    video_tf, video_mask_tf, caption_tf, probs_tf, last_embed_tf = model.build_generator()
    sess = tf.InteractiveSession()

    saver = tf.train.Saver()
    saver.restore(sess, model_path)

    scorer = Meteor()
    scorer_bleu = Bleu(4)
    GTS = defaultdict(list)
    RES = defaultdict(list)
    counter = 0

    for (video_feat_path, caption) in zip(test_videos_unique, test_captions_list):
        generated_sentence = gen_sentence(
            sess, video_tf, video_mask_tf, caption_tf, video_feat_path, ixtoword)
        print video_feat_path, generated_sentence
        #print caption

        GTS[str(counter)] = [{'image_id':str(counter),'cap_id':i,'caption':s} for i, s in enumerate(caption)]
        RES[str(counter)] = [{'image_id':str(counter),'caption':generated_sentence[:-2]+'.'}]

        #GTS[video_feat_path] = caption
        #RES[video_feat_path] = [generated_sentence[:-2] + '.']
        counter += 1
        #ipdb.set_trace()

    tokenizer = PTBTokenizer()
    GTS = tokenizer.tokenize(GTS)
    RES = tokenizer.tokenize(RES)

    score, scores = scorer.compute_score(GTS, RES)
    print "METEOR", score
    score, scores = scorer_bleu.compute_score(GTS, RES)
    print "BLEU", score
    def evaluate(self):
        imgIds = self.params['image_id']
        gts = self.gts
        res = self.res

        # =================================================
        # Set up scorers
        # =================================================
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
        self.setEvalImgs()
def format_for_metrics(gts, res, rev_word_map):
    """
    to generate appropriate format to use pycocoevalcap
    :param gts: groud truth list
    :param res: hypothesis list
    :param rev_word_map: reverse word map, from idxes to character
    :return:
    """
    gts_dic = {}
    for idx, sents in enumerate(gts):
        tmp = []
        for sent in sents:
            tmp.append({
                u'image_id': idx,
                u'caption': ' '.join([rev_word_map[x] for x in sent])
            })
        gts_dic[idx] = tmp[:]

    res_dic = {}
    for idx, sent in enumerate(res):
        res_dic[idx] = [{
            u'image_id': idx,
            u'caption': ' '.join([rev_word_map[x] for x in sent])
        }]

    tokenizer = PTBTokenizer()
    return tokenizer.tokenize(gts_dic), tokenizer.tokenize(res_dic)
Exemplo n.º 8
0
    def score(self, GT, RES, IDs):
        # edited by rgh
        #self.eval = {}
        self.eval = OrderedDict()
        self.imgToEval = {}
        gts = {}
        res = {}
        for ID in IDs:
            #            print ID
            gts[ID] = GT[ID]
            res[ID] = RES[ID]
        print('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        # edited by rgh
        # scorers = [
        #     (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        #     (Meteor(),"METEOR"),
        #     (Rouge(), "ROUGE_L"),
        #     (Cider(), "CIDEr"),
        #     #(Spice(), "SPICE")
        # ]
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(), "METEOR"),
            (Cider(), "CIDEr"),
            (Rouge(), "ROUGE_L"),
            # (Spice(), "SPICE")
        ]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print('computing %s score...' % (scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                # added by rgh
                # for sc, scs, m in zip(score, scores, method):
                #     self.setEval(sc, m)
                #     self.setImgToEvalImgs(scs, IDs, m)
                #     print("%s: %0.3f" % (m, sc))
                self.setEval("%.4f" % score[-1], method[-1])
                self.setImgToEvalImgs(scores[-1], IDs, method[-1])
                print("%s: %0.4f" % (method[-1], score[-1]))
            else:
                self.setEval("%.4f" % score, method)
                self.setImgToEvalImgs(scores, IDs, method)
                print("%s: %0.4f" % (method, score))

        # for metric, score in self.eval.items():
        #    print '%s: %.3f'%(metric, score)
        return self.eval
Exemplo n.º 9
0
def evaluate(gts, res):
    eval = {}

    # =================================================
    # Set up scorers
    # =================================================
    print('tokenization...')
    tokenizer = PTBTokenizer()
    gts = tokenizer.tokenize(gts)
    res = tokenizer.tokenize(res)

    # =================================================
    # Set up scorers
    # =================================================
    print('setting up scorers...')
    scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
               (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")]

    # =================================================
    # Compute scores
    # =================================================
    for scorer, method in scorers:
        print('computing %s score...' % (scorer.method()))
        score, scores = scorer.compute_score(gts, res)
        if type(method) == list:
            for sc, scs, m in zip(score, scores, method):
                eval[m] = sc
        else:
            eval[method] = score

    return eval
Exemplo n.º 10
0
    def get_dcc_scores(self):

        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]

        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]
        score_dict = {}
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    score_dict[m] = sc
                    print "%s: %0.3f" % (m, sc)
            else:
                score_dict[method] = score
                print "%s: %0.3f" % (method, score)

        return score_dict
Exemplo n.º 11
0
    def evaluate(self):
        # =================================================
        # Tokenization
        # =================================================
        print("Tokenization")
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(self.ground_truth)
        preds = tokenizer.tokenize(self.prediction)

        # =================================================
        # Setup scorers
        # =================================================
        print("Setting up scorers...")
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(), "METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr"),
            # (Spice(), "SPICE")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print("Computing {} score...".format(scorer.method()))
            score, scores = scorer.compute_score(gts, preds)
            if isinstance(method, list):
                for sc, scs, m in zip(score, scores, method):
                    self.eval_res[m] = sc * 100
            else:
                self.eval_res[method] = score * 100
Exemplo n.º 12
0
    def get_scorers(self):
        # from pycoco_scorers_vizseq import BLEUScorerAll
        from pycocoevalcap.bleu.bleu import Bleu

        # from pycocoevalcap.spice.spice import Spice
        from pycocoevalcap.cider.cider import Cider
        from pycocoevalcap.rouge.rouge import Rouge
        from pycocoevalcap.meteor.meteor import Meteor
        from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer
        import logging
        import transformers

        transformers.tokenization_utils.logger.setLevel(logging.ERROR)
        transformers.configuration_utils.logger.setLevel(logging.ERROR)
        transformers.modeling_utils.logger.setLevel(logging.ERROR)
        Scorer_ = namedtuple("Scorer_", ["cls_fn", "to_init", "out_str"])
        self.scorer_dict = {
            "bleu":
            Scorer_(Bleu(4, verbose=0), False,
                    ["bleu@1", "bleu@2", "bleu@3", "bleu@4"]),
            "meteor":
            Scorer_(Meteor(), False, ["meteor"]),
            "cider":
            Scorer_(Cider("corpus"), False, ["cider"]),
            "rouge":
            Scorer_(Rouge(), False, ["rouge"]),
            # "spice": Scorer_(Spice(), False, ["spice"]),
            "bert_score":
            Scorer_(BertScoreSimple, True, ["bert_score"]),
        }
        self.tokenizer = PTBTokenizer()
Exemplo n.º 13
0
def score(num, DIR):
    print("Testing results on epoch ", num, " in DIR=", DIR)
    print("Loading coco annotations")
    dataDir = '.'
    dataType = 'val2014'
    algName = 'fakecap'
    annFile = '%s/annotations/captions_%s.json' % (dataDir, dataType)
    subtypes = ['results', 'evalImgs', 'eval']
    [resFile, evalImgsFile, evalFile]= \
    ['%s/results/captions_%s_%s_%s.json'%(dataDir,dataType,algName,subtype) for subtype in subtypes]
    coco_anns = COCO(annFile)
    print("COCO anns imported")

    path = DIR + str(num) + '_test_result.tar.gz'
    save = pickle.load(open(path))
    cocoRes = {}
    coco = {}
    for key, val in save.items():
        reslst = val[u'res']
        res = []
        for data in reslst:
            if data != u'<SEND>':
                res.append(data)
            else:
                break
        res = res[1:]
        #print "RES: ",reslst
        #print "ANN: ", val[u'ann']
        #res = [word for word in res if word!=u'<SEND>'][1:]
        #print "RES FIXED: ", res

        if len(res) == 0:
            res = [u'a']  #just not to be empty, and it has low low idf
        cocoRes[key] = [{u'caption': ' '.join(res)}]

        #coco[key] = [{u'caption':' '.join(val[u'ann'][1:-1])}]
        coco[key] = coco_anns.imgToAnns[key]
    print 'examples'
    for key in coco.keys()[:5]:
        print "IMG_NUM=", key
        print "Annotation: ", '\n'.join(
            [coco[key][i][u'caption'] for i in range(len(coco[key]))])
        print "Generated data: ", ' '.join(save[key][u'res'])
        print "Cleared generation: ", cocoRes[key][0][u'caption']

    print 'tokenization...'
    tokenizer = PTBTokenizer()
    gts = tokenizer.tokenize(coco)
    res = tokenizer.tokenize(cocoRes)

    print 'setting up scorers...'
    scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
               (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"),
               (Spice(), "SPICE")]

    for scorer, method in scorers:
        print 'computing %s score...' % (scorer.method())
        score, scores = scorer.compute_score(gts, res)
        print(score)
def eval_div_stats(dataset, preds_n, model_id, split):
    tokenizer = PTBTokenizer()

    capsById = {}
    for i, d in enumerate(preds_n):
        d['id'] = i
        capsById[d['image_id']] = capsById.get(d['image_id'], []) + [d]

    n_caps_perimg = len(capsById[list(capsById.keys())[0]])
    print(n_caps_perimg)
    _capsById = capsById  # save the untokenized version
    capsById = tokenizer.tokenize(capsById)

    div_1, adiv_1 = compute_div_n(capsById, 1)
    div_2, adiv_2 = compute_div_n(capsById, 2)

    globdiv_1, _ = compute_global_div_n(capsById, 1)

    print(
        'Diversity Statistics are as follows: \n Div1: %.2f, Div2: %.2f, gDiv1: %d\n'
        % (div_1, div_2, globdiv_1))

    # compute mbleu
    scorer = Bleu(4)
    all_scrs = []
    scrperimg = np.zeros((n_caps_perimg, len(capsById)))

    for i in range(n_caps_perimg):
        tempRefsById = {}
        candsById = {}
        for k in capsById:
            tempRefsById[k] = capsById[k][:i] + capsById[k][i + 1:]
            candsById[k] = [capsById[k][i]]

        score, scores = scorer.compute_score(tempRefsById, candsById)
        all_scrs.append(score)
        scrperimg[i, :] = scores[1]

    all_scrs = np.array(all_scrs)

    out = {}
    out['overall'] = {'Div1': div_1, 'Div2': div_2, 'gDiv1': globdiv_1}
    for k, score in zip(range(4), all_scrs.mean(axis=0).tolist()):
        out['overall'].update({'mBLeu_%d' % (k + 1): score})
    imgToEval = {}
    for i, imgid in enumerate(capsById.keys()):
        imgToEval[imgid] = {'mBleu_2': scrperimg[:, i].mean()}
        imgToEval[imgid]['individuals'] = []
        for j, d in enumerate(_capsById[imgid]):
            imgToEval[imgid]['individuals'].append(preds_n[d['id']])
            imgToEval[imgid]['individuals'][-1]['mBleu_2'] = scrperimg[j, i]
    out['ImgToEval'] = imgToEval

    print(
        'Mean mutual Bleu scores on this set is:\nmBLeu_1, mBLeu_2, mBLeu_3, mBLeu_4'
    )
    print(all_scrs.mean(axis=0))

    return out
Exemplo n.º 15
0
    def initializeTokenizer(self):
        groundTruthCaptions = self.actualCaptions
        predictedCaptions = self.predictedCaptions

        #Tokenize
        tokenizer = PTBTokenizer()
        self.gtc_tokens = tokenizer.tokenize(groundTruthCaptions)
        self.pc_tokens = tokenizer.tokenize(predictedCaptions)
Exemplo n.º 16
0
 def __init__(self, preds, gts, gt_vid, verbose=False):
     self.pred_keys = ['results']
     # self.pred_keys = ['results', 'version', 'external_data']
     self.verbose = verbose
     self.preds = preds
     self.gts = gts
     self.gt_vids = gt_vid
     self.tokenizer = PTBTokenizer()
Exemplo n.º 17
0
 def __init__(self):
     # The following script requires Java 1.8.0 and pycocotools installed.
     # The pycocoevalcap can be installed with pip from M4C-Captioner's Github repo
     # but has no python3 support yet.
     from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer
     from pycocoevalcap.bleu.bleu import Bleu
     self.tokenizer = PTBTokenizer()
     self.scorer = Bleu(4)
Exemplo n.º 18
0
 def __init__(self):
     # The following script requires Java 1.8.0 and pycocotools installed.
     # The pycocoevalcap can be installed with pip as
     # pip install git+https://github.com/ronghanghu/coco-caption.git@python23
     # Original pycocoevalcap code is at https://github.com/tylin/coco-caption
     # but has no python3 support yet.
     from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer
     from pycocoevalcap.bleu.bleu import Bleu
     self.tokenizer = PTBTokenizer()
     self.scorer = Bleu(4)
Exemplo n.º 19
0
 def __init__(self, d_model, encoder, vocab_trg, d_hidden=2048,
              n_layers=6, n_heads=8, drop_ratio=0.1):
     super().__init__()
     # self.encoder = Encoder(d_model, d_hidden, n_vocab_src, n_layers,
     #                        n_heads, drop_ratio)
     self.encoder = encoder
     self.decoder = Decoder(d_model, d_hidden, vocab_trg, n_layers,
                           n_heads, drop_ratio)
     self.n_layers = n_layers
     self.tokenizer = PTBTokenizer()
Exemplo n.º 20
0
    def evaluate_tiou(self, tiou):
        # For every prediction, find it's respective references with tIoU > the passed in argument.
        res = {}
        gts = {}
        unique_index = 0
        for vid_id in self.prediction:
            for pred in self.prediction[vid_id]:
                res[unique_index] = [{'caption': pred['sentence']}]
                matches = []
                for gt in self.ground_truths:
                    refs = gt[vid_id]
                    for ref_i, ref_timestamp in enumerate(refs['timestamps']):
                        if self.iou(pred['timestamp'], ref_timestamp) > tiou:
                            matches.append(refs['sentences'][ref_i])
                if len(matches) == 0:
                    gts[unique_index] = [{'caption': 'abc123!@#'}]
                else:
                    gts[unique_index] = [{'caption': v} for v in matches]
                unique_index += 1

        # Set up scorers
        if self.verbose:
            print '| Tokenizing ...'
        # Suppressing tokenizer output
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # Set up scorers
        if self.verbose:
            print '| Setting up scorers ...'
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # Compute scores
        output = {}
        for scorer, method in scorers:
            if self.verbose:
                print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    output[m] = sc
                    if self.verbose:
                        print "Calculated tIoU: %1.1f, %s: %0.3f" % (tiou, m,
                                                                     sc)
            else:
                output[method] = score
                if self.verbose:
                    print "Calculated tIoU: %1.1f, %s: %0.3f" % (tiou, method,
                                                                 score)
        return output
Exemplo n.º 21
0
    def score(self, GT, RES, IDs):
        self.eval = {}
        self.imgToEval = {}
        gts = {}
        res = {}
        for ID in IDs:
            #            print ID
            gts[ID] = GT[ID]
            res[ID] = RES[ID]
        print('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(), "METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr"),
            # (Spice(), "SPICE")
        ]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        sub_category_score = None
        for scorer, method in scorers:
            print('computing %s score...' % (scorer.method()))
            if method == 'SPICE':
                score, scores, sub_category_score = scorer.compute_score(
                    gts, res)
            else:
                score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, IDs, m)
                    print("%s: %0.3f" % (m, sc))
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, IDs, method)
                print("%s: %0.3f" % (method, score))

        # for metric, score in self.eval.items():
        #    print '%s: %.3f'%(metric, score)
        return self.eval, sub_category_score
Exemplo n.º 22
0
def score(gts, res, ids, log_out):
    tokenizer = PTBTokenizer()
    gts = tokenizer.tokenize(gts)
    res = tokenizer.tokenize(res)
    scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
               (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")]
    for scorer, method in scorers:
        # print 'computing %s score...'%(scorer.method())
        score, scores = scorer.compute_score(gts, res)
        if type(method) == list:
            for sc, scs, m in zip(score, scores, method):
                print >> log_out, "%s: %f" % (m, sc)
        else:
            print >> log_out, "%s: %f" % (method, score)
Exemplo n.º 23
0
    def score(self, GT, RES, IDs, result_file):
        self.eval = {}
        self.imgToEval = {}
        gts = {}
        res = {}
        for ID in IDs:
            #            print ID
            gts[ID] = GT[ID]
            res[ID] = RES[ID]
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        #         result_file = '/home/anguyen/workspace/paper_src/2018.icra.v2c.source/output/' + net_id + '/prediction/score_result.txt'
        print 'RESULT FILE: ', result_file

        fwriter = open(result_file, 'w')

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, IDs, m)
                    print "%s: %0.3f" % (m, sc)
                    fwriter.write("%s %0.3f\n" % (m, sc))
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, IDs, method)
                print "%s: %0.3f" % (method, score)
                fwriter.write("%s %0.3f\n" % (method, score))

        #for metric, score in self.eval.items():
        #    print '%s: %.3f'%(metric, score)
        return self.eval
Exemplo n.º 24
0
    def score(self, GT, RES, IDs):
        self.eval = {}
        self.imgToEval = {}
        gts = {}
        res = {}
        for ID in IDs:
            gts[ID] = GT[ID]
            res[ID] = RES[ID]
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)
        with open('all_samples.txt', 'w') as f:
            for i in res.keys():
                print >> f, 'valid stuff'
                print >> f, '\t'.join(res[i])
                print >> f, 'ground truth'
                print >> f, '\n'.join(gts[i])
        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            #            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, IDs, m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, IDs, method)
                print "%s: %0.3f" % (method, score)

        for metric, score in self.eval.items():
            print '%s: %.3f' % (metric, score)
        return self.eval
def create_tokcap(data_folder=DATA_FOLDER):
    cap = COCO(COCO_TRAIN_CAP_FILE)
    
    listedCapMap = {}
    for i in cap.loadAnns(cap.getAnnIds()):
        listedCapMap[i['id']] = [dict([('caption',i['caption']), ('image_id', i['image_id'])])]
    tokenizedListedCapMap = PTBTokenizer().tokenize(listedCapMap)
    
    tokcap = [] #map caption ids to a map of its tokenized caption and image id
    for i, j in tokenizedListedCapMap.iteritems():
        tokcap += [(i, dict([('caption', j[0]), ('image_id', listedCapMap[i][0]['image_id'])]))]
    
    f = open(data_folder + '/preprocessed/tokcap.json', 'w')
    json.dump(tokcap, f)
    f.close()
Exemplo n.º 26
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        print('you')
        print('tokenization...')



        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print('computing %s score...'%(scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    print("%s: %0.3f"%(m, sc))
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                print("%s: %0.3f"%(method, score))
        self.setEvalImgs()
Exemplo n.º 27
0
    def score(self, GT, RES, IDs):
        self.eval = {}
        self.imgToEval = {}
        gts = {}
        res = {}
        for ID in IDs:
            gts[ID] = GT[ID]
            res[ID] = RES[ID]
        print('tokenization...')
        tokenizer = PTBTokenizer()
        '''
        print("gts: ")
        for key in gts:
        	print(key)
        	for value in gts[key]:
        		print(value)
        '''
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print('computing %s score...' % (scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, IDs, m)
                    print("%s: %0.3f" % (m, sc))
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, IDs, method)
                print("%s: %0.3f" % (method, score))

        for metric, score in self.eval.items():
            print('%s: %.3f' % (metric, score))
        return self.eval
def eval_self_cider(dataset, preds_n, model_id, split):
    cache_path = os.path.join('eval_results/',
                              model_id + '_' + split + '_n.json')

    coco = getCOCO(dataset)
    valids = coco.getImgIds()

    # Get Cider_scorer
    Cider_scorer = Cider(df='corpus')

    tokenizer = PTBTokenizer()
    gts = {}
    for imgId in valids:
        gts[imgId] = coco.imgToAnns[imgId]
    gts = tokenizer.tokenize(gts)

    for imgId in valids:
        Cider_scorer.cider_scorer += (None, gts[imgId])
    Cider_scorer.cider_scorer.compute_doc_freq()
    Cider_scorer.cider_scorer.ref_len = np.log(
        float(len(Cider_scorer.cider_scorer.crefs)))

    # Prepare captions
    capsById = {}
    for d in preds_n:
        capsById[d['image_id']] = capsById.get(d['image_id'], []) + [d]

    capsById = tokenizer.tokenize(capsById)
    imgIds = list(capsById.keys())
    scores = Cider_scorer.my_self_cider([capsById[_] for _ in imgIds])

    def get_div(eigvals):
        eigvals = np.clip(eigvals, 0, None)
        return -np.log(np.sqrt(eigvals[-1]) /
                       (np.sqrt(eigvals).sum())) / np.log(len(eigvals))

    sc_scores = [get_div(np.linalg.eigvalsh(_ / 10)) for _ in scores]
    score = np.mean(np.array(sc_scores))

    imgToEval = {}
    for i, image_id in enumerate(imgIds):
        imgToEval[image_id] = {
            'self_cider': sc_scores[i],
            'self_cider_mat': scores[i].tolist()
        }
    return {'overall': {'self_cider': score}, 'imgToEval': imgToEval}

    return score
Exemplo n.º 29
0
class TextCapsBleu4Evaluator:
    def __init__(self):
        # The following script requires Java 1.8.0 and pycocotools installed.
        # The pycocoevalcap can be installed with pip as
        # pip install git+https://github.com/ronghanghu/coco-caption.git@python23
        # Original pycocoevalcap code is at https://github.com/tylin/coco-caption
        # but has no python3 support yet.
        try:
            from pycocoevalcap.bleu.bleu import Bleu
            from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer
        except ModuleNotFoundError:
            print(
                "Please install pycocoevalcap module using "
                "pip install git+https://github.com/ronghanghu/coco-caption.git@python23"  # noqa
            )
            raise

        self.tokenizer = PTBTokenizer()
        self.scorer = Bleu(4)

    def eval_pred_list(self, pred_list):
        # Create reference and hypotheses captions.
        gts = {}
        res = {}
        for idx, entry in enumerate(pred_list):
            gts[idx] = [{"caption": a} for a in entry["gt_answers"]]
            res[idx] = [{"caption": entry["pred_answer"]}]

        gts = self.tokenizer.tokenize(gts)
        res = self.tokenizer.tokenize(res)
        score, _ = self.scorer.compute_score(gts, res)

        bleu4 = score[3]  # score is (Bleu-1, Bleu-2, Bleu-3, Bleu-4)
        return bleu4
Exemplo n.º 30
0
    def __init__(self, ground_truth_filenames=None, prediction_filename=None,
                 verbose=False, all_scorer=False):
        # Check that the gt and submission files exist and load them
        if not ground_truth_filenames:
            raise IOError('Please input a valid ground truth file.')
        if not prediction_filename:
            raise IOError('Please input a valid prediction file.')

        self.verbose = verbose
        self.all_scorer = all_scorer
        self.ground_truths = self.import_ground_truths(ground_truth_filenames)
        self.prediction = self.import_prediction(prediction_filename)
        self.tokenizer = PTBTokenizer()

        # Set up scorers, if not verbose, we only use the one we're
        # testing on: METEOR
        if self.verbose or self.all_scorer:
            self.scorers = [
                (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                (Meteor(),"METEOR"),
                (Rouge(), "ROUGE_L"),
                (Cider(), "CIDEr")
            ]
        else:
            self.scorers = [(Meteor(), "METEOR")]
Exemplo n.º 31
0
    def __init__(self,
                 data,
                 soda_type="c",
                 tious=None,
                 scorer="Meteor",
                 verbose=False):
        #self.data = data
        self.preds = data.preds
        self.gts = data.gts
        self.gt_vids = data.gt_vids
        self.soda_type = soda_type
        self.tious = [0.0] if tious is None else tious
        self.tokenizer = PTBTokenizer()
        if scorer == "BertScore":
            from nlpeval.bert_r_score import BertScore
        self.scorer = eval(scorer)()
        self.scorer_name = scorer
        self.verbose = verbose

        if soda_type == "a":  # averaging F-measure scores with IoU threshold = 0.9, 0.7, 0.5, 0.3
            self.soda_func = self.soda_a
        elif soda_type == "b":  # F-measure, where IoU threshold is set to 0.
            self.soda_func = self.soda_b
        elif soda_type == "c":  # F-measure, utilizing the IoU x METEOR score
            self.soda_func = self.soda_c
        elif soda_type == "d":  # F-measure of IoU score
            self.soda_func = self.soda_d

            class Dummy:
                def compute_score(self, x, y):
                    return [0, 0]

            self.scorer = Dummy()
        else:
            raise NotImplementedError
Exemplo n.º 32
0
    def score(self, GT, RES, IDs):
        self.eval = {}
        self.imgToEval = {}
        gts = {}
        res = {}
        for ID in IDs:
#            print ID
            gts[ID] = GT[ID]
            res[ID] = RES[ID]
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print 'computing %s score...'%(scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, IDs, m)
                    print "%s: %0.3f"%(m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, IDs, method)
                print "%s: %0.3f"%(method, score)
                
        #for metric, score in self.eval.items():
        #    print '%s: %.3f'%(metric, score)
        return self.eval
def create_valtokcap(data_folder=DATA_FOLDER):
    import gc
    gc.collect()
    vcap = COCO(COCO_VALID_CAP_FILE)
    valimgids, tesimgids = getValimgids(), getTesimgids()
    valcap = []
    for i in valimgids:
        valcap += vcap.imgToAnns[i]

    tescap = []
    for i in tesimgids:
        tescap += vcap.imgToAnns[i]
        
    vallistedCapMap = {}
    for i in valcap:
        vallistedCapMap[i['id']] = [dict([('caption',i['caption']), ('image_id', i['image_id'])])]
    valtokenizedListedCapMap = PTBTokenizer().tokenize(vallistedCapMap)

    teslistedCapMap = {}
    for i in tescap:
        teslistedCapMap[i['id']] = [dict([('caption',i['caption']), ('image_id', i['image_id'])])]
    testokenizedListedCapMap = PTBTokenizer().tokenize(teslistedCapMap)
    
    valtokcap = [] #map caption ids to a map of its tokenized caption and image id
    for i, j in valtokenizedListedCapMap.iteritems():
        valtokcap += [(i, dict([('caption', j[0]), ('image_id', vallistedCapMap[i][0]['image_id'])]))]

    testokcap = []
    for i, j in testokenizedListedCapMap.iteritems():
        testokcap += [(i, dict([('caption', j[0]), ('image_id', teslistedCapMap[i][0]['image_id'])]))]

    f = open(data_folder + '/preprocessed/valtokcap.json', 'w')
    json.dump(valtokcap, f)
    f.close()

    f = open(data_folder + '/preprocessed/testokcap.json', 'w')
    json.dump(testokcap, f)
    f.close()