示例#1
0
def language_eval_excoco(predictions, predictions_bleu, sents_label_eval,
                         loader):

    Scorer = CiderD()
    Bleu_scorer = Bleu(4)
    METEOR_scorer = Meteor()
    ROUGE_scorer = Rouge()

    c_score, _ = Scorer.compute_score(sents_label_eval, predictions)
    b_score, _ = Bleu_scorer.compute_score(sents_label_eval, predictions_bleu)
    m_score, _ = METEOR_scorer.compute_score(sents_label_eval,
                                             predictions_bleu)
    r_score, _ = ROUGE_scorer.compute_score(sents_label_eval, predictions_bleu)

    print('Evaluating {} samples'.format(len(predictions)))

    print('Bleu_1 : ' + str(b_score[0]))
    print('Bleu_2 : ' + str(b_score[1]))
    print('Bleu_3 : ' + str(b_score[2]))
    print('Bleu_4 : ' + str(b_score[3]))
    print('METEOR : ' + str(m_score))
    print('ROUGE_L : ' + str(r_score))
    print('CIDEr : ' + str(c_score))

    lang_stat = {}
    lang_stat['BLEU_1'] = b_score[0]
    lang_stat['BLEU_2'] = b_score[1]
    lang_stat['BLEU_3'] = b_score[2]
    lang_stat['BLEU_4'] = b_score[3]
    lang_stat['METEOR'] = m_score
    lang_stat['ROUGE_L'] = r_score
    lang_stat['CIDEr'] = c_score

    return lang_stat
示例#2
0
def test(model, dataloader, args):
    scorer = Bleu(4)
    m_scorer = Meteor()
    r_scorer = Rouge()
    hyp = []
    ref = []
    model.eval()
    gold_file = open('tmp_gold.txt', 'w')
    pred_file = open('tmp_pred.txt', 'w')
    with tqdm(dataloader, desc='Test ',  mininterval=1) as tq:
        for batch in tq:
            with torch.no_grad():
                seq = model(batch, beam_size=args.beam_size)
            r = write_txt(batch, batch['tgt_text'], gold_file, args)
            h = write_txt(batch, seq, pred_file, args)
            hyp.extend(h)
            ref.extend(r)
    hyp = dict(zip(range(len(hyp)), hyp))
    ref = dict(zip(range(len(ref)), ref))
    print(hyp[0], ref[0])
    print('BLEU INP', len(hyp), len(ref))
    print('BLEU', scorer.compute_score(ref, hyp)[0])
    print('METEOR', m_scorer.compute_score(ref, hyp)[0])
    print('ROUGE_L', r_scorer.compute_score(ref, hyp)[0])
    gold_file.close()
    pred_file.close()
示例#3
0
文件: model_base.py 项目: coufon/s2t
def test(model_path='models/model-61', video_feat_path=video_feat_path):

    train_data, test_data = get_video_data(video_data_path, video_feat_path, train_ratio=0.7)
    test_videos = test_data['video_path'].values
    test_captions = test_data['Description'].values
    ixtoword = pd.Series(np.load('./data/ixtoword.npy').tolist())

    test_videos_unique = list()
    test_captions_list = list()
    for (video, caption) in zip(test_videos, test_captions):
        if len(test_videos_unique) == 0 or test_videos_unique[-1] != video:
            test_videos_unique.append(video)
            test_captions_list.append([caption])
        else:
            test_captions_list[-1].append(caption)

    model = Video_Caption_Generator(
            dim_image=dim_image,
            n_words=len(ixtoword),
            dim_embed=dim_embed,
            dim_hidden=dim_hidden,
            batch_size=batch_size,
            encoder_max_sequence_length=encoder_step,
            decoder_max_sentence_length=decoder_step,
            bias_init_vector=None)

    video_tf, video_mask_tf, caption_tf, probs_tf, last_embed_tf = model.build_generator()
    sess = tf.InteractiveSession()

    saver = tf.train.Saver()
    saver.restore(sess, model_path)

    scorer = Meteor()
    scorer_bleu = Bleu(4)
    GTS = defaultdict(list)
    RES = defaultdict(list)
    counter = 0

    for (video_feat_path, caption) in zip(test_videos_unique, test_captions_list):
        generated_sentence = gen_sentence(
            sess, video_tf, video_mask_tf, caption_tf, video_feat_path, ixtoword)
        print video_feat_path, generated_sentence
        #print caption

        GTS[str(counter)] = [{'image_id':str(counter),'cap_id':i,'caption':s} for i, s in enumerate(caption)]
        RES[str(counter)] = [{'image_id':str(counter),'caption':generated_sentence[:-2]+'.'}]

        #GTS[video_feat_path] = caption
        #RES[video_feat_path] = [generated_sentence[:-2] + '.']
        counter += 1
        #ipdb.set_trace()

    tokenizer = PTBTokenizer()
    GTS = tokenizer.tokenize(GTS)
    RES = tokenizer.tokenize(RES)

    score, scores = scorer.compute_score(GTS, RES)
    print "METEOR", score
    score, scores = scorer_bleu.compute_score(GTS, RES)
    print "BLEU", score
def _define_metrics(gts, res):
    bleu_scorer = Bleu(n=4)
    bleu, _ = bleu_scorer.compute_score(gts=gts, res=res)

    rouge_scorer = Rouge()
    rouge, _ = rouge_scorer.compute_score(gts=gts, res=res)

    cider_scorer = Cider()
    cider, _ = cider_scorer.compute_score(gts=gts, res=res)

    meteor_scorer = Meteor()
    meteor, _ = meteor_scorer.compute_score(gts=gts, res=res)

    for i in range(4):
        bleu[i] = round(bleu[i], 4)

    return bleu, round(meteor, 4), round(rouge, 4), round(cider, 4)
示例#5
0
def eval(result_gts_path, result_res_path):
    with open(result_gts_path, 'r') as file:
        gts_dict = json.load(file)
    with open(result_res_path, 'r') as file:
        res_dict = json.load(file)

    bleu_score = Bleu(n=4)
    bleu, _ = bleu_score.compute_score(gts=gts_dict, res=res_dict)

    meteor_score = Meteor()
    meteor, _ = meteor_score.compute_score(gts=gts_dict, res=res_dict)

    rouge_scorer = Rouge()
    rouge, _ = rouge_scorer.compute_score(gts=gts_dict, res=res_dict)

    cider_scorer = Cider()
    cider, _ = cider_scorer.compute_score(gts=gts_dict, res=res_dict)

    return bleu, meteor, rouge, cider
def calculate_metric(rnn, meteor=None):
    gts = {}
    res = {}
    lp_avg = 0.0
    lp_c = 0
    for idx in range(rnn.V_valid.shape[0]):
        iid = rnn.Id_valid[idx]
        if iid not in gts: gts[iid] = []
        #gts[iid].append(' '.join([rnn.dp.i2w[w] for w in rnn.X_valid[idx] if w != 0][::-1]))
        gts[iid] = [
            ' '.join(rnn.dp.tokens[i][::-1])
            for i in rnn.dp.img_id_to_tokens[iid]
        ]
        if iid in res: continue
        res[iid] = []
        #pos_sen, pos_att = rnn.get_sentence(rnn.V_valid[idx], senti=np.array([1.0], dtype=theano.config.floatX))
        (lp, pos_sen) = decoder_beamsearch(rnn,
                                           rnn.V_valid[idx],
                                           senti=1.0,
                                           beam_size=1)
        pos_sen = pos_sen[:-1]
        print(' '.join(pos_sen[::-1]))
        res[iid].append(' '.join(pos_sen[::-1]))
        lp_avg += np.exp(lp)
        lp_c += 1
    lp_avg /= float(lp_c)
    return lp_avg

    bleu = Bleu()
    print("Bleu:")
    print("Positive:", bleu.compute_score(gts, res)[0])
    rouge = Rouge()
    print("Rouge:")
    print("Positive:", rouge.compute_score(gts, res)[0])
    if meteor is None:
        meteor = Meteor()
    print("Meteor:")
    mscore = meteor.compute_score(gts, res)[0]
    print("Positive:", mscore)
    return mscore
示例#7
0
def check_meteor_works():
    try:
        met = Meteor()
    except (AttributeError, FileNotFoundError) as e:
        print(f"Meteor couldn't start due to {e}")
        met = None

    gts = {
        "datapoint1": ["hello my name is", "meteor test program"],
        "datapoint2": ["another test sentence", "this the end of the test."]
    }
    refs = {
        "datapoint1": ["is my name really meteor"],
        "datapoint2": ["probably another test sentence"]
    }
    try:
        output = met.compute_score(gts, refs)
    except (ValueError, FileNotFoundError, AttributeError) as e:
        print(f"{e.__class__.__name__}: {e}")
        met.lock.release()
        return False
    print(output)
    return True
示例#8
0
def coco_caption_metrics(predictions_list,
                         image_id_list,
                         vocabulary_path='data/vocabulary.json',
                         max_caption_length=25,
                         batch_size=32,
                         is_training=True):
    with open(vocabulary_path, 'r') as file:
        vocabulary_list = json.load(file)
    word2id = {}
    for i in range(vocabulary_list.__len__()):
        word2id[vocabulary_list[i]] = i
    id2word = {v: k for k, v in word2id.items()}

    with open('data/captions_gt.json', 'r') as file:
        captions_gt_dict = json.load(file)

    gts = {}
    res = {}
    for i in range(0, predictions_list.__len__()):
        for j in range(0, batch_size):
            sen_input, sen_ground_truth = [], []
            for k in range(max_caption_length):
                id_input = int(predictions_list[i][k][j])
                sen_input.append(id2word[id_input])

            sen_pre = []
            for n in range(max_caption_length):
                word = sen_input[n]
                if word != '</S>':
                    sen_pre.append(word)
                else:
                    break

            str_input = ' '.join(sen_pre)
            image_id = image_id_list[i][j][0]

            # print(image_id)
            res[image_id] = [str_input]
            gts[image_id] = captions_gt_dict[str(image_id)]

    if not is_training:
        # for key in gts.keys():
        #     str_input = res[key]
        #     str_grundtruth = gts[key]
        #     print(key)
        #     print(str_input)
        #     print(str_grundtruth)
        #     print('*' * 100)

        with open('data/result/result_res.json', 'w') as file:
            json.dump(res, file)
        with open('data/result/result_gts.json', 'w') as file:
            json.dump(gts, file)
        # print('result.json get success')

    bleu_scorer = Bleu(n=4)
    bleu, _ = bleu_scorer.compute_score(gts=gts, res=res)

    rouge_scorer = Rouge()
    rouge, _ = rouge_scorer.compute_score(gts=gts, res=res)

    cider_scorer = Cider()
    cider, _ = cider_scorer.compute_score(gts=gts, res=res)

    meteor_scorer = Meteor()
    meteor, _ = meteor_scorer.compute_score(gts=gts, res=res)

    for i in range(4):
        bleu[i] = round(bleu[i], 4)
    return bleu, round(meteor, 4), round(rouge, 4), round(cider, 4)
示例#9
0
class RealTransformer(nn.Module):
    def __init__(self,
                 d_model,
                 encoder,
                 vocab_trg,
                 d_hidden=2048,
                 n_layers=6,
                 n_heads=8,
                 drop_ratio=0.1):
        super().__init__()
        # self.encoder = Encoder(d_model, d_hidden, n_vocab_src, n_layers,
        #                        n_heads, drop_ratio)
        self.encoder = encoder
        self.decoder = Decoder(d_model, d_hidden, vocab_trg, n_layers, n_heads,
                               drop_ratio)
        self.n_layers = n_layers
        self.tokenizer = PTBTokenizer()

    # 将索引转换为对应的单词并替换其中的特殊标记
    def denum(self, data):
        return ' '.join(self.decoder.vocab.itos[i] for i in data).replace(
            ' <eos>', '').replace(' <pad>', '').replace(' .',
                                                        '').replace('  ', '')

    # x:(5,480,1024)  s:(5,20)  x_mask:(5,480,1)
    def forward(self, x, s, x_mask=None, sample_prob=0):
        encoding = self.encoder(x, x_mask)  # [(5,480,1024),(5,480,1024)]

        max_sent_len = 20
        if not self.training:
            if isinstance(s, list):
                hiddens, _ = self.decoder.greedy(encoding, max_sent_len)
                h = hiddens[-1]
                targets = None
            else:
                h = self.decoder(s[:, :-1].contiguous(), encoding)
                targets, h = mask(s[:, 1:].contiguous(), h)
            logits = self.decoder.out(h)
        else:
            if sample_prob == 0:
                h = self.decoder(
                    s[:, :-1].contiguous(), encoding
                )  # (5,19),[(5,480,1024),(5,480,1024)]-->(5,19,1024)
                # 使用mask屏蔽语句中pad,获取对应的特征
                targets, h = mask(s[:, 1:].contiguous(),
                                  h)  # targets:(63)   h:(63,1024)
                logits = self.decoder.out(h)
            else:
                model_pred = self.decoder.sampling(encoding,
                                                   s,
                                                   s.size(1) - 2,
                                                   sample_prob,
                                                   is_argmax=True)
                model_pred.detach_()
                new_y = torch.cat((Variable(
                    model_pred.data.new(s.size(0), 1).long().fill_(
                        self.decoder.vocab.stoi['<init>'])), model_pred), 1)
                h = self.decoder(new_y, encoding)
                targets, h = mask(s[:, 1:].contiguous(), h)
                logits = self.decoder.out(h)

        return logits, targets  # (63,24) / (63)

    #x: (91,480,1024)
    #x_mask: (91,480,1)
    #T:20
    def greedy(self, x, x_mask, T):
        encoding = self.encoder(x, x_mask)  # [(91,480,1024),(91,480,1024)]

        _, pred = self.decoder.greedy(encoding, T)  # (91,20)

        sent_lst = []
        for i in range(pred.data.size(0)):
            sent_lst.append(self.denum(pred.data[i]))
        return sent_lst  # (91,20)

    # --------------------------------------------------scst_loss-----------------------------------------------------#
    """
    scst_loss indicates self-critical sequence training (as in https://arxiv.org/abs/1612.00563). 
    We didn't report results w/ this training loss and hence it's deprecated. Still, we keep this 
    option out there in case people need (might need to upgrade some of the code to pytorch 0.4
    
    """

    def scst(self, x, x_mask, s):
        self.scorer = Meteor()
        encoding = self.encoder(x, x_mask)

        # greedy part
        _, pred = self.decoder.greedy(encoding, s.size(1) - 1)
        pred_greedy = []
        for i in range(pred.data.size(0)):
            pred_greedy.append(self.denum(pred.data[i]))

        del pred
        # sampling part
        model_pred = self.decoder.sampling(encoding,
                                           s,
                                           s.size(1) - 2,
                                           sample_prob=1,
                                           is_argmax=False)
        model_pred.detach_()
        new_y = torch.cat((Variable(
            model_pred.data.new(s.size(0), 1).long().fill_(
                self.decoder.vocab.stoi['<init>'])), model_pred), 1)
        h = self.decoder(new_y, encoding)
        B, T, H = h.size()
        logits = self.decoder.out(h.view(-1, H))  #.view(B, T, -1)

        mask = (s[:, 1:] != 1).float()
        _, pred_sample = torch.max(logits, -1)

        p_model = F.log_softmax(logits, dim=-1)
        logp = p_model[torch.arange(0, B * T).type(logits.data.type()).long(),
                       pred_sample.data].view(B, T)

        pred_sample = pred_sample.view(B, T)

        assert pred_sample.size(0) == len(pred_greedy), (
            'pred_sample should have the same number of sentences as in '
            'pred_greedy, got {} and {} instead'.format(B, len(pred_greedy)))
        assert pred_sample.size() == (B, T), ('pred_sample size should error')

        pred_sample.detach_()

        # rewards
        sentence_greedy, sentence_sample, sentence_gt = {}, {}, {}
        for i in range(len(pred_greedy)):
            sentence_greedy[i] = [{'caption': pred_greedy[i]}]
            sentence_sample[i] = [{'caption': self.denum(pred_sample.data[i])}]
            sentence_gt[i] = [{'caption': self.denum(s.data[i, 1:])}]

        tok_greedy = self.tokenizer.tokenize(sentence_greedy)
        tok_sample = self.tokenizer.tokenize(sentence_sample)
        tok_gt = self.tokenizer.tokenize(sentence_gt)
        _, r_greedy = self.scorer.compute_score(tok_gt, tok_greedy)
        _, r_sample = self.scorer.compute_score(tok_gt, tok_sample)

        r_diff = [r_s - r_g for (r_s, r_g) in zip(r_greedy, r_sample)]
        r_diff = Variable(torch.Tensor(r_diff).type(logp.data.type()))

        loss = -torch.mean(torch.sum(r_diff.view(-1, 1) * logp * mask, 1))

        return loss
示例#10
0
def evaluate(beam_size):
    """
    Evaluation

    :param beam_size: beam size at which to generate captions for evaluation
    :return: BLEU-4 score
    """
    # DataLoader
    loader = torch.utils.data.DataLoader(CaptionDataset(
        data_folder,
        data_name,
        'TEST',
        transform=transforms.Compose([normalize])),
                                         batch_size=1,
                                         shuffle=True,
                                         num_workers=0,
                                         pin_memory=False)

    # TODO: Batched Beam Search
    # Therefore, do not use a batch_size greater than 1 - IMPORTANT!

    # Lists to store references (true captions), and hypothesis (prediction) for each image
    # If for n images, we have n hypotheses, and references a, b, c... for each image, we need -
    # references = [[ref1a, ref1b, ref1c], [ref2a, ref2b], ...], hypotheses = [hyp1, hyp2, ...]
    references = dict()
    hypotheses = dict()

    # For each image
    for j, (image, caps, caplens, allcaps) in enumerate(
            tqdm(loader, desc="EVALUATING AT BEAM SIZE " + str(beam_size))):

        k = beam_size

        # Move to GPU device, if available
        image = image.to(device)  # (1, 3, 256, 256)

        attrs, encoder_out = encoder(image)
        attrs = attrs.expand(3, attrs_dim)

        enc_image_size = encoder_out.size(1)
        encoder_dim = encoder_out.size(3)
        encoder_out = encoder_out.view(1, -1, encoder_dim)
        num_pixels = encoder_out.size(1)
        encoder_out = encoder_out.expand(k, num_pixels, encoder_dim)
        x0 = decoder.init_x0(attrs)

        # Tensor to store top k previous words at each step; now they're just <start>
        k_prev_words = torch.LongTensor([[word_map['<start>']]] * k).to(
            device)  # (k, 1)

        # Tensor to store top k sequences; now they're just <start>
        seqs = k_prev_words  # (k, 1)

        # Tensor to store top k sequences' scores; now they're just 0
        top_k_scores = torch.zeros(k, 1).to(device)  # (k, 1)

        # Lists to store completed sequences and scores
        complete_seqs = list()
        complete_seqs_scores = list()

        # Start decoding
        step = 1
        h1, c1, h2, c2 = decoder.init_hidden_state(attrs,
                                                   encoder_out,
                                                   zero=True)
        h1, c1 = decoder.decode_step1(x0, (h1, c1))
        # s is a number less than or equal to k, because sequences are removed from this process once they hit <end>
        while True:

            embeddings = decoder.embedding(k_prev_words).squeeze(
                1)  # (s, embed_dim)

            h1, c1 = decoder.decode_step1(embeddings, (h1, c1))

            awe, _ = decoder.attention(encoder_out, h1, h2)
            # gate = decoder.sigmoid(decoder.f_beta(h2))
            # awe = gate * awe

            h2, c2 = decoder.decode_step2(torch.cat([embeddings, awe], dim=1),
                                          (h2, c2))

            scores = decoder.fc2(decoder.dropout2(h2))
            scores = F.log_softmax(scores, dim=1)

            # Add
            scores = top_k_scores.expand_as(scores) + scores  # (s, vocab_size)

            # For the first step, all k points will have the same scores (since same k previous words, h, c)
            if step == 1:
                top_k_scores, top_k_words = scores[0].topk(k, 0, True,
                                                           True)  # (s)
            else:
                # Unroll and find top scores, and their unrolled indices
                # (s) 所有分数中最大的k个
                top_k_scores, top_k_words = scores.view(-1).topk(
                    k, 0, True, True)

            # Convert unrolled indices to actual indices of scores
            # 上面展开了,prev_word_inds得到哪些句子是概率最大的
            prev_word_inds = top_k_words / vocab_size  # (s)
            next_word_inds = top_k_words % vocab_size  # (s)

            # Add new words to sequences
            seqs = torch.cat(
                [seqs[prev_word_inds],
                 next_word_inds.unsqueeze(1)], dim=1)  # (s, step+1)

            # Which sequences are incomplete (didn't reach <end>)?
            incomplete_inds = [
                ind for ind, next_word in enumerate(next_word_inds)
                if next_word != word_map['<end>']
            ]
            complete_inds = list(
                set(range(len(next_word_inds))) - set(incomplete_inds))

            # Set aside complete sequences
            if len(complete_inds) > 0:
                complete_seqs.extend(seqs[complete_inds].tolist())
                complete_seqs_scores.extend(top_k_scores[complete_inds])
            k -= len(complete_inds)  # reduce beam length accordingly

            # Proceed with incomplete sequences
            if k == 0:
                break
            seqs = seqs[incomplete_inds]
            h1 = h1[prev_word_inds[incomplete_inds]]
            c1 = c1[prev_word_inds[incomplete_inds]]
            h2 = h2[prev_word_inds[incomplete_inds]]
            c2 = c2[prev_word_inds[incomplete_inds]]
            encoder_out = encoder_out[prev_word_inds[incomplete_inds]]

            top_k_scores = top_k_scores[incomplete_inds].unsqueeze(1)
            k_prev_words = next_word_inds[incomplete_inds].unsqueeze(1)

            # Break if things have been going on too long
            if step > 50:
                break
            step += 1

        i = complete_seqs_scores.index(max(complete_seqs_scores))
        seq = complete_seqs[i]

        # References
        img_caps = allcaps[0].tolist()
        img_captions = list(
            map(
                lambda c: [
                    rev_word_map[w] for w in c if w not in {
                        word_map['<start>'], word_map['<end>'], word_map[
                            '<pad>']
                    }
                ], img_caps))  # remove <start> and pads
        img_caps = [' '.join(c) for c in img_captions]
        # print(img_caps)
        references[str(j)] = img_caps

        # Hypotheses
        hypothesis = ([
            rev_word_map[w] for w in seq if w not in
            {word_map['<start>'], word_map['<end>'], word_map['<pad>']}
        ])
        hypothesis = [' '.join(hypothesis)]
        # print(hypothesis)
        hypotheses[str(j)] = hypothesis

        assert len(references) == len(hypotheses)

    # Calculate BLEU-1~BLEU4 scores
    m1 = Bleu()
    m2 = Meteor()
    m3 = Cider()
    m4 = Rouge()
    m5 = Spice()
    (score1, scores1) = m1.compute_score(references, hypotheses)
    (score2, scores2) = m2.compute_score(references, hypotheses)
    (score3, scores3) = m3.compute_score(references, hypotheses)
    (score4, scores4) = m4.compute_score(references, hypotheses)
    (score5, scores5) = m5.compute_score(references, hypotheses)

    return score1, score2, score3, score4, score5
示例#11
0
    rouge, _ = rouge_obj.compute_score(wtd, wrd)

    rouges.append(rouge)

print(np.mean(rouges))

with open("%s-rouges.txt" % system, 'w') as outf:
    for r in rouges:
        outf.write(str(r) + '\n')

for i in range(len(ref1_strs)):
    word_target_dict[i] = [ref1_strs[i], ref2_strs[i]]
    word_response_dict[i] = [sys_strs[i]]

bleu_score, bleu_scores = bleu_obj.compute_score(word_target_dict,
                                                 word_response_dict)
bleu1_score, _, _, bleu4_score = bleu_score
bleu1_scores, _, _, bleu4_scores = bleu_scores
meteor_score, meteor_scores = meteor_obj.compute_score(word_target_dict,
                                                       word_response_dict)
rouge_score, rouge_scores = rouge_obj.compute_score(word_target_dict,
                                                    word_response_dict)
cider_score, cider_scores = cider_obj.compute_score(word_target_dict,
                                                    word_response_dict)

print("ROUGE-L: ", rouge_score)
print("BLEU-1: ", bleu1_score)
print("BLEU-4: ", bleu4_score)
print("METEOR: ", meteor_score)
print("CiDER: ", cider_score)
示例#12
0
class RealTransformer(nn.Module):  # to caption the proposal object
    # for each proposal, encoder will forward again (with mask)

    def __init__(self,
                 d_model,
                 encoder,
                 vocab_trg,
                 d_hidden=2048,
                 n_layers=6,
                 n_heads=8,
                 drop_ratio=0.1):
        super().__init__()
        # self.encoder = Encoder(d_model, d_hidden, n_vocab_src, n_layers,
        #                        n_heads, drop_ratio)
        self.encoder = encoder
        self.decoder = Decoder(d_model, d_hidden, vocab_trg, n_layers, n_heads,
                               drop_ratio)
        self.n_layers = n_layers
        self.tokenizer = PTBTokenizer()

    def denum(self, data):
        return ' '.join(self.decoder.vocab.itos[i] for i in data).replace(
            ' <eos>', '').replace(' <pad>', '').replace(' .',
                                                        '').replace('  ', '')

    def forward(self, x, s, x_mask=None, sample_prob=0):
        # s : sentence hidden state
        # x是 feature sequence
        # x_mask是proposal mask
        encoding = self.encoder(x, x_mask)  #encode
        max_sent_len = 20  #sentence length
        if not self.training:
            # Infer Mode
            if isinstance(s, list):
                hiddens, _ = self.decoder.greedy(encoding, max_sent_len)
                h = hiddens[-1]  #取最后一个层的所有状态
                targets = None
            else:
                h = self.decoder(s[:, :-1].contiguous(), encoding)
                # 用已有的Hidden State前推一步?
                targets, h = mask(s[:, 1:].contiguous(), h)

            logits = self.decoder.out(h)
        else:
            # Training Mode
            if sample_prob == 0:  #全部从GT里面抽
                h = self.decoder(s[:, :-1].contiguous(), encoding)
                #一步就够了
                targets, h = mask(s[:, 1:].contiguous(), h)
                """
                    def mask(targets, out):
                        mask = (targets != 1)
                        out_mask = mask.unsqueeze(-1).expand_as(out)
                        return targets[mask], out[out_mask].view(-1, out.size(-1))
                """
                #省略掉开头符
                logits = self.decoder.out(h)
            else:
                model_pred = self.decoder.sampling(encoding,
                                                   s,
                                                   s.size(1) - 2,
                                                   sample_prob,
                                                   is_argmax=True)
                model_pred.detach_()
                new_y = torch.cat((Variable(
                    model_pred.data.new(s.size(0), 1).long().fill_(
                        self.decoder.vocab.stoi['<init>'])), model_pred), 1)
                h = self.decoder(new_y, encoding)
                targets, h = mask(s[:, 1:].contiguous(), h)
                logits = self.decoder.out(h)

        return logits, targets

    def greedy(self, x, x_mask, T):
        encoding = self.encoder(x, x_mask)  # Encode Visual Content

        _, pred = self.decoder.greedy(encoding, T)
        # Get The Prediction Sentence By "Greedy Strategy"
        sent_lst = []
        for i in range(pred.data.size(0)):
            sent_lst.append(self.denum(pred.data[i]))
        return sent_lst

    def scst(self, x, x_mask, s):
        self.scorer = Meteor()
        encoding = self.encoder(x, x_mask)

        # greedy part
        _, pred = self.decoder.greedy(encoding, s.size(1) - 1)
        pred_greedy = []
        for i in range(pred.data.size(0)):
            pred_greedy.append(self.denum(pred.data[i]))

        del pred
        # sampling part
        model_pred = self.decoder.sampling(encoding,
                                           s,
                                           s.size(1) - 2,
                                           sample_prob=1,
                                           is_argmax=False)
        model_pred.detach_()
        new_y = torch.cat((Variable(
            model_pred.data.new(s.size(0), 1).long().fill_(
                self.decoder.vocab.stoi['<init>'])), model_pred), 1)
        h = self.decoder(new_y, encoding)
        B, T, H = h.size()
        logits = self.decoder.out(h.view(-1, H))  #.view(B, T, -1)

        mask = (s[:, 1:] != 1).float()
        _, pred_sample = torch.max(logits, -1)

        p_model = F.log_softmax(logits, dim=-1)
        logp = p_model[torch.arange(0, B * T).type(logits.data.type()).long(),
                       pred_sample.data].view(B, T)

        pred_sample = pred_sample.view(B, T)

        assert pred_sample.size(0) == len(pred_greedy), (
            'pred_sample should have the same number of sentences as in '
            'pred_greedy, got {} and {} instead'.format(B, len(pred_greedy)))
        assert pred_sample.size() == (B, T), ('pred_sample size should error')

        pred_sample.detach_()

        # rewards
        sentence_greedy, sentence_sample, sentence_gt = {}, {}, {}
        for i in range(len(pred_greedy)):
            sentence_greedy[i] = [{'caption': pred_greedy[i]}]
            sentence_sample[i] = [{'caption': self.denum(pred_sample.data[i])}]
            sentence_gt[i] = [{'caption': self.denum(s.data[i, 1:])}]

        tok_greedy = self.tokenizer.tokenize(sentence_greedy)
        tok_sample = self.tokenizer.tokenize(sentence_sample)
        tok_gt = self.tokenizer.tokenize(sentence_gt)
        _, r_greedy = self.scorer.compute_score(tok_gt, tok_greedy)
        _, r_sample = self.scorer.compute_score(tok_gt, tok_sample)

        r_diff = [r_s - r_g for (r_s, r_g) in zip(r_greedy, r_sample)]
        r_diff = Variable(torch.Tensor(r_diff).type(logp.data.type()))

        loss = -torch.mean(torch.sum(r_diff.view(-1, 1) * logp * mask, 1))

        return loss
示例#13
0
def meteor(gts, res):
    scorer = Meteor()
    score, scores = scorer.compute_score(gts, res)
    out_file.write('METEOR = %s' % score + '\n')
示例#14
0
class CaptionEvaluator(object):
    def __init__(self, rtranslator):
        self.tokenizer = PTBTokenizer()
        self.scorer = Meteor()
        self.rtranslator = rtranslator

    def evaluate(self, gts, res):
        _, scores = self.scorer.compute_score(gts, res)
        return scores

    def build_loss(self, sl_conf, video_feat, video_len, video_mask, sent_gd,
                   model_cg):
        """
        param input_caption
        """
        return self.build_loss_v1(sl_conf, video_feat, video_len, video_mask,
                                  sent_gd, model_cg)

    def build_loss_v1(self, sl_conf, video_feat, video_len, video_mask,
                      sent_gd, model_cg):
        """
        :param sl_conf:         (batch, n_anchor)
        :param sl_gather_idx:   (batch, )
        :param video_feat:      (batch, ~, ~)
        :param video_len:       (batch, 2)
        :param video_mask:      (batch, ~, 1)
        :param model_cg:
        :return:
        """
        initial_anchors = params['anchor_list']
        n_anchors = len(initial_anchors)

        batch_size = video_feat.size(0)
        ts_seq = Variable(FloatTensor(initial_anchors).repeat(batch_size, 1))
        ts_gather_idx = Variable(
            LongTensor(range(batch_size)).unsqueeze(1).repeat(
                1, n_anchors).view(-1))
        _, sent_pred, sent_len, sent_mask = model_cg.forward(
            video_feat, video_len, video_mask, ts_seq, ts_gather_idx)
        sent_pred = sent_pred.view(batch_size, n_anchors, -1)
        cur_res = {}
        cur_gts = {}
        for idxi, gts_caption in enumerate(sent_gd):
            cur_gts[idxi] = [{
                'caption':
                remove_nonascii(
                    self.rtranslator.rtranslate(
                        gts_caption.cpu().data.numpy()))
            }]
            for idxj in range(n_anchors):
                cur_res[idxi * n_anchors + idxj] = [{
                    'caption':
                    remove_nonascii(
                        self.rtranslator.rtranslate(
                            sent_pred[idxi, idxj].cpu().data.numpy()))
                }]
        tokenize_res = self.tokenizer.tokenize(cur_res)
        tokenize_gts = self.tokenizer.tokenize(cur_gts)

        res = {
            i: {j: tokenize_res[i * n_anchors + j]
                for j in range(n_anchors)}
            for i in range(sent_gd.size(0))
        }
        gts = {
            i: {j: tokenize_gts[i]
                for j in range(n_anchors)}
            for i in range(sent_gd.size(0))
        }

        scores = []
        for i in range(sent_gd.size(0)):
            score = self.evaluate(gts[i], res[i])
            scores.append(score)

        approx_ground_truth = Variable(
            torch.from_numpy(np.array(scores).argmax(1)).cuda())
        return F.cross_entropy(sl_conf, approx_ground_truth)
示例#15
0
class RefineTransformer(nn.Module):

    def __init__(self, d_model, encoder, vocab_trg, d_hidden=2048,
                 n_layers=6, n_heads=8, drop_ratio=0.1):
        super().__init__()
        # self.encoder = Encoder(d_model, d_hidden, n_vocab_src, n_layers,
        #                        n_heads, drop_ratio)
        self.encoder = encoder
        self.decoder = Decoder(d_model, d_hidden, vocab_trg, n_layers,
                              n_heads, drop_ratio)
        self.n_layers = n_layers
        self.tokenizer = PTBTokenizer()

    def denum(self, data):
        return ' '.join(self.decoder.vocab.itos[i] for i in data).replace(
            ' <eos>', '').replace(' <pad>', '').replace(' .', '').replace('  ', '')

    def forward(self, x, s, x_mask=None, sample_prob=0):
        encoding = self.encoder(x, x_mask)

        max_sent_len = 20
        if not self.training:
            if isinstance(s, list):
                hiddens, _ = self.decoder.greedy(encoding, max_sent_len)
                h = hiddens[-1]
                targets = None
            else:
                h = self.decoder(s[:, :-1].contiguous(), encoding)
                targets, h = mask(s[:, 1:].contiguous(), h)
            logits = self.decoder.out(h)
        else:
            if sample_prob == 0:
                h = self.decoder(s[:, :-1].contiguous(), encoding)
                targets, h = mask(s[:, 1:].contiguous(), h)
                logits = self.decoder.out(h)
            else:
                model_pred = self.decoder.sampling(encoding, s,
                                                   s.size(1) - 2,
                                                   sample_prob,
                                                   is_argmax=True)
                model_pred.detach_()
                new_y = torch.cat((
                    Variable(model_pred.data.new(s.size(0), 1).long().fill_(
                        self.decoder.vocab.stoi['<init>'])),
                    model_pred), 1)
                h = self.decoder(new_y, encoding)
                targets, h = mask(s[:, 1:].contiguous(), h)
                logits = self.decoder.out(h)

        return logits, targets

    def greedy(self, x, x_mask, T):
        encoding = self.encoder(x, x_mask)

        _, pred = self.decoder.greedy(encoding, T)
        sent_lst = []
        for i in range(pred.data.size(0)):
            sent_lst.append(self.denum(pred.data[i]))
        return sent_lst

    def scst(self, x, x_mask, s):
        self.scorer = Meteor()
        encoding = self.encoder(x, x_mask)

        # greedy part
        _, pred = self.decoder.greedy(encoding, s.size(1)-1)
        pred_greedy = []
        for i in range(pred.data.size(0)):
            pred_greedy.append(self.denum(pred.data[i]))

        del pred
        # sampling part
        model_pred = self.decoder.sampling(encoding, s,
                                           s.size(1) - 2,
                                           sample_prob=1,
                                           is_argmax=False)
        model_pred.detach_()
        new_y = torch.cat((
            Variable(model_pred.data.new(s.size(0), 1).long().fill_(
                self.decoder.vocab.stoi['<init>'])),
            model_pred), 1)
        h = self.decoder(new_y, encoding)
        B, T, H = h.size()
        logits = self.decoder.out(h.view(-1, H)) #.view(B, T, -1)

        mask = (s[:,1:] != 1).float()
        _, pred_sample = torch.max(logits, -1)

        p_model = F.log_softmax(logits, dim=-1)
        logp = p_model[torch.arange(0,B*T).type(logits.data.type()).long(), pred_sample.data].view(B, T)

        pred_sample = pred_sample.view(B, T)

        assert pred_sample.size(0) == len(pred_greedy), (
            'pred_sample should have the same number of sentences as in '
            'pred_greedy, got {} and {} instead'.format(B, len(pred_greedy))
        )
        assert pred_sample.size() == (B, T), (
            'pred_sample size should error'
        )

        pred_sample.detach_()

        # rewards
        sentence_greedy, sentence_sample, sentence_gt = {}, {}, {}
        for i in range(len(pred_greedy)):
            sentence_greedy[i] = [{'caption':pred_greedy[i]}]
            sentence_sample[i] = [{'caption':self.denum(pred_sample.data[i])}]
            sentence_gt[i] = [{'caption':self.denum(s.data[i,1:])}]

        tok_greedy = self.tokenizer.tokenize(sentence_greedy)
        tok_sample = self.tokenizer.tokenize(sentence_sample)
        tok_gt = self.tokenizer.tokenize(sentence_gt)
        _, r_greedy = self.scorer.compute_score(tok_gt, tok_greedy)
        _, r_sample = self.scorer.compute_score(tok_gt, tok_sample)

        r_diff = [r_s-r_g for (r_s, r_g) in zip(r_greedy, r_sample)]
        r_diff = Variable(torch.Tensor(r_diff).type(logp.data.type()))

        loss = - torch.mean(torch.sum(r_diff.view(-1,1) * logp * mask, 1))

        return loss
示例#16
0
    def end_epoch(self, ):
        path = Path(Options()["exp.dir"])

        dirname = path.joinpath("generated_sentences")
        # Create directory if it does not exist
        if not os.path.exists(dirname):
            try:
                os.makedirs(dirname)
            except OSError as exc:  # Guard against race condition
                if exc.errno != errno.EEXIST:
                    raise

        # Dump sentences to the directory
        for field in ["action", "justification"]:
            for key in ["ground_truth", "predicted"]:
                filepath = dirname.joinpath("%s_%s.txt" % (key, field))
                with open(filepath, "w") as f:
                    f.write("\n".join(self.sentences[key][field]))

        # Compute NLP quality scores (bleu, meteor, cider...)
        for field in ["action", "justification"]:
            cider = Cider()
            bleu = Bleu()
            meteor = Meteor()

            # Check if this is not empty
            if len(self.sentences["ground_truth"][field]) > 0:
                ground_truth = {
                    i: [sentence]
                    for i, sentence in enumerate(self.sentences["ground_truth"]
                                                 [field])
                }
                predicted = {
                    i: [sentence]
                    for i, sentence in enumerate(self.sentences["predicted"]
                                                 [field])
                }

                cider_score, _ = cider.compute_score(ground_truth, predicted)
                cider_score = cider_score * 100  # Convert to percentage

                bleus_score, _ = bleu.compute_score(ground_truth, predicted)
                bleu_score = bleus_score[
                    3] * 100  # Take bleu-4 and convert to percentage

                meteor_score, _ = meteor.compute_score(ground_truth, predicted)
                meteor_score = meteor_score * 100  # Convert to percentage
            else:
                # Otherwise all scores are 0
                cider_score, bleu_score, meteor_score = 0, 0, 0

            Logger().log_value('%s_epoch.cider_%s' % (self.mode, field),
                               cider_score,
                               should_print=True)
            Logger().log_value('%s_epoch.bleucoco_%s' % (self.mode, field),
                               bleu_score,
                               should_print=True)
            Logger().log_value('%s_epoch.meteorcoco_%s' % (self.mode, field),
                               meteor_score,
                               should_print=True)

        # Reset sentences
        self.sentences = {
            "ground_truth": {
                "action": [],
                "justification": []
            },
            "predicted": {
                "action": [],
                "justification": []
            }
        }
        return
def run_load_gap_filler(pretrained_filename,
                        do_bleu=False,
                        must_have_anp=False,
                        copy_if_no_anp=False,
                        replace_adj=False,
                        get_human=False,
                        semi_human=False):
    rnn = RNNModel()
    rnn.load_model(pretrained_filename)
    rnn.conf['VAL_SPLIT'] = RNNDataProvider.TEST

    if get_human:
        id_to_caps = pickle.load(open("coco_mturk/id_to_caps.pik", "rb"))

    rnn.build_model_core()
    rnn.load_val_dataset()

    rnn.build_sentence_generator()

    rnn.build_perplexity_calculator()
    #print rnn.sample_sentence(rnn.V_valid[0])
    #print decoder_beamsearch2(rnn, rnn.V_valid[0])
    #print decoder_beamsearch(rnn, rnn.V_valid[0])

    #calculate_metric(rnn)
    #sys.exit(0)

    pos_sentence_res = []
    pos_att_res = []

    des_sentence_res = []
    des_att_res = []

    img_files = []
    img_ids = []

    id_to_sentences = {}

    seen_ids = set()
    if 'added_words' in rnn.conf:
        new_words = set([w[0] for w in rnn.conf['added_words']])
    else:
        new_words = set()
    num_ignore = 0
    num_not_ignore = 0
    for idx in range(rnn.V_valid.shape[0]):
        img_file = rnn.dp.img_id_to_filename[rnn.Id_valid[idx]]
        img_id = rnn.Id_valid[idx]
        if img_id not in id_to_sentences: id_to_sentences[img_id] = []
        #id_to_sentences[img_id].append(' '.join([rnn.dp.i2w[w] for w in rnn.X_valid[idx] if w != 0][::-1]))
        if replace_adj:
            id_to_sentences[img_id] = [
                ' '.join(do_replace_adj(rnn.dp.tokens[i])[::-1])
                for i in rnn.dp.img_id_to_tokens[img_id]
            ]
        elif get_human:
            id_to_sentences[img_id] = [
                ' '.join(rnn.dp.tokens[i][::-1])
                for i in rnn.dp.img_id_to_tokens[img_id]
            ]
            np.random.shuffle(id_to_sentences[img_id])
            print(len(id_to_sentences[img_id]))
            human_sen_pos = id_to_sentences[img_id].pop()
            print(len(id_to_sentences[img_id]))
            if not id_to_sentences[img_id]: continue
        else:
            id_to_sentences[img_id] = [
                ' '.join(rnn.dp.tokens[i][::-1])
                for i in rnn.dp.img_id_to_tokens[img_id]
            ]
        #print id_to_sentences[img_id]
        if img_id in seen_ids: continue
        seen_ids.add(img_id)
        if get_human and not semi_human:
            pos_sen = human_sen_pos.split()[::-1]
            np.random.shuffle(id_to_caps[img_id])
            des_sen = id_to_caps[img_id][0][::-1]
        else:
            lp, pos_sen, pos_att = decoder_beamsearch_with_attention(
                rnn, rnn.V_valid[idx], senti=1.0, beam_size=5)
            lp, des_sen, des_att = decoder_beamsearch_with_attention(
                rnn, rnn.V_valid[idx], senti=-1.0, beam_size=5)
            pos_sen = pos_sen[:-1]
            des_sen = des_sen[:-1]
            #des_att = des_att[:-1]
            pos_att = pos_att[:-1]
        #pos_sen, pos_att = rnn.get_sentence(rnn.V_valid[idx], senti=np.array([1.0], dtype=theano.config.floatX))
        pos_att = np.array(pos_att)
        pos_att = pos_att.flatten()
        #des_att = np.array(des_att)
        #des_att = des_att.flatten()
        des_att = np.zeros((len(des_sen), ))
        #pos_att = np.zeros((len(pos_sen),))
        if must_have_anp:
            if not sentence_has_anp(pos_sen[::-1]):
                num_ignore += 1
                continue
            num_not_ignore += 1
        if copy_if_no_anp:
            if not sentence_has_anp(pos_sen[::-1]):
                pos_sen = des_sen
        if replace_adj:
            pos_sen = do_replace_adj(pos_sen[::-1])[::-1]
            des_sen = do_replace_adj(des_sen[::-1])[::-1]

        #des_sen, des_att = rnn.get_sentence(rnn.V_valid[idx], senti=np.array([-1.0], dtype=theano.config.floatX))
        new_pos_sen = []
        for vv, a in zip(pos_sen, pos_att):
            out = vv
            col = ""
            if a > 0.75:
                col = "#FF3300"
            elif a > 0.5:
                col = "#FF5C33"
            elif a > 0.25:
                col = "#FF8566"
            #if a > 0.75:
            #    col = "#33CC33"# "#3366FF"
            #elif a > 0.5:
            #    col = "#70DB70" #"#5C85FF"
            #elif a > 0.25:
            #    col = "#ADEBAD" #"#85A3FF"
            if col:
                out = "<font style='background-color: %s'>%s</font>" % (col,
                                                                        vv)
            new_pos_sen.append(out)
        pos_sen = new_pos_sen
        print(pos_sen)
        print(pos_att)
        print(des_sen)
        print_it = False
        for v in pos_sen:
            if v in new_words:
                print_it = True
        if print_it:
            for x in zip(pos_sen, pos_att)[::-1]:
                print(x[0], end=' ')
            print("")
        #for x in zip(pos_sen, pos_att)[::-1]:
        #    print x[0],
        #print ""
        #for x in zip(des_sen, des_att)[::-1]:
        #    print x[0],
        #print "\n"
        pos_att = pos_att[:len(pos_sen)]
        des_att = des_att[:len(des_sen)]
        pos_sentence_res.append(pos_sen[::-1])
        pos_att_res.append(np.exp(pos_att[::-1]))
        des_sentence_res.append(des_sen[::-1])
        des_att_res.append(np.exp(des_att[::-1]))
        img_files.append(img_file)
        img_ids.append(img_id)

    output = {
        'pos_sen': pos_sentence_res,
        'pos_att': pos_att_res,
        'des_sen': des_sentence_res,
        'des_att': des_att_res,
        'img_files': img_files,
        'img_ids': img_ids
    }
    pickle.dump(output,
                open("output_data/sen_att_pos_01.pik", "wb"),
                protocol=2)

    if must_have_anp:
        print("Must have ANP % removed:",
              num_ignore / float(num_not_ignore) * 100.0)

    print("getting Positive perplexity")
    print(rnn.get_val_perplexity())
    print("got perplexity")

    print("getting Descriptive perplexity")
    print(rnn.get_val_perplexity(base=True))
    print("got perplexity")

    gts = {}
    res = {}
    fout = open("eval/output_pos", "w")
    for line, iid in zip(pos_sentence_res, img_ids):
        fout.write(' '.join(line) + '\n')
        if iid not in res: res[iid] = []
        res[iid].append(' '.join(line))
    fout.close()

    res_des = {}
    fout = open("eval/output_des", "w")
    for line, iid in zip(des_sentence_res, img_ids):
        fout.write(' '.join(line) + '\n')
        if iid not in res_des: res_des[iid] = []
        res_des[iid].append(' '.join(line))
    fout.close()

    for i in range(3):
        fout = open("eval/reference%d" % i, "w")
        for cid in img_ids:
            if cid not in gts: gts[cid] = []
            if len(id_to_sentences[cid]) > i:
                gts[cid].append(id_to_sentences[cid][i])
                fout.write(id_to_sentences[cid][i] + "\n")
            else:
                fout.write("\n")
        fout.close()

    bleu = Bleu()
    #for i in gts.keys()[:10]:
    #    print gts[i]
    #    print res_des[i]
    #    print res[i]
    #    print ""
    total_ref_sentences = 0
    for i in list(gts.keys()):
        total_ref_sentences += len(gts[i])
    print("Total ref sentences:", total_ref_sentences)
    print("Bleu:")
    print("Positive:", bleu.compute_score(gts, res)[0])
    print("Descriptive:", bleu.compute_score(gts, res_des)[0])
    rouge = Rouge()
    print("Rouge:")
    print("Positive:", rouge.compute_score(gts, res)[0])
    print("Descriptive:", rouge.compute_score(gts, res_des)[0])
    cider = Cider()
    print("Cider:")
    print("Positive:", cider.compute_score(gts, res)[0])
    print("Descriptive:", cider.compute_score(gts, res_des)[0])
    meteor = Meteor()
    print("Meteor:")
    print("Positive:", meteor.compute_score(gts, res)[0])
    print("Descriptive:", meteor.compute_score(gts, res_des)[0])
示例#18
0
文件: test.py 项目: coufon/s2t
def test(model_path='models/model-61'):
    captions = get_video_data(video_data_path_test,
                              video_feat_path_test,
                              is_test=True)
    ixtoword = pd.Series(np.load('./data/ixtoword.npy').tolist())

    model = VideoCaptionGenerator(
        dim_image=dim_image,
        n_words=len(ixtoword),
        dim_embed=dim_embed,
        dim_hidden=dim_hidden,
        batch_size=1,
        dim_obj_feats=dim_obj_feats,
        n_obj_feats=n_obj_feats,
        #encoder_max_sequence_length=encoder_step,
        decoder_max_sentence_length=decoder_step,
        bias_init_vector=None)

    _, tf_obj_feats, tf_video_mask, _, _, tf_generated_words, tf_generated_att = \
        model.build_model(is_test=True)
    sess = tf.InteractiveSession()
    saver = tf.train.Saver()
    saver.restore(sess, model_path)

    scorer = Meteor()
    scorer_bleu = Bleu(4)
    GTS = defaultdict(list)
    RES = defaultdict(list)
    counter = 0

    for vid, caption in captions.items():
        print counter
        if False:
            # Collect frames.
            cap = cv2.VideoCapture(os.path.join(video_path_test, vid + '.mp4'))
            frames = list()
            while True:
                ret, im = cap.read()
                if ret is False:
                    break
                frames.append(im)

        # Load meta data.
        #with open(os.path.join(meta_data_path_test, vid+'.mp4.txt'), 'r') as f:
        #    meta_data = json.load(f)
        #    all_feats = meta_data['features']

        generated_sentence, generated_att, _ = gen_sentence(
            sess, tf_video_mask, tf_obj_feats, tf_generated_words,
            tf_generated_att, vid, ixtoword)
        #generated_sentence_test, weights = gen_sentence(
        #    sess, video_tf, video_mask_tf, caption_tf, vid, ixtoword, weights_tf, 0.3)
        generated_att = [att[:, 0, 0] for att in generated_att]
        #print generated_att

        print vid, generated_sentence[:-2]
        #plt.plot(generated_att)
        #plt.show()
        #print generated_sentence_test
        #print caption

        if False:
            words = generated_sentence.split(' ')
            feats = list()
            for i, w in enumerate(words):
                i_best_feat_list = np.argsort(generated_att[i])[::-1]
                imgs = list()
                for i_best_feat in i_best_feat_list:
                    weight = generated_att[i][i_best_feat]
                    if weight < 0.1:
                        break
                    print w, i_best_feat
                    if all_feats is None or len(all_feats) == 0:
                        im = cv2.resize(
                            frames[:len(frames):len(frames) / 4][i_best_feat],
                            (300, 300))
                    else:
                        feat = all_feats[i_best_feat]
                        i_frame = feat[0]
                        bbox = feat[2]
                        im = np.copy(frames[i_frame][bbox[2]:bbox[3],
                                                     bbox[0]:bbox[1]])
                        im = cv2.resize(im, (300, 300))
                    constant = cv2.copyMakeBorder(im,
                                                  10,
                                                  10,
                                                  10,
                                                  10,
                                                  cv2.BORDER_CONSTANT,
                                                  value=[0, 0, 0])
                    violet = np.zeros((30, constant.shape[1], 3), np.uint8)
                    violet[:] = (255, 255, 255)
                    vcat = cv2.vconcat((violet, constant))
                    cv2.putText(vcat, str(weight), (10, 20),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, 0)
                    imgs.append(vcat)
                if imgs:
                    final_img = cv2.hconcat(imgs)
                    cv2.imshow('test', final_img)
                    cv2.waitKey(10000)

        GTS[str(counter)] = [{
            'image_id': str(counter),
            'cap_id': i,
            'caption': ' '.join(s)
        } for i, s in enumerate(caption)]
        RES[str(counter)] = [{
            'image_id': str(counter),
            'caption': generated_sentence[:-2]
        }]

        #GTS[vid] = caption
        #RES[vid] = [generated_sentence[:-2] + '.']
        counter += 1

        #words = generated_sentence.split(' ')
        #fig = plt.figure()
        #for i in range(len(words)):
        #    w = weights[i]
        #    ax = fig.add_subplot(len(words), 1, i+1)
        #    ax.set_title(words[i])
        #    ax.plot(range(len(w)), [ww[0] for ww in w], 'b')
        #plt.show()

        #ipdb.set_trace()

    tokenizer = PTBTokenizer()
    GTS = tokenizer.tokenize(GTS)
    RES = tokenizer.tokenize(RES)

    score, scores = scorer.compute_score(GTS, RES)
    print "METEOR", score
    score, scores = scorer_bleu.compute_score(GTS, RES)
    print "BLEU", score
示例#19
0
文件: test.py 项目: aniloc111/WSDEC
import sys
sys.path.append('../third_party/densevid_eval/coco-caption')

from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer
from pycocoevalcap.meteor.meteor import Meteor

src = {0: [{'caption': "this could be a good time, but not then."}]}
tgt = {0: [{'caption': "this is not good at all, time will say."}]}
src_1 = {0: ["this could be a good time, but not then."]}
tgt_1 = {0: ["this is not good at all, time will say."]}

tokenizer = PTBTokenizer()
meteor = Meteor()

src_t = tokenizer.tokenize(src)
tgt_t = tokenizer.tokenize(tgt)

score = meteor.compute_score(src_t, tgt_t)
score_1 = meteor.compute_score(src_1, tgt_1)
import pdb
pdb.set_trace()
示例#20
0
文件: model_new.py 项目: coufon/s2t
def test(model_path='models/model-37', video_feat_path=video_feat_path):

    train_data, test_data = get_video_data(video_data_path,
                                           video_feat_path,
                                           train_ratio=0.9)
    test_videos = test_data['video_path'].values
    test_captions = test_data['Description'].values
    ixtoword = pd.Series(np.load('./data/ixtoword.npy').tolist())

    test_videos_unique = list()
    test_captions_list = list()
    for (video, caption) in zip(test_videos, test_captions):
        if len(test_videos_unique) == 0 or test_videos_unique[-1] != video:
            test_videos_unique.append(video)
            test_captions_list.append([caption])
        else:
            test_captions_list[-1].append(caption)

    model = Video_Caption_Generator(dim_image=dim_image,
                                    n_words=len(ixtoword),
                                    dim_hidden=dim_hidden,
                                    batch_size=batch_size,
                                    encoder_max_sequence_length=n_frame_step,
                                    decoder_max_sentence_length=n_frame_step,
                                    bias_init_vector=None)

    video_tf, video_mask_tf, caption_tf, probs_tf, last_embed_tf = model.build_generator(
    )
    sess = tf.InteractiveSession()

    saver = tf.train.Saver()
    saver.restore(sess, model_path)

    scorer = Meteor()
    GTS = dict()
    RES = dict()

    for (video_feat_path, caption) in zip(test_videos_unique,
                                          test_captions_list):
        print video_feat_path
        print caption
        video_feat = np.load(video_feat_path)[None, ...]
        interval_frame = video_feat.shape[1] / n_frame_step
        video_feat = video_feat[:,
                                range(0, n_frame_step *
                                      interval_frame, interval_frame), :]
        video_mask = np.ones((video_feat.shape[0], video_feat.shape[1]))

        #video_feat = sampling(video_feat, 0.3)

        generated_word_index = sess.run(caption_tf,
                                        feed_dict={
                                            video_tf: video_feat,
                                            video_mask_tf: video_mask
                                        })
        #probs_val = sess.run(probs_tf, feed_dict={video_tf:video_feat})
        #embed_val = sess.run(last_embed_tf, feed_dict={video_tf:video_feat})
        generated_words = ixtoword[generated_word_index]

        punctuation = np.argmax(np.array(generated_words) == '.') + 1
        generated_words = generated_words[:punctuation]

        generated_sentence = ' '.join(generated_words)
        print generated_sentence

        GTS[video_feat_path] = caption
        RES[video_feat_path] = [generated_sentence[:-2] + '.']

        score, scores = scorer.compute_score(GTS, RES)
        print score

        ipdb.set_trace()

    print score

    ipdb.set_trace()
示例#21
0
def meteor():
    scorer = Meteor()
    score, scores = scorer.compute_score(gts, res)
    print('meter = %s' % score)
示例#22
0
def test(model_path='models/model-61', video_feat_path=video_feat_path):
    train_data, test_data = get_video_data(video_data_path,
                                           video_feat_path,
                                           train_ratio=0.7)
    test_videos = test_data['video_path'].values
    test_captions = test_data['Description'].values
    ixtoword = pd.Series(np.load('./data/ixtoword.npy').tolist())

    test_videos_unique = list()
    test_captions_list = list()
    for (video, caption) in zip(test_videos, test_captions):
        if len(test_videos_unique) == 0 or test_videos_unique[-1] != video:
            test_videos_unique.append(video)
            test_captions_list.append([caption])
        else:
            test_captions_list[-1].append(caption)

    model = Video_Caption_Generator(dim_image=dim_image,
                                    n_words=len(ixtoword),
                                    dim_embed=dim_embed,
                                    dim_hidden=dim_hidden,
                                    batch_size=batch_size,
                                    encoder_max_sequence_length=encoder_step,
                                    decoder_max_sentence_length=decoder_step,
                                    bias_init_vector=None)

    tf_loss, tf_video, tf_video_mask, tf_obj_feats, tf_caption, tf_caption_mask, tf_probs = model.build_model(
        is_test=True)
    sess = tf.InteractiveSession()

    saver = tf.train.Saver()
    saver.restore(sess, model_path)

    scorer = Meteor()
    scorer_bleu = Bleu(4)
    GTS = defaultdict(list)
    RES = defaultdict(list)
    counter = 0

    for (vid, caption) in zip(test_videos_unique, test_captions_list):
        generated_sentence = gen_sentence(sess, tf_video, tf_video_mask,
                                          tf_obj_feats, tf_caption, vid,
                                          ixtoword, 1)
        #generated_sentence_test, weights = gen_sentence(
        #    sess, video_tf, video_mask_tf, caption_tf, vid, ixtoword, weights_tf, 0.3)

        print vid, generated_sentence
        #print generated_sentence_test
        #print caption

        GTS[str(counter)] = [{
            'image_id': str(counter),
            'cap_id': i,
            'caption': s
        } for i, s in enumerate(caption)]
        RES[str(counter)] = [{
            'image_id': str(counter),
            'caption': generated_sentence[:-2] + '.'
        }]

        #GTS[vid] = caption
        #RES[vid] = [generated_sentence[:-2] + '.']
        counter += 1

        #words = generated_sentence.split(' ')
        #fig = plt.figure()
        #for i in range(len(words)):
        #    w = weights[i]
        #    ax = fig.add_subplot(len(words), 1, i+1)
        #    ax.set_title(words[i])
        #    ax.plot(range(len(w)), [ww[0] for ww in w], 'b')
        #plt.show()

        ipdb.set_trace()

    tokenizer = PTBTokenizer()
    GTS = tokenizer.tokenize(GTS)
    RES = tokenizer.tokenize(RES)

    score, scores = scorer.compute_score(GTS, RES)
    print "METEOR", score
    score, scores = scorer_bleu.compute_score(GTS, RES)
    print "BLEU", score