示例#1
0
 def __init__(self,
              thresh=0.3,
              cider_w=0.6,
              dis_vqa_reward=False,
              attention_vqa=False):
     if attention_vqa:
         self.vqa_reward = AttentionVQARewards(
             use_dis_reward=dis_vqa_reward)
     else:
         self.vqa_reward = VQARewards(
             'model/kprestval_VQA-BaseNorm/model.ckpt-26000',
             use_dis_reward=dis_vqa_reward)
     self.cider_reward = VisualFactReward()
     # self.cider_reward = IVQARewards()
     self.diversity_reward = DiversityReward()
     self.thresh = thresh
     self.cider_w = cider_w
     self.to_sentence = SentenceGenerator(trainset='trainval')
     self._num_call = long(0)
     self.print_iterval = 100
     self.language_thresh = 0.2
     self.cider_thresh = 0.05
     self.use_cider = True
     self.lm = None
     self.replay_buffer = None
示例#2
0
def test():
    # Load data
    def load_data(fpath):
        d = load_hdf5(fpath)
        return d['quest_ids'], d['ans_preds']

    w = 0.8
    quest_ids, preds1 = load_data('data5/kpval_VQA-BaseNorm_scores.data')
    check_quest_ids, preds2 = load_data(
        'data5/kpval_VQA-BaseNorm_scores_flt.data')
    scores = w * preds1 + (1.0 - w) * preds2

    scores[:, -1] = -1.0
    ans_ids = scores.argmax(axis=1)

    # Create the vocabulary.
    top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file=top_ans_file)

    result = [{
        u'answer': to_sentence.index_to_top_answer(aid),
        u'question_id': qid
    } for aid, qid in zip(ans_ids, quest_ids)]

    # save results
    tf.logging.info('Saving results')
    res_file = FLAGS.result_format % (FLAGS.version, TEST_SET)
    json.dump(result, open(res_file, 'w'))
    tf.logging.info('Done!')
    tf.logging.info('#Num eval samples %d' % len(result))
    return res_file, quest_ids
def test():
    top_ans_file = '/import/vision-ephemeral/fl302/code/' \
                   'VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
    # top_ans_file = 'data/vqa_trainval_top2000_answers.txt'
    mc_ctx = MultiChoiceQuestionManger(subset='val', load_ans=True,
                                       top_ans_file=top_ans_file)
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file=top_ans_file)
    answer_enc = mc_ctx.encoder
    # quest_ids = mc_ctx._quest_id2image_id.keys()
    # quest_ids = np.array(quest_ids)

    # qids = np.random.choice(quest_ids, size=(5,), replace=False)

    create_fn = create_reader('VAQ-CA', 'train')
    reader = create_fn(batch_size=4, subset='kprestval')
    reader.start()

    for _ in range(20):
        # inputs = reader.get_test_batch()
        inputs = reader.pop_batch()

        _, _, _, _, labels, ans_seq, ans_len, quest_ids, image_ids = inputs

        b_top_ans = answer_enc.get_top_answers(labels)
        for i, (quest_id, i_a) in enumerate(zip(quest_ids, b_top_ans)):
            print('question id: %d' % quest_id)
            gt = mc_ctx.get_gt_answer(quest_id)
            print('GT: %s' % gt)
            print('Top: %s' % i_a)
            print('SG: top: %s' % to_sentence.index_to_top_answer(labels[i]))
            seq = ans_seq[i][:ans_len[i]].tolist()
            print('SG: seq: %s\n' % to_sentence.index_to_answer(seq))

    reader.stop()
def visualise():
    mc_ctx = MultiChoiceQuestionManger()
    to_sentence = SentenceGenerator(trainset='trainval')
    # writer = ExperimentWriter('latex/examples_replay_buffer_rescore')
    writer = ExperimentWriter('latex/examples_replay_buffer_rescore_prior')
    # d = load_json('vqa_replay_buffer/vqa_replay_low_rescore.json')
    d = load_json('vqa_replay_buffer/vqa_replay_low_rescore_prior_05_04.json')
    memory = d['memory']
    # show random 100
    keys = deepcopy(memory.keys())
    np.random.seed(123)
    np.random.shuffle(keys)
    vis_keys = keys[:100]
    for i, quest_key in enumerate(vis_keys):
        pathes = memory[quest_key]
        if len(pathes) == 0:
            continue
        # if it has valid questions
        quest_id = int(quest_key)
        image_id = mc_ctx.get_image_id(quest_id)
        gt_question = mc_ctx.get_question(quest_id)
        answer = mc_ctx.get_gt_answer(quest_id)
        head = 'Q: %s A: %s' % (gt_question, answer)
        im_file = '%s2014/COCO_%s2014_%012d.jpg' % ('val', 'val', image_id)
        im_path = os.path.join(IM_ROOT, im_file)
        questions = []
        for p in pathes.keys():
            conf1, conf2 = pathes[p]
            _tokens = [int(t) for t in p.split(' ')]
            sentence = to_sentence.index_to_question(_tokens)
            descr = '%s (%0.2f-%0.2f)' % (sentence, conf1, conf2)
            questions.append(descr)
        writer.add_result(image_id, quest_id, im_path, head, questions)
    writer.render()
def test(checkpoint_path=None):
    batch_size = 100
    config = ModelConfig()
    # Get model function
    model_fn = get_model_creation_fn(FLAGS.model_type)

    # build data reader
    reader = AttentionFetcher(batch_size=batch_size, subset=TEST_SET,
                              feat_type=config.feat_type, version=FLAGS.version)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version,
                                                                     FLAGS.model_type))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='test')
    model.build()
    prob = model.prob

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # Create the vocabulary.
    top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file=top_ans_file)

    ans_ids = []
    quest_ids = []

    print('Running inference on split %s...' % TEST_SET)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        generated_ans = sess.run(
            prob, feed_dict=model.fill_feed_dict(outputs[:-2]))
        generated_ans[:, -1] = 0
        top_ans = np.argmax(generated_ans, axis=1)

        ans_ids.append(top_ans)
        quest_id = outputs[-2]
        quest_ids.append(quest_id)

    quest_ids = np.concatenate(quest_ids)
    ans_ids = np.concatenate(ans_ids)
    result = [{u'answer': to_sentence.index_to_top_answer(aid),
               u'question_id': qid} for aid, qid in zip(ans_ids, quest_ids)]

    # save results
    tf.logging.info('Saving results')
    res_file = FLAGS.result_format % (FLAGS.version, TEST_SET)
    json.dump(result, open(res_file, 'w'))
    tf.logging.info('Done!')
    tf.logging.info('#Num eval samples %d' % len(result))
    return res_file, quest_ids
示例#6
0
def test(checkpoint_path=None):
    batch_size = 100
    config = ModelConfig()
    # Get model function
    # model_fn = get_model_creation_fn(FLAGS.model_type)

    # build data reader
    reader = AttentionFetcher(batch_size=batch_size,
                              subset=TEST_SET,
                              feat_type=config.feat_type,
                              version=FLAGS.version)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir %
                                             (FLAGS.version, FLAGS.model_type))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='test')
    # model.set_agent_ids([0])
    model.build()
    prob = model.prob

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' %
                    os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # Create the vocabulary.
    top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file=top_ans_file)
    # to_sentence = SentenceGenerator(trainset='trainval')

    results = []

    print('Running inference on split %s...' % TEST_SET)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        generated_ans = sess.run(prob,
                                 feed_dict=model.fill_feed_dict(outputs[:-2]))
        generated_ans[:, -1] = 0
        ans_cand_ids = np.argsort(-generated_ans, axis=1)

        quest_ids = outputs[-2]

        for quest_id, ids in zip(quest_ids, ans_cand_ids):
            answers = []
            for k in range(_K):
                aid = ids[k]
                ans = to_sentence.index_to_top_answer(aid)
                answers.append(ans)
            res_i = {'question_id': int(quest_id), 'answers': answers}
            results.append(res_i)

    eval_recall(results)
def ivqa_decoding_beam_search(checkpoint_path=None, subset='kpval'):
    model_config = ModelConfig()
    res_file = 'result/quest_vaq_greedy_%s_%s.json' % (
        FLAGS.model_type.upper(), subset)
    # Get model
    model_fn = get_model_creation_fn(FLAGS.model_type)
    create_fn = create_reader(FLAGS.model_type, phase='test')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # get data reader
    reader = create_fn(batch_size=100,
                       subset=subset,
                       version=FLAGS.test_version)

    if checkpoint_path is None:
        ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)
        # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/'
        ckpt = tf.train.get_checkpoint_state(ckpt_dir)
        checkpoint_path = ckpt.model_checkpoint_path

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'beam')
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

    num_batches = reader.num_batches

    print('Running beam search inference...')
    results = []
    for i in range(num_batches):
        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        scores, pathes = model.greedy_inference(outputs[:-2], sess)

        scores, pathes = post_process_prediction(scores, pathes)
        question = to_sentence.index_to_question(pathes[0])
        print('%d/%d: %s' % (i, num_batches, question))

        for quest_id, image_id, path in zip(quest_ids, image_ids, pathes):
            sentence = to_sentence.index_to_question(path)
            res_i = {
                'image_id': int(image_id),
                'question_id': int(quest_id),
                'question': sentence
            }
            results.append(res_i)

    save_json(res_file, results)
    return res_file
 def __init__(self):
     self.labels = []
     self.rerank_preds = []
     self.vqa_top_scores = []
     self.vqa_top_preds = []
     self.vqa_cands = []
     self.to_sentence = SentenceGenerator(trainset='trainval')
     self.file_stream = open('result/rerank_analysis.txt', 'w')
示例#9
0
def convert():
    model_name = 'ivaq_var_restval'
    checkpoint_path = 'model/var_ivqa_pretrain_restval/model.ckpt-505000'
    # build model
    from config import ModelConfig
    model_config = ModelConfig()
    model_fn = get_model_creation_fn('VAQ-Var')
    # create graph
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'beam')
        model.build()
        tf_embedding = model._answer_embed
        tf_answer_feed = model._ans
        tf_answer_len_feed = model._ans_len
        # Restore from checkpoint
        print('Restore from %s' % checkpoint_path)
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

    # build reader
    top_ans_file = '/import/vision-ephemeral/fl302/code/' \
                   'VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
    mc_ctx = MultiChoiceQuestionManger(subset='val',
                                       load_ans=True,
                                       top_ans_file=top_ans_file)
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file=top_ans_file)
    answer_encoder = mc_ctx.encoder

    top_answer_inds = range(2000)
    top_answers = answer_encoder.get_top_answers(top_answer_inds)

    answer_seqs = answer_encoder.encode_to_sequence(top_answers)
    for i, (ans, seq) in enumerate(zip(top_answers, answer_seqs)):
        rec_ans = to_sentence.index_to_answer(seq)
        ans = ' '.join(_tokenize_sentence(ans))
        print('%d: Raw: %s, Rec: %s' % (i + 1, ans, rec_ans))
        assert (ans == rec_ans)
    print('Checking passed')

    # extract
    print('Converting...')
    ans_arr, ans_arr_len = put_to_array(answer_seqs)
    import pdb
    pdb.set_trace()
    embedding = sess.run(tf_embedding,
                         feed_dict={
                             tf_answer_feed: ans_arr.astype(np.int32),
                             tf_answer_len_feed: ans_arr_len.astype(np.int32)
                         })
    # save
    sv_file = 'data/v1_%s_top2000_lstm_embedding.h5' % model_name
    from util import save_hdf5
    save_hdf5(sv_file, {'answer_embedding': embedding})
    print('Done')
示例#10
0
 def __init__(self, model_name, K=3, do_plot=True):
     self._gt_mgr = MultiChoiceQuestionManger(subset='trainval',
                                              load_ans=True)
     self._rev_map = SentenceGenerator(trainset='trainval')
     self._top_k = K
     self._do_plot = do_plot
     self._model_name = model_name
     self._cache_dir = 'att_maps/%s' % self._model_name
     mkdir_if_missing(self._cache_dir)
示例#11
0
def test():
    from util import unpickle
    import json
    from inference_utils.question_generator_util import SentenceGenerator
    from w2v_answer_encoder import MultiChoiceQuestionManger

    config = MLPConfig()
    model = SequenceMLP(config, phase='test')
    model.build()
    prob = model.prob

    # Load vocabulary
    to_sentence = SentenceGenerator(trainset='trainval')
    # create multiple choice question manger
    mc_manager = MultiChoiceQuestionManger(subset='trainval',
                                           answer_coding='sequence')

    sess = tf.Session()
    # Load model
    ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
    checkpoint_path = ckpt.model_checkpoint_path
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # get data
    result = []
    dataset = unpickle('data/rescore_dev.pkl')
    for itr, datum in enumerate(dataset):
        seq_index, att_mask, label = _process_datum(datum)
        quest_id = datum['quest_id']
        quest = seq_index[0].tolist()
        feed_dict = model.fill_feed_dict([seq_index, att_mask])
        scores = sess.run(prob, feed_dict=feed_dict)
        idx = scores.argmax()
        # parse question and answer
        question = to_sentence.index_to_question([0] + quest)
        mc_ans = mc_manager.get_candidate_answers(quest_id)
        vaq_answer = mc_ans[idx]
        real_answer = mc_ans[label.argmax()]
        # add result
        result.append({u'answer': vaq_answer, u'question_id': quest_id})
        # show results
        if itr % 100 == 0:
            print('============== %d ============' % itr)
            print('question id: %d' % quest_id)
            print('question\t: %s' % question)
            print('answer\t: %s' % real_answer)
            print('VAQ answer\t: %s (%0.2f)' % (vaq_answer, scores[idx]))

    quest_ids = [res[u'question_id'] for res in result]
    # save results
    tf.logging.info('Saving results')
    res_file = 'result/rescore_dev_dev.json'
    json.dump(result, open(res_file, 'w'))
    from vqa_eval import evaluate_model
    acc = evaluate_model(res_file, quest_ids)
    print('Over all accuarcy: %0.2f' % acc)
def test(checkpoint_path=None):
    batch_size = 128

    # build data reader
    reader = Reader(batch_size=batch_size, subset=TEST_SET, phase='test', version='v1')

    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % ('v1',
                                                                     'Fusion'))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = RerankModel(phase='test', version='v1', num_cands=5)
    model.build()

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path))

    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file='../iccv_vaq/data/vqa_trainval_top2000_answers.txt')

    ans_ids = []
    quest_ids = []

    print('Running inference on split %s...' % TEST_SET)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.pop_batch()
        model_preds = sess.run(model.preds, feed_dict=model.fill_feed_dict(outputs))
        local_index = model_preds.argmax(axis=1)
        # local_index = outputs[-3].argmax(axis=1)  # ivqa
        # local_index = outputs[-4].argmax(axis=1) # vqa
        top_ans = np.array([cand[idx] for idx, cand in zip(local_index, outputs[3])])

        ans_ids.append(top_ans)
        quest_id = outputs[-1]
        quest_ids.append(quest_id)

    ans_ids = np.concatenate(ans_ids)
    quest_ids = np.concatenate(quest_ids)
    result = [{u'answer': to_sentence.index_to_top_answer(aid),
               u'question_id': qid} for aid, qid in zip(ans_ids, quest_ids)]

    # save results
    tf.logging.info('Saving results')
    res_file = FLAGS.result_format % ('v1', TEST_SET)
    json.dump(result, open(res_file, 'w'))
    tf.logging.info('Done!')
    tf.logging.info('#Num eval samples %d' % len(result))
    # ana_ctx.close()
    return res_file, quest_ids
示例#13
0
def vaq_decoding_greedy(checkpoint_path=None, subset='kpval'):
    model_config = ModelConfig()
    res_file = 'result/quest_vaq_greedy_%s.json' % FLAGS.model_type.upper()

    # Get model
    model_fn = get_model_creation_fn(FLAGS.model_type)
    create_fn = create_reader(FLAGS.model_type, phase='test')
    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # build data reader
    reader = create_fn(batch_size=32, subset=subset)

    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir %
                                             FLAGS.model_type)
        checkpoint_path = ckpt.model_checkpoint_path

    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'greedy')
        model.build()
        saver = tf.train.Saver()

        sess = tf.Session()
        tf.logging.info('Restore from model %s' %
                        os.path.basename(checkpoint_path))
        saver.restore(sess, checkpoint_path)

    num_batches = reader.num_batches

    print('Running greedy inference...')
    results = []
    for i in range(num_batches):
        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        scores, pathes = model.greedy_inference(outputs[:-2], sess)

        scores, pathes = post_process_prediction(scores, pathes)
        question = to_sentence.index_to_question(pathes[0])
        print('%d/%d: %s' % (i, num_batches, question))

        for quest_id, image_id, path in zip(quest_ids, image_ids, pathes):
            sentence = to_sentence.index_to_question(path)
            res_i = {
                'image_id': int(image_id),
                'question_id': int(quest_id),
                'question': sentence
            }
            results.append(res_i)

    save_json(res_file, results)
    return res_file
def main(_):
    # Build the inference graph.
    config = QuestionGeneratorConfig()
    reader = TFRecordDataFetcher(FLAGS.input_files, config.image_feature_key)

    g = tf.Graph()
    ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
    checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)
    with g.as_default():
        model = QuestionGenerator(config, phase='evaluate')
        model.build()
    # g.finalize()

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset=FLAGS.model_trainset)

    filenames = []
    for file_pattern in FLAGS.input_files.split(","):
        filenames.extend(tf.gfile.Glob(file_pattern))
    tf.logging.set_verbosity(tf.logging.INFO)
    tf.logging.info("Running caption generation on %d files matching %s",
                    len(filenames), FLAGS.input_files)

    with tf.Session(graph=g) as sess:
        # Load the model from checkpoint.
        saver = tf.train.Saver(var_list=tf.all_variables())
        saver.restore(sess, checkpoint_path)

        itr = 0
        while not reader.eof():
            outputs = reader.pop_batch()
            im_ids, quest_id, im_feat, ans_w2v, quest_ids, ans_ids = outputs
            inputs = post_processing_data(outputs)
            perplexity = sess.run(model.likelihood,
                                  feed_dict=model.fill_feed_dict(inputs))

            # generated = [generated[0]]  # sample 3
            question = to_sentence.index_to_question(quest_ids)
            answer = to_sentence.index_to_answer(ans_ids)

            print('============== %d ============' % itr)
            print('image id: %d, question id: %d' % (im_ids, quest_id))
            print('question\t: %s' % question)
            elems = question.split(' ')
            tmp = ' '.join([
                '%s (%0.2f)' % (w, p)
                for w, p in zip(elems, perplexity.flatten())
            ][:-1])
            print('question\t' + tmp)
            print('answer\t: %s' % answer)
            print('perplexity\t: %0.2f\n' % perplexity.mean())

            itr += 1
示例#15
0
    def __init__(self, ckpt_file=None):
        top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
        self.to_sentence = SentenceGenerator(trainset='trainval',
                                             top_ans_file=top_ans_file)
        self.sent_encoder = SentenceEncoder()
        self.model = None
        self.sess = None
        self.name = ''
        self.top_k = 2

        self.answer_to_top_ans_id = None
 def __init__(self, thresh=0.3, cider_w=0.6, dis_vqa_reward=False):
     self.vqa_reward = VQARewards(use_dis_reward=dis_vqa_reward)
     self.cider_reward = IVQARewards()
     self.diversity_reward = DiversityReward()
     self.thresh = thresh
     self.cider_w = cider_w
     self.to_sentence = SentenceGenerator(trainset='trainval')
     self._num_call = long(0)
     self.print_iterval = 100
     self.language_thresh = 0.2
     self.cider_thresh = 0.05
     self.use_cider = True
     self.lm = None
     self.replay_buffer = None
示例#17
0
 def __init__(self, metric='cider', gt_has_start_end_token=False,
              pred_has_start_end_token=True, use_end_token=True,
              subset='kptrain'):
     self.gt_has_start_end_token = gt_has_start_end_token
     self.pred_has_start_end_token = pred_has_start_end_token
     self.use_end_token = use_end_token
     if metric == 'cider':
         self.scorer = ciderEval('vqa_%s_idxs_end' % subset)
     elif metric == 'bleu':
         self.scorer = Bleu(n=4)
     assert (metric == 'cider')
     self.to_sentence = SentenceGenerator(trainset='trainval')
     self._num_call = long(0)
     self.print_iterval = 100
def test_top_answer_layer():
    from inference_utils.question_generator_util import SentenceGenerator
    to_sentence = SentenceGenerator(trainset='trainval')

    def visualise_sequence(seqs, seqs_len, idx):
        seq = seqs[idx]
        seq_len = seqs_len[idx]
        vis_seq = seq[:seq_len]
        answer = to_sentence.index_to_answer(vis_seq)
        print('%s' % answer)
        return answer

    top_ans_file = 'data/top_answer2000_sequences.h5'
    answer_pool = TopAnswerDataLayer(top_ans_file, k=4)
    top_answer_list = load_top_answer_list()

    import numpy as np
    ind = np.random.randint(low=0, high=len(top_answer_list), size=[5, 4],
                            dtype=np.int32)
    top_k_ind = tf.constant(ind, dtype=tf.int32)
    t_ans_arr, t_ans_len = answer_pool.get_top_answer_sequences(top_k_ind)
    ans_arr = t_ans_arr.eval().reshape([-1, answer_pool.data_len])
    ans_len = t_ans_len.eval().reshape([-1])

    answer_ind = top_k_ind.eval().reshape([-1])

    num_test = ans_len.size
    num_passed = 0
    for i in range(num_test):
        top_ans = top_answer_list[answer_ind[i]]
        print(top_ans)
        seq_ans = visualise_sequence(ans_arr, ans_len, i)
        print('========================')
        num_passed += (seq_ans == top_ans)
    print('\nFinish test top answer layer\nPassed: %d/%d' % (num_passed, num_test))
def main(subset):
    from multiprocessing import Process
    # params
    k = 80
    num_proc = 10
    # subset = 'kptest'
    # res_file = 'result/quest_vaq_nn.json'

    print('Creating Models')
    # sentence generator
    to_sentence = SentenceGenerator(trainset='trainval')

    # assign tasks
    val_qids, nn_ids = load_image_nn(subset)
    num = len(val_qids)

    batch_size = ceil(num / num_proc)

    print('Launching process')

    jobs = []
    for i in range(num_proc):
        proc_range = np.arange(start=batch_size * i,
                               stop=min(batch_size * (i + 1), num),
                               dtype=np.int32)
        p = Process(target=process_worker,
                    args=(subset, i, proc_range, to_sentence))
        jobs.append(p)
        p.start()
def test_cst_reader():
    reader = ContrastiveDataReader(batch_size=4)
    to_sentence = SentenceGenerator(trainset='trainval')

    reader.start()
    for i in range(4):
        images, quest, quest_len, top_ans, mask = reader.pop_batch()
        questions = _parse_gt_questions(quest, quest_len)
        print('\nBatch %d' % i)
        this_batch_size = images.shape[0] / 2
        for idx in range(this_batch_size):
            print('Real: %s' % to_sentence.index_to_question(questions[idx]))
            print('Fake: %s\n' % to_sentence.index_to_question(questions[idx + this_batch_size]))
        print('Mask:')
        print(mask.astype(np.int32))
    reader.stop()
 def __init__(self, thresh=0.3, cider_w=0.6):
     from mcb_reward import MCBReward
     self.to_sentence = SentenceGenerator(trainset='trainval')
     self.vqa_reward = MCBReward(self.to_sentence)
     self.cider_reward = VisualFactReward()
     # self.cider_reward = IVQARewards()
     self.diversity_reward = DiversityReward()
     self.thresh = thresh
     self.cider_w = cider_w
     self._num_call = long(0)
     self.print_iterval = 100
     self.language_thresh = 0.2
     self.cider_thresh = 0.05
     self.use_cider = True
     self.lm = None
     self.replay_buffer = None
示例#22
0
def main(_):
    batch_size = 4
    create_fn = create_reader('VAQ-2Att', phase='train')
    to_sentence = SentenceGenerator(trainset='trainval')

    def trim_sequence(seqs, seqs_len, idx):
        seq = seqs[idx]
        seq_len = seqs_len[idx]
        return seq[:seq_len]

    def test_reader(reader):
        reader.start()
        for i in range(5):
            inputs = reader.pop_batch()
            im, attr, capt, capt_len, ans_seq, ans_seq_len = inputs
            question = to_sentence.index_to_question(
                trim_sequence(capt, capt_len, 1))
            answer = to_sentence.index_to_answer(
                trim_sequence(ans_seq, ans_seq_len, 1))
            print('Q: %s\nA: %s\n' % (question, answer))
        reader.stop()

    print('v1:')
    reader = create_fn(batch_size, subset='kptrain', version='v1')
    test_reader(reader)
    del reader

    print('v2:')
    reader = create_fn(batch_size, subset='kptrain', version='v2')
    test_reader(reader)
    del reader
示例#23
0
def test(checkpoint_path=None):
    batch_size = 4
    config = ModelConfig()
    # Get model function
    model_fn = get_model_creation_fn(FLAGS.model_type)

    # build data reader
    reader = AttentionFetcher(batch_size=batch_size,
                              subset=TEST_SET,
                              feat_type=config.feat_type,
                              version=FLAGS.version)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir %
                                             (FLAGS.version, FLAGS.model_type))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='test')
    model.build()
    prob = model.prob

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' %
                    os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # Create the vocabulary.
    to_sentence = SentenceGenerator(
        trainset='trainval',
        top_ans_file='../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt')

    ans_ids = []
    quest_ids = []

    print('Running inference on split %s...' % TEST_SET)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        generated_ans = sess.run(prob,
                                 feed_dict=model.fill_feed_dict(outputs[:-2]))
        generated_ans[:, -1] = 0
        top_ans = np.argmax(generated_ans, axis=1)

        ans_ids.append(top_ans)
        quest_id = outputs[-2]
        quest_ids.append(quest_id)

    quest_ids = np.concatenate(quest_ids)
    ans_ids = np.concatenate(ans_ids)
    gt = reader._answer
    n1, n2 = (gt == ans_ids).sum(), gt.size
    acc = n1 / float(n2)
    print('\nAcc: %0.2f, %d/%d' % (acc * 100., n1, n2))
    return acc
def test_rerank_reader():
    reader = RetrievalDataReader(batch_size=1, n_contrast=10, subset='train')
    reader.start()
    outputs = reader.pop_batch()
    im_feat, quest_arr, quest_len, ans_arr, ans_len = outputs
    from inference_utils.question_generator_util import SentenceGenerator
    to_sentence = SentenceGenerator(
        trainset='trainval',
        ans_vocab_file='data/vqa_trainval_question_answer_word_counts.txt',
        quest_vocab_file='data/vqa_trainval_question_answer_word_counts.txt')
    for q_seq, q_len, a_seq, a_len in zip(quest_arr, quest_len, ans_arr,
                                          ans_len):
        q_ = np.array([0] + q_seq[:q_len].tolist() + [0])
        a_ = np.array([0] + a_seq[:a_len].tolist() + [0])
        q = to_sentence.index_to_question(q_)
        a = to_sentence.index_to_answer(a_)
        print('Q: %s' % q)
        print('A: %s\n' % a)
    reader.stop()
    def __init__(self):
        self.to_sentence = SentenceGenerator(trainset='trainval')
        self.sent_encoder = SentenceEncoder()
        self.g = tf.Graph()
        self.ckpt_file = 'model/v1_var_kptrain_VAQ-VarDS/model.ckpt-3300000'
        from models.variational_ds_ivqa_model import VariationIVQAModel
        from config import ModelConfig
        config = ModelConfig()
        self._top_k = 10
        self.name = ' ------- VarIVQA ------- '

        with self.g.as_default():
            self.sess = tf.Session()
            self.model = VariationIVQAModel(config, phase='sampling_beam')
            self.model.build()
            vars = tf.trainable_variables()
            self.saver = tf.train.Saver(var_list=vars)
            self.saver.restore(self.sess, self.ckpt_file)

        self._init_image_cache()
示例#26
0
def test():
    from util import load_hdf5
    d = load_hdf5('data/rerank_kpval.h5')
    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file=None)
    quest_ids = d['quest_ids']
    ans_ids = d['cands'][:, 0]
    # vqa_scores = d['vqa']

    result = [{u'answer': to_sentence.index_to_top_answer(aid),
               u'question_id': qid} for aid, qid in zip(ans_ids, quest_ids)]

    # save results
    tf.logging.info('Saving results')
    res_file = FLAGS.result_format % ('v2', 'kpval')
    json.dump(result, open(res_file, 'w'))
    tf.logging.info('Done!')
    tf.logging.info('#Num eval samples %d' % len(result))
    # ana_ctx.close()
    return res_file, quest_ids
示例#27
0
def score_fusion():
    subset = 'kpval'
    EPS = 1e-12
    T = 3.0
    save_file = 'data/%s_vqa_vaq_rerank_%s.hdf5' % ((FLAGS.model_type).lower(), subset)
    d = load_hdf5(save_file)
    quest_ids = d['quest_ids']
    vqa_scores = d['vqa_scores']
    vaq_scores = d['vaq_scores']
    vqa_pred_labels = d['vqa_pred_labels']

    # context
    to_sentence = SentenceGenerator(trainset='trainval')

    # fusion
    ans_ids = []
    for i, (quest_id, vqa_score, vaq_score, pred_label) in enumerate(zip(quest_ids,
                                                                         vqa_scores,
                                                                         vaq_scores,
                                                                         vqa_pred_labels)):
        vaq_score = np.exp(-T * vaq_score)
        vaq_score /= (vaq_score.sum() + EPS)
        score = vaq_score * vqa_score
        score = vqa_score
        idx = score.argmax()
        pred = pred_label[idx]
        # add this to result
        ans_ids.append(pred)

    result = [{u'answer': to_sentence.index_to_top_answer(aid),
               u'question_id': int(qid)} for aid, qid in zip(ans_ids, quest_ids)]

    # save results
    tf.logging.info('Saving results')
    res_file = 'vaq_on_vqa_proposal_tmp.json'
    json.dump(result, open(res_file, 'w'))
    tf.logging.info('Done!')
    tf.logging.info('#Num eval samples %d' % len(result))
    return res_file, quest_ids
def main():
    # params
    k = 80
    res_file = 'result/quest_vaq_nn.json'
    # sentence generator
    to_sentence = SentenceGenerator(trainset='trainval')
    # load distances
    val_qids, nn_ids = load_image_nn()
    # create nn model
    nn_model = QuestionPool()
    num = len(val_qids)
    results = []
    for i, (v_qid, v_nn) in enumerate(zip(val_qids, nn_ids)):
        # run nn search
        t = time()
        tr_qid, tr_path = nn_model.get_candidates(v_nn[:k])
        sent = to_sentence.index_to_question(tr_path)
        print(sent)
        print('Processing %d/%d, time %0.2f sec.' % (i, num, time() - t))
        res_i = {'question_id': int(v_qid), 'question': sent}
        results.append(res_i)

    save_json(res_file, results)
    cider = evaluate_question(res_file, subset='kpval', version='v1')
def sample_cst_questions(checkpoint_path=None, subset='kptrain'):
    model_config = ModelConfig()
    model_config.convert = FLAGS.convert
    model_config.loss_type = 'pairwise'
    model_config.top_k = 3
    batch_size = 8
    # Get model
    create_fn = create_reader(FLAGS.model_type, phase='test')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # get data reader
    reader = create_fn(batch_size=batch_size,
                       subset=subset,
                       version=FLAGS.test_version)

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = ContrastQuestionSampler(model_config)
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

    num_batches = reader.num_batches

    print('Running beam search inference...')

    for i in range(num_batches):
        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        c_ans, c_ans_len, pathes, scores = model.greedy_inference(
            outputs[:-2], sess)
        scores, pathes = post_process_prediction(scores, pathes)

        k = 3
        capt, capt_len = outputs[2:4]

        gt = capt[0, :capt_len[0]]
        print('gt: %s [%s]' %
              (to_sentence.index_to_question(gt),
               to_sentence.index_to_answer(c_ans[0, :c_ans_len[0]])))
        for ix in range(k):
            question = to_sentence.index_to_question(pathes[ix])
            answer = to_sentence.index_to_answer(c_ans[ix, :c_ans_len[ix]])
            print('%s %d: %s [%s]' %
                  ('pre' if ix == 0 else 'cst', ix, question, answer))
        import pdb
        pdb.set_trace()
示例#30
0
def ivqa_decoding_beam_search(checkpoint_path=None):
    model_config = ModelConfig()
    method = FLAGS.method
    res_file = 'result/bs_gen_%s.json' % method
    score_file = 'result/bs_vqa_scores_%s.mat' % method
    # Get model
    model_fn = get_model_creation_fn('VAQ-Var')
    create_fn = create_reader('VAQ-VVIS', phase='test')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # get data reader
    subset = 'kptest'
    reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version)

    exemplar = ExemplarLanguageModel()

    if checkpoint_path is None:
        if FLAGS.checkpoint_dir:
            ckpt_dir = FLAGS.checkpoint_dir
        else:
            ckpt_dir = FLAGS.checkpoint_pat % (FLAGS.version, FLAGS.model_type)
        # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/'
        ckpt = tf.train.get_checkpoint_state(ckpt_dir)
        checkpoint_path = ckpt.model_checkpoint_path

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.ex
        model = model_fn(model_config, 'sampling')
        model.set_num_sampling_points(1000)
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

        # build language model
        language_model = LanguageModel()
        language_model.build()
        language_model.set_cache_dir('test_empty')
        # language_model.set_cache_dir('v1_var_att_lowthresh_cache_restval_VAQ-VarRL')
        language_model.set_session(sess)
        language_model.setup_model()

        # build VQA model
        vqa_model = VQAWrapper(g, sess)
    # vqa_model = MLBWrapper()
    num_batches = reader.num_batches

    print('Running beam search inference...')
    results = []
    batch_vqa_scores = []

    num = FLAGS.max_iters if FLAGS.max_iters > 0 else num_batches
    for i in range(num):

        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        im, _, _, top_ans, ans_tokens, ans_len = outputs[:-2]
        # pdb.set_trace()
        if top_ans == 2000:
            continue

        print('\n%d/%d' % (i, num))
        question_id = int(quest_ids[0])
        image_id = int(image_ids[0])

        t1 = time()
        pathes, scores = model.greedy_inference([im, ans_tokens, ans_len],
                                                sess)

        # find unique
        ivqa_scores, ivqa_pathes = process_one(scores, pathes)
        t2 = time()
        print('Time for sample generation: %0.2fs' % (t2 - t1))

        # apply language model
        language_model_inputs = wrap_samples_for_language_model(
            [ivqa_pathes], pad_token=model.pad_token - 1, max_length=20)
        match_gt = exemplar.query(ivqa_pathes)
        legality_scores = language_model.inference(language_model_inputs)
        legality_scores[match_gt] = 1.0
        num_keep = max(100, (legality_scores > 0.1).sum())  # no less than 100
        valid_inds = (-legality_scores).argsort()[:num_keep]

        t3 = time()
        print('Time for language model filtration: %0.2fs' % (t3 - t2))

        # for idx in valid_inds:
        #     path = ivqa_pathes[idx]
        #     sc = legality_scores[idx]
        #     sentence = to_sentence.index_to_question(path)
        #     # questions.append(sentence)
        #     print('%s (%0.3f)' % (sentence, sc))

        # apply  VQA model
        sampled = [ivqa_pathes[_idx] for _idx in valid_inds]
        # vqa_scores = vqa_model.get_scores(sampled, image_id, top_ans)
        vqa_scores, is_valid = vqa_model.get_scores(sampled, im, top_ans)
        # conf_inds = (-vqa_scores).argsort()[:20]
        conf_inds = np.where(is_valid)[0]
        # pdb.set_trace()
        # conf_inds = (-vqa_scores).argsort()[:40]

        t4 = time()
        print('Time for VQA verification: %0.2fs' % (t4 - t3))

        this_mean_vqa_score = vqa_scores[conf_inds].mean()
        print('sampled: %d, unique: %d, legal: %d, gt: %d, mean score %0.2f' %
              (pathes.shape[0], len(ivqa_pathes), num_keep, match_gt.sum(),
               this_mean_vqa_score))
        batch_vqa_scores.append(this_mean_vqa_score)

        for _pid, idx in enumerate(conf_inds):
            path = sampled[idx]
            sc = vqa_scores[idx]
            sentence = to_sentence.index_to_question(path)
            aug_quest_id = question_id * 1000 + _pid
            res_i = {
                'image_id': int(image_id),
                'question_id': aug_quest_id,
                'question': sentence,
                'score': float(sc)
            }
            results.append(res_i)

    save_json(res_file, results)
    batch_vqa_scores = np.array(batch_vqa_scores, dtype=np.float32)
    mean_vqa_score = batch_vqa_scores.mean()
    from scipy.io import savemat
    savemat(score_file, {
        'scores': batch_vqa_scores,
        'mean_score': mean_vqa_score
    })
    print('BS mean VQA score: %0.3f' % mean_vqa_score)
    return res_file, mean_vqa_score