class AttentionVQARewards(object):
    def __init__(self, ckpt_file='model/v1_vqa_VQA/v1_vqa_VQA_best2/model.ckpt-135000',
                 use_dis_reward=False):
        self.g = tf.Graph()
        self.ckpt_file = ckpt_file
        from models.vqa_soft_attention import AttentionModel
        from vqa_config import ModelConfig
        config = ModelConfig()
        self.ans2id = AnswerTokenToTopAnswer()
        self.use_dis_reward = use_dis_reward
        with self.g.as_default():
            self.sess = tf.Session()
            self.model = AttentionModel(config, phase='test_broadcast')
            self.model.build()
            vars = tf.trainable_variables()
            self.saver = tf.train.Saver(var_list=vars)
            self.saver.restore(self.sess, ckpt_file)

    def process_answers(self, ans, ans_len):
        ans_pathes = _parse_gt_questions(ans, ans_len)
        return self.ans2id.get_top_answer(ans_pathes)

    def get_reward(self, sampled, inputs):
        if len(inputs) == 4:
            images, res5c, ans, ans_len = inputs
            top_ans_ids = self.process_answers(ans, ans_len)
        else:
            assert (len(inputs) == 5)
            images, res5c, ans, ans_len, top_ans_ids = inputs
        images_aug = []
        top_ans_ids_aug = []
        answer_aug = []
        answer_len_aug = []
        pathes = []
        for _idx, ps in enumerate(sampled):
            for p in ps:
                if p[-1] == END_TOKEN:
                    pathes.append(p[1:-1])  # remove start end token
                else:
                    pathes.append(p[1:])  # remove start end token
                images_aug.append(images[_idx][np.newaxis, :])
                answer_aug.append(ans[_idx][np.newaxis, :])
                answer_len_aug.append(ans_len[_idx])
                top_ans_ids_aug.append(top_ans_ids[_idx])
        # put to arrays
        arr, arr_len = put_to_array(pathes)
        images_aug = np.concatenate(images_aug)
        answer_aug = np.concatenate(answer_aug).astype(np.int32)
        top_ans_ids_aug = np.array(top_ans_ids_aug)
        answer_len_aug = np.array(answer_len_aug, dtype=np.int32)
        # run inference in VQA
        scores = self.model.inference(self.sess, [res5c, arr, arr_len])
        if self.use_dis_reward:
            vqa_scores = np.require(scores.argmax(axis=1) == top_ans_ids_aug,
                                    np.float32)
        else:
            _this_batch_size = scores.shape[0]
            vqa_scores = scores[np.arange(_this_batch_size), top_ans_ids_aug]
        is_valid = top_ans_ids_aug != 2000
        return vqa_scores, [images_aug, answer_aug, answer_len_aug, is_valid]
Exemplo n.º 2
0
class AttentionModel(BaseVQAModel):
    def __init__(
            self,
            ckpt_file='model/v1_vqa_VQA/v1_vqa_VQA_best2/model.ckpt-135000'):
        BaseVQAModel.__init__(self)
        self.g = tf.Graph()
        self.ckpt_file = ckpt_file
        from models.vqa_soft_attention import AttentionModel
        from vqa_config import ModelConfig
        config = ModelConfig()
        self.name = ' ------- MLB-attention ------- '

        with self.g.as_default():
            self.sess = tf.Session()
            self.model = AttentionModel(config, phase='test_broadcast')
            self.model.build()
            vars = tf.trainable_variables()
            self.saver = tf.train.Saver(var_list=vars)
            self.saver.restore(self.sess, ckpt_file)

    def _load_image(self, image_id):
        FEAT_ROOT = '/usr/data/fl302/data/VQA/ResNet152/resnet_res5c'
        filename = '%s2014/COCO_%s2014_%012d.jpg' % ('val', 'val', image_id)
        f = np.load(os.path.join(FEAT_ROOT, filename + '.npz'))['x']
        return f.transpose((1, 2, 0))[np.newaxis, ::]
Exemplo n.º 3
0
def create_model():
    from models.vqa_soft_attention import AttentionModel
    from vqa_config import ModelConfig
    model = AttentionModel(ModelConfig(), phase='test_broadcast')
    model.build()

    checkpoint_path = 'model/v1_vqa_VQA/v1_vqa_VQA_best2/model.ckpt-135000'
    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' %
                    os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)
    return sess, model
    def __init__(self, ckpt_file='model/v1_vqa_VQA/v1_vqa_VQA_best2/model.ckpt-135000'):
        self.g = tf.Graph()
        self.ckpt_file = ckpt_file
        from models.vqa_soft_attention import AttentionModel
        from vqa_config import ModelConfig
        config = ModelConfig()
        self.name = ' ------- MLB-attention ------- '

        with self.g.as_default():
            self.sess = tf.Session()
            self.model = AttentionModel(config, phase='test_broadcast')
            self.model.build()
            vars = tf.trainable_variables()
            self.saver = tf.train.Saver(var_list=vars)
            self.saver.restore(self.sess, ckpt_file)
 def __init__(self, ckpt_file='model/v1_vqa_VQA/v1_vqa_VQA_best2/model.ckpt-135000',
              use_dis_reward=False):
     self.g = tf.Graph()
     self.ckpt_file = ckpt_file
     from models.vqa_soft_attention import AttentionModel
     from vqa_config import ModelConfig
     config = ModelConfig()
     self.ans2id = AnswerTokenToTopAnswer()
     self.use_dis_reward = use_dis_reward
     with self.g.as_default():
         self.sess = tf.Session()
         self.model = AttentionModel(config, phase='test_broadcast')
         self.model.build()
         vars = tf.trainable_variables()
         self.saver = tf.train.Saver(var_list=vars)
         self.saver.restore(self.sess, ckpt_file)
Exemplo n.º 6
0
class MLBWrapper(object):
    def __init__(
            self,
            ckpt_file='model/v1_vqa_VQA/v1_vqa_VQA_best2/model.ckpt-135000'):
        self.g = tf.Graph()
        self.ckpt_file = ckpt_file
        from models.vqa_soft_attention import AttentionModel
        from vqa_config import ModelConfig
        config = ModelConfig()
        self.name = ' ------- MLB-attention ------- '

        with self.g.as_default():
            self.sess = tf.Session()
            self.model = AttentionModel(config, phase='test_broadcast')
            self.model.build()
            vars = tf.trainable_variables()
            self.saver = tf.train.Saver(var_list=vars)
            self.saver.restore(self.sess, ckpt_file)

    def _load_image(self, image_id):
        FEAT_ROOT = '/usr/data/fl302/data/VQA/ResNet152/resnet_res5c'
        filename = '%s2014/COCO_%s2014_%012d.jpg' % ('val', 'val', image_id)
        f = np.load(os.path.join(FEAT_ROOT, filename + '.npz'))['x']
        return f.transpose((1, 2, 0))[np.newaxis, ::]

    def get_scores(self, sampled, image_id, top_ans_id):
        # process questions
        pathes = []
        for p in sampled:
            if p[-1] == END_TOKEN:
                pathes.append(p[1:-1])  # remove start end token
            else:
                pathes.append(p[1:])  # remove start end token
        num_unk = len(sampled)
        arr, arr_len = put_to_array(pathes)

        # load image
        res5c = self._load_image(image_id)
        images_aug = np.tile(res5c, [num_unk, 1, 1, 1])

        # inference
        scores = self.model.inference(self.sess, [images_aug, arr, arr_len])
        vqa_scores = scores[:, top_ans_id].flatten()
        return vqa_scores
Exemplo n.º 7
0
class IVQARewards(object):
    def __init__(self,
                 graph=None,
                 sess=None,
                 use_vqa_reward=False,
                 metric='cider'):
        self.graph = graph
        self.sess = sess
        self.gamma = 0.0
        self.use_vqa_reward = use_vqa_reward and self.gamma > 0
        # self.cider_scorer = ciderEval('ivqa_train_idxs')
        if metric == 'cider':
            self.scorer = ciderEval('v2_ivqa_train_idxs')
        elif metric == 'bleu':
            self.scorer = Bleu(n=4)
        # self.cider_scorer = CiderD(df='v2_ivqa_train_idxs')
        if self.use_vqa_reward:
            with graph.as_default():
                self._build_vqa_agent()

    def _build_vqa_agent(self):
        with tf.variable_scope('vqa_agent'):
            self.vqa_agent = VQAAgent(config=ModelConfig(), phase='test')

    def get_reward(self, sampled, gts, answers=None):
        """
        compute rewards given a sampled sentence and gt, the reward is
        computed based on CIDEr-D
        :param sampled: a list of sampled samples, [seq, seq_len]
        :param gts: a list of ground-truth samples [seq, seq_len]
        :param answers: numpy.array of ground-truth top answer index
        of VQA
        :return: numpy array of size (N,) of reward for each sample
        """
        rewards = compute_rewards(self.scorer, sampled, gts)
        if self.use_vqa_reward:
            vqa_rewards = self._compute_vqa_reward(sampled, answers)
            rewards = (1. - self.gamma) * rewards + self.gamma * vqa_rewards
        return rewards

    def _compute_vqa_reward(self, sampled, answers):
        probs = self.vqa_agent.prob
        feed_dict = self.vqa_agent.fill_feed_dict(sampled + [answers])
        preds = self.sess.run(probs, feed_dict=feed_dict)
        rewards = np.equal(preds.argmax(axis=1), answers).astype(np.float32)
        return rewards
 def __init__(self, ckpt_file='', use_dis_reward=False,
              use_attention_model=False):
     self.g = tf.Graph()
     self.ckpt_file = ckpt_file
     self.use_attention_model = use_attention_model
     from models.vqa_base import BaseModel
     from vqa_config import ModelConfig
     config = ModelConfig()
     self.ans2id = AnswerTokenToTopAnswer()
     self.use_dis_reward = use_dis_reward
     with self.g.as_default():
         self.sess = tf.Session()
         if self.use_attention_model:
             self.model = AttentionModel(config, phase='test')
             self.model.build()
         else:
             self.model = BaseModel(config, phase='test')
             self.model.build()
         vars = tf.trainable_variables()
         self.saver = tf.train.Saver(var_list=vars)
         self.saver.restore(self.sess, ckpt_file)
Exemplo n.º 9
0
 def __init__(self):
     cmd.Cmd.__init__(self)
     self.image_id = None
     # self.model = AttentionModel()
     self.models = [AttentionModel(), VanillaModel()]
Exemplo n.º 10
0
 def _build_vqa_agent(self):
     with tf.variable_scope('vqa_agent'):
         self.vqa_agent = VQAAgent(config=ModelConfig(), phase='test')