示例#1
0
    def __getitem__(self, idx):

        # For code safety
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)
        abs_iter = np.zeros(1)
        loss_masks = np.zeros(1)

        # Process ['train'] and ['val', 'test'] respectively
        if self.__C.RUN_MODE in ['train', 'val']:
            # Load the run data from list
            ans = self.ans_list[idx]
            ques = self.qid_to_ques[str(ans['question_id'])]

            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ans['image_id'])]
            else:
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ans['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)

            # Process answer
            # ans_iter = proc_ans(ans, self.ans_to_ix)
            ans_iter, abs_iter, loss_masks = self.proc_ans_and_abs(ans)

        else:
            # Load the run data from list
            ques = self.ques_list[idx]

            # # Process image feature from (.npz) file
            # img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])])
            # img_feat_x = img_feat['x'].transpose((1, 0))
            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ques['image_id'])]
            else:
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ques['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)


        return torch.from_numpy(img_feat_iter), \
               torch.from_numpy(ques_ix_iter), \
               torch.from_numpy(ans_iter), \
               torch.from_numpy(abs_iter), \
               loss_masks
    def __getitem__(self, idx):
        '''
        self:ans_list,ans_toix,..
        :param idx: idx=0
        :return:torch类型的:img_feat_iter,ques_feat_iter,ans_iter
        '''

        # For code safety
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)

        # Process ['train'] and ['val', 'test'] respectively
        if self.__C.RUN_MODE in ['train']:
            #加载答案数据,每次加载一个annotation数据包含answers:10个答案,image_id,question_id,
            # {'answers':[{'answer':'skatebodarding'},...,{'answer':'skatebodarding'}],"image_id":139831,"question_id"='VG_1293929'
            ans = self.ans_list[idx]
            #加载问题数据,每次加载一个question如下:{'image_id': 139831, 'question': "What's the man doing?", 'question_id': 'VG_1293929'}
            ques = self.qid_to_ques[str(ans['question_id'])]

            # Process image feature from (.npz) file
            if self.__C.PRELOAD:  #如果为真,返回image_id的npz文件
                img_feat_x = self.iid_to_img_feat[str(ans['image_id'])]
            else:
                #之间load image_id的npz文件
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ans['image_id'])])
                #将数据转换维度
                img_feat_x = img_feat['x'].transpose((1, 0))
            #图像特征迭代器,图像特征输入x,特征填充大小:100
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            #问题特征迭代器,调用data_utils的proc_ques函数,输入ques,token_to_ix,max_token
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)

            #答案迭代器,调用data_utils的proc_ans函数,传入ans,ans_to_ix数据,输出答案分数矩阵
            ans_iter = proc_ans(ans, self.ans_to_ix)

        else:
            # Load the run data from list
            ques = self.ques_list[idx]
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ques['image_id'])]
            else:
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ques['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)

        return torch.from_numpy(img_feat_iter), torch.from_numpy(
            ques_ix_iter), torch.from_numpy(ans_iter)
示例#3
0
    def __getitem__(self, idx):

        # For code safety
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)

        # Process ['train'] and ['val', 'test'] respectively
        if self.__C.RUN_MODE in ['train']:
            # Load the run data from list
            ans = self.ans_list[idx]
            ques = self.qid_to_ques[str(ans['question_id'])]

            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ans['image_id'])]
            else:
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ans['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)

            # Process answer
            ans_iter = proc_ans(ans, self.ans_to_ix)

        else:
            # Load the run data from list
            ques = self.ques_list[idx]
            # print(ques)
            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ques['image_id'])]
            else:
                # '551018' -> '../datasets-vqa/coco_extract/val2014/COCO_val2014_000000551018.jpg.npz'
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ques['image_id'])])

                # ndarray: (2048, 41) -> ndarray: (41, 2048)
                img_feat_x = img_feat['x'].transpose((1, 0))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)


        return torch.from_numpy(img_feat_iter), torch.from_numpy(ques_ix_iter), \
               torch.from_numpy(ans_iter)
示例#4
0
    def __getitem__(self, idx):

        # For code safety
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)
        pad = np.zeros(1)

        # Process ['train'] and ['val', 'test'] respectively
        if self.__C.RUN_MODE in ['train']:
            # Load the run data from list
            ques = self.ques_list[idx]
            # ques = self.qid_to_ques[str(current_ques['ques_id'])]

            # Process image feature from (.npz) file
            # CHANGED
            try:
                img_feat = np.load(self.iid_to_img_feat_path[str(ques['img_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
                bboxes = img_feat['bbox']
                img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE)
            except:
                print('false')
                print(self.iid_to_img_feat_path[str(ques['img_id'])])

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN, element_name='ques')

            # Process answer
            ans_iter = proc_ans_oe(ques, self.ans_to_ix)

        else:
            # Load the run data from list
            ques = self.ques_list[idx]

            img_feat = np.load(self.iid_to_img_feat_path[str(ques['img_id'])])
            img_feat_x = img_feat['x'].transpose((1, 0))
            bboxes = img_feat['bbox']
            img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN, element_name='ques')

        return torch.from_numpy(img_feat_iter), \
               torch.from_numpy(ques_ix_iter), \
               torch.from_numpy(ans_iter), \
               ques['img_id'], \
               pad, \
               ques['ques_id'], \
               pad, pad, pad, pad
示例#5
0
    def __getitem__(self, idx):
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)

        # ['show']
        ques = self.ques_list[idx]

        # Process image feature from (.npz) file
        img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])])
        img_feat_x = img_feat['x'].transpose((1, 0))
        img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE)

        # Process question feature
        ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN)

        return torch.from_numpy(img_feat_iter), torch.from_numpy(ques_ix_iter), \
               torch.from_numpy(ans_iter)
示例#6
0
    def __getitem__(self, idx):

        # For code safety
        pad = np.zeros(1)
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)

        # Process ['train'] and ['val', 'test'] respectively
        if self.__C.RUN_MODE in ['train']:
            # Load the run data from list
            ques = self.ques_list[idx]

            # CHANGED
            try:
                img_feat = np.load(self.iid_to_img_feat_path[str(ques['img_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
                bboxes = img_feat['bbox']
                img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE)
            except:
                print('false')
                print(self.iid_to_img_feat_path[str(ques['img_id'])])

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN, element_name='ques')

            # Process answer ans_score, ans_label, ans_mc_ix, ans_gt_ix
            ans_iter, ans_label, ans_mc_ix, ans_ix = proc_ans_mc(ques, self.ans_to_ix, self.token_to_ix)
            # ans_score, ans_label, ans_mc_ix, ans_ix
            return torch.from_numpy(img_feat_iter), \
                   torch.from_numpy(ques_ix_iter), \
                   torch.from_numpy(ans_iter), \
                   ques['img_id'], \
                   pad, \
                   ques['ques_id'], \
                   torch.from_numpy(ans_label), \
                   torch.from_numpy(ans_mc_ix), \
                   ans_ix, \
                   ques['mc']

        else:
            # Load the run data from list
            ques = self.ques_list[idx]

            img_feat = np.load(self.iid_to_img_feat_path[str(ques['img_id'])])
            img_feat_x = img_feat['x'].transpose((1, 0))
            bboxes = img_feat['bbox']
            img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN, element_name='ques')

            ans_mc_ix, ans_ix = proc_ans_mc_test(ques, self.ans_to_ix, self.token_to_ix)

            return torch.from_numpy(img_feat_iter), \
                   torch.from_numpy(ques_ix_iter), \
                   pad, \
                   ques['img_id'], \
                   pad, \
                   ques['ques_id'], \
                   pad, \
                   torch.from_numpy(ans_mc_ix), \
                   ans_ix, \
                   ques['mc']
示例#7
0
    def __getitem__(self, idx):

        # For code safety
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)

        # Process ['train'] and ['val', 'test'] respectively
        if self.__C.RUN_MODE in ['train']:
            # Load the run data from list
            ans = self.ans_list[idx]
            ques = self.qid_to_ques[str(ans['question_id'])]

            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ans['image_id'])]
            else:
                # modified by Tan Wang
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ans['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
                img_feat_vc = np.load(self.feature_path_vc + '/' +
                                      str(ans['image_id']) + '.npy')
                try:
                    assert img_feat_x.shape[0] == img_feat_vc.shape[0]
                except:
                    print(ans['image_id'])
                    img_feat = np.load(self.feature_path_bu + '/' +
                                       str(ans['image_id']) + '.npy')
                    img_feat_x = img_feat[:img_feat_vc.shape[0], :]
                    assert img_feat_x.shape[0] == img_feat_vc.shape[0]
                img_feat_x = np.hstack((img_feat_x, img_feat_vc))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)

            # Process answer
            ans_iter = proc_ans(ans, self.ans_to_ix)

        else:
            # Load the run data from list
            ques = self.ques_list[idx]

            # # Process image feature from (.npz) file
            # img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])])
            # img_feat_x = img_feat['x'].transpose((1, 0))
            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ques['image_id'])]
            else:
                # modified by Tan Wang
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ques['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
                img_feat_vc = np.load(self.feature_path_vc + '/' +
                                      str(ques['image_id']) + '.npy')
                try:
                    assert img_feat_x.shape[0] == img_feat_vc.shape[0]
                except:
                    print(ques['image_id'])
                    img_feat = np.load(self.feature_path_bu + '/' +
                                       str(ques['image_id']) + '.npy')
                    img_feat_x = img_feat[:img_feat_vc.shape[0], :]
                    assert img_feat_x.shape[0] == img_feat_vc.shape[0]
                img_feat_x = np.hstack((img_feat_x, img_feat_vc))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)


        return torch.from_numpy(img_feat_iter), \
               torch.from_numpy(ques_ix_iter), \
               torch.from_numpy(ans_iter)
示例#8
0
    def __getitem__(self, idx):

        # For code safety
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)

        # Process ['train'] and ['val', 'test'] respectively
        if self.__C.RUN_MODE in ['train']:
            # Load the run data from list
            ans = self.ans_list[idx]
            ques = self.qid_to_ques[str(ans['question_id'])]

            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ans['image_id'])]
            else:
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ans['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            # ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN)

            # Process answer
            ans_iter = proc_ans(ans, self.ans_to_ix)

        else:
            # Load the run data from list
            ques = self.ques_list[idx]

            # # Process image feature from (.npz) file
            # img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])])
            # img_feat_x = img_feat['x'].transpose((1, 0))
            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ques['image_id'])]
            else:
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ques['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            # ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN)

        question = ques['question']
        inputs = self.tokenizer.encode_plus(question,
                                            add_special_tokens=True,
                                            max_length=self.__C.MAX_TOKEN,
                                            return_attention_mask=True,
                                            return_token_type_ids=True,
                                            pad_to_max_length=True)

        ques_idx = inputs['input_ids']
        attention_mask = inputs['attention_mask']
        token_type_ids = inputs['token_type_ids']

        ques_idx = torch.tensor(ques_idx)
        attention_mask = torch.tensor(attention_mask)
        token_type_ids = torch.tensor(token_type_ids)

        return torch.from_numpy(
            img_feat_iter
        ), ques_idx, attention_mask, token_type_ids, torch.from_numpy(ans_iter)