def __getitem__(self, idx): # For code safety img_feat_iter = np.zeros(1) ques_ix_iter = np.zeros(1) ans_iter = np.zeros(1) # Process ['train'] and ['val', 'test'] respectively if self.__C.RUN_MODE in ['train']: # Load the run data from list ans = self.ans_list[idx] ques = self.qid_to_ques[str(ans['question_id'])] # Process image feature from (.npz) file if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ans['image_id'])] else: # CHANGED try: img_feat = np.load(self.iid_to_img_feat_path[str( ans['image_id'])]) img_feat_x = img_feat['x'].transpose((1, 0)) except: print('false') print(self.iid_to_img_feat_path[str(ans['image_id'])]) img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) # Process answer ans_iter = proc_ans(ans, self.ans_to_ix) else: # Load the run data from list ques = self.ques_list[idx] # # Process image feature from (.npz) file # img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])]) # img_feat_x = img_feat['x'].transpose((1, 0)) # Process image feature from (.npz) file if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ques['image_id'])] else: img_feat = np.load(self.iid_to_img_feat_path[str( ques['image_id'])]) img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) return torch.from_numpy(img_feat_iter), \ torch.from_numpy(ques_ix_iter), \ torch.from_numpy(ans_iter)
def __getitem__(self, idx): # For code safety img_feat_iter = np.zeros(1) ques_ix_iter = np.zeros(1) ans_iter = np.zeros(1) # Process ['train'] and ['val', 'test'] respectively if self.__C.RUN_MODE in ['train', 'val']: # Load the run data from list ans = self.ans_list[idx] ques = self.qid_to_ques[str(ans['question_id'])] # Process image feature from (.npz) file if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ans['image_id'])] else: img_feat = np.load(self.iid_to_img_feat_path[str(ans['image_id'])]) img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) # Process answer ans_iter = proc_ans(ans, self.ans_to_ix) ans_embedding_sampled, ans_score_sampled = self.get_sampled_ans(ans_iter) return { "img_feat" : torch.from_numpy(img_feat_iter), "ques_ix" : torch.from_numpy(ques_ix_iter), "ans_score" : torch.from_numpy(ans_iter), "ans_embedding_sampled" : torch.from_numpy(ans_embedding_sampled), "ans_score_sampled" : torch.from_numpy(ans_score_sampled) } else: # Load the run data from list ques = self.ques_list[idx] # # Process image feature from (.npz) file # img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])]) # img_feat_x = img_feat['x'].transpose((1, 0)) # Process image feature from (.npz) file if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ques['image_id'])] else: img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])]) img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) return { "img_feat" : torch.from_numpy(img_feat_iter), "ques_ix" : torch.from_numpy(ques_ix_iter), "ans_score" : torch.from_numpy(ans_iter) }
def __getitem__(self, idx): ''' self:ans_list,ans_toix,.. :param idx: idx=0 :return:torch类型的:img_feat_iter,ques_feat_iter,ans_iter ''' # For code safety img_feat_iter = np.zeros(1) ques_ix_iter = np.zeros(1) ans_iter = np.zeros(1) # Process ['train'] and ['val', 'test'] respectively if self.__C.RUN_MODE in ['train']: #加载答案数据,每次加载一个annotation数据包含answers:10个答案,image_id,question_id, # {'answers':[{'answer':'skatebodarding'},...,{'answer':'skatebodarding'}],"image_id":139831,"question_id"='VG_1293929' ans = self.ans_list[idx] #加载问题数据,每次加载一个question如下:{'image_id': 139831, 'question': "What's the man doing?", 'question_id': 'VG_1293929'} ques = self.qid_to_ques[str(ans['question_id'])] # Process image feature from (.npz) file if self.__C.PRELOAD: #如果为真,返回image_id的npz文件 img_feat_x = self.iid_to_img_feat[str(ans['image_id'])] else: #之间load image_id的npz文件 img_feat = np.load(self.iid_to_img_feat_path[str( ans['image_id'])]) #将数据转换维度 img_feat_x = img_feat['x'].transpose((1, 0)) #图像特征迭代器,图像特征输入x,特征填充大小:100 img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) #问题特征迭代器,调用data_utils的proc_ques函数,输入ques,token_to_ix,max_token ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) #答案迭代器,调用data_utils的proc_ans函数,传入ans,ans_to_ix数据,输出答案分数矩阵 ans_iter = proc_ans(ans, self.ans_to_ix) else: # Load the run data from list ques = self.ques_list[idx] if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ques['image_id'])] else: img_feat = np.load(self.iid_to_img_feat_path[str( ques['image_id'])]) img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) return torch.from_numpy(img_feat_iter), torch.from_numpy( ques_ix_iter), torch.from_numpy(ans_iter)
def __getitem__(self, idx): # For code safety img_feat_iter = np.zeros(1) ques_ix_iter = np.zeros(1) ans_iter = np.zeros(1) # Process ['train'] and ['val', 'test'] respectively if self.__C.RUN_MODE in ['train']: # Load the run data from list ans = self.ans_list[idx] ques = self.qid_to_ques[str(ans['question_id'])] # Process image feature from (.npz) file if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ans['image_id'])] else: img_feat = np.load(self.iid_to_img_feat_path[str( ans['image_id'])]) img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) # Process answer ans_iter = proc_ans(ans, self.ans_to_ix) else: # Load the run data from list ques = self.ques_list[idx] # print(ques) # Process image feature from (.npz) file if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ques['image_id'])] else: # '551018' -> '../datasets-vqa/coco_extract/val2014/COCO_val2014_000000551018.jpg.npz' img_feat = np.load(self.iid_to_img_feat_path[str( ques['image_id'])]) # ndarray: (2048, 41) -> ndarray: (41, 2048) img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) return torch.from_numpy(img_feat_iter), torch.from_numpy(ques_ix_iter), \ torch.from_numpy(ans_iter)
def __getitem__(self, idx): # For code safety img_feat_iter = np.zeros(1) ques_ix_iter = np.zeros(1) ans_iter = np.zeros(1) # Process ['train'] and ['val', 'test'] respectively if self.__C.RUN_MODE in ['train']: # Load the run data from list ans = self.ans_list[idx] ques = self.qid_to_ques[str(ans['question_id'])] # Process image feature from (.npz) file if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ans['image_id'])] else: img_feat = np.load(self.iid_to_img_feat_path[str( ans['image_id'])]) # img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_x = img_feat img_feat_iter = img_feat_x # proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) # Process answer ans_iter = proc_ans(ans, self.ans_to_ix) # get 36 objects from img_obj_train.json objects = self.img_obj_train[str( ans['image_id'])] # ['person', 'windows', ..., 'apple'] # get top 10 objects top_objects = get_top_obj(objects, top=self.__C.TOP_OBJ) while len(top_objects) < self.__C.TOP_OBJ: top_objects.append('pad_obj') # print('image id:', str(ans['image_id']), 'top_objects:', top_objects) # query object from react_obj_rel, and only get top 5 sentences top_fact = [] pad_rel = self.react_obj_rel['pad_obj'][0] for obj in top_objects: relations = self.react_obj_rel[obj] curr_top_fact = relations[:self.__C.TOP_REL] while len(curr_top_fact) < self.__C.TOP_REL: curr_top_fact.append(pad_rel) top_fact.extend(curr_top_fact) # we get about 50 fact sentence index num_token = self.__C.FACT_TOKEN * self.__C.TOP_OBJ * self.__C.TOP_REL fact_idx_iter = proc_fact(top_fact, self.token_to_ix, num_token, self.__C.FACT_TOKEN) else: # Load the run data from list ques = self.ques_list[idx] # # Process image feature from (.npz) file # img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])]) # img_feat_x = img_feat['x'].transpose((1, 0)) # Process image feature from (.npz) file if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ques['image_id'])] else: img_feat = np.load(self.iid_to_img_feat_path[str( ques['image_id'])]) # img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_x = img_feat img_feat_iter = img_feat_x # proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) # get 36 objects from img_obj_train.json objects = self.img_obj_val[str( ques['image_id'])] # ['person', 'windows', ...] # get top 10 objects top_objects = get_top_obj(objects, top=self.__C.TOP_OBJ) while len(top_objects) < self.__C.TOP_OBJ: top_objects.append('pad_obj') # print('image id:', str(ans['image_id']), 'top_objects:', top_objects) # query object from react_obj_rel, and only get top 5 sentences top_fact = [] pad_rel = self.react_obj_rel['pad_obj'][0] for obj in top_objects: relations = self.react_obj_rel[obj] curr_top_fact = relations[:self.__C.TOP_REL] while len(curr_top_fact) < self.__C.TOP_REL: curr_top_fact.append(pad_rel) top_fact.extend(curr_top_fact) # we get about 50 fact sentence index num_token = self.__C.FACT_TOKEN * self.__C.TOP_OBJ * self.__C.TOP_REL fact_idx_iter = proc_fact(top_fact, self.token_to_ix, num_token, self.__C.FACT_TOKEN) return torch.from_numpy(img_feat_iter), \ torch.from_numpy(ques_ix_iter), \ torch.from_numpy(ans_iter), \ torch.from_numpy(fact_idx_iter)
def __getitem__(self, idx): # For code safety img_feat_iter = np.zeros(1) ques_ix_iter = np.zeros(1) ans_iter = np.zeros(1) # Process ['train'] and ['val', 'test'] respectively if self.__C.RUN_MODE in ['train']: # Load the run data from list ans = self.ans_list[idx] ques = self.qid_to_ques[str(ans['question_id'])] # Process image feature from (.npz) file if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ans['image_id'])] else: # modified by Tan Wang img_feat = np.load(self.iid_to_img_feat_path[str( ans['image_id'])]) img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_vc = np.load(self.feature_path_vc + '/' + str(ans['image_id']) + '.npy') try: assert img_feat_x.shape[0] == img_feat_vc.shape[0] except: print(ans['image_id']) img_feat = np.load(self.feature_path_bu + '/' + str(ans['image_id']) + '.npy') img_feat_x = img_feat[:img_feat_vc.shape[0], :] assert img_feat_x.shape[0] == img_feat_vc.shape[0] img_feat_x = np.hstack((img_feat_x, img_feat_vc)) img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) # Process answer ans_iter = proc_ans(ans, self.ans_to_ix) else: # Load the run data from list ques = self.ques_list[idx] # # Process image feature from (.npz) file # img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])]) # img_feat_x = img_feat['x'].transpose((1, 0)) # Process image feature from (.npz) file if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ques['image_id'])] else: # modified by Tan Wang img_feat = np.load(self.iid_to_img_feat_path[str( ques['image_id'])]) img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_vc = np.load(self.feature_path_vc + '/' + str(ques['image_id']) + '.npy') try: assert img_feat_x.shape[0] == img_feat_vc.shape[0] except: print(ques['image_id']) img_feat = np.load(self.feature_path_bu + '/' + str(ques['image_id']) + '.npy') img_feat_x = img_feat[:img_feat_vc.shape[0], :] assert img_feat_x.shape[0] == img_feat_vc.shape[0] img_feat_x = np.hstack((img_feat_x, img_feat_vc)) img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) return torch.from_numpy(img_feat_iter), \ torch.from_numpy(ques_ix_iter), \ torch.from_numpy(ans_iter)
def __getitem__(self, idx): # For code safety img_feat_iter = np.zeros(1) ques_ix_iter = np.zeros(1) ans_iter = np.zeros(1) # Process ['train'] and ['val', 'test'] respectively if self.__C.RUN_MODE in ['train']: # Load the run data from list ans = self.ans_list[idx] ques = self.qid_to_ques[str(ans['question_id'])] # Process image feature from (.npz) file if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ans['image_id'])] else: img_feat = np.load(self.iid_to_img_feat_path[str( ans['image_id'])]) img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question # ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) # Process answer ans_iter = proc_ans(ans, self.ans_to_ix) else: # Load the run data from list ques = self.ques_list[idx] # # Process image feature from (.npz) file # img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])]) # img_feat_x = img_feat['x'].transpose((1, 0)) # Process image feature from (.npz) file if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ques['image_id'])] else: img_feat = np.load(self.iid_to_img_feat_path[str( ques['image_id'])]) img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question # ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) question = ques['question'] inputs = self.tokenizer.encode_plus(question, add_special_tokens=True, max_length=self.__C.MAX_TOKEN, return_attention_mask=True, return_token_type_ids=True, pad_to_max_length=True) ques_idx = inputs['input_ids'] attention_mask = inputs['attention_mask'] token_type_ids = inputs['token_type_ids'] ques_idx = torch.tensor(ques_idx) attention_mask = torch.tensor(attention_mask) token_type_ids = torch.tensor(token_type_ids) return torch.from_numpy( img_feat_iter ), ques_idx, attention_mask, token_type_ids, torch.from_numpy(ans_iter)