def __getitem__(self, index): cap_id = self.cap_ids[index] video_id = getVideoId(cap_id) # video frame_list = self.video2frames[video_id] frame_vecs = [] for frame_id in frame_list: frame_vecs.append(self.visual_feat.read_one(frame_id)) frames_tensor = torch.Tensor(frame_vecs) # text caption = self.captions[cap_id] if self.bow2vec is not None: cap_bow = self.bow2vec.mapping(caption) if cap_bow is None: cap_bow = torch.zeros(self.bow2vec.ndims) else: cap_bow = torch.Tensor(cap_bow) else: cap_bow = None if self.vocab is not None: tokens = clean_str(caption) caption = [] caption.append(self.vocab('<start>')) caption.extend([self.vocab(token) for token in tokens]) caption.append(self.vocab('<end>')) cap_tensor = torch.Tensor(caption) else: cap_tensor = None return frames_tensor, cap_tensor, cap_bow, index, cap_id, video_id
def __getitem__(self, index): cap_id = self.cap_ids[index] caption = self.captions[cap_id] if self.bow2vec is not None: cap_bow = self.bow2vec.mapping(caption) if cap_bow is None: cap_bow = torch.zeros(self.bow2vec.ndims) else: cap_bow = torch.Tensor(cap_bow) else: cap_bow = None if self.vocab is not None: tokens = clean_str(caption) caption = [] caption.append(self.vocab('<start>')) caption.extend([self.vocab(token) for token in tokens]) caption.append(self.vocab('<end>')) cap_tensor = torch.Tensor(caption) else: cap_tensor = None return cap_tensor, cap_bow, index, cap_id
def preprocess(self, query, clear): if clear: words = clean_str(query) else: words = query.strip().split() return words