from detectron2.structures import BoxMode from gulpio import GulpDirectory from epic_kitchens.dataset.epic_dataset import EpicVideoDataset from gulpio.transforms import Scale, CenterCrop, Compose, UnitNorm from read_gulpio import EpicDataset class_type = 'noun' rgb_train = EpicVideoDataset('../../epic/data/processed/gulp/rgb_train', class_type) transforms = Compose([]) dataset = EpicDataset(transforms) segment_uids = list(rgb_train.gulp_dir.merged_meta_dict.keys()) exsample_segment = rgb_train.video_segments[10] exsample_frames = rgb_train.load_frames(exsample_segment) dataloader = torch.utils.data.DataLoader(dataset, batch_size=8, shuffle=True) for batch_num, (data, label) in enumerate(dataloader): frame = data[0].to('cpu').detach().numpy().copy() frame = frame.transpose(1, 2, 3, 0) frame = np.squeeze(frame) break im = frame cfg = get_cfg() # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library cfg.merge_from_file( model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
class epic_gulp(data.Dataset): def __init__(self, mode='train', transform=None, seq_len=6, num_seq=5, downsample=3, class_type='verb+noun'): self.mode = mode self.transform = transform self.seq_len = seq_len self.num_seq = num_seq self.downsample = downsample self.class_type = class_type gulp_root = '/proj/vondrick/datasets/epic-kitchens/data/processed/gulp' print(os.path.join(gulp_root, 'rgb_train', self.class_type)) self.EpicDataset = EpicVideoDataset( os.path.join(gulp_root, 'rgb_train'), self.class_type) dataset = list(self.EpicDataset) rgb = [] for i in range(len(dataset)): # remove segments that are too short if dataset[ i].num_frames > self.seq_len * self.num_seq * self.downsample: rgb.append(dataset[i]) del dataset train_idx = random.sample(range(1, len(rgb)), int(float(len(rgb)) * 0.8)) rgb_train = [] rgb_val = [] for i in range(len(rgb)): if i in train_idx: rgb_train.append(rgb[i]) else: rgb_val.append(rgb[i]) if self.mode == 'train': self.video_info = rgb_train elif self.mode in ['val']: self.video_info = rgb_val def idx_sampler(self, index): vlen = self.video_info[index].num_frames if vlen - self.num_seq * self.seq_len * self.downsample <= 0: return None n = 1 start_idx = np.random.choice( range(vlen - self.num_seq * self.seq_len * self.downsample), n) # print ("start_idx:", start_idx) seq_idx = np.expand_dims(np.arange( self.num_seq), -1) * self.downsample * self.seq_len + start_idx # print ("seq_idx:", seq_idx) seq_idx_block = seq_idx + \ np.expand_dims(np.arange(self.seq_len), 0) * self.downsample # print ("seq_idx_block:", seq_idx_block) return seq_idx_block def __getitem__(self, index): idx_block = self.idx_sampler(index) # print(idx_block) # print(index) # print(len(self.video_info)) assert idx_block.shape == (self.num_seq, self.seq_len) idx_block = idx_block.reshape(self.num_seq * self.seq_len) #print ("idx_block, ", idx_block) segment = self.EpicDataset.load_frames(self.video_info[index]) seq = [segment[i] for i in idx_block] # do we need it here t_seq = self.transform(seq) # apply same transform num_crop = None try: (C, H, W) = t_seq[0].size() t_seq = torch.stack(t_seq, 0) except: (C, H, W) = t_seq[0][0].size() tmp = [torch.stack(i, 0) for i in t_seq] assert len(tmp) == 5 num_crop = 5 t_seq = torch.stack(tmp, 1) t_seq = t_seq.view(self.num_seq, self.seq_len, C, H, W).transpose(1, 2) action = torch.LongTensor([self.video_info[index].verb_class]) noun = torch.LongTensor([self.video_info[index].noun_class]) # OLD: return sequence only # return t_seq, action, noun # NEW: return all useful information in a dictionary result = { 't_seq': t_seq, 'idx_block': idx_block, 'vpath': 'TODO, idk how to retrieve', 'action': action, 'noun': noun } return result def __len__(self): return len(self.video_info)