def __getitem__(self, idx): video_info = self.data_source[idx] storage_obj = self.backend.open(video_info) frame_inds = self.frame_sampler.sample(len(storage_obj)) num_segs, clip_len = frame_inds.shape img_list = storage_obj.get_frame(frame_inds.reshape(-1)) img_tensor_list = [] for i in range(num_segs): img_tensor = self.img_transform.apply_image(img_list[i*clip_len:(i+1)*clip_len]) img_tensor_list.append(img_tensor) img_tensor = torch.cat(img_tensor_list, dim=0) # img_tensor: (M, C, H, W) M = N_seg * L img_tensor = img_tensor.view((num_segs, clip_len) + img_tensor.shape[1:]) img_tensor = img_tensor.permute(0, 2, 1, 3, 4).contiguous() # [N_seg, 3, L, H, W] data = dict( imgs=DataContainer(img_tensor, stack=True, cpu_only=False) ) if not self.test_mode: gt_label = torch.LongTensor([video_info['label']]) - 1 data['gt_labels'] = DataContainer(gt_label, stack=True, pad_dims=None, cpu_only=False) return data
def test_format_trimap(): ori_trimap = np.random.randint(3, size=(64, 64)) ori_trimap[ori_trimap == 1] = 128 ori_trimap[ori_trimap == 2] = 255 from mmcv.parallel import DataContainer ori_result = dict(trimap=torch.from_numpy(ori_trimap.copy()), meta=DataContainer({})) format_trimap = FormatTrimap(to_onehot=False) results = format_trimap(ori_result) result_trimap = results['trimap'] assert result_trimap.shape == (1, 64, 64) assert ((result_trimap.numpy() == 0) == (ori_trimap == 0)).all() assert ((result_trimap.numpy() == 1) == (ori_trimap == 128)).all() assert ((result_trimap.numpy() == 2) == (ori_trimap == 255)).all() ori_result = dict(trimap=torch.from_numpy(ori_trimap.copy()), meta=DataContainer({})) format_trimap = FormatTrimap(to_onehot=True) results = format_trimap(ori_result) result_trimap = results['trimap'] assert result_trimap.shape == (3, 64, 64) assert ((result_trimap[0, ...].numpy() == 1) == (ori_trimap == 0)).all() assert ((result_trimap[1, ...].numpy() == 1) == (ori_trimap == 128)).all() assert ((result_trimap[2, ...].numpy() == 1) == (ori_trimap == 255)).all() assert repr(format_trimap) == format_trimap.__class__.__name__ + ( '(to_onehot=True)')
def __getitem__(self, idx): video_info = self.data_source[idx] storage_obj = self.backend.open(video_info) frame_inds = self.frame_sampler.sample(len(storage_obj)) num_segs, clip_len = frame_inds.shape assert num_segs == 1, f'support num_segs==1 only, got {num_segs}' img_list = storage_obj.get_frame(frame_inds.reshape(-1)) img_tensor, trans_params = \ self.img_transform.apply_image(img_list, return_transform_param=True) img_tensor = img_tensor.view((num_segs, clip_len) + img_tensor.shape[1:]) img_tensor = img_tensor.permute(0, 2, 1, 3, 4).contiguous() data = dict(imgs=DataContainer(img_tensor, stack=True, pad_dims=2, cpu_only=False), ) if not self.test_mode: gt_label = \ torch.LongTensor([trans_params[self.rot_trans_index]['flag']]) data['gt_labels'] = DataContainer(gt_label, stack=True, pad_dims=None, cpu_only=False) return data
def __getitem__(self, idx): video_info = self.data_source[idx] # build video storage backend object storage_obj = self.backend.open(video_info) total_num_frames = len(storage_obj) assert total_num_frames > 0, "Bad data {}".format(video_info) frame_inds = self.clip_sampler.sample(total_num_frames) num_segs, clip_len = frame_inds.shape assert num_segs == 1 frame_list = storage_obj.get_frame(frame_inds.reshape(-1)) clip_tensor, trans_params = self.clip_transform.apply_image(frame_list, return_transform_param=True) clip_tensor = clip_tensor.permute(1, 0, 2, 3).contiguous() gt_trajs = torch.FloatTensor(trans_params[self.mask_trans_index]['traj_rois']) im_q, sampled_position = self.sample_single_img_tensor(storage_obj) im_k, _ = self.sample_single_img_tensor(storage_obj, position=sampled_position) data = dict( img_q=DataContainer(im_q, stack=True, pad_dims=1, cpu_only=False), img_k=DataContainer(im_k, stack=True, pad_dims=1, cpu_only=False), imgs=DataContainer(clip_tensor, stack=True, pad_dims=1, cpu_only=False), gt_trajs=DataContainer(gt_trajs, stack=True, pad_dims=1, cpu_only=False), ) storage_obj.close() return data
def __getitem__(self, idx): video_info = self.data_source[idx] # build video storage backend object storage_obj = self.backend.open(video_info) assert len(storage_obj) == len(video_info['gt_boxes']) frame_inds = self.frame_sampler.sample(len(storage_obj)) assert frame_inds.shape[0] == 1, "Support single clip only." frame_inds = frame_inds.reshape(-1).astype(np.long) img_list = storage_obj.get_frame(frame_inds) gt_info = np.array(video_info['gt_boxes'], dtype=np.int) gt_labels = gt_info[frame_inds, -1] gt_boxes = gt_info[frame_inds, 0:4].astype(np.float32) img_list, gt_boxes = self.crop_wrt_gt_boxes(img_list, gt_boxes) # for some bounding boxes that are out of scope, we set the weight to be zeros. gt_ctrs = (gt_boxes[..., 0:2] + gt_boxes[..., 2:4]) * 0.5 is_in_scope = (gt_ctrs >= 0) & (gt_ctrs <= self.x_size) is_in_scope = np.all(is_in_scope, axis=-1) gt_weights = np.ones((len(gt_labels), ), np.float32) gt_weights[gt_labels > 0] = 0. gt_weights[~is_in_scope] = 0. img_tensor, trans_params = self.img_transform.apply_image( img_list, return_transform_param=True) gt_boxes = self.img_transform.apply_boxes(gt_boxes, trans_params) gt_trajs = torch.FloatTensor(gt_boxes).unsqueeze(0) # [1, 16, 4] gt_weights = torch.FloatTensor(gt_weights).unsqueeze(0) # [1, 16] img_tensor = img_tensor.permute(1, 0, 2, 3).contiguous() data = dict( imgs=DataContainer(img_tensor, stack=True, pad_dims=1, cpu_only=False), gt_trajs=DataContainer(gt_trajs, stack=True, pad_dims=1, cpu_only=False), gt_weights=DataContainer(gt_weights, stack=True, pad_dims=1, cpu_only=False), ) storage_obj.close() return data
def __getitem__(self, idx): video_info = self.data_source[idx] # build video storage backend object storage_obj = self.backend.open(video_info) frame_inds = self.frame_sampler.sample(len(storage_obj)) num_segs, clip_len = frame_inds.shape assert num_segs == 1 img_list = storage_obj.get_frame(frame_inds.reshape(-1)) img_tensor, trans_params = self.img_transform.apply_image(img_list, return_transform_param=True) gt_trajs = torch.FloatTensor(trans_params[self.mask_trans_index]['traj_rois']) img_tensor = img_tensor.permute(1, 0, 2, 3).contiguous() data = dict( imgs=DataContainer(img_tensor, stack=True, pad_dims=1, cpu_only=False), gt_trajs=DataContainer(gt_trajs, stack=True, pad_dims=1, cpu_only=False), ) storage_obj.close() return data
def __getitem__(self, idx): video_info = self.data_source[idx] # build video storage backend object storage_obj = self.backend.open(video_info) imgs, stride_idx = self.sample_single_img_tensor(storage_obj) imgs = imgs.unsqueeze(0) gt_label = torch.LongTensor([stride_idx]) data = dict(imgs=DataContainer(imgs, stack=True, pad_dims=1, cpu_only=False), gt_labels=DataContainer(gt_label, stack=True, pad_dims=None, cpu_only=False)) storage_obj.close() return data
def __getitem__(self, idx): video_info = self.data_source[idx] # build video storage backend object storage_obj = self.backend.open( video_info) # type: storage_backends.BaseStorageBackend num_frames = len(storage_obj) frame_inds = self._sample_indices(num_frames) # extract video frames from backend storage # get frame according to the frame indexes img_list = storage_obj.get_frame(frame_inds.reshape(-1)) img_list = [img.astype(np.float32) for img in img_list] img_tensor = self.img_transform.apply_image( img_list) # type: torch.Tensor img_tensor = img_tensor.view((self.tuple_len, self.clip_len) + img_tensor.shape[1:]) img_tensor = img_tensor.permute( (0, 2, 1, 3, 4)) # to [N_tup, 3, T, H, W] # random shuffle shuffle_index = random.randint(0, self.permutations.size(0) - 1) shuffle_order = self.permutations[shuffle_index] img_tensor = img_tensor[shuffle_order].contiguous() gt_label = torch.LongTensor([shuffle_index]) data = dict(imgs=DataContainer(img_tensor, stack=True, pad_dims=2, cpu_only=False), gt_labels=DataContainer(gt_label, stack=True, pad_dims=None, cpu_only=False)) return data
def __getitem__(self, idx): video_info = self.data_source[idx] # build video storage backend object storage_obj = self.backend.open(video_info) total_num_frames = len(storage_obj) assert total_num_frames > 0, "Bad data {}".format(video_info) clip_tensor_with_transParam, clip_q, clip_k_p, clip_k_n, dataIndex = self.sample_query_key( storage_obj) clip_tensor, clip_tensor_trans_param = clip_tensor_with_transParam gt_trajs = torch.FloatTensor( clip_tensor_trans_param[self.mask_trans_index]['traj_rois']) data = dict( clip_q=DataContainer(clip_q, stack=True, pad_dims=1, cpu_only=False), clip_k_p=DataContainer(clip_k_p, stack=True, pad_dims=1, cpu_only=False), clip_k_n=DataContainer(clip_k_n, stack=True, pad_dims=1, cpu_only=False), dataIndex=DataContainer(dataIndex, stack=True, pad_dims=None, cpu_only=False), imgs=DataContainer(clip_tensor, stack=True, pad_dims=1, cpu_only=False), gt_trajs=DataContainer(gt_trajs, stack=True, pad_dims=1, cpu_only=False), ) storage_obj.close() return data
def collate(batch, samples_per_gpu=1, pad_size=None): """Puts each data field into a tensor/DataContainer with outer dimension batch size. Extend default_collate to add support for :type:`~mmcv.parallel.DataContainer`. There are 3 cases. 1. cpu_only = True, e.g., meta data 2. cpu_only = False, stack = True, e.g., images tensors 3. cpu_only = False, stack = False, e.g., gt bboxes """ if not isinstance(batch, collections.Sequence): raise TypeError("{} is not supported.".format(batch.dtype)) if isinstance(batch[0], DataContainer): assert len(batch) % samples_per_gpu == 0 stacked = [] if batch[0].cpu_only: for i in range(0, len(batch), samples_per_gpu): stacked.append( [sample.data for sample in batch[i:i + samples_per_gpu]]) return DataContainer(stacked, batch[0].stack, batch[0].padding_value, cpu_only=True) elif batch[0].stack: for i in range(0, len(batch), samples_per_gpu): assert isinstance(batch[i].data, torch.Tensor) # TODO: handle tensors other than 3d assert batch[i].dim() == 3 c, h, w = batch[0].size() for sample in batch[i:i + samples_per_gpu]: assert c == sample.size(0) h = max(h, sample.size(1)) w = max(w, sample.size(2)) if pad_size is not None: aspect_ratio = h / w if aspect_ratio >= 1.0: h = pad_size[0] w = pad_size[1] else: h = pad_size[0] w = pad_size[1] padded_samples = [ F.pad(sample.data, (0, w - sample.size(2), 0, h - sample.size(1)), value=sample.padding_value) for sample in batch[i:i + samples_per_gpu] ] stacked.append(default_collate(padded_samples)) else: for i in range(0, len(batch), samples_per_gpu): stacked.append( [sample.data for sample in batch[i:i + samples_per_gpu]]) return DataContainer(stacked, batch[0].stack, batch[0].padding_value) elif isinstance(batch[0], collections.Sequence): transposed = zip(*batch) return [ collate(samples, samples_per_gpu, pad_size=pad_size) for samples in transposed ] elif isinstance(batch[0], collections.Mapping): return { key: collate([d[key] for d in batch], samples_per_gpu, pad_size=pad_size) for key in batch[0] } else: return default_collate(batch)
def collate(batch, samples_per_gpu=1): """Puts each data field into a tensor/DataContainer with outer dimension batch size. Extend default_collate to add support for :type:`~mmcv.parallel.DataContainer`. There are 3 cases. 1. cpu_only = True, e.g., meta data 2. cpu_only = False, stack = True, e.g., images tensors 3. cpu_only = False, stack = False, e.g., gt bboxes """ if not isinstance(batch, collections.Sequence): raise TypeError("{} is not supported.".format(batch.dtype)) if isinstance(batch[0], DataContainer): assert len(batch) % samples_per_gpu == 0 stacked = [] if batch[0].cpu_only: for i in range(0, len(batch), samples_per_gpu): stacked.append( [sample.data for sample in batch[i:i + samples_per_gpu]]) return DataContainer( stacked, batch[0].stack, batch[0].padding_value, cpu_only=True) elif batch[0].stack: for i in range(0, len(batch), samples_per_gpu): assert isinstance(batch[i].data, torch.Tensor) if batch[i].pad_dims is not None: ndim = batch[i].dim() assert ndim > batch[i].pad_dims max_shape = [0 for _ in range(batch[i].pad_dims)] for dim in range(1, batch[i].pad_dims + 1): max_shape[dim - 1] = batch[i].size(-dim) for sample in batch[i:i + samples_per_gpu]: for dim in range(0, ndim - batch[i].pad_dims): assert batch[i].size(dim) == sample.size(dim) for dim in range(1, batch[i].pad_dims + 1): max_shape[dim - 1] = max(max_shape[dim - 1], sample.size(-dim)) padded_samples = [] for sample in batch[i:i + samples_per_gpu]: pad = [0 for _ in range(batch[i].pad_dims * 2)] for dim in range(1, batch[i].pad_dims + 1): pad[2 * dim - 1] = max_shape[dim - 1] - sample.size(-dim) padded_samples.append( F.pad( sample.data, pad, value=sample.padding_value)) stacked.append(default_collate(padded_samples)) elif batch[i].pad_dims is None: stacked.append( default_collate([ sample.data for sample in batch[i:i + samples_per_gpu] ])) else: raise ValueError( 'pad_dims should be either None or integers (1-3)') else: for i in range(0, len(batch), samples_per_gpu): stacked.append( [sample.data for sample in batch[i:i + samples_per_gpu]]) return DataContainer(stacked, batch[0].stack, batch[0].padding_value) elif isinstance(batch[0], collections.Sequence): transposed = zip(*batch) return [collate(samples, samples_per_gpu) for samples in transposed] elif isinstance(batch[0], collections.Mapping): return { key: collate([d[key] for d in batch], samples_per_gpu) for key in batch[0] } else: return default_collate(batch)