def __call__(self, results): # stack=False for key in [ 'points', 'pts_indices', 'seg_points', 'seg_pts_indices', 'seg_label', 'scn_coords' ]: # assert key in results.keys(), f"key {key} does not exist." if key not in results.keys(): continue results[key] = DC(to_tensor(results[key]), stack=False) # stack=True img = np.ascontiguousarray(results['img'].transpose(2, 0, 1)) results['img'] = DC(to_tensor(img), stack=True) # gt_labels_3d: list of int for key in ['gt_labels_3d']: if key not in results: continue if isinstance(results[key], list): results[key] = DC([to_tensor(res) for res in results[key]]) else: results[key] = DC(to_tensor(results[key])) if 'gt_bboxes_3d' in results: if isinstance(results['gt_bboxes_3d'], BaseInstance3DBoxes): results['gt_bboxes_3d'] = DC(results['gt_bboxes_3d'], cpu_only=True) else: results['gt_bboxes_3d'] = DC(to_tensor( results['gt_bboxes_3d'])) return results
def reid_format_bundle(self, results): """Transform and format gt_label fields in results. Args: results (dict): Result dict contains the data to convert. Returns: dict: The result dict contains the data that is formatted with ReID bundle. """ for key in results: if key == 'img': img = results[key] if img.ndim == 3: img = np.ascontiguousarray(img.transpose(2, 0, 1)) else: img = np.ascontiguousarray(img.transpose(3, 2, 0, 1)) results['img'] = DC(to_tensor(img), stack=True) elif key == 'gt_label': results[key] = DC(to_tensor(results[key]), stack=True, pad_dims=None) else: raise KeyError(f'key {key} is not supported') return results
def __call__(self, results): if 'img' in results: img = np.ascontiguousarray(results['img'].transpose(2, 0, 1)) results['img'] = DC(to_tensor(img).float(), stack=True) if 'gt_masks' in results: results['gt_masks'] = DC(to_tensor(results['gt_masks']), stack=True) return results
def __call__(self, results): if 'img' in results: img = np.ascontiguousarray(results['img'].transpose(2, 0, 1)) results['img'] = DC(to_tensor(img), stack=True) for key in ['proposals', 'gt_bboxes', 'gt_coefs', 'gt_skeleton', 'gt_bboxes_ignore', 'gt_labels']: if key not in results: continue results[key] = DC(to_tensor(results[key])) if 'gt_masks' in results: results['gt_masks'] = DC(results['gt_masks'], cpu_only=True) if 'gt_semantic_seg' in results: results['gt_semantic_seg'] = DC( to_tensor(results['gt_semantic_seg'][None, ...]), stack=True) return results
def images_to_tensor(self, results): """Transpose and convert images/multi-images to Tensor.""" if 'img' in results: img = results['img'] if len(img.shape) == 3: # (H, W, 3) to (3, H, W) img = np.ascontiguousarray(img.transpose(2, 0, 1)) else: # (H, W, 3, N) to (N, 3, H, W) img = np.ascontiguousarray(img.transpose(3, 2, 0, 1)) results['img'] = to_tensor(img) if 'proposals' in results: results['proposals'] = to_tensor(results['proposals']) if 'img_metas' in results: results['img_metas'] = DC(results['img_metas'], cpu_only=True) return results
def __call__(self, results): """Call function to transform and format common fields in results. Args: results (dict): Result dict contains the data to convert. Returns: dict: The result dict contains the data that is formatted with default bundle. """ # Format 3D data if 'points' in results: assert isinstance(results['points'], BasePoints) results['points'] = DC(results['points'].tensor) for key in ['voxels', 'coors', 'voxel_centers', 'num_points']: if key not in results: continue results[key] = DC(to_tensor(results[key]), stack=False) if self.with_gt: # Clean GT bboxes in the final if 'gt_bboxes_3d_mask' in results: gt_bboxes_3d_mask = results['gt_bboxes_3d_mask'] results['gt_bboxes_3d'] = results['gt_bboxes_3d'][ gt_bboxes_3d_mask] if 'gt_names_3d' in results: results['gt_names_3d'] = results['gt_names_3d'][ gt_bboxes_3d_mask] if 'gt_bboxes_mask' in results: gt_bboxes_mask = results['gt_bboxes_mask'] if 'gt_bboxes' in results: results['gt_bboxes'] = results['gt_bboxes'][gt_bboxes_mask] results['gt_names'] = results['gt_names'][gt_bboxes_mask] if self.with_label: if 'gt_names' in results and len(results['gt_names']) == 0: results['gt_labels'] = np.array([], dtype=np.int64) elif 'gt_names' in results and isinstance( results['gt_names'][0], list): # gt_labels might be a list of list in multi-view setting results['gt_labels'] = [ np.array([self.class_names.index(n) for n in res], dtype=np.int64) for res in results['gt_names'] ] elif 'gt_names' in results: results['gt_labels'] = np.array([ self.class_names.index(n) for n in results['gt_names'] ], dtype=np.int64) # we still assume one pipeline for one frame LiDAR # thus, the 3D name is list[string] if 'gt_names_3d' in results: results['gt_labels_3d'] = np.array([ self.class_names.index(n) for n in results['gt_names_3d'] ], dtype=np.int64) results = super(DefaultFormatBundle3D, self).__call__(results) return results
def __call__(self, results): outs = [] for _results in results: _results = super().__call__(_results) _results['gt_match_indices'] = DC( to_tensor(_results['gt_match_indices'])) outs.append(_results) return outs
def default_format_bundle(self, results): """Transform and format common fields in results. Args: results (dict): Result dict contains the data to convert. Returns: dict: The result dict contains the data that is formatted with default bundle. """ if 'img' in results: img = results['img'] if len(img.shape) == 3: img = np.ascontiguousarray(img.transpose(2, 0, 1)) else: img = np.ascontiguousarray(img.transpose(3, 2, 0, 1)) results['img'] = DC(to_tensor(img), stack=True) if 'padding_mask' in results: results['padding_mask'] = DC( to_tensor(results['padding_mask'].copy()), stack=True) for key in [ 'proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels', 'gt_instance_ids', 'gt_match_indices' ]: if key not in results: continue results[key] = DC(to_tensor(results[key])) for key in ['img_metas', 'gt_masks']: if key in results: results[key] = DC(results[key], cpu_only=True) if 'gt_semantic_seg' in results: semantic_seg = results['gt_semantic_seg'] if len(semantic_seg.shape) == 2: semantic_seg = semantic_seg[None, ...] else: semantic_seg = np.ascontiguousarray( semantic_seg.transpose(3, 2, 0, 1)) results['gt_semantic_seg'] = DC( to_tensor(results['gt_semantic_seg']), stack=True) return results
def prepare_train_img(self, idx): """ Pipelines same as PairDET30Datase. """ img_info = self.img_infos[idx] ann_info = self.get_ann_info(idx) results = dict(img_info=img_info, ann_info=ann_info) self.pre_pipeline(results) results = self.pipeline(results) results['ref_img'] = DC(results['img'].data.clone(), stack=True) results['ref_img_meta'] = results['img_meta'] # Is it read-only ? results['ref_bboxes'] = DC(results['gt_bboxes'].data.clone()) results['ref_labels'] = DC(results['gt_labels'].data.clone()) # We generate 'trackids' field manually num_gts = len(results['gt_labels'].data) _data = [i + 1 for i in range(num_gts) ] # note we leave 0 for negative/background trackids = results['gt_labels'].data.new_tensor(_data) results['gt_trackids'] = DC(to_tensor(trackids)) results['ref_trackids'] = DC(to_tensor(trackids)) if len(results['gt_bboxes'].data) == 0: return None return results
def __call__(self, results): """Call function to transform and format common fields in results. Args: results (dict): Result dict contains the data to convert. Returns: dict: The result dict contains the data that is formatted with default bundle. """ if 'img' in results: if isinstance(results['img'], list): # process multiple imgs in single frame imgs = [img.transpose(2, 0, 1) for img in results['img']] imgs = np.ascontiguousarray(np.stack(imgs, axis=0)) results['img'] = DC(to_tensor(imgs), stack=True) else: img = np.ascontiguousarray(results['img'].transpose(2, 0, 1)) results['img'] = DC(to_tensor(img), stack=True) for key in [ 'proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels', 'gt_labels_3d', 'attr_labels', 'pts_instance_mask', 'pts_semantic_mask', 'centers2d', 'depths' ]: if key not in results: continue if isinstance(results[key], list): results[key] = DC([to_tensor(res) for res in results[key]]) else: results[key] = DC(to_tensor(results[key])) if 'gt_bboxes_3d' in results: if isinstance(results['gt_bboxes_3d'], BaseInstance3DBoxes): results['gt_bboxes_3d'] = DC( results['gt_bboxes_3d'], cpu_only=True) else: results['gt_bboxes_3d'] = DC( to_tensor(results['gt_bboxes_3d'])) if 'gt_masks' in results: results['gt_masks'] = DC(results['gt_masks'], cpu_only=True) if 'gt_semantic_seg' in results: results['gt_semantic_seg'] = DC( to_tensor(results['gt_semantic_seg'][None, ...]), stack=True) if 'bev_seg_image' in results: results['bev_seg_image'] = DC(to_tensor(results['bev_seg_image'][None, ...]), stack=True) return results
def __call__(self, results): """Call function to transform and format common fields in results. Args: results (dict): Result dict contains the data to convert. Returns: dict: The result dict contains the data that is formatted with default bundle. """ # Format 3D data if 'points' in results: assert isinstance(results['points'], BasePoints) results['points'] = DC(results['points'].tensor) for key in ['voxels', 'coors', 'voxel_centers', 'num_points']: if key not in results: continue results[key] = DC(to_tensor(results[key]), stack=False) # Feng Xiang begin # code begin # print("FENGXIANGRESULTSKEYS:LSDKJF:LKSDJF:LSDKJF") # print("Self With GT: " + str(self.with_gt)) # TRUE # print("Self with Label: " + str(self.with_label)) # FALSE # print(results.keys()) # code end if self.with_gt: # Clean GT bboxes in the final if 'gt_bboxes_3d_mask' in results: print("Formating - DefaultFormatBundle3D - gt_bboxes_3d_mask") gt_bboxes_3d_mask = results['gt_bboxes_3d_mask'] results['gt_bboxes_3d'] = results['gt_bboxes_3d'][ gt_bboxes_3d_mask] if 'gt_names_3d' in results: results['gt_names_3d'] = results['gt_names_3d'][ gt_bboxes_3d_mask] # Feng Xiang code # code begin results['gt_attr_3d'] = results['gt_attr_3d'][gt_bboxes_3d_mask] # code end if 'centers2d' in results: results['centers2d'] = results['centers2d'][ gt_bboxes_3d_mask] if 'depths' in results: results['depths'] = results['depths'][gt_bboxes_3d_mask] if 'gt_bboxes_mask' in results: print("Formatting - DefaultFormatBundle3D - gt_bboxes_mask") gt_bboxes_mask = results['gt_bboxes_mask'] if 'gt_bboxes' in results: results['gt_bboxes'] = results['gt_bboxes'][gt_bboxes_mask] results['gt_names'] = results['gt_names'][gt_bboxes_mask] # Feng Xiang code # code begin results['gt_attr'] = results['gt_attr'][gt_bboxes_mask] # code end if self.with_label: # print("Formatting - DefaultFormatBundle3D - self.with_label") if 'gt_names' in results and len(results['gt_names']) == 0: print("Formatting - DefaultFormatBundle3D - self.with_label - gt_names and len") results['gt_labels'] = np.array([], dtype=np.int64) results['attr_labels'] = np.array([], dtype=np.int64) # Feng Xiang code # code begin results['gt_attr'] = np.array([], dtype=np.int64) # code end elif 'gt_names' in results and isinstance( results['gt_names'][0], list): print("Formatting - DefaultFormatBundle3D - self.with_label - gt_names and isinstance") # gt_labels might be a list of list in multi-view setting results['gt_labels'] = [ np.array([self.class_names.index(n) for n in res], dtype=np.int64) for res in results['gt_names'] ] # Feng Xiang code # code begin results['gt_attr'] = [np.array([self.attr_names.index(n) for n in res], dtype=np.int64) for res in results['gt_names']] # code end elif 'gt_names' in results: print("Formatting - DefaultFormatBundle3D - self.with_label - gt_names only") results['gt_labels'] = np.array([ self.class_names.index(n) for n in results['gt_names'] ], dtype=np.int64) results['gt_attr'] = np.array([ self.attr_names.index(n) for n in results['gt_names'] ], dtype=np.int64) # we still assume one pipeline for one frame LiDAR # thus, the 3D name is list[string] if 'gt_names_3d' in results: print("Formatting - DefaultFormatBundle3D - gt_names_3d") results['gt_labels_3d'] = np.array([ self.class_names.index(n) for n in results['gt_names_3d'] ], dtype=np.int64) results['gt_attr_3d'] = np.array([ self.attr_names.index(n) for n in results['gt_names_3d'] ], dtype=np.int64) results = super(DefaultFormatBundle3D, self).__call__(results) return results
def __call__(self, results): for key in self.keys: results[key] = to_tensor( np.expand_dims(results[key].transpose(2, 0, 1), 0)) return results