def restructure_ICDAR2019_dataset(root, out_dir, track="TRACKA", year="2014"): """ """ # start with training train_ann_files_path = osp.join(root, "training", track, "ground_truth") print(train_ann_files_path) train_img_prefix = osp.join(root, "training", track, "ground_truth") print(train_img_prefix) train_out_annotations = osp.join(out_dir, "annotations", f"instances_train{year}.json") print(train_out_annotations) print("Converting train annotations...") convert_icdar2019_to_coco(ann_files_path=train_ann_files_path, out_file=train_out_annotations, img_prefix=train_img_prefix) print("Moving train images...") train_img_dest = osp.join(out_dir, f"train{year}") os.makedirs(train_img_dest, exist_ok=True) train_img_list = [ fn for fn in glob(osp.join(train_img_prefix, '*.*')) if osp.splitext(fn.lower())[1] in img_exts ] for fn in mmcv.track_iter_progress(train_img_list): os.rename(fn, osp.join(train_img_dest, osp.basename(fn))) # val val_ann_files_path = osp.join(root, "test_ground_truth", track) print(val_ann_files_path) val_img_prefix = osp.join(root, "test", track) print(val_img_prefix) val_out_annotations = osp.join(out_dir, "annotations", f"instances_val{year}.json") print(val_out_annotations) print("Converting val annotations...") convert_icdar2019_to_coco(ann_files_path=val_ann_files_path, out_file=val_out_annotations, img_prefix=val_img_prefix) print("Moving val images...") val_img_dest = osp.join(out_dir, f"val{year}") os.makedirs(val_img_dest, exist_ok=True) val_img_list = [ fn for fn in glob(osp.join(val_img_prefix, '*.*')) if osp.splitext(fn.lower())[1] in img_exts ] for fn in mmcv.track_iter_progress(val_img_list): os.rename(fn, osp.join(val_img_dest, osp.basename(fn))) os.makedirs(osp.join(out_dir, "logs"), exist_ok=True)
def main(): args = parse_args() assert args.out or args.show, \ ('Please specify at least one operation (save/show the ' 'video) with the argument "--out" or "--show"') model = init_detector(args.config, args.checkpoint, device=args.device) video_reader = mmcv.VideoReader(args.video) video_writer = None if args.out: fourcc = cv2.VideoWriter_fourcc(*'mp4v') video_writer = cv2.VideoWriter( args.out, fourcc, video_reader.fps, (video_reader.width, video_reader.height)) for frame in mmcv.track_iter_progress(video_reader): result = inference_detector(model, frame) frame = model.show_result(frame, result, score_thr=args.score_thr) if args.show: cv2.namedWindow('video', 0) mmcv.imshow(frame, 'video', args.wait_time) if args.out: video_writer.write(frame) if video_writer: video_writer.release() cv2.destroyAllWindows()
def export_2d_annotation(root_path, info_path, version, mono3d=True): """Export 2d annotation from the info file and raw data. Args: root_path (str): Root path of the raw data. info_path (str): Path of the info file. version (str): Dataset version. mono3d (bool): Whether to export mono3d annotation. Default: True. """ # get bbox annotations for camera camera_types = [ 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_FRONT_LEFT', 'CAM_BACK', 'CAM_BACK_LEFT', 'CAM_BACK_RIGHT', ] nusc_infos = mmcv.load(info_path)['infos'] nusc = NuScenes(version=version, dataroot=root_path, verbose=True) # info_2d_list = [] cat2Ids = [ dict(id=nus_categories.index(cat_name), name=cat_name) for cat_name in nus_categories ] coco_ann_id = 0 coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids) for info in mmcv.track_iter_progress(nusc_infos): for cam in camera_types: cam_info = info['cams'][cam] coco_infos = get_2d_boxes(nusc, cam_info['sample_data_token'], visibilities=['', '1', '2', '3', '4'], mono3d=mono3d) (height, width, _) = mmcv.imread(cam_info['data_path']).shape coco_2d_dict['images'].append( dict(file_name=cam_info['data_path'].split('data/nuscenes/') [-1], id=cam_info['sample_data_token'], token=info['token'], cam2ego_rotation=cam_info['sensor2ego_rotation'], cam2ego_translation=cam_info['sensor2ego_translation'], ego2global_rotation=info['ego2global_rotation'], ego2global_translation=info['ego2global_translation'], cam_intrinsic=cam_info['cam_intrinsic'], width=width, height=height)) for coco_info in coco_infos: if coco_info is None: continue # add an empty key for coco format coco_info['segmentation'] = [] coco_info['id'] = coco_ann_id coco_2d_dict['annotations'].append(coco_info) coco_ann_id += 1 if mono3d: json_prefix = f'{info_path[:-4]}_mono3d' else: json_prefix = f'{info_path[:-4]}' mmcv.dump(coco_2d_dict, f'{json_prefix}.coco.json')
def sub_processor(lock, pid, video_list): """ Define job for every subprocess :param lock: threading lock :param pid: sub processor id :param video_list: video list assigned to each subprocess :return: None """ if pid == 0: video_range = mmcv.track_iter_progress(range(len(video_list))) else: video_range = range(len(video_list)) for i in video_range: video_name = video_list[i] """ Read result csv file """ df = pd.read_csv(os.path.join(result_dir, video_name + ".csv")) """ Calculate final score of proposals """ df['score'] = df.iou.values[:] * df.start.values[:] * df.end.values[:] if len(df) > 1: df = softNMS(df) df = df.sort_values(by="score", ascending=False) video_info = video_dict[video_name] video_duration = video_info["duration_second"] proposal_list = [] for j in range(min(top_number, len(df))): tmp_proposal = {} tmp_proposal["score"] = df.score.values[j] tmp_proposal["segment"] = [ max(0, df.xmin.values[j]) * video_duration, min(1, df.xmax.values[j]) * video_duration ] proposal_list.append(tmp_proposal) result_dict[video_name[2:]] = proposal_list
def main(): args = parse_args() if args.output_dir is not None: mkdir_or_exist(args.output_dir) cfg = build_data_cfg(args.config, args.skip_type, args.cfg_options) try: dataset = build_dataset(cfg.data.train, default_args=dict(filter_empty_gt=False)) except TypeError: # seg dataset doesn't have `filter_empty_gt` key dataset = build_dataset(cfg.data.train) data_infos = dataset.data_infos dataset_type = cfg.dataset_type # configure visualization mode vis_task = args.task # 'det', 'seg', 'multi_modality-det', 'mono-det' for idx, data_info in enumerate(track_iter_progress(data_infos)): if dataset_type in ['KittiDataset', 'WaymoDataset']: data_path = data_info['point_cloud']['velodyne_path'] elif dataset_type in [ 'ScanNetDataset', 'SUNRGBDDataset', 'ScanNetSegDataset', 'S3DISSegDataset', 'S3DISDataset' ]: data_path = data_info['pts_path'] elif dataset_type in ['NuScenesDataset', 'LyftDataset']: data_path = data_info['lidar_path'] elif dataset_type in ['NuScenesMonoDataset']: data_path = data_info['file_name'] else: raise NotImplementedError( f'unsupported dataset type {dataset_type}') file_name = osp.splitext(osp.basename(data_path))[0] if vis_task in ['det', 'multi_modality-det']: # show 3D bboxes on 3D point clouds show_det_data(idx, dataset, args.output_dir, file_name, show=args.online) if vis_task in ['multi_modality-det', 'mono-det']: # project 3D bboxes to 2D image show_proj_bbox_img( idx, dataset, args.output_dir, file_name, show=args.online, is_nus_mono=(dataset_type == 'NuScenesMonoDataset')) elif vis_task in ['seg']: # show 3D segmentation mask on 3D point clouds show_seg_data(idx, dataset, args.output_dir, file_name, show=args.online)
def _create_reduced_point_cloud(data_path, info_path, save_path=None, back=False, num_features=4, front_camera_id=2): """Create reduced point clouds for given info. Args: data_path (str): Path of original data. info_path (str): Path of data info. save_path (str | None): Path to save reduced point cloud data. Default: None. back (bool): Whether to flip the points to back. num_features (int): Number of point features. Default: 4. front_camera_id (int): The referenced/front camera ID. Default: 2. """ kitti_infos = mmcv.load(info_path) for info in mmcv.track_iter_progress(kitti_infos): pc_info = info['point_cloud'] image_info = info['image'] calib = info['calib'] v_path = pc_info['velodyne_path'] v_path = Path(data_path) / v_path points_v = np.fromfile( str(v_path), dtype=np.float32, count=-1).reshape([-1, num_features]) rect = calib['R0_rect'] if front_camera_id == 2: P2 = calib['P2'] else: P2 = calib[f'P{str(front_camera_id)}'] Trv2c = calib['Tr_velo_to_cam'] # first remove z < 0 points # keep = points_v[:, -1] > 0 # points_v = points_v[keep] # then remove outside. if back: points_v[:, 0] = -points_v[:, 0] points_v = box_np_ops.remove_outside_points(points_v, rect, Trv2c, P2, image_info['image_shape']) if save_path is None: save_dir = v_path.parent.parent / (v_path.parent.stem + '_reduced') if not save_dir.exists(): save_dir.mkdir() save_filename = save_dir / v_path.name # save_filename = str(v_path) + '_reduced' if back: save_filename += '_back' else: save_filename = str(Path(save_path) / v_path.name) if back: save_filename += '_back' with open(save_filename, 'w') as f: points_v.tofile(f)
def export_2d_annotation(set_folder, info_path, ego_pose_sensor_name="imu_perfect", camera_names=["cam_front"]): loader = DatasetLoader(set_folder) loader.setup() ego_pose_sensor, ego_pose_calib = loading_utils.load_sensor_with_calib( loader, ego_pose_sensor_name) camera_sensors, camera_calibs = loading_utils.load_sensors_with_calibs( loader, camera_names) carla_infos = mmcv.load(info_path)["infos"] # info_2d_list = [] cat2Ids = [ dict(id=carla_categories.index(cat_name), name=cat_name) for cat_name in carla_categories ] coco_ann_id = 0 coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids) for info in mmcv.track_iter_progress(carla_infos): for cam in camera_names: cam_info = info["cams"][cam] img_path = cam_info["data_path"] # get img path relative to dataset root img_path = str(pathlib.Path( img_path).relative_to(loader.dataset_root)) coco_infos = get_2d_boxes( loader, info["token"], ego_pose_sensor, ego_pose_calib, camera_sensors[cam], camera_calibs[cam], img_path ) (height, width, _) = mmcv.imread(cam_info["data_path"]).shape coco_2d_dict["images"].append( dict( file_name=img_path, id=info["token"], width=width, height=height, ) ) for coco_info in coco_infos: if coco_info is None: continue # add an empty key for coco format coco_info["segmentation"] = [] coco_info["id"] = coco_ann_id coco_2d_dict["annotations"].append(coco_info) coco_ann_id += 1 mmcv.dump(coco_2d_dict, f"{info_path[:-4]}.coco.json", indent=4)
def load_lyft_gts(lyft, data_root, eval_split, logger=None): """Loads ground truth boxes from database. Args: lyft (:obj:`LyftDataset`): Lyft class in the sdk. data_root (str): Root of data for reading splits. eval_split (str): Name of the split for evaluation. logger (logging.Logger | str | None): Logger used for printing related information during evaluation. Default: None. Returns: list[dict]: List of annotation dictionaries. """ split_scenes = mmcv.list_from_file(osp.join(data_root, f'{eval_split}.txt')) # Read out all sample_tokens in DB. sample_tokens_all = [s['token'] for s in lyft.sample] assert len(sample_tokens_all) > 0, 'Error: Database has no samples!' if eval_split == 'test': # Check that you aren't trying to cheat :) assert len(lyft.sample_annotation) > 0, \ 'Error: You are trying to evaluate on the test set \ but you do not have the annotations!' sample_tokens = [] for sample_token in sample_tokens_all: scene_token = lyft.get('sample', sample_token)['scene_token'] scene_record = lyft.get('scene', scene_token) if scene_record['name'] in split_scenes: sample_tokens.append(sample_token) all_annotations = [] print_log('Loading ground truth annotations...', logger=logger) # Load annotations and filter predictions and annotations. for sample_token in mmcv.track_iter_progress(sample_tokens): sample = lyft.get('sample', sample_token) sample_annotation_tokens = sample['anns'] for sample_annotation_token in sample_annotation_tokens: # Get label name in detection task and filter unused labels. sample_annotation = \ lyft.get('sample_annotation', sample_annotation_token) detection_name = sample_annotation['category_name'] if detection_name is None: continue annotation = { 'sample_token': sample_token, 'translation': sample_annotation['translation'], 'size': sample_annotation['size'], 'rotation': sample_annotation['rotation'], 'name': detection_name, } all_annotations.append(annotation) return all_annotations
def _format_bbox(self, results, jsonfile_prefix=None): nusc_annos = {} mapped_class_names = self.CLASSES print('Start to convert detection format...') for sample_id, det in enumerate(mmcv.track_iter_progress(results)): annos = [] boxes = output_to_nusc_box( det) # list of NuScenesBox; lidar coordinate sample_token = self.data_infos[sample_id]['token'] boxes = lidar_nusc_box_to_global( self.data_infos[sample_id], boxes, mapped_class_names, self.eval_detection_configs, self.eval_version) # list of NuScenesBox; global for i, box in enumerate(boxes): name = mapped_class_names[box.label] if np.sqrt(box.velocity[0]**2 + box.velocity[1]**2) > 0.2: if name in [ 'car', 'truck', 'bus', 'trailer', 'construction_vehicle', 'vehicle' ]: attr = 'vehicle.moving' elif name in ['bicycle', 'motorcycle', 'bike']: attr = 'cycle.with_rider' else: attr = '' # attr = MMDAMergeCatDataset.DefaultAttribute[name] else: if name in ['pedestrian']: attr = 'pedestrian.standing' elif name in ['bus']: attr = 'vehicle.stopped' else: attr = '' # attr = MMDAMergeCatDataset.DefaultAttribute[name] nusc_anno = dict(sample_token=sample_token, translation=box.center.tolist(), size=box.wlh.tolist(), rotation=box.orientation.elements.tolist(), velocity=box.velocity[:2].tolist(), detection_name=name, detection_score=box.score, attribute_name=attr) annos.append(nusc_anno) nusc_annos[sample_token] = annos nusc_submissions = { 'meta': self.modality, 'results': nusc_annos, } mmcv.mkdir_or_exist(jsonfile_prefix) res_path = osp.join(jsonfile_prefix, 'results_nusc.json') print('Results writes to', res_path) mmcv.dump(nusc_submissions, res_path) return res_path
def test_track_iter_progress(capsys): ret = [] for num in mmcv.track_iter_progress([1, 2, 3], bar_width=3): ret.append(sleep_1s(num)) out, _ = capsys.readouterr() assert out == ('[ ] 0/3, elapsed: 0s, ETA:' '\r[> ] 1/3, 1.0 task/s, elapsed: 1s, ETA: 2s' '\r[>> ] 2/3, 1.0 task/s, elapsed: 2s, ETA: 1s' '\r[>>>] 3/3, 1.0 task/s, elapsed: 3s, ETA: 0s\n') assert ret == [1, 2, 3]
def test_track_iter_progress(): out = StringIO() ret = [] for num in mmcv.track_iter_progress([1, 2, 3], bar_width=3, file=out): ret.append(sleep_1s(num)) assert out.getvalue() == ( '[ ] 0/3, elapsed: 0s, ETA:' '\r[> ] 1/3, 1.0 task/s, elapsed: 1s, ETA: 2s' '\r[>> ] 2/3, 1.0 task/s, elapsed: 2s, ETA: 1s' '\r[>>>] 3/3, 1.0 task/s, elapsed: 3s, ETA: 0s\n') assert ret == [1, 2, 3]
def export_2d_annotation(root_path, info_path, version): """Export 2d annotation from the info file and raw data. Args: root_path (str): Root path of the raw data. info_path (str): Path of the info file. version (str): Dataset version. """ warning.warn('DeprecationWarning: 2D annotations are not used on the ' 'Lyft dataset. The function export_2d_annotation will be ' 'deprecated.') # get bbox annotations for camera camera_types = [ 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_FRONT_LEFT', 'CAM_BACK', 'CAM_BACK_LEFT', 'CAM_BACK_RIGHT', ] lyft_infos = mmcv.load(info_path)['infos'] lyft = Lyft(data_path=osp.join(root_path, version), json_path=osp.join(root_path, version, version), verbose=True) # info_2d_list = [] cat2Ids = [ dict(id=lyft_categories.index(cat_name), name=cat_name) for cat_name in lyft_categories ] coco_ann_id = 0 coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids) for info in mmcv.track_iter_progress(lyft_infos): for cam in camera_types: cam_info = info['cams'][cam] coco_infos = get_2d_boxes(lyft, cam_info['sample_data_token'], visibilities=['', '1', '2', '3', '4']) (height, width, _) = mmcv.imread(cam_info['data_path']).shape coco_2d_dict['images'].append( dict(file_name=cam_info['data_path'], id=cam_info['sample_data_token'], width=width, height=height)) for coco_info in coco_infos: if coco_info is None: continue # add an empty key for coco format coco_info['segmentation'] = [] coco_info['id'] = coco_ann_id coco_2d_dict['annotations'].append(coco_info) coco_ann_id += 1 mmcv.dump(coco_2d_dict, f'{info_path[:-4]}.coco.json')
def export_2d_annotation(root_path, info_path, version): """Export 2d annotation from the info file and raw data. Args: root_path (str): Root path of the raw data. info_path (str): Path of the info file. version (str): Dataset version. """ # get bbox annotations for camera camera_types = [ "CAM_FRONT", "CAM_FRONT_RIGHT", "CAM_FRONT_LEFT", "CAM_BACK", "CAM_BACK_LEFT", "CAM_BACK_RIGHT", ] nusc_infos = mmcv.load(info_path)["infos"] nusc = NuScenes(version=version, dataroot=root_path, verbose=True) # info_2d_list = [] cat2Ids = [ dict(id=nus_categories.index(cat_name), name=cat_name) for cat_name in nus_categories ] coco_ann_id = 0 coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids) for info in mmcv.track_iter_progress(nusc_infos): for cam in camera_types: cam_info = info["cams"][cam] coco_infos = get_2d_boxes( nusc, cam_info["sample_data_token"], visibilities=["", "1", "2", "3", "4"], ) (height, width, _) = mmcv.imread(cam_info["data_path"]).shape coco_2d_dict["images"].append( dict( file_name=cam_info["data_path"], id=cam_info["sample_data_token"], width=width, height=height, ) ) for coco_info in coco_infos: if coco_info is None: continue # add an empty key for coco format coco_info["segmentation"] = [] coco_info["id"] = coco_ann_id coco_2d_dict["annotations"].append(coco_info) coco_ann_id += 1 mmcv.dump(coco_2d_dict, f"{info_path[:-4]}.coco.json")
def convert_icdar2019_to_coco(ann_files_path, out_file, img_prefix): cat2label = {k: i for i, k in enumerate(['table', 'cell'])} annotations = [] images = [] obj_count = 0 image_list = [ osp.basename(fn) for fn in glob(osp.join(img_prefix, '*.*')) if osp.splitext(fn.lower())[1] in img_exts ] for idx, image_fn in enumerate(mmcv.track_iter_progress(image_list)): image_id = osp.splitext(osp.basename(image_fn))[0] #filename = f'{image_id}.jpg' # TODO check this vs img_path filename = image_fn img_path = osp.join(img_prefix, filename) height, width = mmcv.imread(img_path).shape[:2] images.append( dict(id=idx, file_name=filename, height=height, width=width)) # load annotations xml_path = f'{ann_files_path}/{image_id}.xml' tables = parse_tables_from_xml(xml_path) for table in tables: bbox = table.bbox area = (bbox[2]) * (bbox[3]) poly = table.bounds data_anno = dict(image_id=idx, id=obj_count, category_id=cat2label['table'], bbox=bbox, area=area, segmentation=[poly], iscrowd=0) annotations.append(data_anno) obj_count += 1 coco_format_json = dict(images=images, annotations=annotations, categories=[{ 'id': cat2label[label], 'name': label } for label in cat2label]) os.makedirs(osp.dirname(out_file), exist_ok=True) mmcv.dump(coco_format_json, out_file)
def main(): args = parse_args() if args.output_dir is not None: mkdir_or_exist(args.output_dir) cfg = retrieve_data_cfg(args.config, args.skip_type, args.cfg_options) if cfg.data.train['type'] == 'RepeatDataset': cfg.data.train.dataset['pipeline'] = get_loading_pipeline( cfg.train_pipeline) else: cfg.data.train['pipeline'] = get_loading_pipeline(cfg.train_pipeline) dataset = build_dataset(cfg.data.train, default_args=dict(filter_empty_gt=False)) # For RepeatDataset type, the infos are stored in dataset.dataset if cfg.data.train['type'] == 'RepeatDataset': dataset = dataset.dataset data_infos = dataset.data_infos for idx, data_info in enumerate(track_iter_progress(data_infos)): if cfg.dataset_type in ['KittiDataset', 'WaymoDataset']: pts_path = data_info['point_cloud']['velodyne_path'] elif cfg.dataset_type in ['ScanNetDataset', 'SUNRGBDDataset']: pts_path = data_info['pts_path'] elif cfg.dataset_type in ['NuScenesDataset', 'LyftDataset']: pts_path = data_info['lidar_path'] else: raise NotImplementedError( f'unsupported dataset type {cfg.dataset_type}') file_name = osp.splitext(osp.basename(pts_path))[0] save_path = osp.join(args.output_dir, f'{file_name}.png') if args.output_dir else None example = dataset.prepare_train_data(idx) points = example['points']._data.numpy() points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR, Coord3DMode.DEPTH) gt_bboxes = dataset.get_ann_info(idx)['gt_bboxes_3d'].tensor if gt_bboxes is not None: gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR, Box3DMode.DEPTH) vis = Visualizer(points, save_path='./show.png') vis.add_bboxes(bbox3d=gt_bboxes, bbox_color=(0, 0, 1)) vis.show(save_path) del vis
def collect_image_infos(path, exclude_extensions=None): img_infos = [] images_generator = mmcv.scandir(path, recursive=True) for image_path in mmcv.track_iter_progress(list(images_generator)): if exclude_extensions is None or ( exclude_extensions is not None and not image_path.lower().endswith(exclude_extensions)): image_path = os.path.join(path, image_path) img_pillow = Image.open(image_path) img_info = { 'filename': image_path, 'width': img_pillow.width, 'height': img_pillow.height, } img_infos.append(img_info) return img_infos
def main(): args = parse_args() assert args.out or args.show, \ ('Please specify at least one operation (save/show the ' 'video) with the argument "--out" or "--show"') model = init_detector(args.config, args.checkpoint, device=args.device) if args.nvdecode: VideoCapture = ffmpegcv.VideoCaptureNV else: VideoCapture = ffmpegcv.VideoCapture video_origin = VideoCapture(args.video) img_metas = prefetch_img_metas(model.cfg, (video_origin.width, video_origin.height)) resize_wh = img_metas['pad_shape'][1::-1] video_resize = VideoCapture(args.video, resize=resize_wh, resize_keepratio=True, resize_keepratioalign='topleft', pix_fmt='rgb24') video_writer = None if args.out: video_writer = ffmpegcv.VideoWriter(args.out, fps=video_origin.fps) with torch.no_grad(): for frame_resize, frame_origin in zip( mmcv.track_iter_progress(video_resize), video_origin): data = process_img(frame_resize, img_metas, args.device) result = model(return_loss=False, rescale=True, **data)[0] frame_mask = model.show_result(frame_origin, result, score_thr=args.score_thr) if args.show: cv2.namedWindow('video', 0) mmcv.imshow(frame_mask, 'video', args.wait_time) if args.out: video_writer.write(frame_mask) if video_writer: video_writer.release() video_origin.release() video_resize.release() cv2.destroyAllWindows()
def export_2d_annotation(root_path, info_path, mono3d=True): """Export 2d annotation from the info file and raw data. Args: root_path (str): Root path of the raw data. info_path (str): Path of the info file. mono3d (bool): Whether to export mono3d annotation. Default: True. """ # get bbox annotations for camera kitti_infos = mmcv.load(info_path) cat2Ids = [ dict(id=kitti_categories.index(cat_name), name=cat_name) for cat_name in kitti_categories ] coco_ann_id = 0 coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids) from os import path as osp for info in mmcv.track_iter_progress(kitti_infos): coco_infos = get_2d_boxes(info, occluded=[0, 1, 2, 3], mono3d=mono3d) (height, width, _) = mmcv.imread(osp.join(root_path, info['image']['image_path'])).shape coco_2d_dict['images'].append( dict(file_name=info['image']['image_path'], id=info['image']['image_idx'], Tri2v=info['calib']['Tr_imu_to_velo'], Trv2c=info['calib']['Tr_velo_to_cam'], rect=info['calib']['R0_rect'], cam_intrinsic=info['calib']['P2'], width=width, height=height)) for coco_info in coco_infos: if coco_info is None: continue # add an empty key for coco format coco_info['segmentation'] = [] coco_info['id'] = coco_ann_id coco_2d_dict['annotations'].append(coco_info) coco_ann_id += 1 if mono3d: json_prefix = f'{info_path[:-4]}_mono3d' else: json_prefix = f'{info_path[:-4]}' mmcv.dump(coco_2d_dict, f'{json_prefix}.coco.json')
def convert_dir(video_root): ''' Convert all non-mp4 videos to *.mp4 ''' videos = set(glob.glob(video_root + "/*")) - set( glob.glob(video_root + "/*.mp4")) print('Working on path: {}'.format(video_root)) print('Videos to be converted: {}'.format(len(videos))) for i, video in enumerate(mmcv.track_iter_progress(videos)): src = video dirname = os.path.dirname(src) basename = os.path.basename(src) dst = os.path.join(dirname, basename[0:11] + '.mp4') cmd = 'sudo ffmpeg -y -i "{}" "{}" >> {}/ffmpeg.log 2>>&1'.format( src, dst, os.getcwd()) subprocess.call(cmd, shell=True)
def _format_bbox(self, results, jsonfile_prefix=None): """Convert the results to the standard format. Args: results (list[dict]): Testing results of the dataset. jsonfile_prefix (str): The prefix of the output jsonfile. You can specify the output directory/filename by modifying the jsonfile_prefix. Default: None. Returns: str: Path of the output json file. """ lyft_annos = {} mapped_class_names = self.CLASSES print('Start to convert detection format...') for sample_id, det in enumerate(mmcv.track_iter_progress(results)): annos = [] boxes = output_to_lyft_box(det) sample_token = self.data_infos[sample_id]['token'] boxes = lidar_lyft_box_to_global(self.data_infos[sample_id], boxes) for i, box in enumerate(boxes): name = mapped_class_names[box.label] lyft_anno = dict( sample_token=sample_token, translation=box.center.tolist(), size=box.wlh.tolist(), rotation=box.orientation.elements.tolist(), name=name, score=box.score) annos.append(lyft_anno) lyft_annos[sample_token] = annos lyft_submissions = { 'meta': self.modality, 'results': lyft_annos, } mmcv.mkdir_or_exist(jsonfile_prefix) res_path = osp.join(jsonfile_prefix, 'results_lyft.json') print('Results writes to', res_path) mmcv.dump(lyft_submissions, res_path) return res_path
def save_proposals_result(batch_video_list, batch_result_xmin, batch_result_xmax, batch_result_iou, batch_result_pstart, batch_result_pend, tscale, result_dir): """ Save proposal results to csv files """ print('Saving results ...') columns = ["iou", "start", "end", "xmin", "xmax"] """for each batch video list """ for idx in mmcv.track_iter_progress(range(len(batch_video_list))): b_video = batch_video_list[idx] b_xmin = batch_result_xmin[idx] b_xmax = batch_result_xmax[idx] b_iou = batch_result_iou[idx] b_pstart = batch_result_pstart[idx] b_pend = batch_result_pend[idx] """for each video """ for j in range(len(b_video)): tmp_video = b_video[j] tmp_xmin = b_xmin[j] tmp_xmax = b_xmax[j] tmp_iou = b_iou[j] tmp_pstart = b_pstart[j] tmp_pend = b_pend[j] res = [] """ save all proposals result """ for i in range(tscale): for j in range(i, tscale): start = tmp_pstart[i] end = tmp_pend[j] iou = tmp_iou[i, j] res.append([iou, start, end, tmp_xmin[i], tmp_xmax[j]]) tmp_result = np.stack(res) tmp_df = pd.DataFrame(tmp_result, columns=columns) """ write csv file """ tmp_df.to_csv(os.path.join(result_dir, tmp_video + '.csv'), index=False)
def _create_reduced_point_cloud(data_path, info_path, save_path=None, back=False): with open(info_path, 'rb') as f: kitti_infos = pickle.load(f) for info in track_iter_progress(kitti_infos): pc_info = info['point_cloud'] image_info = info['image'] calib = info['calib'] v_path = pc_info['velodyne_path'] v_path = Path(data_path) / v_path points_v = np.fromfile(str(v_path), dtype=np.float32, count=-1).reshape([-1, 4]) rect = calib['R0_rect'] P2 = calib['P2'] Trv2c = calib['Tr_velo_to_cam'] # first remove z < 0 points # keep = points_v[:, -1] > 0 # points_v = points_v[keep] # then remove outside. if back: points_v[:, 0] = -points_v[:, 0] points_v = box_np_ops.remove_outside_points(points_v, rect, Trv2c, P2, image_info['image_shape']) if save_path is None: save_dir = v_path.parent.parent / (v_path.parent.stem + '_reduced') if not save_dir.exists(): save_dir.mkdir() save_filename = save_dir / v_path.name # save_filename = str(v_path) + '_reduced' if back: save_filename += '_back' else: save_filename = str(Path(save_path) / v_path.name) if back: save_filename += '_back' with open(save_filename, 'w') as f: points_v.tofile(f)
def labelling_video(config, checkpoint, work_dir, video, outdir, iou_thr): ''' Формат данных предварительной разметки: C(x,y) -> координаты центра бокса w -> ширина h -> высота logs -> комментарий (опционально) ''' csv_file = str(video) + '_layout.csv' model = init_detector(config, checkpoint, device='cuda:0') print(os.path.join(work_dir, video)) video_reader = mmcv.VideoReader(os.path.join(work_dir, video)) print(video_reader._frame_cnt) if not os.path.exists(outdir): os.mkdir(outdir) layout = [] count = 0 for frame in mmcv.track_iter_progress(video_reader): result = inference_detector(model, frame) for i in range(len(result[0])): if result[0][i][4] < iou_thr: break elif result[0][i][4] >= iou_thr: layout.append(np.insert(result[0][i][:4], 0, count)) count += 1 layout_df = pd.DataFrame(layout, columns=['frame', 'x', 'y', 'x2', 'y2']) layout_df['w'] = abs(layout_df['x2'] - layout_df['x']) layout_df['h'] = abs(layout_df['y2'] - layout_df['y']) layout_df['logs'] = np.nan layout_df = layout_df.drop(columns=['x2', 'y2']) layout_df = layout_df.astype({ 'frame': 'int32', 'x': 'int32', 'y': 'int32', 'w': 'int32', 'h': 'int32' }) layout_df.to_csv(os.path.join(work_dir, csv_file), index=False)
def _calculate_num_points_in_gt(data_path, infos, relative_path, remove_outside=True, num_features=4): for info in mmcv.track_iter_progress(infos): pc_info = info['point_cloud'] image_info = info['image'] calib = info['calib'] if relative_path: v_path = str(Path(data_path) / pc_info['velodyne_path']) else: v_path = pc_info['velodyne_path'] points_v = np.fromfile(v_path, dtype=np.float32, count=-1).reshape([-1, num_features]) rect = calib['R0_rect'] Trv2c = calib['Tr_velo_to_cam'] P2 = calib['P2'] if remove_outside: points_v = box_np_ops.remove_outside_points( points_v, rect, Trv2c, P2, image_info['image_shape']) # points_v = points_v[points_v[:, 0] > 0] annos = info['annos'] num_obj = len([n for n in annos['name'] if n != 'DontCare']) # annos = kitti.filter_kitti_anno(annos, ['DontCare']) dims = annos['dimensions'][:num_obj] loc = annos['location'][:num_obj] rots = annos['rotation_y'][:num_obj] gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]], axis=1) gt_boxes_lidar = box_np_ops.box_camera_to_lidar( gt_boxes_camera, rect, Trv2c) indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar) num_points_in_gt = indices.sum(0) num_ignored = len(annos['dimensions']) - num_obj num_points_in_gt = np.concatenate( [num_points_in_gt, -np.ones([num_ignored])]) annos['num_points_in_gt'] = num_points_in_gt.astype(np.int32)
def _classify_image(self): print('开始检测所有商品图,并进行分类剪枝:') # 为了加速debug 只剪枝前1000个商品图 for commodity in mmcv.track_iter_progress( self.reader.commodity_index_list): labels_in_this_commodity = {i: 0 for i in range(23)} imgs_in_this_commodity = list( self.reader.commodity_index2img_path_list[commodity]) for img in imgs_in_this_commodity: result_over_thr, labels_over_thr, _ = DetectionWorker.get_result_and_feats( self.classify_model, img) self.img_boxes_label_result[img] = (result_over_thr, labels_over_thr) for label in labels_over_thr: labels_in_this_commodity[label] += 1 labels_in_this_commodity_list = sorted( labels_in_this_commodity.items(), key=lambda x: x[1], reverse=True)[:2] # 取出现类标最多的两个 for i, item in enumerate(labels_in_this_commodity_list): label, appear_num = item if i != 0 and appear_num == 0: break self.class2commoditys[label].add(commodity) # 将商品加入到所属类标下 # 选出具有代表性的图 剪枝商品图 present_imgs = [] random.shuffle(imgs_in_this_commodity) for img in imgs_in_this_commodity: result_over_thr, labels_over_thr = self.img_boxes_label_result[ img] if [ x for x in labels_in_this_commodity_list if x in labels_over_thr ] != []: present_imgs.append(img) if len(present_imgs) == 2: # 控制选择几幅图 break self.reader.commodity_index2img_path_list[commodity] = present_imgs
def bbox2result_kitti2d(self, net_outputs, class_names, pklfile_prefix=None, submission_prefix=None): """Convert 2D detection results to kitti format for evaluation and test submission. Args: net_outputs (list[np.ndarray]): List of array storing the \ inferenced bounding boxes and scores. class_names (list[String]): A list of class names. pklfile_prefix (str | None): The prefix of pkl file. submission_prefix (str | None): The prefix of submission file. Returns: list[dict]: A list of dictionaries have the kitti format """ assert len(net_outputs) == len(self.data_infos), \ 'invalid list length of network outputs' det_annos = [] print('\nConverting prediction to KITTI format') for i, bboxes_per_sample in enumerate( mmcv.track_iter_progress(net_outputs)): annos = [] anno = dict(name=[], truncated=[], occluded=[], alpha=[], bbox=[], dimensions=[], location=[], rotation_y=[], score=[]) sample_idx = self.data_infos[i]['image']['image_idx'] num_example = 0 for label in range(len(bboxes_per_sample)): bbox = bboxes_per_sample[label] for i in range(bbox.shape[0]): anno['name'].append(class_names[int(label)]) anno['truncated'].append(0.0) anno['occluded'].append(0) anno['alpha'].append(0.0) anno['bbox'].append(bbox[i, :4]) # set dimensions (height, width, length) to zero anno['dimensions'].append( np.zeros(shape=[3], dtype=np.float32)) # set the 3D translation to (-1000, -1000, -1000) anno['location'].append( np.ones(shape=[3], dtype=np.float32) * (-1000.0)) anno['rotation_y'].append(0.0) anno['score'].append(bbox[i, 4]) num_example += 1 if num_example == 0: annos.append( dict( name=np.array([]), truncated=np.array([]), occluded=np.array([]), alpha=np.array([]), bbox=np.zeros([0, 4]), dimensions=np.zeros([0, 3]), location=np.zeros([0, 3]), rotation_y=np.array([]), score=np.array([]), )) else: anno = {k: np.stack(v) for k, v in anno.items()} annos.append(anno) annos[-1]['sample_idx'] = np.array([sample_idx] * num_example, dtype=np.int64) det_annos += annos if pklfile_prefix is not None: # save file in pkl format pklfile_path = (pklfile_prefix[:-4] if pklfile_prefix.endswith( ('.pkl', '.pickle')) else pklfile_prefix) mmcv.dump(det_annos, pklfile_path) if submission_prefix is not None: # save file in submission format mmcv.mkdir_or_exist(submission_prefix) print(f'Saving KITTI submission to {submission_prefix}') for i, anno in enumerate(det_annos): sample_idx = self.data_infos[i]['image']['image_idx'] cur_det_file = f'{submission_prefix}/{sample_idx:06d}.txt' with open(cur_det_file, 'w') as f: bbox = anno['bbox'] loc = anno['location'] dims = anno['dimensions'][::-1] # lhw -> hwl for idx in range(len(bbox)): print( '{} -1 -1 {:4f} {:4f} {:4f} {:4f} {:4f} {:4f} ' '{:4f} {:4f} {:4f} {:4f} {:4f} {:4f} {:4f}'.format( anno['name'][idx], anno['alpha'][idx], *bbox[idx], # 4 float *dims[idx], # 3 float *loc[idx], # 3 float anno['rotation_y'][idx], anno['score'][idx]), file=f, ) print(f'Result is saved to {submission_prefix}') return det_annos
def bbox2result_kitti(self, net_outputs, class_names, pklfile_prefix=None, submission_prefix=None): """Convert 3D detection results to kitti format for evaluation and test submission. Args: net_outputs (list[np.ndarray]): List of array storing the \ inferenced bounding boxes and scores. class_names (list[String]): A list of class names. pklfile_prefix (str | None): The prefix of pkl file. submission_prefix (str | None): The prefix of submission file. Returns: list[dict]: A list of dictionaries with the kitti format. """ assert len(net_outputs) == len(self.data_infos), \ 'invalid list length of network outputs' if submission_prefix is not None: mmcv.mkdir_or_exist(submission_prefix) det_annos = [] print('\nConverting prediction to KITTI format') for idx, pred_dicts in enumerate( mmcv.track_iter_progress(net_outputs)): annos = [] info = self.data_infos[idx] sample_idx = info['image']['image_idx'] image_shape = info['image']['image_shape'][:2] box_dict = self.convert_valid_bboxes(pred_dicts, info) anno = { 'name': [], 'truncated': [], 'occluded': [], 'alpha': [], 'bbox': [], 'dimensions': [], 'location': [], 'rotation_y': [], 'score': [] } if len(box_dict['bbox']) > 0: box_2d_preds = box_dict['bbox'] box_preds = box_dict['box3d_camera'] scores = box_dict['scores'] box_preds_lidar = box_dict['box3d_lidar'] label_preds = box_dict['label_preds'] for box, box_lidar, bbox, score, label in zip( box_preds, box_preds_lidar, box_2d_preds, scores, label_preds): bbox[2:] = np.minimum(bbox[2:], image_shape[::-1]) bbox[:2] = np.maximum(bbox[:2], [0, 0]) anno['name'].append(class_names[int(label)]) anno['truncated'].append(0.0) anno['occluded'].append(0) anno['alpha'].append( -np.arctan2(-box_lidar[1], box_lidar[0]) + box[6]) anno['bbox'].append(bbox) anno['dimensions'].append(box[3:6]) anno['location'].append(box[:3]) anno['rotation_y'].append(box[6]) anno['score'].append(score) anno = {k: np.stack(v) for k, v in anno.items()} annos.append(anno) else: anno = { 'name': np.array([]), 'truncated': np.array([]), 'occluded': np.array([]), 'alpha': np.array([]), 'bbox': np.zeros([0, 4]), 'dimensions': np.zeros([0, 3]), 'location': np.zeros([0, 3]), 'rotation_y': np.array([]), 'score': np.array([]), } annos.append(anno) if submission_prefix is not None: curr_file = f'{submission_prefix}/{sample_idx:06d}.txt' with open(curr_file, 'w') as f: bbox = anno['bbox'] loc = anno['location'] dims = anno['dimensions'] # lhw -> hwl for idx in range(len(bbox)): print( '{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} ' '{:.4f} {:.4f} {:.4f} ' '{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'.format( anno['name'][idx], anno['alpha'][idx], bbox[idx][0], bbox[idx][1], bbox[idx][2], bbox[idx][3], dims[idx][1], dims[idx][2], dims[idx][0], loc[idx][0], loc[idx][1], loc[idx][2], anno['rotation_y'][idx], anno['score'][idx]), file=f) annos[-1]['sample_idx'] = np.array([sample_idx] * len(annos[-1]['score']), dtype=np.int64) det_annos += annos if pklfile_prefix is not None: if not pklfile_prefix.endswith(('.pkl', '.pickle')): out = f'{pklfile_prefix}.pkl' mmcv.dump(det_annos, out) print(f'Result is saved to {out}.') return det_annos
def main(): """Visualize the demo images.""" parser = ArgumentParser() parser.add_argument('pose_config', help='Config file for pose') parser.add_argument('pose_checkpoint', help='Checkpoint file for pose') parser.add_argument('--video-path', type=str, help='Video path') parser.add_argument('--show', action='store_true', default=False, help='whether to show visualizations.') parser.add_argument('--out-video-root', default='', help='Root of the output video file. ' 'Default not saving the visualization video.') parser.add_argument('--device', default='cuda:0', help='Device used for inference') parser.add_argument('--kpt-thr', type=float, default=0.3, help='Keypoint score threshold') parser.add_argument('--pose-nms-thr', type=float, default=0.9, help='OKS threshold for pose NMS') parser.add_argument('--radius', type=int, default=4, help='Keypoint radius for visualization') parser.add_argument('--thickness', type=int, default=1, help='Link thickness for visualization') args = parser.parse_args() assert args.show or (args.out_video_root != '') # build the pose model from a config file and a checkpoint file pose_model = init_pose_model(args.pose_config, args.pose_checkpoint, device=args.device.lower()) dataset = pose_model.cfg.data['test']['type'] dataset_info = pose_model.cfg.data['test'].get('dataset_info', None) if dataset_info is None: warnings.warn( 'Please set `dataset_info` in the config.' 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.', DeprecationWarning) assert (dataset == 'BottomUpCocoDataset') else: dataset_info = DatasetInfo(dataset_info) # read video video = mmcv.VideoReader(args.video_path) assert video.opened, f'Faild to load video file {args.video_path}' if args.out_video_root == '': save_out_video = False else: os.makedirs(args.out_video_root, exist_ok=True) save_out_video = True if save_out_video: fps = video.fps size = (video.width, video.height) fourcc = cv2.VideoWriter_fourcc(*'mp4v') videoWriter = cv2.VideoWriter( os.path.join(args.out_video_root, f'vis_{os.path.basename(args.video_path)}'), fourcc, fps, size) # optional return_heatmap = False # e.g. use ('backbone', ) to return backbone feature output_layer_names = None print('Running inference...') for _, cur_frame in enumerate(mmcv.track_iter_progress(video)): pose_results, _ = inference_bottom_up_pose_model( pose_model, cur_frame, dataset=dataset, dataset_info=dataset_info, pose_nms_thr=args.pose_nms_thr, return_heatmap=return_heatmap, outputs=output_layer_names) # show the results vis_frame = vis_pose_result(pose_model, cur_frame, pose_results, radius=args.radius, thickness=args.thickness, dataset=dataset, dataset_info=dataset_info, kpt_score_thr=args.kpt_thr, show=False) if args.show: cv2.imshow('Image', vis_frame) if save_out_video: videoWriter.write(vis_frame) if args.show and cv2.waitKey(1) & 0xFF == ord('q'): break if save_out_video: videoWriter.release() if args.show: cv2.destroyAllWindows()
def _fill_trainval_infos(nusc, train_scenes, val_scenes, test=False, max_sweeps=10): """Generate the train/val infos from the raw data. Args: nusc (:obj:`NuScenes`): Dataset class in the nuScenes dataset. train_scenes (list[str]): Basic information of training scenes. val_scenes (list[str]): Basic information of validation scenes. test (bool): Whether use the test mode. In the test mode, no annotations can be accessed. Default: False. max_sweeps (int): Max number of sweeps. Default: 10. Returns: tuple[list[dict]]: Information of training set and validation set that will be saved to the info file. """ train_nusc_infos = [] val_nusc_infos = [] for sample in mmcv.track_iter_progress(nusc.sample): lidar_token = sample['data']['LIDAR_TOP'] cam_token = sample['data']['CAM_FRONT'] sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP']) cs_record = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token']) pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token']) lidar_path, boxes_lidar, _ = nusc.get_sample_data(lidar_token) _, boxes_cam, _ = nusc.get_sample_data(cam_token) lidarseg = nusc.get('lidarseg', lidar_token) lidarseg_path = os.path.join(nusc.dataroot, lidarseg['filename']) mmcv.check_file_exist(lidar_path) info = { 'lidar_path': lidar_path, 'lidarseg_path': lidarseg_path, 'token': sample['token'], 'sweeps': [], 'cams': dict(), 'lidar2ego_translation': cs_record['translation'], 'lidar2ego_rotation': cs_record['rotation'], 'ego2global_translation': pose_record['translation'], 'ego2global_rotation': pose_record['rotation'], 'timestamp': sample['timestamp'], } l2e_r = info['lidar2ego_rotation'] l2e_t = info['lidar2ego_translation'] e2g_r = info['ego2global_rotation'] e2g_t = info['ego2global_translation'] l2e_r_mat = Quaternion(l2e_r).rotation_matrix e2g_r_mat = Quaternion(e2g_r).rotation_matrix # obtain 6 image's information per frame camera_types = [ 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_FRONT_LEFT', 'CAM_BACK', 'CAM_BACK_LEFT', 'CAM_BACK_RIGHT', ] for cam in camera_types: cam_token = sample['data'][cam] cam_path, _, cam_intrinsic = nusc.get_sample_data(cam_token) cam_info = obtain_sensor2top(nusc, cam_token, l2e_t, l2e_r_mat, e2g_t, e2g_r_mat, cam) cam_info.update(cam_intrinsic=cam_intrinsic) info['cams'].update({cam: cam_info}) # obtain sweeps for a single key-frame sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP']) sweeps = [] while len(sweeps) < max_sweeps: if not sd_rec['prev'] == '': sweep = obtain_sensor2top(nusc, sd_rec['prev'], l2e_t, l2e_r_mat, e2g_t, e2g_r_mat, 'lidar') sweeps.append(sweep) sd_rec = nusc.get('sample_data', sd_rec['prev']) else: break info['sweeps'] = sweeps # obtain annotation if not test: # full range # annotations = [ # nusc.get('sample_annotation', token) # for token in sample['anns'] # ] # inside camera valid_box_tokens = [box.token for box in boxes_cam] boxes_lidar_filter = [ box for box in boxes_lidar if box.token in valid_box_tokens ] annotations = [ nusc.get('sample_annotation', token) for token in valid_box_tokens ] locs = np.array([b.center for b in boxes_lidar_filter]).reshape(-1, 3) dims = np.array([b.wlh for b in boxes_lidar_filter]).reshape(-1, 3) rots = np.array([ b.orientation.yaw_pitch_roll[0] for b in boxes_lidar_filter ]).reshape(-1, 1) # velocity = np.array( # [nusc.box_velocity(token)[:2] for token in sample['anns']]) velocity = np.array( [nusc.box_velocity(token)[:2] for token in valid_box_tokens]) valid_flag = np.array( [(anno['num_lidar_pts'] + anno['num_radar_pts']) > 0 for anno in annotations], dtype=bool).reshape(-1) # convert velo from global to lidar for i in range(len(boxes_lidar_filter)): velo = np.array([*velocity[i], 0.0]) velo = velo @ np.linalg.inv(e2g_r_mat).T @ np.linalg.inv( l2e_r_mat).T velocity[i] = velo[:2] names = [b.name for b in boxes_lidar_filter] for i in range(len(names)): if names[i] in NuScenesDataset.NameMapping: names[i] = NuScenesDataset.NameMapping[names[i]] names = np.array(names) # we need to convert rot to SECOND format. gt_boxes = np.concatenate([locs, dims, -rots - np.pi / 2], axis=1) assert len(gt_boxes) == len( annotations), f'{len(gt_boxes)}, {len(annotations)}' info['gt_boxes'] = gt_boxes info['gt_names'] = names info['gt_velocity'] = velocity.reshape(-1, 2) info['num_lidar_pts'] = np.array( [a['num_lidar_pts'] for a in annotations]) info['num_radar_pts'] = np.array( [a['num_radar_pts'] for a in annotations]) info['valid_flag'] = valid_flag if sample['scene_token'] in train_scenes: train_nusc_infos.append(info) else: val_nusc_infos.append(info) return train_nusc_infos, val_nusc_infos
def create_groundtruth_database(dataset_class_name, data_path, info_prefix, info_path=None, mask_anno_path=None, used_classes=None, database_save_path=None, db_info_save_path=None, relative_path=True, add_rgb=False, lidar_only=False, bev_only=False, coors_range=None, with_mask=False): """Given the raw data, generate the ground truth database. Args: dataset_class_name (str): Name of the input dataset. data_path (str): Path of the data. info_prefix (str): Prefix of the info file. info_path (str): Path of the info file. Default: None. mask_anno_path (str): Path of the mask_anno. Default: None. used_classes (list[str]): Classes have been used. Default: None. database_save_path (str): Path to save database. Default: None. db_info_save_path (str): Path to save db_info. Default: None. relative_path (bool): Whether to use relative path. Default: True. with_mask (bool): Whether to use mask. Default: False. """ print(f'Create GT Database of {dataset_class_name}') dataset_cfg = dict(type=dataset_class_name, data_root=data_path, ann_file=info_path) if dataset_class_name == 'KittiDataset': file_client_args = dict(backend='disk') dataset_cfg.update(test_mode=False, split='training', modality=dict( use_lidar=True, use_depth=False, use_lidar_intensity=True, use_camera=with_mask, ), pipeline=[ dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4, file_client_args=file_client_args), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, file_client_args=file_client_args) ]) elif dataset_class_name == 'NuScenesDataset': dataset_cfg.update(use_valid_flag=True, pipeline=[ dict(type='LoadPointsFromFile', load_dim=5, use_dim=5), dict(type='LoadPointsFromMultiSweeps', coord_type='LIDAR', sweeps_num=10, use_dim=[0, 1, 2, 3, 4], pad_empty_sweeps=True, remove_close=True), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True) ]) elif dataset_class_name == 'WaymoDataset': file_client_args = dict(backend='disk') dataset_cfg.update(test_mode=False, split='training', modality=dict( use_lidar=True, use_depth=False, use_lidar_intensity=True, use_camera=False, ), pipeline=[ dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=5, file_client_args=file_client_args), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, file_client_args=file_client_args) ]) dataset = build_dataset(dataset_cfg) if database_save_path is None: database_save_path = osp.join(data_path, f'{info_prefix}_gt_database') if db_info_save_path is None: db_info_save_path = osp.join(data_path, f'{info_prefix}_dbinfos_train.pkl') mmcv.mkdir_or_exist(database_save_path) all_db_infos = dict() if with_mask: coco = COCO(osp.join(data_path, mask_anno_path)) imgIds = coco.getImgIds() file2id = dict() for i in imgIds: info = coco.loadImgs([i])[0] file2id.update({info['file_name']: i}) group_counter = 0 for j in track_iter_progress(list(range(len(dataset)))): input_dict = dataset.get_data_info(j) dataset.pre_pipeline(input_dict) example = dataset.pipeline(input_dict) annos = example['ann_info'] image_idx = example['sample_idx'] points = example['points'].tensor.numpy() gt_boxes_3d = annos['gt_bboxes_3d'].tensor.numpy() names = annos['gt_names'] group_dict = dict() if 'group_ids' in annos: group_ids = annos['group_ids'] else: group_ids = np.arange(gt_boxes_3d.shape[0], dtype=np.int64) difficulty = np.zeros(gt_boxes_3d.shape[0], dtype=np.int32) if 'difficulty' in annos: difficulty = annos['difficulty'] num_obj = gt_boxes_3d.shape[0] point_indices = box_np_ops.points_in_rbbox(points, gt_boxes_3d) if with_mask: # prepare masks gt_boxes = annos['gt_bboxes'] img_path = osp.split(example['img_info']['filename'])[-1] if img_path not in file2id.keys(): print(f'skip image {img_path} for empty mask') continue img_id = file2id[img_path] kins_annIds = coco.getAnnIds(imgIds=img_id) kins_raw_info = coco.loadAnns(kins_annIds) kins_ann_info = _parse_coco_ann_info(kins_raw_info) h, w = annos['img_shape'][:2] gt_masks = [ _poly2mask(mask, h, w) for mask in kins_ann_info['masks'] ] # get mask inds based on iou mapping bbox_iou = bbox_overlaps(kins_ann_info['bboxes'], gt_boxes) mask_inds = bbox_iou.argmax(axis=0) valid_inds = (bbox_iou.max(axis=0) > 0.5) # mask the image # use more precise crop when it is ready # object_img_patches = np.ascontiguousarray( # np.stack(object_img_patches, axis=0).transpose(0, 3, 1, 2)) # crop image patches using roi_align # object_img_patches = crop_image_patch_v2( # torch.Tensor(gt_boxes), # torch.Tensor(mask_inds).long(), object_img_patches) object_img_patches, object_masks = crop_image_patch( gt_boxes, gt_masks, mask_inds, annos['img']) for i in range(num_obj): filename = f'{image_idx}_{names[i]}_{i}.bin' abs_filepath = osp.join(database_save_path, filename) rel_filepath = osp.join(f'{info_prefix}_gt_database', filename) # save point clouds and image patches for each object gt_points = points[point_indices[:, i]] gt_points[:, :3] -= gt_boxes_3d[i, :3] if with_mask: if object_masks[i].sum() == 0 or not valid_inds[i]: # Skip object for empty or invalid mask continue img_patch_path = abs_filepath + '.png' mask_patch_path = abs_filepath + '.mask.png' mmcv.imwrite(object_img_patches[i], img_patch_path) mmcv.imwrite(object_masks[i], mask_patch_path) with open(abs_filepath, 'w') as f: gt_points.tofile(f) if (used_classes is None) or names[i] in used_classes: db_info = { 'name': names[i], 'path': rel_filepath, 'image_idx': image_idx, 'gt_idx': i, 'box3d_lidar': gt_boxes_3d[i], 'num_points_in_gt': gt_points.shape[0], 'difficulty': difficulty[i], } local_group_id = group_ids[i] # if local_group_id >= 0: if local_group_id not in group_dict: group_dict[local_group_id] = group_counter group_counter += 1 db_info['group_id'] = group_dict[local_group_id] if 'score' in annos: db_info['score'] = annos['score'][i] if with_mask: db_info.update({'box2d_camera': gt_boxes[i]}) if names[i] in all_db_infos: all_db_infos[names[i]].append(db_info) else: all_db_infos[names[i]] = [db_info] for k, v in all_db_infos.items(): print(f'load {len(v)} {k} database infos') with open(db_info_save_path, 'wb') as f: pickle.dump(all_db_infos, f)