Exemplo n.º 1
0
def restructure_ICDAR2019_dataset(root, out_dir, track="TRACKA", year="2014"):
    """
    
    """
    # start with training
    train_ann_files_path = osp.join(root, "training", track, "ground_truth")
    print(train_ann_files_path)
    train_img_prefix = osp.join(root, "training", track, "ground_truth")
    print(train_img_prefix)
    train_out_annotations = osp.join(out_dir, "annotations",
                                     f"instances_train{year}.json")
    print(train_out_annotations)

    print("Converting train annotations...")
    convert_icdar2019_to_coco(ann_files_path=train_ann_files_path,
                              out_file=train_out_annotations,
                              img_prefix=train_img_prefix)

    print("Moving train images...")
    train_img_dest = osp.join(out_dir, f"train{year}")
    os.makedirs(train_img_dest, exist_ok=True)
    train_img_list = [
        fn for fn in glob(osp.join(train_img_prefix, '*.*'))
        if osp.splitext(fn.lower())[1] in img_exts
    ]
    for fn in mmcv.track_iter_progress(train_img_list):
        os.rename(fn, osp.join(train_img_dest, osp.basename(fn)))

    # val
    val_ann_files_path = osp.join(root, "test_ground_truth", track)
    print(val_ann_files_path)
    val_img_prefix = osp.join(root, "test", track)
    print(val_img_prefix)
    val_out_annotations = osp.join(out_dir, "annotations",
                                   f"instances_val{year}.json")
    print(val_out_annotations)

    print("Converting val annotations...")
    convert_icdar2019_to_coco(ann_files_path=val_ann_files_path,
                              out_file=val_out_annotations,
                              img_prefix=val_img_prefix)

    print("Moving val images...")
    val_img_dest = osp.join(out_dir, f"val{year}")
    os.makedirs(val_img_dest, exist_ok=True)
    val_img_list = [
        fn for fn in glob(osp.join(val_img_prefix, '*.*'))
        if osp.splitext(fn.lower())[1] in img_exts
    ]
    for fn in mmcv.track_iter_progress(val_img_list):
        os.rename(fn, osp.join(val_img_dest, osp.basename(fn)))

    os.makedirs(osp.join(out_dir, "logs"), exist_ok=True)
Exemplo n.º 2
0
def main():
    args = parse_args()
    assert args.out or args.show, \
        ('Please specify at least one operation (save/show the '
         'video) with the argument "--out" or "--show"')

    model = init_detector(args.config, args.checkpoint, device=args.device)

    video_reader = mmcv.VideoReader(args.video)
    video_writer = None
    if args.out:
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        video_writer = cv2.VideoWriter(
            args.out, fourcc, video_reader.fps,
            (video_reader.width, video_reader.height))

    for frame in mmcv.track_iter_progress(video_reader):
        result = inference_detector(model, frame)
        frame = model.show_result(frame, result, score_thr=args.score_thr)
        if args.show:
            cv2.namedWindow('video', 0)
            mmcv.imshow(frame, 'video', args.wait_time)
        if args.out:
            video_writer.write(frame)

    if video_writer:
        video_writer.release()
    cv2.destroyAllWindows()
def export_2d_annotation(root_path, info_path, version, mono3d=True):
    """Export 2d annotation from the info file and raw data.

    Args:
        root_path (str): Root path of the raw data.
        info_path (str): Path of the info file.
        version (str): Dataset version.
        mono3d (bool): Whether to export mono3d annotation. Default: True.
    """
    # get bbox annotations for camera
    camera_types = [
        'CAM_FRONT',
        'CAM_FRONT_RIGHT',
        'CAM_FRONT_LEFT',
        'CAM_BACK',
        'CAM_BACK_LEFT',
        'CAM_BACK_RIGHT',
    ]
    nusc_infos = mmcv.load(info_path)['infos']
    nusc = NuScenes(version=version, dataroot=root_path, verbose=True)
    # info_2d_list = []
    cat2Ids = [
        dict(id=nus_categories.index(cat_name), name=cat_name)
        for cat_name in nus_categories
    ]
    coco_ann_id = 0
    coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)
    for info in mmcv.track_iter_progress(nusc_infos):
        for cam in camera_types:
            cam_info = info['cams'][cam]
            coco_infos = get_2d_boxes(nusc,
                                      cam_info['sample_data_token'],
                                      visibilities=['', '1', '2', '3', '4'],
                                      mono3d=mono3d)
            (height, width, _) = mmcv.imread(cam_info['data_path']).shape
            coco_2d_dict['images'].append(
                dict(file_name=cam_info['data_path'].split('data/nuscenes/')
                     [-1],
                     id=cam_info['sample_data_token'],
                     token=info['token'],
                     cam2ego_rotation=cam_info['sensor2ego_rotation'],
                     cam2ego_translation=cam_info['sensor2ego_translation'],
                     ego2global_rotation=info['ego2global_rotation'],
                     ego2global_translation=info['ego2global_translation'],
                     cam_intrinsic=cam_info['cam_intrinsic'],
                     width=width,
                     height=height))
            for coco_info in coco_infos:
                if coco_info is None:
                    continue
                # add an empty key for coco format
                coco_info['segmentation'] = []
                coco_info['id'] = coco_ann_id
                coco_2d_dict['annotations'].append(coco_info)
                coco_ann_id += 1
    if mono3d:
        json_prefix = f'{info_path[:-4]}_mono3d'
    else:
        json_prefix = f'{info_path[:-4]}'
    mmcv.dump(coco_2d_dict, f'{json_prefix}.coco.json')
Exemplo n.º 4
0
def sub_processor(lock, pid, video_list):
    """
    Define job for every subprocess
    :param lock: threading lock
    :param pid: sub processor id
    :param video_list: video list assigned to each subprocess
    :return: None
    """
    if pid == 0:
        video_range = mmcv.track_iter_progress(range(len(video_list)))
    else:
        video_range = range(len(video_list))

    for i in video_range:
        video_name = video_list[i]
        """ Read result csv file """
        df = pd.read_csv(os.path.join(result_dir, video_name + ".csv"))
        """ Calculate final score of proposals """
        df['score'] = df.iou.values[:] * df.start.values[:] * df.end.values[:]
        if len(df) > 1:
            df = softNMS(df)
        df = df.sort_values(by="score", ascending=False)
        video_info = video_dict[video_name]
        video_duration = video_info["duration_second"]
        proposal_list = []

        for j in range(min(top_number, len(df))):
            tmp_proposal = {}
            tmp_proposal["score"] = df.score.values[j]
            tmp_proposal["segment"] = [
                max(0, df.xmin.values[j]) * video_duration,
                min(1, df.xmax.values[j]) * video_duration
            ]
            proposal_list.append(tmp_proposal)
        result_dict[video_name[2:]] = proposal_list
Exemplo n.º 5
0
def main():
    args = parse_args()

    if args.output_dir is not None:
        mkdir_or_exist(args.output_dir)

    cfg = build_data_cfg(args.config, args.skip_type, args.cfg_options)
    try:
        dataset = build_dataset(cfg.data.train,
                                default_args=dict(filter_empty_gt=False))
    except TypeError:  # seg dataset doesn't have `filter_empty_gt` key
        dataset = build_dataset(cfg.data.train)
    data_infos = dataset.data_infos
    dataset_type = cfg.dataset_type

    # configure visualization mode
    vis_task = args.task  # 'det', 'seg', 'multi_modality-det', 'mono-det'

    for idx, data_info in enumerate(track_iter_progress(data_infos)):
        if dataset_type in ['KittiDataset', 'WaymoDataset']:
            data_path = data_info['point_cloud']['velodyne_path']
        elif dataset_type in [
                'ScanNetDataset', 'SUNRGBDDataset', 'ScanNetSegDataset',
                'S3DISSegDataset', 'S3DISDataset'
        ]:
            data_path = data_info['pts_path']
        elif dataset_type in ['NuScenesDataset', 'LyftDataset']:
            data_path = data_info['lidar_path']
        elif dataset_type in ['NuScenesMonoDataset']:
            data_path = data_info['file_name']
        else:
            raise NotImplementedError(
                f'unsupported dataset type {dataset_type}')

        file_name = osp.splitext(osp.basename(data_path))[0]

        if vis_task in ['det', 'multi_modality-det']:
            # show 3D bboxes on 3D point clouds
            show_det_data(idx,
                          dataset,
                          args.output_dir,
                          file_name,
                          show=args.online)
        if vis_task in ['multi_modality-det', 'mono-det']:
            # project 3D bboxes to 2D image
            show_proj_bbox_img(
                idx,
                dataset,
                args.output_dir,
                file_name,
                show=args.online,
                is_nus_mono=(dataset_type == 'NuScenesMonoDataset'))
        elif vis_task in ['seg']:
            # show 3D segmentation mask on 3D point clouds
            show_seg_data(idx,
                          dataset,
                          args.output_dir,
                          file_name,
                          show=args.online)
Exemplo n.º 6
0
def _create_reduced_point_cloud(data_path,
                                info_path,
                                save_path=None,
                                back=False,
                                num_features=4,
                                front_camera_id=2):
    """Create reduced point clouds for given info.

    Args:
        data_path (str): Path of original data.
        info_path (str): Path of data info.
        save_path (str | None): Path to save reduced point cloud data.
            Default: None.
        back (bool): Whether to flip the points to back.
        num_features (int): Number of point features. Default: 4.
        front_camera_id (int): The referenced/front camera ID. Default: 2.
    """
    kitti_infos = mmcv.load(info_path)

    for info in mmcv.track_iter_progress(kitti_infos):
        pc_info = info['point_cloud']
        image_info = info['image']
        calib = info['calib']

        v_path = pc_info['velodyne_path']
        v_path = Path(data_path) / v_path
        points_v = np.fromfile(
            str(v_path), dtype=np.float32,
            count=-1).reshape([-1, num_features])
        rect = calib['R0_rect']
        if front_camera_id == 2:
            P2 = calib['P2']
        else:
            P2 = calib[f'P{str(front_camera_id)}']
        Trv2c = calib['Tr_velo_to_cam']
        # first remove z < 0 points
        # keep = points_v[:, -1] > 0
        # points_v = points_v[keep]
        # then remove outside.
        if back:
            points_v[:, 0] = -points_v[:, 0]
        points_v = box_np_ops.remove_outside_points(points_v, rect, Trv2c, P2,
                                                    image_info['image_shape'])
        if save_path is None:
            save_dir = v_path.parent.parent / (v_path.parent.stem + '_reduced')
            if not save_dir.exists():
                save_dir.mkdir()
            save_filename = save_dir / v_path.name
            # save_filename = str(v_path) + '_reduced'
            if back:
                save_filename += '_back'
        else:
            save_filename = str(Path(save_path) / v_path.name)
            if back:
                save_filename += '_back'
        with open(save_filename, 'w') as f:
            points_v.tofile(f)
Exemplo n.º 7
0
def export_2d_annotation(set_folder, info_path, ego_pose_sensor_name="imu_perfect", camera_names=["cam_front"]):

    loader = DatasetLoader(set_folder)
    loader.setup()

    ego_pose_sensor, ego_pose_calib = loading_utils.load_sensor_with_calib(
        loader, ego_pose_sensor_name)

    camera_sensors, camera_calibs = loading_utils.load_sensors_with_calibs(
        loader, camera_names)

    carla_infos = mmcv.load(info_path)["infos"]
    # info_2d_list = []
    cat2Ids = [
        dict(id=carla_categories.index(cat_name), name=cat_name)
        for cat_name in carla_categories
    ]

    coco_ann_id = 0
    coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)
    for info in mmcv.track_iter_progress(carla_infos):

        for cam in camera_names:
            cam_info = info["cams"][cam]

            img_path = cam_info["data_path"]

            # get img path relative to dataset root
            img_path = str(pathlib.Path(
                img_path).relative_to(loader.dataset_root))
            coco_infos = get_2d_boxes(
                loader,
                info["token"],
                ego_pose_sensor,
                ego_pose_calib,
                camera_sensors[cam],
                camera_calibs[cam],
                img_path
            )
            (height, width, _) = mmcv.imread(cam_info["data_path"]).shape
            coco_2d_dict["images"].append(
                dict(
                    file_name=img_path,
                    id=info["token"],
                    width=width,
                    height=height,
                )
            )
            for coco_info in coco_infos:
                if coco_info is None:
                    continue
                # add an empty key for coco format
                coco_info["segmentation"] = []
                coco_info["id"] = coco_ann_id
                coco_2d_dict["annotations"].append(coco_info)
                coco_ann_id += 1
    mmcv.dump(coco_2d_dict, f"{info_path[:-4]}.coco.json", indent=4)
Exemplo n.º 8
0
def load_lyft_gts(lyft, data_root, eval_split, logger=None):
    """Loads ground truth boxes from database.

    Args:
        lyft (:obj:`LyftDataset`): Lyft class in the sdk.
        data_root (str): Root of data for reading splits.
        eval_split (str): Name of the split for evaluation.
        logger (logging.Logger | str | None): Logger used for printing
        related information during evaluation. Default: None.

    Returns:
        list[dict]: List of annotation dictionaries.
    """
    split_scenes = mmcv.list_from_file(osp.join(data_root,
                                                f'{eval_split}.txt'))

    # Read out all sample_tokens in DB.
    sample_tokens_all = [s['token'] for s in lyft.sample]
    assert len(sample_tokens_all) > 0, 'Error: Database has no samples!'

    if eval_split == 'test':
        # Check that you aren't trying to cheat :)
        assert len(lyft.sample_annotation) > 0, \
            'Error: You are trying to evaluate on the test set \
             but you do not have the annotations!'

    sample_tokens = []
    for sample_token in sample_tokens_all:
        scene_token = lyft.get('sample', sample_token)['scene_token']
        scene_record = lyft.get('scene', scene_token)
        if scene_record['name'] in split_scenes:
            sample_tokens.append(sample_token)

    all_annotations = []

    print_log('Loading ground truth annotations...', logger=logger)
    # Load annotations and filter predictions and annotations.
    for sample_token in mmcv.track_iter_progress(sample_tokens):
        sample = lyft.get('sample', sample_token)
        sample_annotation_tokens = sample['anns']
        for sample_annotation_token in sample_annotation_tokens:
            # Get label name in detection task and filter unused labels.
            sample_annotation = \
                lyft.get('sample_annotation', sample_annotation_token)
            detection_name = sample_annotation['category_name']
            if detection_name is None:
                continue
            annotation = {
                'sample_token': sample_token,
                'translation': sample_annotation['translation'],
                'size': sample_annotation['size'],
                'rotation': sample_annotation['rotation'],
                'name': detection_name,
            }
            all_annotations.append(annotation)

    return all_annotations
Exemplo n.º 9
0
    def _format_bbox(self, results, jsonfile_prefix=None):
        nusc_annos = {}
        mapped_class_names = self.CLASSES

        print('Start to convert detection format...')
        for sample_id, det in enumerate(mmcv.track_iter_progress(results)):
            annos = []
            boxes = output_to_nusc_box(
                det)  # list of NuScenesBox; lidar coordinate
            sample_token = self.data_infos[sample_id]['token']
            boxes = lidar_nusc_box_to_global(
                self.data_infos[sample_id], boxes, mapped_class_names,
                self.eval_detection_configs,
                self.eval_version)  # list of NuScenesBox; global
            for i, box in enumerate(boxes):
                name = mapped_class_names[box.label]
                if np.sqrt(box.velocity[0]**2 + box.velocity[1]**2) > 0.2:
                    if name in [
                            'car', 'truck', 'bus', 'trailer',
                            'construction_vehicle', 'vehicle'
                    ]:
                        attr = 'vehicle.moving'
                    elif name in ['bicycle', 'motorcycle', 'bike']:
                        attr = 'cycle.with_rider'
                    else:
                        attr = ''
                        # attr = MMDAMergeCatDataset.DefaultAttribute[name]
                else:
                    if name in ['pedestrian']:
                        attr = 'pedestrian.standing'
                    elif name in ['bus']:
                        attr = 'vehicle.stopped'
                    else:
                        attr = ''
                        # attr = MMDAMergeCatDataset.DefaultAttribute[name]

                nusc_anno = dict(sample_token=sample_token,
                                 translation=box.center.tolist(),
                                 size=box.wlh.tolist(),
                                 rotation=box.orientation.elements.tolist(),
                                 velocity=box.velocity[:2].tolist(),
                                 detection_name=name,
                                 detection_score=box.score,
                                 attribute_name=attr)
                annos.append(nusc_anno)
            nusc_annos[sample_token] = annos
        nusc_submissions = {
            'meta': self.modality,
            'results': nusc_annos,
        }

        mmcv.mkdir_or_exist(jsonfile_prefix)
        res_path = osp.join(jsonfile_prefix, 'results_nusc.json')
        print('Results writes to', res_path)
        mmcv.dump(nusc_submissions, res_path)
        return res_path
Exemplo n.º 10
0
def test_track_iter_progress(capsys):
    ret = []
    for num in mmcv.track_iter_progress([1, 2, 3], bar_width=3):
        ret.append(sleep_1s(num))
    out, _ = capsys.readouterr()
    assert out == ('[   ] 0/3, elapsed: 0s, ETA:'
                   '\r[>  ] 1/3, 1.0 task/s, elapsed: 1s, ETA:     2s'
                   '\r[>> ] 2/3, 1.0 task/s, elapsed: 2s, ETA:     1s'
                   '\r[>>>] 3/3, 1.0 task/s, elapsed: 3s, ETA:     0s\n')
    assert ret == [1, 2, 3]
Exemplo n.º 11
0
def test_track_iter_progress():
    out = StringIO()
    ret = []
    for num in mmcv.track_iter_progress([1, 2, 3], bar_width=3, file=out):
        ret.append(sleep_1s(num))
    assert out.getvalue() == (
        '[   ] 0/3, elapsed: 0s, ETA:'
        '\r[>  ] 1/3, 1.0 task/s, elapsed: 1s, ETA:     2s'
        '\r[>> ] 2/3, 1.0 task/s, elapsed: 2s, ETA:     1s'
        '\r[>>>] 3/3, 1.0 task/s, elapsed: 3s, ETA:     0s\n')
    assert ret == [1, 2, 3]
Exemplo n.º 12
0
def export_2d_annotation(root_path, info_path, version):
    """Export 2d annotation from the info file and raw data.

    Args:
        root_path (str): Root path of the raw data.
        info_path (str): Path of the info file.
        version (str): Dataset version.
    """
    warning.warn('DeprecationWarning: 2D annotations are not used on the '
                 'Lyft dataset. The function export_2d_annotation will be '
                 'deprecated.')
    # get bbox annotations for camera
    camera_types = [
        'CAM_FRONT',
        'CAM_FRONT_RIGHT',
        'CAM_FRONT_LEFT',
        'CAM_BACK',
        'CAM_BACK_LEFT',
        'CAM_BACK_RIGHT',
    ]
    lyft_infos = mmcv.load(info_path)['infos']
    lyft = Lyft(data_path=osp.join(root_path, version),
                json_path=osp.join(root_path, version, version),
                verbose=True)
    # info_2d_list = []
    cat2Ids = [
        dict(id=lyft_categories.index(cat_name), name=cat_name)
        for cat_name in lyft_categories
    ]
    coco_ann_id = 0
    coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)
    for info in mmcv.track_iter_progress(lyft_infos):
        for cam in camera_types:
            cam_info = info['cams'][cam]
            coco_infos = get_2d_boxes(lyft,
                                      cam_info['sample_data_token'],
                                      visibilities=['', '1', '2', '3', '4'])
            (height, width, _) = mmcv.imread(cam_info['data_path']).shape
            coco_2d_dict['images'].append(
                dict(file_name=cam_info['data_path'],
                     id=cam_info['sample_data_token'],
                     width=width,
                     height=height))
            for coco_info in coco_infos:
                if coco_info is None:
                    continue
                # add an empty key for coco format
                coco_info['segmentation'] = []
                coco_info['id'] = coco_ann_id
                coco_2d_dict['annotations'].append(coco_info)
                coco_ann_id += 1
    mmcv.dump(coco_2d_dict, f'{info_path[:-4]}.coco.json')
Exemplo n.º 13
0
def export_2d_annotation(root_path, info_path, version):
    """Export 2d annotation from the info file and raw data.

    Args:
        root_path (str): Root path of the raw data.
        info_path (str): Path of the info file.
        version (str): Dataset version.
    """
    # get bbox annotations for camera
    camera_types = [
        "CAM_FRONT",
        "CAM_FRONT_RIGHT",
        "CAM_FRONT_LEFT",
        "CAM_BACK",
        "CAM_BACK_LEFT",
        "CAM_BACK_RIGHT",
    ]
    nusc_infos = mmcv.load(info_path)["infos"]
    nusc = NuScenes(version=version, dataroot=root_path, verbose=True)
    # info_2d_list = []
    cat2Ids = [
        dict(id=nus_categories.index(cat_name), name=cat_name)
        for cat_name in nus_categories
    ]
    coco_ann_id = 0
    coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)
    for info in mmcv.track_iter_progress(nusc_infos):
        for cam in camera_types:
            cam_info = info["cams"][cam]
            coco_infos = get_2d_boxes(
                nusc,
                cam_info["sample_data_token"],
                visibilities=["", "1", "2", "3", "4"],
            )
            (height, width, _) = mmcv.imread(cam_info["data_path"]).shape
            coco_2d_dict["images"].append(
                dict(
                    file_name=cam_info["data_path"],
                    id=cam_info["sample_data_token"],
                    width=width,
                    height=height,
                )
            )
            for coco_info in coco_infos:
                if coco_info is None:
                    continue
                # add an empty key for coco format
                coco_info["segmentation"] = []
                coco_info["id"] = coco_ann_id
                coco_2d_dict["annotations"].append(coco_info)
                coco_ann_id += 1
    mmcv.dump(coco_2d_dict, f"{info_path[:-4]}.coco.json")
Exemplo n.º 14
0
def convert_icdar2019_to_coco(ann_files_path, out_file, img_prefix):

    cat2label = {k: i for i, k in enumerate(['table', 'cell'])}

    annotations = []
    images = []
    obj_count = 0

    image_list = [
        osp.basename(fn) for fn in glob(osp.join(img_prefix, '*.*'))
        if osp.splitext(fn.lower())[1] in img_exts
    ]

    for idx, image_fn in enumerate(mmcv.track_iter_progress(image_list)):
        image_id = osp.splitext(osp.basename(image_fn))[0]
        #filename = f'{image_id}.jpg' # TODO check this vs img_path
        filename = image_fn
        img_path = osp.join(img_prefix, filename)
        height, width = mmcv.imread(img_path).shape[:2]

        images.append(
            dict(id=idx, file_name=filename, height=height, width=width))

        # load annotations
        xml_path = f'{ann_files_path}/{image_id}.xml'
        tables = parse_tables_from_xml(xml_path)

        for table in tables:
            bbox = table.bbox
            area = (bbox[2]) * (bbox[3])
            poly = table.bounds

            data_anno = dict(image_id=idx,
                             id=obj_count,
                             category_id=cat2label['table'],
                             bbox=bbox,
                             area=area,
                             segmentation=[poly],
                             iscrowd=0)
            annotations.append(data_anno)
            obj_count += 1

    coco_format_json = dict(images=images,
                            annotations=annotations,
                            categories=[{
                                'id': cat2label[label],
                                'name': label
                            } for label in cat2label])
    os.makedirs(osp.dirname(out_file), exist_ok=True)
    mmcv.dump(coco_format_json, out_file)
Exemplo n.º 15
0
def main():
    args = parse_args()

    if args.output_dir is not None:
        mkdir_or_exist(args.output_dir)

    cfg = retrieve_data_cfg(args.config, args.skip_type, args.cfg_options)
    if cfg.data.train['type'] == 'RepeatDataset':
        cfg.data.train.dataset['pipeline'] = get_loading_pipeline(
            cfg.train_pipeline)
    else:
        cfg.data.train['pipeline'] = get_loading_pipeline(cfg.train_pipeline)
    dataset = build_dataset(cfg.data.train,
                            default_args=dict(filter_empty_gt=False))
    # For RepeatDataset type, the infos are stored in dataset.dataset
    if cfg.data.train['type'] == 'RepeatDataset':
        dataset = dataset.dataset
    data_infos = dataset.data_infos

    for idx, data_info in enumerate(track_iter_progress(data_infos)):
        if cfg.dataset_type in ['KittiDataset', 'WaymoDataset']:
            pts_path = data_info['point_cloud']['velodyne_path']
        elif cfg.dataset_type in ['ScanNetDataset', 'SUNRGBDDataset']:
            pts_path = data_info['pts_path']
        elif cfg.dataset_type in ['NuScenesDataset', 'LyftDataset']:
            pts_path = data_info['lidar_path']
        else:
            raise NotImplementedError(
                f'unsupported dataset type {cfg.dataset_type}')
        file_name = osp.splitext(osp.basename(pts_path))[0]
        save_path = osp.join(args.output_dir,
                             f'{file_name}.png') if args.output_dir else None

        example = dataset.prepare_train_data(idx)
        points = example['points']._data.numpy()
        points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,
                                           Coord3DMode.DEPTH)
        gt_bboxes = dataset.get_ann_info(idx)['gt_bboxes_3d'].tensor
        if gt_bboxes is not None:
            gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR,
                                          Box3DMode.DEPTH)

        vis = Visualizer(points, save_path='./show.png')
        vis.add_bboxes(bbox3d=gt_bboxes, bbox_color=(0, 0, 1))

        vis.show(save_path)
        del vis
Exemplo n.º 16
0
def collect_image_infos(path, exclude_extensions=None):
    img_infos = []

    images_generator = mmcv.scandir(path, recursive=True)
    for image_path in mmcv.track_iter_progress(list(images_generator)):
        if exclude_extensions is None or (
                exclude_extensions is not None
                and not image_path.lower().endswith(exclude_extensions)):
            image_path = os.path.join(path, image_path)
            img_pillow = Image.open(image_path)
            img_info = {
                'filename': image_path,
                'width': img_pillow.width,
                'height': img_pillow.height,
            }
            img_infos.append(img_info)
    return img_infos
Exemplo n.º 17
0
def main():
    args = parse_args()
    assert args.out or args.show, \
        ('Please specify at least one operation (save/show the '
         'video) with the argument "--out" or "--show"')

    model = init_detector(args.config, args.checkpoint, device=args.device)

    if args.nvdecode:
        VideoCapture = ffmpegcv.VideoCaptureNV
    else:
        VideoCapture = ffmpegcv.VideoCapture
    video_origin = VideoCapture(args.video)
    img_metas = prefetch_img_metas(model.cfg,
                                   (video_origin.width, video_origin.height))
    resize_wh = img_metas['pad_shape'][1::-1]
    video_resize = VideoCapture(args.video,
                                resize=resize_wh,
                                resize_keepratio=True,
                                resize_keepratioalign='topleft',
                                pix_fmt='rgb24')
    video_writer = None
    if args.out:
        video_writer = ffmpegcv.VideoWriter(args.out, fps=video_origin.fps)

    with torch.no_grad():
        for frame_resize, frame_origin in zip(
                mmcv.track_iter_progress(video_resize), video_origin):
            data = process_img(frame_resize, img_metas, args.device)
            result = model(return_loss=False, rescale=True, **data)[0]
            frame_mask = model.show_result(frame_origin,
                                           result,
                                           score_thr=args.score_thr)
            if args.show:
                cv2.namedWindow('video', 0)
                mmcv.imshow(frame_mask, 'video', args.wait_time)
            if args.out:
                video_writer.write(frame_mask)

    if video_writer:
        video_writer.release()
    video_origin.release()
    video_resize.release()

    cv2.destroyAllWindows()
Exemplo n.º 18
0
def export_2d_annotation(root_path, info_path, mono3d=True):
    """Export 2d annotation from the info file and raw data.

    Args:
        root_path (str): Root path of the raw data.
        info_path (str): Path of the info file.
        mono3d (bool): Whether to export mono3d annotation. Default: True.
    """
    # get bbox annotations for camera
    kitti_infos = mmcv.load(info_path)
    cat2Ids = [
        dict(id=kitti_categories.index(cat_name), name=cat_name)
        for cat_name in kitti_categories
    ]
    coco_ann_id = 0
    coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)
    from os import path as osp
    for info in mmcv.track_iter_progress(kitti_infos):
        coco_infos = get_2d_boxes(info, occluded=[0, 1, 2, 3], mono3d=mono3d)
        (height, width,
         _) = mmcv.imread(osp.join(root_path,
                                   info['image']['image_path'])).shape
        coco_2d_dict['images'].append(
            dict(file_name=info['image']['image_path'],
                 id=info['image']['image_idx'],
                 Tri2v=info['calib']['Tr_imu_to_velo'],
                 Trv2c=info['calib']['Tr_velo_to_cam'],
                 rect=info['calib']['R0_rect'],
                 cam_intrinsic=info['calib']['P2'],
                 width=width,
                 height=height))
        for coco_info in coco_infos:
            if coco_info is None:
                continue
            # add an empty key for coco format
            coco_info['segmentation'] = []
            coco_info['id'] = coco_ann_id
            coco_2d_dict['annotations'].append(coco_info)
            coco_ann_id += 1
    if mono3d:
        json_prefix = f'{info_path[:-4]}_mono3d'
    else:
        json_prefix = f'{info_path[:-4]}'
    mmcv.dump(coco_2d_dict, f'{json_prefix}.coco.json')
Exemplo n.º 19
0
def convert_dir(video_root):
    '''
    Convert all non-mp4 videos to *.mp4
    '''
    videos = set(glob.glob(video_root + "/*")) - set(
        glob.glob(video_root + "/*.mp4"))

    print('Working on path: {}'.format(video_root))
    print('Videos to be converted: {}'.format(len(videos)))

    for i, video in enumerate(mmcv.track_iter_progress(videos)):
        src = video
        dirname = os.path.dirname(src)
        basename = os.path.basename(src)
        dst = os.path.join(dirname, basename[0:11] + '.mp4')
        cmd = 'sudo ffmpeg -y -i "{}" "{}" >> {}/ffmpeg.log 2>>&1'.format(
            src, dst, os.getcwd())

        subprocess.call(cmd, shell=True)
Exemplo n.º 20
0
    def _format_bbox(self, results, jsonfile_prefix=None):
        """Convert the results to the standard format.

        Args:
            results (list[dict]): Testing results of the dataset.
            jsonfile_prefix (str): The prefix of the output jsonfile.
                You can specify the output directory/filename by
                modifying the jsonfile_prefix. Default: None.

        Returns:
            str: Path of the output json file.
        """
        lyft_annos = {}
        mapped_class_names = self.CLASSES

        print('Start to convert detection format...')
        for sample_id, det in enumerate(mmcv.track_iter_progress(results)):
            annos = []
            boxes = output_to_lyft_box(det)
            sample_token = self.data_infos[sample_id]['token']
            boxes = lidar_lyft_box_to_global(self.data_infos[sample_id], boxes)
            for i, box in enumerate(boxes):
                name = mapped_class_names[box.label]
                lyft_anno = dict(
                    sample_token=sample_token,
                    translation=box.center.tolist(),
                    size=box.wlh.tolist(),
                    rotation=box.orientation.elements.tolist(),
                    name=name,
                    score=box.score)
                annos.append(lyft_anno)
            lyft_annos[sample_token] = annos
        lyft_submissions = {
            'meta': self.modality,
            'results': lyft_annos,
        }

        mmcv.mkdir_or_exist(jsonfile_prefix)
        res_path = osp.join(jsonfile_prefix, 'results_lyft.json')
        print('Results writes to', res_path)
        mmcv.dump(lyft_submissions, res_path)
        return res_path
Exemplo n.º 21
0
def save_proposals_result(batch_video_list, batch_result_xmin,
                          batch_result_xmax, batch_result_iou,
                          batch_result_pstart, batch_result_pend, tscale,
                          result_dir):
    """ Save proposal results to csv files
    """
    print('Saving results ...')
    columns = ["iou", "start", "end", "xmin", "xmax"]
    """for each batch video list
    """
    for idx in mmcv.track_iter_progress(range(len(batch_video_list))):
        b_video = batch_video_list[idx]
        b_xmin = batch_result_xmin[idx]
        b_xmax = batch_result_xmax[idx]
        b_iou = batch_result_iou[idx]
        b_pstart = batch_result_pstart[idx]
        b_pend = batch_result_pend[idx]
        """for each video
        """
        for j in range(len(b_video)):
            tmp_video = b_video[j]
            tmp_xmin = b_xmin[j]
            tmp_xmax = b_xmax[j]
            tmp_iou = b_iou[j]
            tmp_pstart = b_pstart[j]
            tmp_pend = b_pend[j]
            res = []
            """ save all proposals result
            """
            for i in range(tscale):
                for j in range(i, tscale):
                    start = tmp_pstart[i]
                    end = tmp_pend[j]
                    iou = tmp_iou[i, j]
                    res.append([iou, start, end, tmp_xmin[i], tmp_xmax[j]])
            tmp_result = np.stack(res)
            tmp_df = pd.DataFrame(tmp_result, columns=columns)
            """ write csv file 
            """
            tmp_df.to_csv(os.path.join(result_dir, tmp_video + '.csv'),
                          index=False)
Exemplo n.º 22
0
def _create_reduced_point_cloud(data_path,
                                info_path,
                                save_path=None,
                                back=False):
    with open(info_path, 'rb') as f:
        kitti_infos = pickle.load(f)

    for info in track_iter_progress(kitti_infos):
        pc_info = info['point_cloud']
        image_info = info['image']
        calib = info['calib']

        v_path = pc_info['velodyne_path']
        v_path = Path(data_path) / v_path
        points_v = np.fromfile(str(v_path), dtype=np.float32,
                               count=-1).reshape([-1, 4])
        rect = calib['R0_rect']
        P2 = calib['P2']
        Trv2c = calib['Tr_velo_to_cam']
        # first remove z < 0 points
        # keep = points_v[:, -1] > 0
        # points_v = points_v[keep]
        # then remove outside.
        if back:
            points_v[:, 0] = -points_v[:, 0]
        points_v = box_np_ops.remove_outside_points(points_v, rect, Trv2c, P2,
                                                    image_info['image_shape'])
        if save_path is None:
            save_dir = v_path.parent.parent / (v_path.parent.stem + '_reduced')
            if not save_dir.exists():
                save_dir.mkdir()
            save_filename = save_dir / v_path.name
            # save_filename = str(v_path) + '_reduced'
            if back:
                save_filename += '_back'
        else:
            save_filename = str(Path(save_path) / v_path.name)
            if back:
                save_filename += '_back'
        with open(save_filename, 'w') as f:
            points_v.tofile(f)
Exemplo n.º 23
0
def labelling_video(config, checkpoint, work_dir, video, outdir, iou_thr):
    '''
        Формат данных предварительной разметки:
        C(x,y) -> координаты центра бокса
        w -> ширина
        h -> высота
        logs -> комментарий (опционально)
    '''
    csv_file = str(video) + '_layout.csv'
    model = init_detector(config, checkpoint, device='cuda:0')
    print(os.path.join(work_dir, video))
    video_reader = mmcv.VideoReader(os.path.join(work_dir, video))
    print(video_reader._frame_cnt)
    if not os.path.exists(outdir):
        os.mkdir(outdir)
    layout = []
    count = 0

    for frame in mmcv.track_iter_progress(video_reader):
        result = inference_detector(model, frame)
        for i in range(len(result[0])):
            if result[0][i][4] < iou_thr:
                break
            elif result[0][i][4] >= iou_thr:
                layout.append(np.insert(result[0][i][:4], 0, count))
        count += 1

    layout_df = pd.DataFrame(layout, columns=['frame', 'x', 'y', 'x2', 'y2'])
    layout_df['w'] = abs(layout_df['x2'] - layout_df['x'])
    layout_df['h'] = abs(layout_df['y2'] - layout_df['y'])
    layout_df['logs'] = np.nan
    layout_df = layout_df.drop(columns=['x2', 'y2'])
    layout_df = layout_df.astype({
        'frame': 'int32',
        'x': 'int32',
        'y': 'int32',
        'w': 'int32',
        'h': 'int32'
    })
    layout_df.to_csv(os.path.join(work_dir, csv_file), index=False)
Exemplo n.º 24
0
def _calculate_num_points_in_gt(data_path,
                                infos,
                                relative_path,
                                remove_outside=True,
                                num_features=4):
    for info in mmcv.track_iter_progress(infos):
        pc_info = info['point_cloud']
        image_info = info['image']
        calib = info['calib']
        if relative_path:
            v_path = str(Path(data_path) / pc_info['velodyne_path'])
        else:
            v_path = pc_info['velodyne_path']
        points_v = np.fromfile(v_path, dtype=np.float32,
                               count=-1).reshape([-1, num_features])
        rect = calib['R0_rect']
        Trv2c = calib['Tr_velo_to_cam']
        P2 = calib['P2']
        if remove_outside:
            points_v = box_np_ops.remove_outside_points(
                points_v, rect, Trv2c, P2, image_info['image_shape'])

        # points_v = points_v[points_v[:, 0] > 0]
        annos = info['annos']
        num_obj = len([n for n in annos['name'] if n != 'DontCare'])
        # annos = kitti.filter_kitti_anno(annos, ['DontCare'])
        dims = annos['dimensions'][:num_obj]
        loc = annos['location'][:num_obj]
        rots = annos['rotation_y'][:num_obj]
        gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]],
                                         axis=1)
        gt_boxes_lidar = box_np_ops.box_camera_to_lidar(
            gt_boxes_camera, rect, Trv2c)
        indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar)
        num_points_in_gt = indices.sum(0)
        num_ignored = len(annos['dimensions']) - num_obj
        num_points_in_gt = np.concatenate(
            [num_points_in_gt, -np.ones([num_ignored])])
        annos['num_points_in_gt'] = num_points_in_gt.astype(np.int32)
Exemplo n.º 25
0
 def _classify_image(self):
     print('开始检测所有商品图,并进行分类剪枝:')
     # 为了加速debug 只剪枝前1000个商品图
     for commodity in mmcv.track_iter_progress(
             self.reader.commodity_index_list):
         labels_in_this_commodity = {i: 0 for i in range(23)}
         imgs_in_this_commodity = list(
             self.reader.commodity_index2img_path_list[commodity])
         for img in imgs_in_this_commodity:
             result_over_thr, labels_over_thr, _ = DetectionWorker.get_result_and_feats(
                 self.classify_model, img)
             self.img_boxes_label_result[img] = (result_over_thr,
                                                 labels_over_thr)
             for label in labels_over_thr:
                 labels_in_this_commodity[label] += 1
         labels_in_this_commodity_list = sorted(
             labels_in_this_commodity.items(),
             key=lambda x: x[1],
             reverse=True)[:2]  # 取出现类标最多的两个
         for i, item in enumerate(labels_in_this_commodity_list):
             label, appear_num = item
             if i != 0 and appear_num == 0:
                 break
             self.class2commoditys[label].add(commodity)  # 将商品加入到所属类标下
         # 选出具有代表性的图 剪枝商品图
         present_imgs = []
         random.shuffle(imgs_in_this_commodity)
         for img in imgs_in_this_commodity:
             result_over_thr, labels_over_thr = self.img_boxes_label_result[
                 img]
             if [
                     x for x in labels_in_this_commodity_list
                     if x in labels_over_thr
             ] != []:
                 present_imgs.append(img)
             if len(present_imgs) == 2:  # 控制选择几幅图
                 break
         self.reader.commodity_index2img_path_list[commodity] = present_imgs
Exemplo n.º 26
0
    def bbox2result_kitti2d(self,
                            net_outputs,
                            class_names,
                            pklfile_prefix=None,
                            submission_prefix=None):
        """Convert 2D detection results to kitti format for evaluation and test
        submission.

        Args:
            net_outputs (list[np.ndarray]): List of array storing the \
                inferenced bounding boxes and scores.
            class_names (list[String]): A list of class names.
            pklfile_prefix (str | None): The prefix of pkl file.
            submission_prefix (str | None): The prefix of submission file.

        Returns:
            list[dict]: A list of dictionaries have the kitti format
        """
        assert len(net_outputs) == len(self.data_infos), \
            'invalid list length of network outputs'
        det_annos = []
        print('\nConverting prediction to KITTI format')
        for i, bboxes_per_sample in enumerate(
                mmcv.track_iter_progress(net_outputs)):
            annos = []
            anno = dict(name=[],
                        truncated=[],
                        occluded=[],
                        alpha=[],
                        bbox=[],
                        dimensions=[],
                        location=[],
                        rotation_y=[],
                        score=[])
            sample_idx = self.data_infos[i]['image']['image_idx']

            num_example = 0
            for label in range(len(bboxes_per_sample)):
                bbox = bboxes_per_sample[label]
                for i in range(bbox.shape[0]):
                    anno['name'].append(class_names[int(label)])
                    anno['truncated'].append(0.0)
                    anno['occluded'].append(0)
                    anno['alpha'].append(0.0)
                    anno['bbox'].append(bbox[i, :4])
                    # set dimensions (height, width, length) to zero
                    anno['dimensions'].append(
                        np.zeros(shape=[3], dtype=np.float32))
                    # set the 3D translation to (-1000, -1000, -1000)
                    anno['location'].append(
                        np.ones(shape=[3], dtype=np.float32) * (-1000.0))
                    anno['rotation_y'].append(0.0)
                    anno['score'].append(bbox[i, 4])
                    num_example += 1

            if num_example == 0:
                annos.append(
                    dict(
                        name=np.array([]),
                        truncated=np.array([]),
                        occluded=np.array([]),
                        alpha=np.array([]),
                        bbox=np.zeros([0, 4]),
                        dimensions=np.zeros([0, 3]),
                        location=np.zeros([0, 3]),
                        rotation_y=np.array([]),
                        score=np.array([]),
                    ))
            else:
                anno = {k: np.stack(v) for k, v in anno.items()}
                annos.append(anno)

            annos[-1]['sample_idx'] = np.array([sample_idx] * num_example,
                                               dtype=np.int64)
            det_annos += annos

        if pklfile_prefix is not None:
            # save file in pkl format
            pklfile_path = (pklfile_prefix[:-4] if pklfile_prefix.endswith(
                ('.pkl', '.pickle')) else pklfile_prefix)
            mmcv.dump(det_annos, pklfile_path)

        if submission_prefix is not None:
            # save file in submission format
            mmcv.mkdir_or_exist(submission_prefix)
            print(f'Saving KITTI submission to {submission_prefix}')
            for i, anno in enumerate(det_annos):
                sample_idx = self.data_infos[i]['image']['image_idx']
                cur_det_file = f'{submission_prefix}/{sample_idx:06d}.txt'
                with open(cur_det_file, 'w') as f:
                    bbox = anno['bbox']
                    loc = anno['location']
                    dims = anno['dimensions'][::-1]  # lhw -> hwl
                    for idx in range(len(bbox)):
                        print(
                            '{} -1 -1 {:4f} {:4f} {:4f} {:4f} {:4f} {:4f} '
                            '{:4f} {:4f} {:4f} {:4f} {:4f} {:4f} {:4f}'.format(
                                anno['name'][idx],
                                anno['alpha'][idx],
                                *bbox[idx],  # 4 float
                                *dims[idx],  # 3 float
                                *loc[idx],  # 3 float
                                anno['rotation_y'][idx],
                                anno['score'][idx]),
                            file=f,
                        )
            print(f'Result is saved to {submission_prefix}')

        return det_annos
Exemplo n.º 27
0
    def bbox2result_kitti(self,
                          net_outputs,
                          class_names,
                          pklfile_prefix=None,
                          submission_prefix=None):
        """Convert 3D detection results to kitti format for evaluation and test
        submission.

        Args:
            net_outputs (list[np.ndarray]): List of array storing the \
                inferenced bounding boxes and scores.
            class_names (list[String]): A list of class names.
            pklfile_prefix (str | None): The prefix of pkl file.
            submission_prefix (str | None): The prefix of submission file.

        Returns:
            list[dict]: A list of dictionaries with the kitti format.
        """
        assert len(net_outputs) == len(self.data_infos), \
            'invalid list length of network outputs'
        if submission_prefix is not None:
            mmcv.mkdir_or_exist(submission_prefix)

        det_annos = []
        print('\nConverting prediction to KITTI format')
        for idx, pred_dicts in enumerate(
                mmcv.track_iter_progress(net_outputs)):
            annos = []
            info = self.data_infos[idx]
            sample_idx = info['image']['image_idx']
            image_shape = info['image']['image_shape'][:2]
            box_dict = self.convert_valid_bboxes(pred_dicts, info)
            anno = {
                'name': [],
                'truncated': [],
                'occluded': [],
                'alpha': [],
                'bbox': [],
                'dimensions': [],
                'location': [],
                'rotation_y': [],
                'score': []
            }
            if len(box_dict['bbox']) > 0:
                box_2d_preds = box_dict['bbox']
                box_preds = box_dict['box3d_camera']
                scores = box_dict['scores']
                box_preds_lidar = box_dict['box3d_lidar']
                label_preds = box_dict['label_preds']

                for box, box_lidar, bbox, score, label in zip(
                        box_preds, box_preds_lidar, box_2d_preds, scores,
                        label_preds):
                    bbox[2:] = np.minimum(bbox[2:], image_shape[::-1])
                    bbox[:2] = np.maximum(bbox[:2], [0, 0])
                    anno['name'].append(class_names[int(label)])
                    anno['truncated'].append(0.0)
                    anno['occluded'].append(0)
                    anno['alpha'].append(
                        -np.arctan2(-box_lidar[1], box_lidar[0]) + box[6])
                    anno['bbox'].append(bbox)
                    anno['dimensions'].append(box[3:6])
                    anno['location'].append(box[:3])
                    anno['rotation_y'].append(box[6])
                    anno['score'].append(score)

                anno = {k: np.stack(v) for k, v in anno.items()}
                annos.append(anno)
            else:
                anno = {
                    'name': np.array([]),
                    'truncated': np.array([]),
                    'occluded': np.array([]),
                    'alpha': np.array([]),
                    'bbox': np.zeros([0, 4]),
                    'dimensions': np.zeros([0, 3]),
                    'location': np.zeros([0, 3]),
                    'rotation_y': np.array([]),
                    'score': np.array([]),
                }
                annos.append(anno)

            if submission_prefix is not None:
                curr_file = f'{submission_prefix}/{sample_idx:06d}.txt'
                with open(curr_file, 'w') as f:
                    bbox = anno['bbox']
                    loc = anno['location']
                    dims = anno['dimensions']  # lhw -> hwl

                    for idx in range(len(bbox)):
                        print(
                            '{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} '
                            '{:.4f} {:.4f} {:.4f} '
                            '{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'.format(
                                anno['name'][idx], anno['alpha'][idx],
                                bbox[idx][0], bbox[idx][1], bbox[idx][2],
                                bbox[idx][3], dims[idx][1], dims[idx][2],
                                dims[idx][0], loc[idx][0], loc[idx][1],
                                loc[idx][2], anno['rotation_y'][idx],
                                anno['score'][idx]),
                            file=f)

            annos[-1]['sample_idx'] = np.array([sample_idx] *
                                               len(annos[-1]['score']),
                                               dtype=np.int64)

            det_annos += annos

        if pklfile_prefix is not None:
            if not pklfile_prefix.endswith(('.pkl', '.pickle')):
                out = f'{pklfile_prefix}.pkl'
            mmcv.dump(det_annos, out)
            print(f'Result is saved to {out}.')

        return det_annos
Exemplo n.º 28
0
def main():
    """Visualize the demo images."""
    parser = ArgumentParser()
    parser.add_argument('pose_config', help='Config file for pose')
    parser.add_argument('pose_checkpoint', help='Checkpoint file for pose')
    parser.add_argument('--video-path', type=str, help='Video path')
    parser.add_argument('--show',
                        action='store_true',
                        default=False,
                        help='whether to show visualizations.')
    parser.add_argument('--out-video-root',
                        default='',
                        help='Root of the output video file. '
                        'Default not saving the visualization video.')
    parser.add_argument('--device',
                        default='cuda:0',
                        help='Device used for inference')
    parser.add_argument('--kpt-thr',
                        type=float,
                        default=0.3,
                        help='Keypoint score threshold')
    parser.add_argument('--pose-nms-thr',
                        type=float,
                        default=0.9,
                        help='OKS threshold for pose NMS')
    parser.add_argument('--radius',
                        type=int,
                        default=4,
                        help='Keypoint radius for visualization')
    parser.add_argument('--thickness',
                        type=int,
                        default=1,
                        help='Link thickness for visualization')

    args = parser.parse_args()

    assert args.show or (args.out_video_root != '')

    # build the pose model from a config file and a checkpoint file
    pose_model = init_pose_model(args.pose_config,
                                 args.pose_checkpoint,
                                 device=args.device.lower())

    dataset = pose_model.cfg.data['test']['type']
    dataset_info = pose_model.cfg.data['test'].get('dataset_info', None)
    if dataset_info is None:
        warnings.warn(
            'Please set `dataset_info` in the config.'
            'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
            DeprecationWarning)
        assert (dataset == 'BottomUpCocoDataset')
    else:
        dataset_info = DatasetInfo(dataset_info)

    # read video
    video = mmcv.VideoReader(args.video_path)
    assert video.opened, f'Faild to load video file {args.video_path}'

    if args.out_video_root == '':
        save_out_video = False
    else:
        os.makedirs(args.out_video_root, exist_ok=True)
        save_out_video = True

    if save_out_video:
        fps = video.fps
        size = (video.width, video.height)
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        videoWriter = cv2.VideoWriter(
            os.path.join(args.out_video_root,
                         f'vis_{os.path.basename(args.video_path)}'), fourcc,
            fps, size)

    # optional
    return_heatmap = False

    # e.g. use ('backbone', ) to return backbone feature
    output_layer_names = None

    print('Running inference...')
    for _, cur_frame in enumerate(mmcv.track_iter_progress(video)):
        pose_results, _ = inference_bottom_up_pose_model(
            pose_model,
            cur_frame,
            dataset=dataset,
            dataset_info=dataset_info,
            pose_nms_thr=args.pose_nms_thr,
            return_heatmap=return_heatmap,
            outputs=output_layer_names)

        # show the results
        vis_frame = vis_pose_result(pose_model,
                                    cur_frame,
                                    pose_results,
                                    radius=args.radius,
                                    thickness=args.thickness,
                                    dataset=dataset,
                                    dataset_info=dataset_info,
                                    kpt_score_thr=args.kpt_thr,
                                    show=False)

        if args.show:
            cv2.imshow('Image', vis_frame)

        if save_out_video:
            videoWriter.write(vis_frame)

        if args.show and cv2.waitKey(1) & 0xFF == ord('q'):
            break

    if save_out_video:
        videoWriter.release()
    if args.show:
        cv2.destroyAllWindows()
Exemplo n.º 29
0
def _fill_trainval_infos(nusc,
                         train_scenes,
                         val_scenes,
                         test=False,
                         max_sweeps=10):
    """Generate the train/val infos from the raw data.

    Args:
        nusc (:obj:`NuScenes`): Dataset class in the nuScenes dataset.
        train_scenes (list[str]): Basic information of training scenes.
        val_scenes (list[str]): Basic information of validation scenes.
        test (bool): Whether use the test mode. In the test mode, no
            annotations can be accessed. Default: False.
        max_sweeps (int): Max number of sweeps. Default: 10.

    Returns:
        tuple[list[dict]]: Information of training set and validation set
            that will be saved to the info file.
    """
    train_nusc_infos = []
    val_nusc_infos = []

    for sample in mmcv.track_iter_progress(nusc.sample):
        lidar_token = sample['data']['LIDAR_TOP']
        cam_token = sample['data']['CAM_FRONT']
        sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP'])
        cs_record = nusc.get('calibrated_sensor',
                             sd_rec['calibrated_sensor_token'])
        pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])
        lidar_path, boxes_lidar, _ = nusc.get_sample_data(lidar_token)
        _, boxes_cam, _ = nusc.get_sample_data(cam_token)
        lidarseg = nusc.get('lidarseg', lidar_token)
        lidarseg_path = os.path.join(nusc.dataroot, lidarseg['filename'])

        mmcv.check_file_exist(lidar_path)

        info = {
            'lidar_path': lidar_path,
            'lidarseg_path': lidarseg_path,
            'token': sample['token'],
            'sweeps': [],
            'cams': dict(),
            'lidar2ego_translation': cs_record['translation'],
            'lidar2ego_rotation': cs_record['rotation'],
            'ego2global_translation': pose_record['translation'],
            'ego2global_rotation': pose_record['rotation'],
            'timestamp': sample['timestamp'],
        }

        l2e_r = info['lidar2ego_rotation']
        l2e_t = info['lidar2ego_translation']
        e2g_r = info['ego2global_rotation']
        e2g_t = info['ego2global_translation']
        l2e_r_mat = Quaternion(l2e_r).rotation_matrix
        e2g_r_mat = Quaternion(e2g_r).rotation_matrix

        # obtain 6 image's information per frame
        camera_types = [
            'CAM_FRONT',
            'CAM_FRONT_RIGHT',
            'CAM_FRONT_LEFT',
            'CAM_BACK',
            'CAM_BACK_LEFT',
            'CAM_BACK_RIGHT',
        ]
        for cam in camera_types:
            cam_token = sample['data'][cam]
            cam_path, _, cam_intrinsic = nusc.get_sample_data(cam_token)
            cam_info = obtain_sensor2top(nusc, cam_token, l2e_t, l2e_r_mat,
                                         e2g_t, e2g_r_mat, cam)
            cam_info.update(cam_intrinsic=cam_intrinsic)
            info['cams'].update({cam: cam_info})

        # obtain sweeps for a single key-frame
        sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP'])
        sweeps = []
        while len(sweeps) < max_sweeps:
            if not sd_rec['prev'] == '':
                sweep = obtain_sensor2top(nusc, sd_rec['prev'], l2e_t,
                                          l2e_r_mat, e2g_t, e2g_r_mat, 'lidar')
                sweeps.append(sweep)
                sd_rec = nusc.get('sample_data', sd_rec['prev'])
            else:
                break
        info['sweeps'] = sweeps
        # obtain annotation
        if not test:
            # full range
            # annotations = [
            #     nusc.get('sample_annotation', token)
            #     for token in sample['anns']
            # ]

            # inside camera
            valid_box_tokens = [box.token for box in boxes_cam]
            boxes_lidar_filter = [
                box for box in boxes_lidar if box.token in valid_box_tokens
            ]
            annotations = [
                nusc.get('sample_annotation', token)
                for token in valid_box_tokens
            ]

            locs = np.array([b.center
                             for b in boxes_lidar_filter]).reshape(-1, 3)
            dims = np.array([b.wlh for b in boxes_lidar_filter]).reshape(-1, 3)
            rots = np.array([
                b.orientation.yaw_pitch_roll[0] for b in boxes_lidar_filter
            ]).reshape(-1, 1)
            # velocity = np.array(
            #    [nusc.box_velocity(token)[:2] for token in sample['anns']])
            velocity = np.array(
                [nusc.box_velocity(token)[:2] for token in valid_box_tokens])
            valid_flag = np.array(
                [(anno['num_lidar_pts'] + anno['num_radar_pts']) > 0
                 for anno in annotations],
                dtype=bool).reshape(-1)
            # convert velo from global to lidar
            for i in range(len(boxes_lidar_filter)):
                velo = np.array([*velocity[i], 0.0])
                velo = velo @ np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(
                    l2e_r_mat).T
                velocity[i] = velo[:2]

            names = [b.name for b in boxes_lidar_filter]
            for i in range(len(names)):
                if names[i] in NuScenesDataset.NameMapping:
                    names[i] = NuScenesDataset.NameMapping[names[i]]
            names = np.array(names)
            # we need to convert rot to SECOND format.
            gt_boxes = np.concatenate([locs, dims, -rots - np.pi / 2], axis=1)
            assert len(gt_boxes) == len(
                annotations), f'{len(gt_boxes)}, {len(annotations)}'
            info['gt_boxes'] = gt_boxes
            info['gt_names'] = names
            info['gt_velocity'] = velocity.reshape(-1, 2)
            info['num_lidar_pts'] = np.array(
                [a['num_lidar_pts'] for a in annotations])
            info['num_radar_pts'] = np.array(
                [a['num_radar_pts'] for a in annotations])
            info['valid_flag'] = valid_flag

        if sample['scene_token'] in train_scenes:
            train_nusc_infos.append(info)
        else:
            val_nusc_infos.append(info)

    return train_nusc_infos, val_nusc_infos
Exemplo n.º 30
0
def create_groundtruth_database(dataset_class_name,
                                data_path,
                                info_prefix,
                                info_path=None,
                                mask_anno_path=None,
                                used_classes=None,
                                database_save_path=None,
                                db_info_save_path=None,
                                relative_path=True,
                                add_rgb=False,
                                lidar_only=False,
                                bev_only=False,
                                coors_range=None,
                                with_mask=False):
    """Given the raw data, generate the ground truth database.

    Args:
        dataset_class_name (str): Name of the input dataset.
        data_path (str): Path of the data.
        info_prefix (str): Prefix of the info file.
        info_path (str): Path of the info file.
            Default: None.
        mask_anno_path (str): Path of the mask_anno.
            Default: None.
        used_classes (list[str]): Classes have been used.
            Default: None.
        database_save_path (str): Path to save database.
            Default: None.
        db_info_save_path (str): Path to save db_info.
            Default: None.
        relative_path (bool): Whether to use relative path.
            Default: True.
        with_mask (bool): Whether to use mask.
            Default: False.
    """
    print(f'Create GT Database of {dataset_class_name}')
    dataset_cfg = dict(type=dataset_class_name,
                       data_root=data_path,
                       ann_file=info_path)
    if dataset_class_name == 'KittiDataset':
        file_client_args = dict(backend='disk')
        dataset_cfg.update(test_mode=False,
                           split='training',
                           modality=dict(
                               use_lidar=True,
                               use_depth=False,
                               use_lidar_intensity=True,
                               use_camera=with_mask,
                           ),
                           pipeline=[
                               dict(type='LoadPointsFromFile',
                                    coord_type='LIDAR',
                                    load_dim=4,
                                    use_dim=4,
                                    file_client_args=file_client_args),
                               dict(type='LoadAnnotations3D',
                                    with_bbox_3d=True,
                                    with_label_3d=True,
                                    file_client_args=file_client_args)
                           ])

    elif dataset_class_name == 'NuScenesDataset':
        dataset_cfg.update(use_valid_flag=True,
                           pipeline=[
                               dict(type='LoadPointsFromFile',
                                    load_dim=5,
                                    use_dim=5),
                               dict(type='LoadPointsFromMultiSweeps',
                                    coord_type='LIDAR',
                                    sweeps_num=10,
                                    use_dim=[0, 1, 2, 3, 4],
                                    pad_empty_sweeps=True,
                                    remove_close=True),
                               dict(type='LoadAnnotations3D',
                                    with_bbox_3d=True,
                                    with_label_3d=True)
                           ])

    elif dataset_class_name == 'WaymoDataset':
        file_client_args = dict(backend='disk')
        dataset_cfg.update(test_mode=False,
                           split='training',
                           modality=dict(
                               use_lidar=True,
                               use_depth=False,
                               use_lidar_intensity=True,
                               use_camera=False,
                           ),
                           pipeline=[
                               dict(type='LoadPointsFromFile',
                                    coord_type='LIDAR',
                                    load_dim=6,
                                    use_dim=5,
                                    file_client_args=file_client_args),
                               dict(type='LoadAnnotations3D',
                                    with_bbox_3d=True,
                                    with_label_3d=True,
                                    file_client_args=file_client_args)
                           ])

    dataset = build_dataset(dataset_cfg)

    if database_save_path is None:
        database_save_path = osp.join(data_path, f'{info_prefix}_gt_database')
    if db_info_save_path is None:
        db_info_save_path = osp.join(data_path,
                                     f'{info_prefix}_dbinfos_train.pkl')
    mmcv.mkdir_or_exist(database_save_path)
    all_db_infos = dict()
    if with_mask:
        coco = COCO(osp.join(data_path, mask_anno_path))
        imgIds = coco.getImgIds()
        file2id = dict()
        for i in imgIds:
            info = coco.loadImgs([i])[0]
            file2id.update({info['file_name']: i})

    group_counter = 0
    for j in track_iter_progress(list(range(len(dataset)))):
        input_dict = dataset.get_data_info(j)
        dataset.pre_pipeline(input_dict)
        example = dataset.pipeline(input_dict)
        annos = example['ann_info']
        image_idx = example['sample_idx']
        points = example['points'].tensor.numpy()
        gt_boxes_3d = annos['gt_bboxes_3d'].tensor.numpy()
        names = annos['gt_names']
        group_dict = dict()
        if 'group_ids' in annos:
            group_ids = annos['group_ids']
        else:
            group_ids = np.arange(gt_boxes_3d.shape[0], dtype=np.int64)
        difficulty = np.zeros(gt_boxes_3d.shape[0], dtype=np.int32)
        if 'difficulty' in annos:
            difficulty = annos['difficulty']

        num_obj = gt_boxes_3d.shape[0]
        point_indices = box_np_ops.points_in_rbbox(points, gt_boxes_3d)

        if with_mask:
            # prepare masks
            gt_boxes = annos['gt_bboxes']
            img_path = osp.split(example['img_info']['filename'])[-1]
            if img_path not in file2id.keys():
                print(f'skip image {img_path} for empty mask')
                continue
            img_id = file2id[img_path]
            kins_annIds = coco.getAnnIds(imgIds=img_id)
            kins_raw_info = coco.loadAnns(kins_annIds)
            kins_ann_info = _parse_coco_ann_info(kins_raw_info)
            h, w = annos['img_shape'][:2]
            gt_masks = [
                _poly2mask(mask, h, w) for mask in kins_ann_info['masks']
            ]
            # get mask inds based on iou mapping
            bbox_iou = bbox_overlaps(kins_ann_info['bboxes'], gt_boxes)
            mask_inds = bbox_iou.argmax(axis=0)
            valid_inds = (bbox_iou.max(axis=0) > 0.5)

            # mask the image
            # use more precise crop when it is ready
            # object_img_patches = np.ascontiguousarray(
            #     np.stack(object_img_patches, axis=0).transpose(0, 3, 1, 2))
            # crop image patches using roi_align
            # object_img_patches = crop_image_patch_v2(
            #     torch.Tensor(gt_boxes),
            #     torch.Tensor(mask_inds).long(), object_img_patches)
            object_img_patches, object_masks = crop_image_patch(
                gt_boxes, gt_masks, mask_inds, annos['img'])

        for i in range(num_obj):
            filename = f'{image_idx}_{names[i]}_{i}.bin'
            abs_filepath = osp.join(database_save_path, filename)
            rel_filepath = osp.join(f'{info_prefix}_gt_database', filename)

            # save point clouds and image patches for each object
            gt_points = points[point_indices[:, i]]
            gt_points[:, :3] -= gt_boxes_3d[i, :3]

            if with_mask:
                if object_masks[i].sum() == 0 or not valid_inds[i]:
                    # Skip object for empty or invalid mask
                    continue
                img_patch_path = abs_filepath + '.png'
                mask_patch_path = abs_filepath + '.mask.png'
                mmcv.imwrite(object_img_patches[i], img_patch_path)
                mmcv.imwrite(object_masks[i], mask_patch_path)

            with open(abs_filepath, 'w') as f:
                gt_points.tofile(f)

            if (used_classes is None) or names[i] in used_classes:
                db_info = {
                    'name': names[i],
                    'path': rel_filepath,
                    'image_idx': image_idx,
                    'gt_idx': i,
                    'box3d_lidar': gt_boxes_3d[i],
                    'num_points_in_gt': gt_points.shape[0],
                    'difficulty': difficulty[i],
                }
                local_group_id = group_ids[i]
                # if local_group_id >= 0:
                if local_group_id not in group_dict:
                    group_dict[local_group_id] = group_counter
                    group_counter += 1
                db_info['group_id'] = group_dict[local_group_id]
                if 'score' in annos:
                    db_info['score'] = annos['score'][i]
                if with_mask:
                    db_info.update({'box2d_camera': gt_boxes[i]})
                if names[i] in all_db_infos:
                    all_db_infos[names[i]].append(db_info)
                else:
                    all_db_infos[names[i]] = [db_info]

    for k, v in all_db_infos.items():
        print(f'load {len(v)} {k} database infos')

    with open(db_info_save_path, 'wb') as f:
        pickle.dump(all_db_infos, f)