def test_fcos3d():
    if not torch.cuda.is_available():
        pytest.skip('test requires GPU and torch+cuda')

    _setup_seed(0)
    fcos3d_cfg = _get_detector_cfg(
        'fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d.py')
    self = build_detector(fcos3d_cfg).cuda()
    imgs = torch.rand([1, 3, 928, 1600], dtype=torch.float32).cuda()
    gt_bboxes = [torch.rand([3, 4], dtype=torch.float32).cuda()]
    gt_bboxes_3d = CameraInstance3DBoxes(
        torch.rand([3, 9], device='cuda'), box_dim=9)
    gt_labels = [torch.randint(0, 10, [3], device='cuda')]
    gt_labels_3d = gt_labels
    centers2d = [torch.rand([3, 2], dtype=torch.float32).cuda()]
    depths = [torch.rand([3], dtype=torch.float32).cuda()]
    attr_labels = [torch.randint(0, 9, [3], device='cuda')]
    img_metas = [
        dict(
            cam2img=[[1260.8474446004698, 0.0, 807.968244525554],
                     [0.0, 1260.8474446004698, 495.3344268742088],
                     [0.0, 0.0, 1.0]],
            scale_factor=np.array([1., 1., 1., 1.], dtype=np.float32),
            box_type_3d=CameraInstance3DBoxes)
    ]

    # test forward_train
    losses = self.forward_train(imgs, img_metas, gt_bboxes, gt_labels,
                                gt_bboxes_3d, gt_labels_3d, centers2d, depths,
                                attr_labels)
    assert losses['loss_cls'] >= 0
    assert losses['loss_offset'] >= 0
    assert losses['loss_depth'] >= 0
    assert losses['loss_size'] >= 0
    assert losses['loss_rotsin'] >= 0
    assert losses['loss_centerness'] >= 0
    assert losses['loss_velo'] >= 0
    assert losses['loss_dir'] >= 0
    assert losses['loss_attr'] >= 0

    # test simple_test
    with torch.no_grad():
        results = self.simple_test(imgs, img_metas)
    boxes_3d = results[0]['img_bbox']['boxes_3d']
    scores_3d = results[0]['img_bbox']['scores_3d']
    labels_3d = results[0]['img_bbox']['labels_3d']
    attrs_3d = results[0]['img_bbox']['attrs_3d']
    assert boxes_3d.tensor.shape[0] >= 0
    assert boxes_3d.tensor.shape[1] == 9
    assert scores_3d.shape[0] >= 0
    assert labels_3d.shape[0] >= 0
    assert attrs_3d.shape[0] >= 0
示例#2
0
def test_show_result_meshlab():
    pcd = 'tests/data/nuscenes/samples/LIDAR_TOP/n015-2018-08-02-17-16-37+' \
              '0800__LIDAR_TOP__1533201470948018.pcd.bin'
    box_3d = LiDARInstance3DBoxes(
        torch.tensor(
            [[8.7314, -1.8559, -1.5997, 0.4800, 1.2000, 1.8900, 0.0100]]))
    labels_3d = torch.tensor([0])
    scores_3d = torch.tensor([0.5])
    points = np.random.rand(100, 4)
    img_meta = dict(pts_filename=pcd,
                    boxes_3d=box_3d,
                    box_mode_3d=Box3DMode.LIDAR)
    data = dict(points=[[torch.tensor(points)]], img_metas=[[img_meta]])
    result = [
        dict(pts_bbox=dict(
            boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d))
    ]
    tmp_dir = tempfile.TemporaryDirectory()
    temp_out_dir = tmp_dir.name
    out_dir, file_name = show_result_meshlab(data, result, temp_out_dir)
    expected_outfile_pred = file_name + '_pred.obj'
    expected_outfile_pts = file_name + '_points.obj'
    expected_outfile_pred_path = os.path.join(out_dir, file_name,
                                              expected_outfile_pred)
    expected_outfile_pts_path = os.path.join(out_dir, file_name,
                                             expected_outfile_pts)
    assert os.path.exists(expected_outfile_pred_path)
    assert os.path.exists(expected_outfile_pts_path)
    tmp_dir.cleanup()

    # test multi-modality show
    # indoor scene
    pcd = 'tests/data/sunrgbd/points/000001.bin'
    filename = 'tests/data/sunrgbd/sunrgbd_trainval/image/000001.jpg'
    box_3d = DepthInstance3DBoxes(
        torch.tensor(
            [[-1.1580, 3.3041, -0.9961, 0.3829, 0.4647, 0.5574, 1.1213]]))
    img = np.random.randn(1, 3, 608, 832)
    k_mat = np.array([[529.5000, 0.0000, 365.0000],
                      [0.0000, 529.5000, 265.0000], [0.0000, 0.0000, 1.0000]])
    rt_mat = np.array([[0.9980, 0.0058, -0.0634], [0.0058, 0.9835, 0.1808],
                       [0.0634, -0.1808, 0.9815]])
    rt_mat = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]]) @ rt_mat.transpose(
        1, 0)
    depth2img = k_mat @ rt_mat
    img_meta = dict(filename=filename,
                    depth2img=depth2img,
                    pcd_horizontal_flip=False,
                    pcd_vertical_flip=False,
                    box_mode_3d=Box3DMode.DEPTH,
                    box_type_3d=DepthInstance3DBoxes,
                    pcd_trans=np.array([0., 0., 0.]),
                    pcd_scale_factor=1.0,
                    pts_filename=pcd,
                    transformation_3d_flow=['R', 'S', 'T'])
    data = dict(points=[[torch.tensor(points)]],
                img_metas=[[img_meta]],
                img=[img])
    result = [dict(boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d)]
    tmp_dir = tempfile.TemporaryDirectory()
    temp_out_dir = tmp_dir.name
    out_dir, file_name = show_result_meshlab(data,
                                             result,
                                             temp_out_dir,
                                             0.3,
                                             task='multi_modality-det')
    expected_outfile_pred = file_name + '_pred.obj'
    expected_outfile_pts = file_name + '_points.obj'
    expected_outfile_png = file_name + '_img.png'
    expected_outfile_proj = file_name + '_pred.png'
    expected_outfile_pred_path = os.path.join(out_dir, file_name,
                                              expected_outfile_pred)
    expected_outfile_pts_path = os.path.join(out_dir, file_name,
                                             expected_outfile_pts)
    expected_outfile_png_path = os.path.join(out_dir, file_name,
                                             expected_outfile_png)
    expected_outfile_proj_path = os.path.join(out_dir, file_name,
                                              expected_outfile_proj)
    assert os.path.exists(expected_outfile_pred_path)
    assert os.path.exists(expected_outfile_pts_path)
    assert os.path.exists(expected_outfile_png_path)
    assert os.path.exists(expected_outfile_proj_path)
    tmp_dir.cleanup()
    # outdoor scene
    pcd = 'tests/data/kitti/training/velodyne_reduced/000000.bin'
    filename = 'tests/data/kitti/training/image_2/000000.png'
    box_3d = LiDARInstance3DBoxes(
        torch.tensor(
            [[6.4495, -3.9097, -1.7409, 1.5063, 3.1819, 1.4716, 1.8782]]))
    img = np.random.randn(1, 3, 384, 1280)
    lidar2img = np.array(
        [[6.09695435e+02, -7.21421631e+02, -1.25125790e+00, -1.23041824e+02],
         [1.80384201e+02, 7.64479828e+00, -7.19651550e+02, -1.01016693e+02],
         [9.99945343e-01, 1.24365499e-04, 1.04513029e-02, -2.69386917e-01],
         [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])
    img_meta = dict(filename=filename,
                    pcd_horizontal_flip=False,
                    pcd_vertical_flip=False,
                    box_mode_3d=Box3DMode.LIDAR,
                    box_type_3d=LiDARInstance3DBoxes,
                    pcd_trans=np.array([0., 0., 0.]),
                    pcd_scale_factor=1.0,
                    pts_filename=pcd,
                    lidar2img=lidar2img)
    data = dict(points=[[torch.tensor(points)]],
                img_metas=[[img_meta]],
                img=[img])
    result = [
        dict(pts_bbox=dict(
            boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d))
    ]
    out_dir, file_name = show_result_meshlab(data,
                                             result,
                                             temp_out_dir,
                                             0.1,
                                             task='multi_modality-det')
    tmp_dir = tempfile.TemporaryDirectory()
    temp_out_dir = tmp_dir.name
    expected_outfile_pred = file_name + '_pred.obj'
    expected_outfile_pts = file_name + '_points.obj'
    expected_outfile_png = file_name + '_img.png'
    expected_outfile_proj = file_name + '_pred.png'
    expected_outfile_pred_path = os.path.join(out_dir, file_name,
                                              expected_outfile_pred)
    expected_outfile_pts_path = os.path.join(out_dir, file_name,
                                             expected_outfile_pts)
    expected_outfile_png_path = os.path.join(out_dir, file_name,
                                             expected_outfile_png)
    expected_outfile_proj_path = os.path.join(out_dir, file_name,
                                              expected_outfile_proj)
    assert os.path.exists(expected_outfile_pred_path)
    assert os.path.exists(expected_outfile_pts_path)
    assert os.path.exists(expected_outfile_png_path)
    assert os.path.exists(expected_outfile_proj_path)
    tmp_dir.cleanup()
    # test mono-3d show
    filename = 'tests/data/nuscenes/samples/CAM_BACK_LEFT/n015-2018-' \
               '07-18-11-07-57+0800__CAM_BACK_LEFT__1531883530447423.jpg'
    box_3d = CameraInstance3DBoxes(
        torch.tensor(
            [[6.4495, -3.9097, -1.7409, 1.5063, 3.1819, 1.4716, 1.8782]]))
    img = np.random.randn(1, 3, 384, 1280)
    cam2img = np.array([[100.0, 0.0, 50.0], [0.0, 100.0, 50.0],
                        [0.0, 0.0, 1.0]])
    img_meta = dict(filename=filename,
                    pcd_horizontal_flip=False,
                    pcd_vertical_flip=False,
                    box_mode_3d=Box3DMode.CAM,
                    box_type_3d=CameraInstance3DBoxes,
                    pcd_trans=np.array([0., 0., 0.]),
                    pcd_scale_factor=1.0,
                    cam2img=cam2img)
    data = dict(points=[[torch.tensor(points)]],
                img_metas=[[img_meta]],
                img=[img])
    result = [
        dict(img_bbox=dict(
            boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d))
    ]
    out_dir, file_name = show_result_meshlab(data,
                                             result,
                                             temp_out_dir,
                                             0.1,
                                             task='mono-det')
    tmp_dir = tempfile.TemporaryDirectory()
    temp_out_dir = tmp_dir.name
    expected_outfile_png = file_name + '_img.png'
    expected_outfile_proj = file_name + '_pred.png'
    expected_outfile_png_path = os.path.join(out_dir, file_name,
                                             expected_outfile_png)
    expected_outfile_proj_path = os.path.join(out_dir, file_name,
                                              expected_outfile_proj)
    assert os.path.exists(expected_outfile_png_path)
    assert os.path.exists(expected_outfile_proj_path)
    tmp_dir.cleanup()

    # test seg show
    pcd = 'tests/data/scannet/points/scene0000_00.bin'
    points = np.random.rand(100, 6)
    img_meta = dict(pts_filename=pcd)
    data = dict(points=[[torch.tensor(points)]], img_metas=[[img_meta]])
    pred_seg = torch.randint(0, 20, (100, ))
    result = [dict(semantic_mask=pred_seg)]
    tmp_dir = tempfile.TemporaryDirectory()
    temp_out_dir = tmp_dir.name
    out_dir, file_name = show_result_meshlab(data,
                                             result,
                                             temp_out_dir,
                                             task='seg')
    expected_outfile_pred = file_name + '_pred.obj'
    expected_outfile_pts = file_name + '_points.obj'
    expected_outfile_pred_path = os.path.join(out_dir, file_name,
                                              expected_outfile_pred)
    expected_outfile_pts_path = os.path.join(out_dir, file_name,
                                             expected_outfile_pts)
    assert os.path.exists(expected_outfile_pred_path)
    assert os.path.exists(expected_outfile_pts_path)
    tmp_dir.cleanup()
def test_boxes_conversion():
    # test CAM to LIDAR and DEPTH
    cam_boxes = CameraInstance3DBoxes(
        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
    convert_lidar_boxes = Coord3DMode.convert(cam_boxes, Coord3DMode.CAM,
                                              Coord3DMode.LIDAR)

    expected_tensor = torch.tensor(
        [[-1.7501, -1.7802, -2.5162, 1.6500, 1.7500, 3.3900, 1.4800],
         [-1.6357, -8.9594, -2.4567, 1.5700, 1.5400, 4.0100, 1.6200],
         [-1.3033, -28.2967, 0.5558, 1.4800, 1.4700, 2.2300, -1.5700],
         [-1.7361, -26.6690, -21.8230, 1.4000, 1.5600, 3.4800, -1.6900],
         [-1.6218, -31.3198, -8.1621, 1.4800, 1.7400, 3.7700, 2.7900]])
    assert torch.allclose(expected_tensor, convert_lidar_boxes.tensor, 1e-3)

    convert_depth_boxes = Coord3DMode.convert(cam_boxes, Coord3DMode.CAM,
                                              Coord3DMode.DEPTH)
    expected_tensor = torch.tensor(
        [[1.7802, 1.7501, 2.5162, 1.7500, 1.6500, 3.3900, 1.4800],
         [8.9594, 1.6357, 2.4567, 1.5400, 1.5700, 4.0100, 1.6200],
         [28.2967, 1.3033, -0.5558, 1.4700, 1.4800, 2.2300, -1.5700],
         [26.6690, 1.7361, 21.8230, 1.5600, 1.4000, 3.4800, -1.6900],
         [31.3198, 1.6218, 8.1621, 1.7400, 1.4800, 3.7700, 2.7900]])
    assert torch.allclose(expected_tensor, convert_depth_boxes.tensor, 1e-3)

    # test LIDAR to CAM and DEPTH
    lidar_boxes = LiDARInstance3DBoxes(
        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
    convert_cam_boxes = Coord3DMode.convert(lidar_boxes, Coord3DMode.LIDAR,
                                            Coord3DMode.CAM)
    expected_tensor = torch.tensor(
        [[-2.5162, 1.7501, 1.7802, 3.3900, 1.6500, 1.7500, 1.4800],
         [-2.4567, 1.6357, 8.9594, 4.0100, 1.5700, 1.5400, 1.6200],
         [0.5558, 1.3033, 28.2967, 2.2300, 1.4800, 1.4700, -1.5700],
         [-21.8230, 1.7361, 26.6690, 3.4800, 1.4000, 1.5600, -1.6900],
         [-8.1621, 1.6218, 31.3198, 3.7700, 1.4800, 1.7400, 2.7900]])
    assert torch.allclose(expected_tensor, convert_cam_boxes.tensor, 1e-3)

    convert_depth_boxes = Coord3DMode.convert(lidar_boxes, Coord3DMode.LIDAR,
                                              Coord3DMode.DEPTH)
    expected_tensor = torch.tensor(
        [[-2.5162, 1.7802, -1.7501, 3.3900, 1.7500, 1.6500, 1.4800],
         [-2.4567, 8.9594, -1.6357, 4.0100, 1.5400, 1.5700, 1.6200],
         [0.5558, 28.2967, -1.3033, 2.2300, 1.4700, 1.4800, -1.5700],
         [-21.8230, 26.6690, -1.7361, 3.4800, 1.5600, 1.4000, -1.6900],
         [-8.1621, 31.3198, -1.6218, 3.7700, 1.7400, 1.4800, 2.7900]])
    assert torch.allclose(expected_tensor, convert_depth_boxes.tensor, 1e-3)

    # test DEPTH to CAM and LIDAR
    depth_boxes = DepthInstance3DBoxes(
        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
    convert_cam_boxes = Coord3DMode.convert(depth_boxes, Coord3DMode.DEPTH,
                                            Coord3DMode.CAM)
    expected_tensor = torch.tensor(
        [[1.7802, -1.7501, -2.5162, 1.7500, 1.6500, 3.3900, 1.4800],
         [8.9594, -1.6357, -2.4567, 1.5400, 1.5700, 4.0100, 1.6200],
         [28.2967, -1.3033, 0.5558, 1.4700, 1.4800, 2.2300, -1.5700],
         [26.6690, -1.7361, -21.8230, 1.5600, 1.4000, 3.4800, -1.6900],
         [31.3198, -1.6218, -8.1621, 1.7400, 1.4800, 3.7700, 2.7900]])
    assert torch.allclose(expected_tensor, convert_cam_boxes.tensor, 1e-3)

    convert_lidar_boxes = Coord3DMode.convert(depth_boxes, Coord3DMode.DEPTH,
                                              Coord3DMode.LIDAR)
    expected_tensor = torch.tensor(
        [[2.5162, -1.7802, -1.7501, 3.3900, 1.7500, 1.6500, 1.4800],
         [2.4567, -8.9594, -1.6357, 4.0100, 1.5400, 1.5700, 1.6200],
         [-0.5558, -28.2967, -1.3033, 2.2300, 1.4700, 1.4800, -1.5700],
         [21.8230, -26.6690, -1.7361, 3.4800, 1.5600, 1.4000, -1.6900],
         [8.1621, -31.3198, -1.6218, 3.7700, 1.7400, 1.4800, 2.7900]])
    assert torch.allclose(expected_tensor, convert_lidar_boxes.tensor, 1e-3)
示例#4
0
def test_fcos_mono3d_head():
    if not torch.cuda.is_available():
        pytest.skip('test requires GPU and torch+cuda')
    _setup_seed(0)
    fcos3d_head_cfg = _get_head_cfg(
        'fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d.py')
    self = build_head(fcos3d_head_cfg).cuda()

    feats = [
        torch.rand([2, 256, 116, 200], dtype=torch.float32).cuda(),
        torch.rand([2, 256, 58, 100], dtype=torch.float32).cuda(),
        torch.rand([2, 256, 29, 50], dtype=torch.float32).cuda(),
        torch.rand([2, 256, 15, 25], dtype=torch.float32).cuda(),
        torch.rand([2, 256, 8, 13], dtype=torch.float32).cuda()
    ]

    # test forward
    ret_dict = self(feats)
    assert len(ret_dict) == 5
    assert len(ret_dict[0]) == 5
    assert ret_dict[0][0].shape == torch.Size([2, 10, 116, 200])

    # test loss
    gt_bboxes = [
        torch.rand([3, 4], dtype=torch.float32).cuda(),
        torch.rand([3, 4], dtype=torch.float32).cuda()
    ]
    gt_bboxes_3d = CameraInstance3DBoxes(torch.rand([3, 9], device='cuda'),
                                         box_dim=9)
    gt_labels = [torch.randint(0, 10, [3], device='cuda') for i in range(2)]
    gt_labels_3d = gt_labels
    centers2d = [
        torch.rand([3, 2], dtype=torch.float32).cuda(),
        torch.rand([3, 2], dtype=torch.float32).cuda()
    ]
    depths = [
        torch.rand([3], dtype=torch.float32).cuda(),
        torch.rand([3], dtype=torch.float32).cuda()
    ]
    attr_labels = [torch.randint(0, 9, [3], device='cuda') for i in range(2)]
    img_metas = [
        dict(cam2img=[[1260.8474446004698, 0.0, 807.968244525554],
                      [0.0, 1260.8474446004698, 495.3344268742088],
                      [0.0, 0.0, 1.0]],
             scale_factor=np.array([1., 1., 1., 1.], dtype=np.float32),
             box_type_3d=CameraInstance3DBoxes) for i in range(2)
    ]
    losses = self.loss(*ret_dict, gt_bboxes, gt_labels, gt_bboxes_3d,
                       gt_labels_3d, centers2d, depths, attr_labels, img_metas)
    assert losses['loss_cls'] >= 0
    assert losses['loss_offset'] >= 0
    assert losses['loss_depth'] >= 0
    assert losses['loss_size'] >= 0
    assert losses['loss_rotsin'] >= 0
    assert losses['loss_centerness'] >= 0
    assert losses['loss_velo'] >= 0
    assert losses['loss_dir'] >= 0
    assert losses['loss_attr'] >= 0

    # test get_boxes
    results = self.get_bboxes(*ret_dict, img_metas)
    assert len(results) == 2
    assert len(results[0]) == 4
    assert results[0][0].tensor.shape == torch.Size([200, 9])
    assert results[0][1].shape == torch.Size([200])
    assert results[0][2].shape == torch.Size([200])
    assert results[0][3].shape == torch.Size([200])