示例#1
0
def project_corners_3d_to_image(corners_3d, p):
    """Computes the 3D bounding box projected onto
    image space.

    Keyword arguments:
    obj -- object file to draw bounding box
    p -- transform matrix

    Returns:
        corners : numpy array of corner points projected
        onto image space.
        face_idx: numpy array of 3D bounding box face
    """
    # index for 3d bounding box face
    # it is converted to 4x4 matrix
    face_idx = np.array([
        0,
        1,
        5,
        4,  # front face
        1,
        2,
        6,
        5,  # left face
        2,
        3,
        7,
        6,  # back face
        3,
        0,
        4,
        7
    ]).reshape((4, 4))  # right face
    return calib_utils.project_pc_to_image(corners_3d, p), face_idx
示例#2
0
def compute_orientation_3d(obj, p):
    """Computes the orientation given object and camera matrix

    Keyword arguments:
    obj -- object file to draw bounding box
    p -- transform matrix
    """

    # compute rotational matrix
    rot = np.array([[+np.cos(obj.ry), 0, +np.sin(obj.ry)], [0, 1, 0],
                    [-np.sin(obj.ry), 0, +np.cos(obj.ry)]])

    orientation3d = np.array([0.0, obj.l, 0.0, 0.0, 0.0, 0.0]).reshape(3, 2)
    orientation3d = np.dot(rot, orientation3d)

    orientation3d[0, :] = orientation3d[0, :] + obj.t[0]
    orientation3d[1, :] = orientation3d[1, :] + obj.t[1]
    orientation3d[2, :] = orientation3d[2, :] + obj.t[2]

    # only draw for boxes that are in front of the camera
    for idx in np.arange(orientation3d.shape[1]):
        if orientation3d[2, idx] < 0.1:
            return None

    return calib_utils.project_pc_to_image(orientation3d, p)
示例#3
0
def get_point_colours(points, cam_p, image):
    points_in_im = calib_utils.project_pc_to_image(points.T, cam_p)
    points_in_im_rounded = np.round(points_in_im).astype(np.int32)

    point_colours = image[points_in_im_rounded[1], points_in_im_rounded[0]]

    return point_colours
示例#4
0
def get_lidar_point_cloud_for_cam(sample_name,
                                  frame_calib,
                                  velo_dir,
                                  image_shape=None,
                                  cam_idx=2):
    """Gets the lidar point cloud in cam0 frame, and optionally returns only the
    points that are projected to an image.

    Args:
        sample_name: sample name
        frame_calib: FrameCalib frame calibration
        velo_dir: velodyne directory
        image_shape: (optional) image shape [h, w] to filter points inside image
        cam_idx: (optional) cam index (2 or 3) for filtering

    Returns:
        (3, N) point_cloud in the form [[x,...][y,...][z,...]]
    """

    # Get points in camera frame
    point_cloud = get_lidar_point_cloud(sample_name, frame_calib, velo_dir)

    # Only keep points in front of camera (positive z)
    point_cloud = point_cloud[:, point_cloud[2] > 1.0]

    if image_shape is None:
        return point_cloud

    else:

        # Project to image frame
        if cam_idx == 2:
            cam_p = frame_calib.p2
        elif cam_idx == 3:
            cam_p = frame_calib.p3
        else:
            raise ValueError('Invalid cam_idx', cam_idx)

        # Project to image
        points_in_img = calib_utils.project_pc_to_image(point_cloud,
                                                        cam_p=cam_p)
        points_in_img_rounded = np.round(points_in_img)

        # Filter based on the given image shape
        image_filter = (points_in_img_rounded[0] >= 0) & \
                       (points_in_img_rounded[0] < image_shape[1]) & \
                       (points_in_img_rounded[1] >= 0) & \
                       (points_in_img_rounded[1] < image_shape[0])

        filtered_point_cloud = point_cloud[:, image_filter].astype(np.float32)

        return filtered_point_cloud
示例#5
0
def get_viewing_angle_box_3d(box_3d, cam_p=None, version='x_offset'):
    """Calculates the viewing angle to the centroid of a box_3d

    Args:
        box_3d: box_3d in cam_0 frame
        cam_p: camera projection matrix, required if version is not 'cam_0'
        version:
            'cam_0': assuming cam_0 frame
            'x_offset': apply x_offset from camera baseline to cam_0
            'projection': project centroid to image

    Returns:
        viewing_angle: viewing angle to box centroid
    """

    if version == 'cam_0':
        # Viewing angle in cam_0 frame
        viewing_angle = np.arctan2(box_3d[0], box_3d[2])

    elif version == 'x_offset':
        # Get x offset (b_cam) from calibration: cam_mat[0, 3] = (-f_x * b_cam)
        x_offset = -cam_p[0, 3] / cam_p[0, 0]

        # Shift box_3d from cam_0 to cam_N frame
        box_x_cam = box_3d[0] - x_offset

        # Calculate viewing angle
        viewing_angle = np.arctan2(box_x_cam, box_3d[2])

    elif version == 'projection':
        # Project centroid to the image
        proj_uv = calib_utils.project_pc_to_image(box_3d[0:3].reshape(3, -1),
                                                  cam_p)

        centre_u = cam_p[0, 2]
        focal_length = cam_p[0, 0]

        centre_x = proj_uv[0][0]

        # Assume depth of 1.0 to calculate viewing angle
        # viewing_angle = atan2(i / f, 1.0)
        viewing_angle = np.arctan2((centre_x - centre_u) / focal_length, 1.0)

    else:
        raise ValueError('Invalid version', version)

    return viewing_angle
示例#6
0
def project_depths(point_cloud, cam_p, image_shape, max_depth=100.0):
    """Projects a point cloud into image space and saves depths per pixel.

    Args:
        point_cloud: (3, N) Point cloud in cam0
        cam_p: camera projection matrix
        image_shape: image shape [h, w]
        max_depth: optional, max depth for inversion

    Returns:
        projected_depths: projected depth map
    """

    # Only keep points in front of the camera
    all_points = point_cloud.T

    # Save the depth corresponding to each point
    points_in_img = calib_utils.project_pc_to_image(all_points.T, cam_p)
    points_in_img_int = np.int32(np.round(points_in_img))

    # Remove points outside image
    valid_indices = \
        (points_in_img_int[0] >= 0) & (points_in_img_int[0] < image_shape[1]) & \
        (points_in_img_int[1] >= 0) & (points_in_img_int[1] < image_shape[0])

    all_points = all_points[valid_indices]
    points_in_img_int = points_in_img_int[:, valid_indices]

    # Invert depths
    all_points[:, 2] = max_depth - all_points[:, 2]

    # Only save valid pixels, keep closer points when overlapping
    projected_depths = np.zeros(image_shape)
    valid_indices = [points_in_img_int[1], points_in_img_int[0]]
    projected_depths[valid_indices] = [
        max(projected_depths[
            points_in_img_int[1, idx], points_in_img_int[0, idx]],
            all_points[idx, 2])
        for idx in range(points_in_img_int.shape[1])]

    projected_depths[valid_indices] = \
        max_depth - projected_depths[valid_indices]

    return projected_depths.astype(np.float32)
def main():

    ####################
    # Options
    ####################

    # drive_id = '2011_09_26_drive_0001_sync'  # No moving cars, depth completion sample 0 area
    # drive_id = '2011_09_26_drive_0002_sync'  # depth completion sample 0
    # drive_id = '2011_09_26_drive_0005_sync'  # (gps bad)
    # drive_id = '2011_09_26_drive_0005_sync'  # (moving) (gps bad)
    # drive_id = '2011_09_26_drive_0009_sync'  # missing velo?
    # drive_id = '2011_09_26_drive_0011_sync'  # both moving, then traffic light)
    # drive_id = '2011_09_26_drive_0013_sync'  # both moving
    # drive_id = '2011_09_26_drive_0014_sync'  # both moving, sample 104
    # drive_id = '2011_09_26_drive_0015_sync'  # both moving
    # drive_id = '2011_09_26_drive_0017_sync'  # other only, traffic light, moving across
    # drive_id = '2011_09_26_drive_0018_sync'  # other only, traffic light, forward
    # drive_id = '2011_09_26_drive_0019_sync'  # both moving, some people at end, wonky gps
    # drive_id = '2011_09_26_drive_0020_sync'  # gps drift
    # drive_id = '2011_09_26_drive_0022_sync'  # ego mostly, then both, long
    # drive_id = '2011_09_26_drive_0023_sync'  # sample 169 (long)
    # drive_id = '2011_09_26_drive_0027_sync'  # both moving, fast, straight
    # drive_id = '2011_09_26_drive_0028_sync'  # both moving, opposite
    drive_id = '2011_09_26_drive_0029_sync'  # both moving, good gps
    # drive_id = '2011_09_26_drive_0032_sync'  # both moving, following some cars
    # drive_id = '2011_09_26_drive_0035_sync'  #
    # drive_id = '2011_09_26_drive_0036_sync'  # (long) behind red truck
    # drive_id = '2011_09_26_drive_0039_sync'  # ok, 1 moving
    # drive_id = '2011_09_26_drive_0046_sync'  # (short) only 1 moving at start
    # drive_id = '2011_09_26_drive_0048_sync'  # ok but short, no movement
    # drive_id = '2011_09_26_drive_0052_sync'  #
    # drive_id = '2011_09_26_drive_0056_sync'  #
    # drive_id = '2011_09_26_drive_0057_sync'  # (gps sinking)
    # drive_id = '2011_09_26_drive_0059_sync'  #
    # drive_id = '2011_09_26_drive_0060_sync'  #
    # drive_id = '2011_09_26_drive_0061_sync'  # ego only, ok, long, bumpy, some gps drift
    # drive_id = '2011_09_26_drive_0064_sync'  # Smart car, sample 25
    # drive_id = '2011_09_26_drive_0070_sync'  #
    # drive_id = '2011_09_26_drive_0079_sync'  #
    # drive_id = '2011_09_26_drive_0086_sync'  # (medium) uphill
    # drive_id = '2011_09_26_drive_0087_sync'  #
    # drive_id = '2011_09_26_drive_0091_sync'  #
    # drive_id = '2011_09_26_drive_0093_sync'  # Sample 50 (bad)
    # drive_id = '2011_09_26_drive_0106_sync'  # Two cyclists on right
    # drive_id = '2011_09_26_drive_0113_sync'  #
    # drive_id = '2011_09_26_drive_0117_sync'  # (long)

    # drive_id = '2011_09_28_drive_0002_sync'  # campus
    # drive_id = '2011_09_28_drive_0016_sync'  # campus
    # drive_id = '2011_09_28_drive_0021_sync'  # campus
    # drive_id = '2011_09_28_drive_0034_sync'  #
    # drive_id = '2011_09_28_drive_0037_sync'  #
    # drive_id = '2011_09_28_drive_0038_sync'  #
    # drive_id = '2011_09_28_drive_0039_sync'  # busy campus, bad gps
    # drive_id = '2011_09_28_drive_0043_sync'  #
    # drive_id = '2011_09_28_drive_0045_sync'  #
    # drive_id = '2011_09_28_drive_0179_sync'  #

    # drive_id = '2011_09_29_drive_0026_sync'  #
    # drive_id = '2011_09_29_drive_0071_sync'  #

    # drive_id = '2011_09_30_drive_0020_sync'  #
    # drive_id = '2011_09_30_drive_0027_sync'  # (long)
    # drive_id = '2011_09_30_drive_0028_sync'  # (long) bad gps
    # drive_id = '2011_09_30_drive_0033_sync'  #
    # drive_id = '2011_09_30_drive_0034_sync'  #

    # drive_id = '2011_10_03_drive_0042_sync'  #
    # drive_id = '2011_10_03_drive_0047_sync'  #

    raw_dir = os.path.expanduser('~/Kitti/raw')

    vtk_window_size = (1280, 720)

    point_cloud_source = 'lidar'

    # Load raw data
    drive_date = drive_id[0:10]
    drive_num_str = drive_id[17:21]
    raw_data = pykitti.raw(raw_dir, drive_date, drive_num_str)

    # Check that velo length matches timestamps?
    if len(raw_data.velo_files) != len(raw_data.timestamps):
        raise ValueError('velo files and timestamps have different length!')

    frame_range = (0, len(raw_data.timestamps))
    # frame_range = (0, 100)

    poses_source = 'gps'
    # poses_source = 'orbslam2'

    # camera_viewpoint = 'front'
    camera_viewpoint = 'elevated'

    ####################
    # End of Options
    ####################

    vtk_renderer = demo_utils.setup_vtk_renderer()

    vtk_render_window = demo_utils.setup_vtk_render_window(
        'Overlaid Point Cloud', vtk_window_size, vtk_renderer)

    vtk_interactor = vtk.vtkRenderWindowInteractor()
    vtk_interactor.SetRenderWindow(vtk_render_window)
    vtk_interactor.SetInteractorStyle(
        vtk_utils.ToggleActorsInteractorStyle(None, vtk_renderer))
    vtk_interactor.Initialize()

    drive_date_dir = raw_dir + '/{}'.format(drive_date)

    if point_cloud_source != 'lidar':
        raise ValueError(
            'Invalid point cloud source {}'.format(point_cloud_source))

    cam_p = raw_data.calib.P_rect_20

    if poses_source == 'gps':
        # Load poses from matlab
        poses_path = drive_date_dir + '/{}/poses.mat'.format(drive_id)
        poses_mat = scipy.io.loadmat(poses_path)
        imu_ref_poses = np.asarray(poses_mat['pose'][0])

    elif poses_source == 'orbslam2':
        # Load poses from ORBSLAM2
        imu_ref_poses = np.loadtxt(raw_data.data_path +
                                   '/poses_orbslam2.txt').reshape(-1, 3, 4)
        imu_ref_poses = np.pad(imu_ref_poses, ((0, 0), (0, 1), (0, 0)),
                               mode='constant',
                               constant_values=0)
        imu_ref_poses[:, 3, 3] = 1.0
    else:
        raise ValueError('Invalid poses_source', poses_source)

    # Read calibrations
    tf_velo_calib_imu_calib = raw_data.calib.T_velo_imu
    tf_imu_calib_velo_calib = transform_utils.invert_tf(
        tf_velo_calib_imu_calib)

    tf_cam0_calib_velo_calib = raw_data.calib.T_cam0_velo
    tf_velo_calib_cam0_calib = transform_utils.invert_tf(
        tf_cam0_calib_velo_calib)

    # Create VtkAxes
    vtk_axes = vtk.vtkAxesActor()
    vtk_axes.SetTotalLength(2, 2, 2)
    vtk_renderer.AddActor(vtk_axes)

    for frame_idx in range(*frame_range):

        # Point cloud actor wrapper
        # vtk_pc = VtkPointCloud()
        vtk_pc = VtkPointCloudGlyph()
        vtk_pc.vtk_actor.GetProperty().SetPointSize(2)

        # all_vtk_actors.append(vtk_point_cloud.vtk_actor)
        vtk_renderer.AddActor(vtk_pc.vtk_actor)

        print('{} / {}'.format(frame_idx, len(raw_data.timestamps) - 1))

        # Load next frame data
        load_start_time = time.time()
        rgb_image = np.asarray(raw_data.get_cam2(frame_idx))
        bgr_image = rgb_image[..., ::-1]

        if point_cloud_source == 'lidar':
            velo_points = get_velo_points(raw_data, frame_idx)

            # Transform point cloud to cam_0 frame
            velo_curr_points_padded = np.pad(velo_points, [[0, 0], [0, 1]],
                                             constant_values=1.0,
                                             mode='constant')

            # R_rect_00 already applied in T_cam0_velo
            # T_cam0_velo = R_rect_00 @ T_cam0_velo_unrect
            cam0_curr_pc_all_padded = raw_data.calib.T_cam0_velo @ velo_curr_points_padded.T

        else:
            raise ValueError('Invalid point cloud source')

        print('load\t\t', time.time() - load_start_time)

        # Project velodyne points
        projection_start_time = time.time()

        if point_cloud_source == 'lidar':

            points_in_img = calib_utils.project_pc_to_image(
                cam0_curr_pc_all_padded[0:3], cam_p)
            points_in_img_int = np.round(points_in_img).astype(np.int32)

            image_filter = obj_utils.points_in_img_filter(
                points_in_img_int, bgr_image.shape)

            cam0_curr_pc_padded = cam0_curr_pc_all_padded[:, image_filter]

            points_in_img_int_valid = points_in_img_int[:, image_filter]
            point_colours = bgr_image[points_in_img_int_valid[1],
                                      points_in_img_int_valid[0]]

        print('projection\t', time.time() - projection_start_time)

        # Get calibration transformations
        tf_velo_calib_imu_calib = transform_utils.invert_tf(
            tf_imu_calib_velo_calib)
        tf_cam0_calib_imu_calib = tf_cam0_calib_velo_calib @ tf_velo_calib_imu_calib

        tf_imu_calib_cam0_calib = transform_utils.invert_tf(
            tf_cam0_calib_imu_calib)

        # Get poses
        imu_ref_pose = imu_ref_poses[frame_idx]
        tf_imu_ref_imu_curr = imu_ref_pose

        # Calculate point cloud in imu_ref frame
        velo_curr_pc_padded = tf_velo_calib_cam0_calib @ cam0_curr_pc_padded
        imu_curr_pc_padded = tf_imu_calib_velo_calib @ velo_curr_pc_padded
        imu_ref_pc_padded = tf_imu_ref_imu_curr @ imu_curr_pc_padded

        # VtkPointCloud
        vtk_pc_start_time = time.time()
        vtk_pc.set_points(imu_ref_pc_padded[0:3].T, point_colours)
        print('vtk_pc\t\t', time.time() - vtk_pc_start_time)

        vtk_pc_pose = VtkPointCloudGlyph()
        vtk_pc_pose.vtk_actor.GetProperty().SetPointSize(5)
        vtk_pc_pose.set_points(np.reshape(imu_ref_pose[0:3, 3], [-1, 3]))
        vtk_renderer.AddActor(vtk_pc_pose.vtk_actor)

        # Render
        render_start_time = time.time()
        # Reset the clipping range to show all points
        vtk_renderer.ResetCameraClippingRange()
        vtk_render_window.Render()
        print('render\t\t', time.time() - render_start_time)

        # Move camera
        if camera_viewpoint == 'front':
            imu_curr_cam0_position = tf_imu_calib_cam0_calib @ [
                0.0, 0.0, 0.0, 1.0
            ]
        elif camera_viewpoint == 'elevated':
            imu_curr_cam0_position = tf_imu_calib_cam0_calib.dot(
                [0.0, -5.0, -10.0, 1.0])
        else:
            raise ValueError('Invalid camera_pos', camera_viewpoint)

        imu_ref_cam0_position = tf_imu_ref_imu_curr.dot(imu_curr_cam0_position)
        imu_curr_focal_point = tf_imu_calib_cam0_calib.dot(
            [0.0, 0.0, 20.0, 1.0])
        imu_ref_focal_point = tf_imu_ref_imu_curr.dot(imu_curr_focal_point)

        current_cam = vtk_renderer.GetActiveCamera()
        vtk_renderer.ResetCamera()
        current_cam.SetViewUp(0, 0, 1)

        current_cam.SetPosition(imu_ref_cam0_position[0:3])
        current_cam.SetFocalPoint(*imu_ref_focal_point[0:3])

        current_cam.Zoom(0.5)
        vtk_renderer.ResetCameraClippingRange()
        vtk_renderer.GetRenderWindow().Render()

        print('---')

    print('Done')

    # Keep window open
    vtk_interactor.Start()
示例#8
0
def main():
    """Demo to display point clouds and 3D bounding boxes

    Keys:
        F1: Toggle LIDAR point cloud
        F2: Toggle depth map point cloud
        F3: Toggle labels
    """

    ##############################
    # Options
    ##############################
    # sample_name = '000050'
    sample_name = '000169'

    # Area extents
    x_min = -40
    x_max = 40
    y_min = -5
    y_max = 5
    z_min = 0
    z_max = 70

    ##############################
    # End of Options
    ##############################

    area_extents = np.array([[x_min, x_max], [y_min, y_max], [z_min, z_max]],
                            dtype=np.float32)

    dataset = DatasetBuilder.build_kitti_obj_dataset(
        DatasetBuilder.KITTI_TRAINVAL)

    # Get sample info
    image = obj_utils.get_image(sample_name, dataset.image_2_dir)
    image_shape = image.shape[0:2]
    frame_calib = calib_utils.get_frame_calib(dataset.calib_dir, sample_name)

    # Get lidar points
    lidar_point_cloud = obj_utils.get_lidar_point_cloud(
        sample_name, frame_calib, dataset.velo_dir)
    lidar_point_cloud, area_filter = obj_utils.filter_pc_to_area(
        lidar_point_cloud, area_extents)

    # Filter to image
    lidar_points_in_img = calib_utils.project_pc_to_image(
        lidar_point_cloud, frame_calib.p2)
    lidar_point_cloud, image_filter = obj_utils.filter_pc_to_image(
        lidar_point_cloud, lidar_points_in_img, image_shape)
    lidar_points_in_img_int = np.floor(
        lidar_points_in_img[:, image_filter]).astype(np.int32)
    lidar_point_colours = image[lidar_points_in_img_int[1],
                                lidar_points_in_img_int[0]]

    # Get points from depth map
    depth_point_cloud = obj_utils.get_depth_map_point_cloud(
        sample_name, frame_calib, dataset.depth_dir)
    depth_point_cloud, area_filter = obj_utils.filter_pc_to_area(
        depth_point_cloud, area_extents)

    # Filter depth map points to area
    area_filter = np.reshape(area_filter, image.shape[0:2])
    depth_point_colours = image[area_filter]

    # Get bounding boxes
    gt_objects = obj_utils.read_labels(dataset.label_dir, sample_name)

    ##############################
    # Vtk Visualization
    ##############################
    vtk_pc_lidar = VtkPointCloud()
    vtk_pc_depth = VtkPointCloud()

    vtk_pc_lidar.vtk_actor.GetProperty().SetPointSize(2)
    vtk_pc_depth.vtk_actor.GetProperty().SetPointSize(2)

    vtk_pc_lidar.set_points(lidar_point_cloud.T, lidar_point_colours)
    vtk_pc_depth.set_points(depth_point_cloud.T, depth_point_colours)

    # Create VtkBoxes for boxes
    vtk_pyr_boxes = VtkPyramidBoxes()
    vtk_pyr_boxes.set_objects(gt_objects, vtk_pyr_boxes.COLOUR_SCHEME_KITTI)

    # Create Axes
    axes = vtk.vtkAxesActor()
    axes.SetTotalLength(5, 5, 5)

    # Create Voxel Grid Renderer in bottom half
    vtk_renderer = vtk.vtkRenderer()
    vtk_renderer.AddActor(vtk_pc_lidar.vtk_actor)
    vtk_renderer.AddActor(vtk_pc_depth.vtk_actor)

    vtk_renderer.AddActor(vtk_pyr_boxes.vtk_actor)
    vtk_renderer.AddActor(axes)
    vtk_renderer.SetBackground(0.2, 0.3, 0.4)

    # Setup Camera
    current_cam = vtk_renderer.GetActiveCamera()
    current_cam.Pitch(170.0)
    current_cam.Roll(180.0)

    # Zooms out to fit all points on screen
    vtk_renderer.ResetCamera()

    # Zoom in slightly
    current_cam.Zoom(2.5)

    # Reset the clipping range to show all points
    vtk_renderer.ResetCameraClippingRange()

    # Setup Render Window
    vtk_render_window = vtk.vtkRenderWindow()
    vtk_render_window.SetWindowName(
        "Point Cloud, Sample {}".format(sample_name))
    vtk_render_window.SetSize(1280, 720)
    vtk_render_window.AddRenderer(vtk_renderer)

    # Setup custom interactor style, which handles mouse and key events
    vtk_render_window_interactor = vtk.vtkRenderWindowInteractor()
    vtk_render_window_interactor.SetRenderWindow(vtk_render_window)

    # Add custom interactor to toggle actor visibilities
    custom_interactor = vtk_utils.ToggleActorsInteractorStyle([
        vtk_pc_lidar.vtk_actor, vtk_pc_depth.vtk_actor, vtk_pyr_boxes.vtk_actor
    ], vtk_renderer, current_cam, axes)

    vtk_render_window_interactor.SetInteractorStyle(custom_interactor)

    # Render in VTK
    vtk_render_window.Render()
    vtk_render_window_interactor.Start()  # Blocking