def project_corners_3d_to_image(corners_3d, p): """Computes the 3D bounding box projected onto image space. Keyword arguments: obj -- object file to draw bounding box p -- transform matrix Returns: corners : numpy array of corner points projected onto image space. face_idx: numpy array of 3D bounding box face """ # index for 3d bounding box face # it is converted to 4x4 matrix face_idx = np.array([ 0, 1, 5, 4, # front face 1, 2, 6, 5, # left face 2, 3, 7, 6, # back face 3, 0, 4, 7 ]).reshape((4, 4)) # right face return calib_utils.project_pc_to_image(corners_3d, p), face_idx
def compute_orientation_3d(obj, p): """Computes the orientation given object and camera matrix Keyword arguments: obj -- object file to draw bounding box p -- transform matrix """ # compute rotational matrix rot = np.array([[+np.cos(obj.ry), 0, +np.sin(obj.ry)], [0, 1, 0], [-np.sin(obj.ry), 0, +np.cos(obj.ry)]]) orientation3d = np.array([0.0, obj.l, 0.0, 0.0, 0.0, 0.0]).reshape(3, 2) orientation3d = np.dot(rot, orientation3d) orientation3d[0, :] = orientation3d[0, :] + obj.t[0] orientation3d[1, :] = orientation3d[1, :] + obj.t[1] orientation3d[2, :] = orientation3d[2, :] + obj.t[2] # only draw for boxes that are in front of the camera for idx in np.arange(orientation3d.shape[1]): if orientation3d[2, idx] < 0.1: return None return calib_utils.project_pc_to_image(orientation3d, p)
def get_point_colours(points, cam_p, image): points_in_im = calib_utils.project_pc_to_image(points.T, cam_p) points_in_im_rounded = np.round(points_in_im).astype(np.int32) point_colours = image[points_in_im_rounded[1], points_in_im_rounded[0]] return point_colours
def get_lidar_point_cloud_for_cam(sample_name, frame_calib, velo_dir, image_shape=None, cam_idx=2): """Gets the lidar point cloud in cam0 frame, and optionally returns only the points that are projected to an image. Args: sample_name: sample name frame_calib: FrameCalib frame calibration velo_dir: velodyne directory image_shape: (optional) image shape [h, w] to filter points inside image cam_idx: (optional) cam index (2 or 3) for filtering Returns: (3, N) point_cloud in the form [[x,...][y,...][z,...]] """ # Get points in camera frame point_cloud = get_lidar_point_cloud(sample_name, frame_calib, velo_dir) # Only keep points in front of camera (positive z) point_cloud = point_cloud[:, point_cloud[2] > 1.0] if image_shape is None: return point_cloud else: # Project to image frame if cam_idx == 2: cam_p = frame_calib.p2 elif cam_idx == 3: cam_p = frame_calib.p3 else: raise ValueError('Invalid cam_idx', cam_idx) # Project to image points_in_img = calib_utils.project_pc_to_image(point_cloud, cam_p=cam_p) points_in_img_rounded = np.round(points_in_img) # Filter based on the given image shape image_filter = (points_in_img_rounded[0] >= 0) & \ (points_in_img_rounded[0] < image_shape[1]) & \ (points_in_img_rounded[1] >= 0) & \ (points_in_img_rounded[1] < image_shape[0]) filtered_point_cloud = point_cloud[:, image_filter].astype(np.float32) return filtered_point_cloud
def get_viewing_angle_box_3d(box_3d, cam_p=None, version='x_offset'): """Calculates the viewing angle to the centroid of a box_3d Args: box_3d: box_3d in cam_0 frame cam_p: camera projection matrix, required if version is not 'cam_0' version: 'cam_0': assuming cam_0 frame 'x_offset': apply x_offset from camera baseline to cam_0 'projection': project centroid to image Returns: viewing_angle: viewing angle to box centroid """ if version == 'cam_0': # Viewing angle in cam_0 frame viewing_angle = np.arctan2(box_3d[0], box_3d[2]) elif version == 'x_offset': # Get x offset (b_cam) from calibration: cam_mat[0, 3] = (-f_x * b_cam) x_offset = -cam_p[0, 3] / cam_p[0, 0] # Shift box_3d from cam_0 to cam_N frame box_x_cam = box_3d[0] - x_offset # Calculate viewing angle viewing_angle = np.arctan2(box_x_cam, box_3d[2]) elif version == 'projection': # Project centroid to the image proj_uv = calib_utils.project_pc_to_image(box_3d[0:3].reshape(3, -1), cam_p) centre_u = cam_p[0, 2] focal_length = cam_p[0, 0] centre_x = proj_uv[0][0] # Assume depth of 1.0 to calculate viewing angle # viewing_angle = atan2(i / f, 1.0) viewing_angle = np.arctan2((centre_x - centre_u) / focal_length, 1.0) else: raise ValueError('Invalid version', version) return viewing_angle
def project_depths(point_cloud, cam_p, image_shape, max_depth=100.0): """Projects a point cloud into image space and saves depths per pixel. Args: point_cloud: (3, N) Point cloud in cam0 cam_p: camera projection matrix image_shape: image shape [h, w] max_depth: optional, max depth for inversion Returns: projected_depths: projected depth map """ # Only keep points in front of the camera all_points = point_cloud.T # Save the depth corresponding to each point points_in_img = calib_utils.project_pc_to_image(all_points.T, cam_p) points_in_img_int = np.int32(np.round(points_in_img)) # Remove points outside image valid_indices = \ (points_in_img_int[0] >= 0) & (points_in_img_int[0] < image_shape[1]) & \ (points_in_img_int[1] >= 0) & (points_in_img_int[1] < image_shape[0]) all_points = all_points[valid_indices] points_in_img_int = points_in_img_int[:, valid_indices] # Invert depths all_points[:, 2] = max_depth - all_points[:, 2] # Only save valid pixels, keep closer points when overlapping projected_depths = np.zeros(image_shape) valid_indices = [points_in_img_int[1], points_in_img_int[0]] projected_depths[valid_indices] = [ max(projected_depths[ points_in_img_int[1, idx], points_in_img_int[0, idx]], all_points[idx, 2]) for idx in range(points_in_img_int.shape[1])] projected_depths[valid_indices] = \ max_depth - projected_depths[valid_indices] return projected_depths.astype(np.float32)
def main(): #################### # Options #################### # drive_id = '2011_09_26_drive_0001_sync' # No moving cars, depth completion sample 0 area # drive_id = '2011_09_26_drive_0002_sync' # depth completion sample 0 # drive_id = '2011_09_26_drive_0005_sync' # (gps bad) # drive_id = '2011_09_26_drive_0005_sync' # (moving) (gps bad) # drive_id = '2011_09_26_drive_0009_sync' # missing velo? # drive_id = '2011_09_26_drive_0011_sync' # both moving, then traffic light) # drive_id = '2011_09_26_drive_0013_sync' # both moving # drive_id = '2011_09_26_drive_0014_sync' # both moving, sample 104 # drive_id = '2011_09_26_drive_0015_sync' # both moving # drive_id = '2011_09_26_drive_0017_sync' # other only, traffic light, moving across # drive_id = '2011_09_26_drive_0018_sync' # other only, traffic light, forward # drive_id = '2011_09_26_drive_0019_sync' # both moving, some people at end, wonky gps # drive_id = '2011_09_26_drive_0020_sync' # gps drift # drive_id = '2011_09_26_drive_0022_sync' # ego mostly, then both, long # drive_id = '2011_09_26_drive_0023_sync' # sample 169 (long) # drive_id = '2011_09_26_drive_0027_sync' # both moving, fast, straight # drive_id = '2011_09_26_drive_0028_sync' # both moving, opposite drive_id = '2011_09_26_drive_0029_sync' # both moving, good gps # drive_id = '2011_09_26_drive_0032_sync' # both moving, following some cars # drive_id = '2011_09_26_drive_0035_sync' # # drive_id = '2011_09_26_drive_0036_sync' # (long) behind red truck # drive_id = '2011_09_26_drive_0039_sync' # ok, 1 moving # drive_id = '2011_09_26_drive_0046_sync' # (short) only 1 moving at start # drive_id = '2011_09_26_drive_0048_sync' # ok but short, no movement # drive_id = '2011_09_26_drive_0052_sync' # # drive_id = '2011_09_26_drive_0056_sync' # # drive_id = '2011_09_26_drive_0057_sync' # (gps sinking) # drive_id = '2011_09_26_drive_0059_sync' # # drive_id = '2011_09_26_drive_0060_sync' # # drive_id = '2011_09_26_drive_0061_sync' # ego only, ok, long, bumpy, some gps drift # drive_id = '2011_09_26_drive_0064_sync' # Smart car, sample 25 # drive_id = '2011_09_26_drive_0070_sync' # # drive_id = '2011_09_26_drive_0079_sync' # # drive_id = '2011_09_26_drive_0086_sync' # (medium) uphill # drive_id = '2011_09_26_drive_0087_sync' # # drive_id = '2011_09_26_drive_0091_sync' # # drive_id = '2011_09_26_drive_0093_sync' # Sample 50 (bad) # drive_id = '2011_09_26_drive_0106_sync' # Two cyclists on right # drive_id = '2011_09_26_drive_0113_sync' # # drive_id = '2011_09_26_drive_0117_sync' # (long) # drive_id = '2011_09_28_drive_0002_sync' # campus # drive_id = '2011_09_28_drive_0016_sync' # campus # drive_id = '2011_09_28_drive_0021_sync' # campus # drive_id = '2011_09_28_drive_0034_sync' # # drive_id = '2011_09_28_drive_0037_sync' # # drive_id = '2011_09_28_drive_0038_sync' # # drive_id = '2011_09_28_drive_0039_sync' # busy campus, bad gps # drive_id = '2011_09_28_drive_0043_sync' # # drive_id = '2011_09_28_drive_0045_sync' # # drive_id = '2011_09_28_drive_0179_sync' # # drive_id = '2011_09_29_drive_0026_sync' # # drive_id = '2011_09_29_drive_0071_sync' # # drive_id = '2011_09_30_drive_0020_sync' # # drive_id = '2011_09_30_drive_0027_sync' # (long) # drive_id = '2011_09_30_drive_0028_sync' # (long) bad gps # drive_id = '2011_09_30_drive_0033_sync' # # drive_id = '2011_09_30_drive_0034_sync' # # drive_id = '2011_10_03_drive_0042_sync' # # drive_id = '2011_10_03_drive_0047_sync' # raw_dir = os.path.expanduser('~/Kitti/raw') vtk_window_size = (1280, 720) point_cloud_source = 'lidar' # Load raw data drive_date = drive_id[0:10] drive_num_str = drive_id[17:21] raw_data = pykitti.raw(raw_dir, drive_date, drive_num_str) # Check that velo length matches timestamps? if len(raw_data.velo_files) != len(raw_data.timestamps): raise ValueError('velo files and timestamps have different length!') frame_range = (0, len(raw_data.timestamps)) # frame_range = (0, 100) poses_source = 'gps' # poses_source = 'orbslam2' # camera_viewpoint = 'front' camera_viewpoint = 'elevated' #################### # End of Options #################### vtk_renderer = demo_utils.setup_vtk_renderer() vtk_render_window = demo_utils.setup_vtk_render_window( 'Overlaid Point Cloud', vtk_window_size, vtk_renderer) vtk_interactor = vtk.vtkRenderWindowInteractor() vtk_interactor.SetRenderWindow(vtk_render_window) vtk_interactor.SetInteractorStyle( vtk_utils.ToggleActorsInteractorStyle(None, vtk_renderer)) vtk_interactor.Initialize() drive_date_dir = raw_dir + '/{}'.format(drive_date) if point_cloud_source != 'lidar': raise ValueError( 'Invalid point cloud source {}'.format(point_cloud_source)) cam_p = raw_data.calib.P_rect_20 if poses_source == 'gps': # Load poses from matlab poses_path = drive_date_dir + '/{}/poses.mat'.format(drive_id) poses_mat = scipy.io.loadmat(poses_path) imu_ref_poses = np.asarray(poses_mat['pose'][0]) elif poses_source == 'orbslam2': # Load poses from ORBSLAM2 imu_ref_poses = np.loadtxt(raw_data.data_path + '/poses_orbslam2.txt').reshape(-1, 3, 4) imu_ref_poses = np.pad(imu_ref_poses, ((0, 0), (0, 1), (0, 0)), mode='constant', constant_values=0) imu_ref_poses[:, 3, 3] = 1.0 else: raise ValueError('Invalid poses_source', poses_source) # Read calibrations tf_velo_calib_imu_calib = raw_data.calib.T_velo_imu tf_imu_calib_velo_calib = transform_utils.invert_tf( tf_velo_calib_imu_calib) tf_cam0_calib_velo_calib = raw_data.calib.T_cam0_velo tf_velo_calib_cam0_calib = transform_utils.invert_tf( tf_cam0_calib_velo_calib) # Create VtkAxes vtk_axes = vtk.vtkAxesActor() vtk_axes.SetTotalLength(2, 2, 2) vtk_renderer.AddActor(vtk_axes) for frame_idx in range(*frame_range): # Point cloud actor wrapper # vtk_pc = VtkPointCloud() vtk_pc = VtkPointCloudGlyph() vtk_pc.vtk_actor.GetProperty().SetPointSize(2) # all_vtk_actors.append(vtk_point_cloud.vtk_actor) vtk_renderer.AddActor(vtk_pc.vtk_actor) print('{} / {}'.format(frame_idx, len(raw_data.timestamps) - 1)) # Load next frame data load_start_time = time.time() rgb_image = np.asarray(raw_data.get_cam2(frame_idx)) bgr_image = rgb_image[..., ::-1] if point_cloud_source == 'lidar': velo_points = get_velo_points(raw_data, frame_idx) # Transform point cloud to cam_0 frame velo_curr_points_padded = np.pad(velo_points, [[0, 0], [0, 1]], constant_values=1.0, mode='constant') # R_rect_00 already applied in T_cam0_velo # T_cam0_velo = R_rect_00 @ T_cam0_velo_unrect cam0_curr_pc_all_padded = raw_data.calib.T_cam0_velo @ velo_curr_points_padded.T else: raise ValueError('Invalid point cloud source') print('load\t\t', time.time() - load_start_time) # Project velodyne points projection_start_time = time.time() if point_cloud_source == 'lidar': points_in_img = calib_utils.project_pc_to_image( cam0_curr_pc_all_padded[0:3], cam_p) points_in_img_int = np.round(points_in_img).astype(np.int32) image_filter = obj_utils.points_in_img_filter( points_in_img_int, bgr_image.shape) cam0_curr_pc_padded = cam0_curr_pc_all_padded[:, image_filter] points_in_img_int_valid = points_in_img_int[:, image_filter] point_colours = bgr_image[points_in_img_int_valid[1], points_in_img_int_valid[0]] print('projection\t', time.time() - projection_start_time) # Get calibration transformations tf_velo_calib_imu_calib = transform_utils.invert_tf( tf_imu_calib_velo_calib) tf_cam0_calib_imu_calib = tf_cam0_calib_velo_calib @ tf_velo_calib_imu_calib tf_imu_calib_cam0_calib = transform_utils.invert_tf( tf_cam0_calib_imu_calib) # Get poses imu_ref_pose = imu_ref_poses[frame_idx] tf_imu_ref_imu_curr = imu_ref_pose # Calculate point cloud in imu_ref frame velo_curr_pc_padded = tf_velo_calib_cam0_calib @ cam0_curr_pc_padded imu_curr_pc_padded = tf_imu_calib_velo_calib @ velo_curr_pc_padded imu_ref_pc_padded = tf_imu_ref_imu_curr @ imu_curr_pc_padded # VtkPointCloud vtk_pc_start_time = time.time() vtk_pc.set_points(imu_ref_pc_padded[0:3].T, point_colours) print('vtk_pc\t\t', time.time() - vtk_pc_start_time) vtk_pc_pose = VtkPointCloudGlyph() vtk_pc_pose.vtk_actor.GetProperty().SetPointSize(5) vtk_pc_pose.set_points(np.reshape(imu_ref_pose[0:3, 3], [-1, 3])) vtk_renderer.AddActor(vtk_pc_pose.vtk_actor) # Render render_start_time = time.time() # Reset the clipping range to show all points vtk_renderer.ResetCameraClippingRange() vtk_render_window.Render() print('render\t\t', time.time() - render_start_time) # Move camera if camera_viewpoint == 'front': imu_curr_cam0_position = tf_imu_calib_cam0_calib @ [ 0.0, 0.0, 0.0, 1.0 ] elif camera_viewpoint == 'elevated': imu_curr_cam0_position = tf_imu_calib_cam0_calib.dot( [0.0, -5.0, -10.0, 1.0]) else: raise ValueError('Invalid camera_pos', camera_viewpoint) imu_ref_cam0_position = tf_imu_ref_imu_curr.dot(imu_curr_cam0_position) imu_curr_focal_point = tf_imu_calib_cam0_calib.dot( [0.0, 0.0, 20.0, 1.0]) imu_ref_focal_point = tf_imu_ref_imu_curr.dot(imu_curr_focal_point) current_cam = vtk_renderer.GetActiveCamera() vtk_renderer.ResetCamera() current_cam.SetViewUp(0, 0, 1) current_cam.SetPosition(imu_ref_cam0_position[0:3]) current_cam.SetFocalPoint(*imu_ref_focal_point[0:3]) current_cam.Zoom(0.5) vtk_renderer.ResetCameraClippingRange() vtk_renderer.GetRenderWindow().Render() print('---') print('Done') # Keep window open vtk_interactor.Start()
def main(): """Demo to display point clouds and 3D bounding boxes Keys: F1: Toggle LIDAR point cloud F2: Toggle depth map point cloud F3: Toggle labels """ ############################## # Options ############################## # sample_name = '000050' sample_name = '000169' # Area extents x_min = -40 x_max = 40 y_min = -5 y_max = 5 z_min = 0 z_max = 70 ############################## # End of Options ############################## area_extents = np.array([[x_min, x_max], [y_min, y_max], [z_min, z_max]], dtype=np.float32) dataset = DatasetBuilder.build_kitti_obj_dataset( DatasetBuilder.KITTI_TRAINVAL) # Get sample info image = obj_utils.get_image(sample_name, dataset.image_2_dir) image_shape = image.shape[0:2] frame_calib = calib_utils.get_frame_calib(dataset.calib_dir, sample_name) # Get lidar points lidar_point_cloud = obj_utils.get_lidar_point_cloud( sample_name, frame_calib, dataset.velo_dir) lidar_point_cloud, area_filter = obj_utils.filter_pc_to_area( lidar_point_cloud, area_extents) # Filter to image lidar_points_in_img = calib_utils.project_pc_to_image( lidar_point_cloud, frame_calib.p2) lidar_point_cloud, image_filter = obj_utils.filter_pc_to_image( lidar_point_cloud, lidar_points_in_img, image_shape) lidar_points_in_img_int = np.floor( lidar_points_in_img[:, image_filter]).astype(np.int32) lidar_point_colours = image[lidar_points_in_img_int[1], lidar_points_in_img_int[0]] # Get points from depth map depth_point_cloud = obj_utils.get_depth_map_point_cloud( sample_name, frame_calib, dataset.depth_dir) depth_point_cloud, area_filter = obj_utils.filter_pc_to_area( depth_point_cloud, area_extents) # Filter depth map points to area area_filter = np.reshape(area_filter, image.shape[0:2]) depth_point_colours = image[area_filter] # Get bounding boxes gt_objects = obj_utils.read_labels(dataset.label_dir, sample_name) ############################## # Vtk Visualization ############################## vtk_pc_lidar = VtkPointCloud() vtk_pc_depth = VtkPointCloud() vtk_pc_lidar.vtk_actor.GetProperty().SetPointSize(2) vtk_pc_depth.vtk_actor.GetProperty().SetPointSize(2) vtk_pc_lidar.set_points(lidar_point_cloud.T, lidar_point_colours) vtk_pc_depth.set_points(depth_point_cloud.T, depth_point_colours) # Create VtkBoxes for boxes vtk_pyr_boxes = VtkPyramidBoxes() vtk_pyr_boxes.set_objects(gt_objects, vtk_pyr_boxes.COLOUR_SCHEME_KITTI) # Create Axes axes = vtk.vtkAxesActor() axes.SetTotalLength(5, 5, 5) # Create Voxel Grid Renderer in bottom half vtk_renderer = vtk.vtkRenderer() vtk_renderer.AddActor(vtk_pc_lidar.vtk_actor) vtk_renderer.AddActor(vtk_pc_depth.vtk_actor) vtk_renderer.AddActor(vtk_pyr_boxes.vtk_actor) vtk_renderer.AddActor(axes) vtk_renderer.SetBackground(0.2, 0.3, 0.4) # Setup Camera current_cam = vtk_renderer.GetActiveCamera() current_cam.Pitch(170.0) current_cam.Roll(180.0) # Zooms out to fit all points on screen vtk_renderer.ResetCamera() # Zoom in slightly current_cam.Zoom(2.5) # Reset the clipping range to show all points vtk_renderer.ResetCameraClippingRange() # Setup Render Window vtk_render_window = vtk.vtkRenderWindow() vtk_render_window.SetWindowName( "Point Cloud, Sample {}".format(sample_name)) vtk_render_window.SetSize(1280, 720) vtk_render_window.AddRenderer(vtk_renderer) # Setup custom interactor style, which handles mouse and key events vtk_render_window_interactor = vtk.vtkRenderWindowInteractor() vtk_render_window_interactor.SetRenderWindow(vtk_render_window) # Add custom interactor to toggle actor visibilities custom_interactor = vtk_utils.ToggleActorsInteractorStyle([ vtk_pc_lidar.vtk_actor, vtk_pc_depth.vtk_actor, vtk_pyr_boxes.vtk_actor ], vtk_renderer, current_cam, axes) vtk_render_window_interactor.SetInteractorStyle(custom_interactor) # Render in VTK vtk_render_window.Render() vtk_render_window_interactor.Start() # Blocking