def generate_depth_map(self, sample_idx, datum_idx, filename): # Generate depth filename filename = '{}/{}.npz'.format( os.path.dirname(self.path), filename.format('depth/{}'.format(self.depth_type))) # Load and return if exists if os.path.exists(filename): return np.load(filename)['depth'] # Otherwise, create, save and return else: # Get pointcloud scene_idx, sample_idx_in_scene, _ = self.dataset.dataset_item_index[sample_idx] pc_datum_idx_in_sample = self.dataset.get_datum_index_for_datum_name( scene_idx, sample_idx_in_scene, self.depth_type) pc_datum_data = self.dataset.get_point_cloud_from_datum( scene_idx, sample_idx_in_scene, pc_datum_idx_in_sample) # Create camera camera_rgb = self.get_current('rgb', datum_idx) camera_pose = self.get_current('pose', datum_idx) camera_intrinsics = self.get_current('intrinsics', datum_idx) camera = Camera(K=camera_intrinsics, p_cw=camera_pose.inverse()) # Generate depth map world_points = pc_datum_data['pose'] * pc_datum_data['point_cloud'] depth = generate_depth_map(camera, world_points, camera_rgb.size[::-1]) # Save depth map os.makedirs(os.path.dirname(filename), exist_ok=True) np.savez_compressed(filename, depth=depth) # Return depth map return depth
def camera(): k = np.float32([ [.5, 0, 2], [0, .25, 3], [0, 0, 1], ]) d = np.zeros(shape=(5, )) return Camera(K=k, D=d)
def render_pointcloud_on_image(img, camera, Xw, cmap=MPL_JET_CMAP, norm_depth=10, dilation=3): """Render pointcloud on image. Parameters ---------- img: np.ndarray Image to render bounding boxes onto. camera: Camera Camera object with appropriately set extrinsics wrt world. Xw: np.ndarray (N x 3) 3D point cloud (x, y, z) in the world coordinate. cmap: matplotlib.colors.Colormap Colormap used for visualizing the inverse depth. norm_depth: float, default: 10 Depth value to normalize (inverse) pointcloud depths in color mapping. dilation: int, default: 3 Dilation factor applied on each point. Returns ------- img: np.array Image with rendered point cloud. """ # Move point cloud to the camera's (C) reference frame from the world (W) Xc = camera.p_cw * Xw # Project the points as if they were in the camera's frame of reference uv = Camera(K=camera.K).project(Xc) # Colorize the point cloud based on depth z_c = Xc[:, 2] zinv_c = 1. / (z_c + 1e-6) zinv_c *= norm_depth colors = (cmap(np.clip(zinv_c, 0., 1.0))[:, :3] * 255).astype(np.uint8) # Create an empty image to overlay H, W, _ = img.shape vis = np.zeros_like(img) in_view = np.logical_and.reduce([(uv >= 0).all(axis=1), uv[:, 0] < W, uv[:, 1] < H, z_c > 0]) uv, colors = uv[in_view].astype(int), colors[in_view] vis[uv[:, 1], uv[:, 0]] = colors # pylint: disable=unsupported-assignment-operation # Dilate visualization so that they render clearly vis = cv2.dilate(vis, np.ones((dilation, dilation))) mask = (vis > 0).astype(np.uint8) return (1 - mask) * img + mask * vis
def get_depth_from_point_cloud(dataset, scene_idx, sample_idx_in_scene, cam_datum_idx_in_sample, pc_datum_idx_in_sample): """Generate the depth map in the camera view using the provided point cloud datum within the sample. Parameters ---------- dataset: dgp.dataset.BaseDataset Inherited base dataset to augment with depth data. scene_idx: int Index of the scene. sample_idx_in_scene: int Index of the sample within the scene at scene_idx. cam_datum_idx_in_sample: int Index of the camera datum within the sample. pc_datum_idx_in_sample: int Index of the point cloud datum within the sample. Returns ------- depth: np.ndarray Depth map from the camera's viewpoint. """ # Get point cloud datum and load it pc_datum = dataset.get_datum(scene_idx, sample_idx_in_scene, pc_datum_idx_in_sample) pc_datum_type = pc_datum.datum.WhichOneof('datum_oneof') assert pc_datum_type == 'point_cloud', 'Depth cannot be generated from {} '.format( pc_datum_type) pc_datum_data = dataset.get_point_cloud_from_datum(scene_idx, sample_idx_in_scene, pc_datum_idx_in_sample) X_W = pc_datum_data['pose'] * pc_datum_data['point_cloud'] # Get target camera datum for projection cam_datum = dataset.get_datum(scene_idx, sample_idx_in_scene, cam_datum_idx_in_sample) cam_datum_type = cam_datum.datum.WhichOneof('datum_oneof') assert cam_datum_type == 'image', 'Depth cannot be projected onto {} '.format( cam_datum_type) cam_datum_data = dataset.get_image_from_datum(scene_idx, sample_idx_in_scene, cam_datum_idx_in_sample) p_WC = cam_datum_data['pose'] camera = Camera(K=cam_datum_data['intrinsics'], p_cw=p_WC.inverse()) (W, H) = cam_datum_data['rgb'].size[:2] return generate_depth_map(camera, X_W, (H, W))
def generate_depth_map(self, sample_idx, datum_idx, filename): """ Generates the depth map for a camera by projecting LiDAR information. It also caches the depth map following DGP folder structure, so it's not recalculated Parameters ---------- sample_idx : int sample index datum_idx : int Datum index filename : Filename used for loading / saving Returns ------- depth : np.array [H, W] Depth map for that datum in that sample """ # Generate depth filename filename = '{}/{}.npz'.format( os.path.dirname(self.path), filename.format('depth/{}'.format(self.depth_type))) # Load and return if exists if os.path.exists(filename): return np.load(filename, allow_pickle=True)['depth'] # Otherwise, create, save and return else: # Get pointcloud scene_idx, sample_idx_in_scene, _ = self.dataset.dataset_item_index[ sample_idx] pc_datum_idx_in_sample = self.dataset.get_datum_index_for_datum_name( scene_idx, sample_idx_in_scene, self.depth_type) pc_datum_data = self.dataset.get_point_cloud_from_datum( scene_idx, sample_idx_in_scene, pc_datum_idx_in_sample) # Create camera camera_rgb = self.get_current('rgb', datum_idx) camera_pose = self.get_current('pose', datum_idx) camera_intrinsics = self.get_current('intrinsics', datum_idx) camera = Camera(K=camera_intrinsics, p_cw=camera_pose.inverse()) # Generate depth map world_points = pc_datum_data['pose'] * pc_datum_data['point_cloud'] depth = generate_depth_map(camera, world_points, camera_rgb.size[::-1]) # Save depth map os.makedirs(os.path.dirname(filename), exist_ok=True) np.savez_compressed(filename, depth=depth) # Return depth map return depth
def render_pointcloud_on_image(img, camera, Xw, colormap='jet', percentile=80): """Render pointcloud on image. Parameters ---------- img: np.ndarray Image to render bounding boxes onto. camera: Camera Camera object with appropriately set extrinsics wrt world. Xw: np.ndarray (N x 3) 3D point cloud (x, y, z) in the world coordinate. colormap: str, default: jet Colormap used for visualizing the inverse depth. percentile: float, default: 80 Use this percentile to normalize the inverse depth. Returns ------- img: np.array Image with rendered point cloud. """ cmap = get_cmap('jet') # Move point cloud to the camera's (C) reference frame from the world (W) Xc = camera.p_cw * Xw # Project the points as if they were in the camera's frame of reference uv = Camera(K=camera.K).project(Xc) # Colorize the point cloud based on depth z_c = Xc[:, 2] zinv_c = 1. / (z_c + 1e-6) zinv_c /= np.percentile(zinv_c, percentile) colors = (cmap(np.clip(zinv_c, 0., 1.0))[:, :3] * 255).astype(np.uint8) # Create an empty image to overlay H, W, _ = img.shape vis = np.zeros_like(img) in_view = np.logical_and.reduce([(uv >= 0).all(axis=1), uv[:, 0] < W, uv[:, 1] < H, z_c > 0]) uv, colors = uv[in_view].astype(int), colors[in_view] vis[uv[:, 1], uv[:, 0]] = colors # Dilate visualization so that they render clearly vis = cv2.dilate(vis, np.ones((5, 5))) return np.maximum(vis, img)
def camera2(): return Camera.from_params(.5, .25, 2, 3)
def render_pointcloud_and_box_onto_rgb(camera_datums, pts_world_coord=None, id_to_name=None): """Project LiDAR point cloud to 2D rgb and render projected points overlapping on top of rgb. Parameters ---------- camera_datums: list of OrderedDict "datum_name": str Sensor name from which the data was collected. "rgb": list of PIL.Image (mode=RGB) List of image in RGB format. List is in order of `selected_datums` that are images. "intrinsics": np.ndarray Camera intrinsics. "pose": Pose Ego pose of datum. "bounding_box_2d": np.ndarray dtype=np.float32 Tensor containing bounding boxes for this sample (x, y, w, h) in absolute pixel coordinates. "bounding_box_3d": list of BoundingBox3D 3D Bounding boxes for this sample specified in this point cloud sensor's reference frame. (i.e. this provides the bounding box (B) in the sensor's (S) reference frame `box_SB`). pts_world_coord: numpy.ndarray, default: None Accumulated point clouds from all lidar sensors in a same single coordinate frame. id_to_name: OrderedDict, default: None Object class id (int) to name (str). Returns ------- images_2d: dict Image Datum name to RGB image with bounding boxes. images_3d: dict Image Datum name to point clouds and bounding boxes overlapping on top of the RGB image. """ images_2d, images_3d = {}, {} for d_cam in camera_datums: # Ensure the datum names are consistent with the query # Render the image img = np.array(d_cam['rgb']).copy() vis_2d = print_status(img, d_cam['datum_name']) vis_3d = vis_2d.copy() # Render 3d annotations in the camera cam_identity = Camera(K=d_cam['intrinsics']) # Only render bounding boxes if available if 'bounding_box_3d' in d_cam: for bbox3d in d_cam['bounding_box_3d']: vis_2d = bbox3d.render_on_image(cam_identity, vis_2d) # Render 2d annotations if 'bounding_box_2d' in d_cam: classes = [id_to_name[cid] for cid in d_cam['class_ids']] if id_to_name else None vis_3d = render_bbox2d_on_image(vis_3d, d_cam['bounding_box_2d'], texts=classes) # Render 3d points into image if pts_world_coord is not None: # Move points in sensor (S) frame to the camera (C) frame. p_WC = d_cam['pose'] pts_camera_coord = p_WC.inverse() * pts_world_coord vis_3d = render_pointcloud_on_image(vis_3d, cam_identity, pts_camera_coord) images_2d[d_cam['datum_name']] = vis_2d images_3d[d_cam['datum_name']] = vis_3d return images_2d, images_3d
def render_radar_pointcloud_on_image(img, camera, point_cloud, cmap=MPL_JET_CMAP, norm_depth=10, velocity=None, velocity_scale=1, velocity_max_pix=.05): """Render radar pointcloud on image. Parameters ---------- img: np.ndarray Image to render bounding boxes onto. camera: Camera Camera object with appropriately set extrinsics wrt world. Xw: np.ndarray (N x 8) point cloud in spherical coordinates of radar sensor frame cmap: matplotlib.colors.Colormap Colormap used for visualizing the inverse depth. norm_depth: float, default: 10 Depth value to normalize (inverse) pointcloud depths in color mapping. velocity: numpy array with shape (N,3), default None velocity vector of points velocity_scale: float factor to scale velocity vector by velocity_max_pix: float Maximum length of velocity vector rendering in percent of image width Returns ------- img: np.array Image with rendered point cloud. """ if len(point_cloud) == 0: return img Xc = camera.p_cw * point_cloud # Project the points as if they were in the camera's frame of reference uv = Camera(K=camera.K).project(Xc) # Colorize the point cloud based on depth z_c = Xc[:, 2] zinv_c = 1. / (z_c + 1e-6) zinv_c *= norm_depth colors = (cmap(np.clip(zinv_c, 0., 1.0))[:, :3] * 255) # Create an empty image to overlay H, W, _ = img.shape in_view = np.logical_and.reduce([(uv >= 0).all(axis=1), uv[:, 0] < W, uv[:, 1] < H, z_c > 0]) uv, colors = uv[in_view].astype(int), colors[in_view] for row, color in zip(uv, colors): cx, cy = row cv2.circle(img, (cx, cy), 10, color, thickness=3) def clip_norm(v, x): M = np.linalg.norm(v) if M == 0: return v return np.clip(M, 0, x) * v / M if velocity is not None: tail = point_cloud + velocity_scale * velocity uv_tail = Camera(K=camera.K).project(camera.p_cw * tail) uv_tail = uv_tail[in_view].astype(int) for row, row_tail, color in zip(uv, uv_tail, colors): v_2d = row_tail - row v_2d = clip_norm(v_2d, velocity_max_pix * W) cx, cy = row cx2, cy2 = row + v_2d.astype(np.int) cx2 = np.clip(cx2, 0, W - 1) cy2 = np.clip(cy2, 0, H - 1) cv2.arrowedLine(img, (cx, cy), (cx2, cy2), color, thickness=2, line_type=cv2.LINE_AA) return img
def visualize_cameras( camera_datums, id_to_name, lidar_datums=None, rgb_resize_factor=1.0, # `BoundingBox3D` kwargs bbox3d_font_scale=1.0, bbox3d_line_thickness=4, # `render_pointcloud_on_image` kwargs pc_rgb_cmap=MPL_JET_CMAP, pc_rgb_norm_depth=10, pc_rgb_dilation=3, radar_datums=None, ): """Create camera visualization that shows 3D bounding boxes, and optionally projected pointcloud. Parameters ---------- camera_datums: List[OrderedDict], default: None List of camera datums as a dictionary. id_to_name: OrderedDict, default: None Mapping from class IDs to class names. lidar_datums: List[OrderedDict] or None, default: None List of lidar datums as a dictionary. If given, then draw pointcloud contained in all datums. rgb_resize_factor: float, default: 1.0 Resize images by this factor before tiling them into a single panel. bbox3d_font_scale: float, default: 1.0 Font scale used for text labels. bbox3d_line_thickness: int, default: 4 Thickness of lines used for drawing 3D bounding boxes. pc_rgb_norm_depth: int, default: 10 Depth value to normalize (inverse) pointcloud depths in color mapping. pc_rgb_dilation: int, default: 3 Dilation factor applied on each point in pointcloud. radar_datums: List[OrderedDict], default: None List of radar datums to visualize Returns ------- rgb_viz: List[np.ndarray] List of camera visualization images, one for each camera. """ rgb_viz = [] for cam_datum in camera_datums: rgb = np.array(cam_datum['rgb']).copy() if lidar_datums is not None: # 1. Render pointcloud for lidar_datum in lidar_datums: p_LC = cam_datum['extrinsics'].inverse() * lidar_datum[ 'extrinsics'] # lidar -> body -> camera rgb = render_pointcloud_on_image(rgb, Camera( K=cam_datum['intrinsics'], p_cw=p_LC), lidar_datum['point_cloud'], cmap=pc_rgb_cmap, norm_depth=pc_rgb_norm_depth, dilation=pc_rgb_dilation) if radar_datums is not None: # 2. Render radar pointcloud for radar_datum in radar_datums: p_LC = cam_datum['extrinsics'].inverse() * radar_datum[ 'extrinsics'] # radar -> body -> camera rgb = render_radar_pointcloud_on_image( rgb, Camera(K=cam_datum['intrinsics'], p_cw=p_LC), radar_datum['point_cloud'], norm_depth=pc_rgb_norm_depth, velocity=radar_datum['velocity']) # 3. Render 3D bboxes # for bbox3d, class_id in zip(cam_datum['bounding_box_3d'], cam_datum['class_ids']): if 'bounding_box_3d' in cam_datum: for bbox3d in cam_datum['bounding_box_3d']: class_name = id_to_name[bbox3d.class_id] rgb = bbox3d.render( rgb, Camera(K=cam_datum['intrinsics']), line_thickness=bbox3d_line_thickness, class_name=class_name, font_scale=bbox3d_font_scale, ) rgb_viz.append( cv2.resize(rgb, None, fx=rgb_resize_factor, fy=rgb_resize_factor)) return rgb_viz