def get_point_colours(points, cam_p, image): points_in_im = calib_utils.project_pc_to_image(points.T, cam_p) points_in_im_rounded = np.round(points_in_im).astype(np.int32) point_colours = image[points_in_im_rounded[1], points_in_im_rounded[0]] return point_colours
def compute_orientation_3d(obj, p): """Computes the orientation given object and camera matrix Keyword arguments: obj -- object file to draw bounding box p -- transform matrix """ # compute rotational matrix rot = np.array([[+np.cos(obj.ry), 0, +np.sin(obj.ry)], [0, 1, 0], [-np.sin(obj.ry), 0, +np.cos(obj.ry)]]) orientation3d = np.array([0.0, obj.l, 0.0, 0.0, 0.0, 0.0]).reshape(3, 2) orientation3d = np.dot(rot, orientation3d) orientation3d[0, :] = orientation3d[0, :] + obj.t[0] orientation3d[1, :] = orientation3d[1, :] + obj.t[1] orientation3d[2, :] = orientation3d[2, :] + obj.t[2] # only draw for boxes that are in front of the camera for idx in np.arange(orientation3d.shape[1]): if orientation3d[2, idx] < 0.1: return None return calib_utils.project_pc_to_image(orientation3d, p)
def postprocess_cen_x(pred_box_2d, pred_box_3d, cam_p): """Post-process centroid x by projecting predicted 3D box and finding width ratio to centre, and then finding the u position by using this ratio on the 2D box and projecting it. Args: pred_box_2d: 2D box in box_2d format [y1, x1, y2, x2] pred_box_3d: 3D box in box_3d format [x, y, z, l, w, h, ry] cam_p: camera projection matrix Returns: new_cen_x: post-processed centroid x position """ focal_length = cam_p[0, 0] centre_u = cam_p[0, 2] # Project corners pred_box_3d_corners = obj_utils.compute_box_3d_corners(pred_box_3d) pred_box_corners_uv = calib_utils.project_pc_to_image( pred_box_3d_corners, cam_p) # Project centroid pred_cen_pc = pred_box_3d[0:3, np.newaxis] pred_cen_uv = calib_utils.project_pc_to_image(pred_cen_pc, cam_p) # Find min u pred_box_min_u = np.amin(pred_box_corners_uv[0]) pred_box_max_u = np.amax(pred_box_corners_uv[0]) # Find centroid u ratio pred_box_w = pred_box_max_u - pred_box_min_u pred_box_cen_u_ratio = (pred_cen_uv[0] - pred_box_min_u) / pred_box_w # Find new u from original 2D detection pred_box_w = pred_box_2d[3] - pred_box_2d[1] pred_box_u = pred_box_2d[1] + pred_box_cen_u_ratio * pred_box_w # Calculate new centroid x i = pred_box_u - centre_u # Similar triangles ratio (x/i = d/f) pred_cen_z = pred_box_3d[2] ratio = pred_cen_z / focal_length new_cen_x = i * ratio return new_cen_x
def proj_points(xz_dist, centroid_y, viewing_angle, cam2_inst_points_local, cam_p, rotate_view=True): """Projects point based on estimated transformation matrix calculated from xz_dist and viewing angle Args: xz_dist: distance along viewing angle centroid_y: box centroid y viewing_angle: viewing angle cam2_inst_points_local: (N, 3) instance points cam_p: (3, 4) camera projection matrix rotate_view: bool whether to rotate by viewing angle Returns: points_uv: (2, N) The points projected in u, v coordinates valid_points_mask: (N) Mask of valid points """ guess_x = xz_dist * np.sin(viewing_angle) guess_y = centroid_y guess_z = xz_dist * np.cos(viewing_angle) # Rotate predicted instance points to viewing angle and translate to guessed centroid rot_mat = transform_utils.np_get_tr_mat(viewing_angle, (0.0, 0.0, 0.0)) t_mat = transform_utils.np_get_tr_mat(0.0, [guess_x, guess_y, guess_z]) if rotate_view: cam2_points_rotated = transform_utils.apply_tr_mat_to_points( rot_mat, cam2_inst_points_local) else: cam2_points_rotated = cam2_inst_points_local cam2_points_global = transform_utils.apply_tr_mat_to_points( t_mat, cam2_points_rotated) # Get valid points mask valid_points_mask = np.sum(np.abs(cam2_points_rotated), axis=1) > 0.1 # Shift points into cam0 frame for projection # Get x offset (b_cam) from calibration: cam_mat[0, 3] = (-f_x * b_cam) x_offset = -cam_p[0, 3] / cam_p[0, 0] # Shift points from cam2 to cam0 frame cam0_points_global = (cam2_points_global + [x_offset, 0, 0]) * valid_points_mask.reshape(-1, 1) # Project back to image pred_points_in_img = calib_utils.project_pc_to_image( cam0_points_global.T, cam_p) * valid_points_mask return pred_points_in_img, valid_points_mask
def get_lidar_point_cloud_for_cam(sample_name, frame_calib, velo_dir, image_shape=None, cam_idx=2): """Gets the lidar point cloud in cam0 frame, and optionally returns only the points that are projected to an image. Args: sample_name: sample name frame_calib: FrameCalib frame calibration velo_dir: velodyne directory image_shape: (optional) image shape [h, w] to filter points inside image cam_idx: (optional) cam index (2 or 3) for filtering Returns: (3, N) point_cloud in the form [[x,...][y,...][z,...]] """ # Get points in camera frame point_cloud = get_lidar_point_cloud(sample_name, frame_calib, velo_dir) # Only keep points in front of camera (positive z) point_cloud = point_cloud[:, point_cloud[2] > 1.0] if image_shape is None: return point_cloud else: # Project to image frame if cam_idx == 2: cam_p = frame_calib.p2 elif cam_idx == 3: cam_p = frame_calib.p3 else: raise ValueError('Invalid cam_idx', cam_idx) # Project to image points_in_img = calib_utils.project_pc_to_image(point_cloud, cam_p=cam_p) points_in_img_rounded = np.round(points_in_img) # Filter based on the given image shape image_filter = (points_in_img_rounded[0] >= 0) & \ (points_in_img_rounded[0] < image_shape[1]) & \ (points_in_img_rounded[1] >= 0) & \ (points_in_img_rounded[1] < image_shape[0]) filtered_point_cloud = point_cloud[:, image_filter].astype(np.float32) return filtered_point_cloud
def get_viewing_angle_box_3d(box_3d, cam_p=None, version='x_offset'): """Calculates the viewing angle to the centroid of a box_3d Args: box_3d: box_3d in cam_0 frame cam_p: camera projection matrix, required if version is not 'cam_0' version: 'cam_0': assuming cam_0 frame 'x_offset': apply x_offset from camera baseline to cam_0 'projection': project centroid to image Returns: viewing_angle: viewing angle to box centroid """ format_checker.check_box_3d_format(box_3d) if version == 'cam_0': # Viewing angle in cam_0 frame viewing_angle = np.arctan2(box_3d[0], box_3d[2]) elif version == 'x_offset': # Get x offset (b_cam) from calibration: cam_mat[0, 3] = (-f_x * b_cam) x_offset = -cam_p[0, 3] / cam_p[0, 0] # Shift box_3d from cam_0 to cam_N frame box_x_cam = box_3d[0] - x_offset # Calculate viewing angle viewing_angle = np.arctan2(box_x_cam, box_3d[2]) elif version == 'projection': # Project centroid to the image proj_uv = calib_utils.project_pc_to_image(box_3d[0:3].reshape(3, -1), cam_p) centre_u = cam_p[0, 2] focal_length = cam_p[0, 0] centre_x = proj_uv[0][0] # Assume depth of 1.0 to calculate viewing angle # viewing_angle = atan2(i / f, 1.0) viewing_angle = np.arctan2((centre_x - centre_u) / focal_length, 1.0) else: raise ValueError('Invalid version', version) return viewing_angle
def project_depths(point_cloud, cam_p, image_shape, max_depth=100.0): """Projects a point cloud into image space and saves depths per pixel. Args: point_cloud: (3, N) Point cloud in cam0 cam_p: camera projection matrix image_shape: image shape [h, w] max_depth: optional, max depth for inversion Returns: projected_depths: projected depth map """ # Only keep points in front of the camera all_points = point_cloud.T # Save the depth corresponding to each point points_in_img = calib_utils.project_pc_to_image(all_points.T, cam_p) points_in_img_int = np.int32(np.round(points_in_img)) # Remove points outside image valid_indices = \ (points_in_img_int[0] >= 0) & (points_in_img_int[0] < image_shape[1]) & \ (points_in_img_int[1] >= 0) & (points_in_img_int[1] < image_shape[0]) all_points = all_points[valid_indices] points_in_img_int = points_in_img_int[:, valid_indices] # Invert depths all_points[:, 2] = max_depth - all_points[:, 2] # Only save valid pixels, keep closer points when overlapping projected_depths = np.zeros(image_shape) valid_indices = [points_in_img_int[1], points_in_img_int[0]] projected_depths[valid_indices] = [ max( projected_depths[points_in_img_int[1, idx], points_in_img_int[0, idx]], all_points[idx, 2]) for idx in range(points_in_img_int.shape[1]) ] projected_depths[valid_indices] = \ max_depth - projected_depths[valid_indices] return projected_depths.astype(np.float32)
def project_corners_3d_to_image(corners_3d, p): """Computes the 3D bounding box projected onto image space. Keyword arguments: obj -- object file to draw bounding box p -- transform matrix Returns: corners : numpy array of corner points projected onto image space. face_idx: numpy array of 3D bounding box face """ # index for 3d bounding box face # it is converted to 4x4 matrix face_idx = np.array([0, 1, 5, 4, # front face 1, 2, 6, 5, # right face 2, 3, 7, 6, # back face 3, 0, 4, 7]).reshape((4, 4)) # left face return calib_utils.project_pc_to_image(corners_3d, p), face_idx
def test_tf_project_pc_to_image(self): """Check that tf_project_pc_to_image matches numpy version""" dataset = DatasetBuilder.build_kitti_dataset( DatasetBuilder.KITTI_TRAINVAL) np.random.seed(12345) point_cloud_batch = np.random.rand(32, 3, 2304) frame_calib = calib_utils.get_frame_calib(dataset.calib_dir, '000050') cam_p = frame_calib.p2 exp_proj_uv = [ calib_utils.project_pc_to_image(point_cloud, cam_p) for point_cloud in point_cloud_batch ] tf_proj_uv = calib_utils.tf_project_pc_to_image( point_cloud_batch, cam_p, 32) with self.test_session() as sess: proj_uv_out = sess.run(tf_proj_uv) np.testing.assert_allclose(exp_proj_uv, proj_uv_out)
def main(): ############################## # Options ############################## point_cloud_source = 'depth_2_multiscale' samples_to_use = None # all samples dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_TRAINVAL) out_instance_dir = 'outputs/instance_2_{}'.format(point_cloud_source) required_classes = [ 'Car', 'Pedestrian', 'Cyclist', 'Van', 'Truck', 'Person_sitting', 'Tram', 'Misc', ] ############################## # End of Options ############################## # Create instance folder os.makedirs(out_instance_dir, exist_ok=True) # Get frame ids to process if samples_to_use is None: samples_to_use = dataset.get_sample_names() # Begin instance mask generation for sample_idx, sample_name in enumerate(samples_to_use): sys.stdout.write( '\r{} / {} Generating {} instances for sample {}'.format( sample_idx, dataset.num_samples - 1, point_cloud_source, sample_name)) # Get image image = obj_utils.get_image(sample_name, dataset.image_2_dir) image_shape = image.shape[0:2] # Get calibration frame_calib = calib_utils.get_frame_calib(dataset.calib_dir, sample_name) # Get point cloud if point_cloud_source.startswith('depth'): point_cloud = obj_utils.get_depth_map_point_cloud( sample_name, frame_calib, dataset.depth_dir) elif point_cloud_source == 'velo': point_cloud = obj_utils.get_lidar_point_cloud_for_cam( sample_name, frame_calib, dataset.velo_dir, image_shape) else: raise ValueError('Invalid point cloud source', point_cloud_source) # Filter according to classes obj_labels = obj_utils.read_labels(dataset.kitti_label_dir, sample_name) obj_labels, _ = obj_utils.filter_labels_by_class( obj_labels, required_classes) # Get 2D and 3D bounding boxes from labels gt_boxes_2d = [ box_3d_encoder.object_label_to_box_2d(obj_label) for obj_label in obj_labels ] gt_boxes_3d = [ box_3d_encoder.object_label_to_box_3d(obj_label) for obj_label in obj_labels ] instance_image = np.full(image_shape, 255, dtype=np.uint8) # Start instance index at 0 and generate instance masks for all boxes inst_idx = 0 for obj_label, box_2d, box_3d in zip(obj_labels, gt_boxes_2d, gt_boxes_3d): # Apply inflation and offset to box_3d modified_box_3d = modify_box_3d(box_3d, obj_label) # Get points in 3D box box_points, mask = obj_utils.points_in_box_3d( modified_box_3d, point_cloud.T) # Get points in 2D box points_in_im = calib_utils.project_pc_to_image( box_points.T, cam_p=frame_calib.p2) mask_2d = \ (points_in_im[0] >= box_2d[1]) & \ (points_in_im[0] <= box_2d[3]) & \ (points_in_im[1] >= box_2d[0]) & \ (points_in_im[1] <= box_2d[2]) if point_cloud_source.startswith('depth'): mask_points_in_im = np.where(mask.reshape(image_shape)) mask_points_in_im = [ mask_points_in_im[0][mask_2d], mask_points_in_im[1][mask_2d] ] instance_pixels = np.asarray( [mask_points_in_im[1], mask_points_in_im[0]]) elif point_cloud_source == 'velo': # image_points = box_utils.project_to_image( # box_points.T, frame.p_left).astype(np.int32) pass # Guarantees that indices don't exceed image dimensions instance_pixels[0, :] = np.clip(instance_pixels[0, :], 0, image_shape[1] - 1) instance_pixels[1, :] = np.clip(instance_pixels[1, :], 0, image_shape[0] - 1) instance_image[instance_pixels[1, :], instance_pixels[0, :]] = np.uint8(inst_idx) inst_idx += 1 # Write image to directory cv2.imwrite(out_instance_dir + '/{}.png'.format(sample_name), instance_image, [cv2.IMWRITE_PNG_COMPRESSION, 1])
def project_to_image_space(box_3d, calib_p2, truncate=False, image_size=None, discard=True, discard_before_truncation=True): """ Projects a box_3d into image space Args: box_3d: single box_3d to project calib_p2: stereo calibration p2 matrix truncate: if True, 2D projections are truncated to be inside the image image_size: [w, h] must be provided if truncate is True, used for truncation discard: if True, discard boxes that are truncated over a certain amount discard_before_truncation: If True, discard boxes that are larger than 80% of the image in width OR height BEFORE truncation. If False, discard boxes that are larger than 80% of the width AND height AFTER truncation. Returns: Projected box in image space [x1, y1, x2, y2] Returns None if box is not inside the image """ format_checker.check_box_3d_format(box_3d) obj_label = box_3d_encoder.box_3d_to_object_label(box_3d) corners_3d = obj_utils.compute_obj_label_corners_3d(obj_label) projected = calib_utils.project_pc_to_image(corners_3d, calib_p2) x1 = np.amin(projected[0]) y1 = np.amin(projected[1]) x2 = np.amax(projected[0]) y2 = np.amax(projected[1]) img_box = np.array([x1, y1, x2, y2]) if truncate: if not image_size: raise ValueError('Image size must be provided') image_w = image_size[0] image_h = image_size[1] # Discard invalid boxes (outside image space) if img_box[0] > image_w or \ img_box[1] > image_h or \ img_box[2] < 0 or \ img_box[3] < 0: return None # Discard boxes that are larger than 80% of the image width OR height if discard and discard_before_truncation: img_box_w = img_box[2] - img_box[0] img_box_h = img_box[3] - img_box[1] if img_box_w > (image_w * 0.8) or img_box_h > (image_h * 0.8): return None # Truncate remaining boxes into image space if img_box[0] < 0: img_box[0] = 0 if img_box[1] < 0: img_box[1] = 0 if img_box[2] > image_w: img_box[2] = image_w if img_box[3] > image_h: img_box[3] = image_h # Discard boxes that are covering the the whole image after truncation if discard and not discard_before_truncation: img_box_w = img_box[2] - img_box[0] img_box_h = img_box[3] - img_box[1] if img_box_w > (image_w * 0.8) and img_box_h > (image_h * 0.8): return None return img_box