def parse_annotations_3d_proto(annotation_file, json_category_id_to_contiguous_id): """Parse annotations from BoundingBox2DAnnotations structure. Parameters ---------- annotations: str Path to JSON file containing annotations for 2D bounding boxes json_category_id_to_contiguous_id: dict Lookup from COCO style JSON id's to contiguous id's transformation: Pose Pose object that can be used to convert annotations to a new reference frame. Returns ------- tuple holding: boxes: list of BoundingBox3D Tensor containing bounding boxes for this sample (pose.quat.qw, pose.quat.qx, pose.quat.qy, pose.quat.qz, pose.tvec.x, pose.tvec.y, pose.tvec.z, width, length, height) in absolute scale class_ids: np.int64 array Numpy array containing class ids (aligned with ``boxes``) instance_ids: dict Map from instance_id to tuple of (box, class_id) """ # *CAVEAT*: `attributes` field is defined in proto, but not being read here. # TODO: read attributes (see above); change outputs of all function calls. with open(annotation_file) as _f: annotations = open_pbobject(annotation_file, BoundingBox3DAnnotations) boxes, class_ids, instance_ids = [], [], {} for i, ann in enumerate(list(annotations.annotations)): boxes.append( BoundingBox3D( Pose.from_pose_proto(ann.box.pose), np.float32([ann.box.width, ann.box.length, ann.box.height]), ann.num_points, ann.box.occlusion, ann.box.truncation)) class_ids.append(json_category_id_to_contiguous_id[ann.class_id]) instance_ids[ann.instance_id] = (boxes[i], class_ids[i]) return boxes, class_ids, instance_ids
def get_point_cloud_from_datum(self, scene_idx, sample_idx_in_scene, datum_idx_in_sample): """Get the sample image data from point cloud datum. Parameters ---------- scene_idx: int Index of the scene. sample_idx_in_scene: int Index of the sample within the scene at scene_idx. datum_idx_in_sample: int Index of datum within sample datum keys Returns ------- data: OrderedDict "timestamp": int Timestamp of the image in microseconds. "datum_name": str Sensor name from which the data was collected "extrinsics": Pose Sensor extrinsics with respect to the vehicle frame. "point_cloud": np.ndarray (N x 3) Point cloud in the local/world (L) frame returning X, Y and Z coordinates. The local frame is consistent across multiple timesteps in a scene. "extra_channels": np.ndarray (N x M) Remaining channels from point_cloud (i.e. lidar intensity I or pixel colors RGB) "pose": Pose Pose of sensor with respect to the world/global/local frame (reference frame that is initialized at start-time). (i.e. this provides the ego-pose in `pose_WS` where S refers to the point cloud sensor (S)). "bounding_box_3d": list of BoundingBox3D 3D Bounding boxes for this sample specified in this point cloud sensor's reference frame. (i.e. this provides the bounding box (B) in the sensor's (S) reference frame `box_SB`). "class_ids": np.ndarray dtype=np.int64 Tensor containing class ids (aligned with ``bounding_box_3d``) "instance_ids": np.ndarray dtype=np.int64 Tensor containing instance ids (aligned with ``bounding_box_3d``) """ datum = self.get_datum(scene_idx, sample_idx_in_scene, datum_idx_in_sample) assert datum.datum.WhichOneof('datum_oneof') == 'point_cloud' # Determine the ego-pose of the lidar sensor (S) with respect to the world # (W) @ t=Ts pose_WS_Ts = Pose.from_pose_proto(datum.datum.point_cloud.pose) \ if hasattr(datum.datum.point_cloud, 'pose') else Pose() # Get sensor extrinsics for the datum name pose_VS = self.get_sensor_extrinsics( self.get_sample(scene_idx, sample_idx_in_scene).calibration_key, datum.id.name) # Points are described in the Lidar sensor (S) frame captured at the # corresponding lidar timestamp (Ts). # Points are in the lidar sensor's (S) frame. X_S, annotations = self.load_datum_and_annotations( scene_idx, sample_idx_in_scene, datum_idx_in_sample) data = OrderedDict({ "timestamp": datum.id.timestamp.ToMicroseconds(), "datum_name": datum.id.name, "extrinsics": pose_VS, "pose": pose_WS_Ts, "point_cloud": X_S[:, :3], "extra_channels": X_S[:, 3:], }) # Extract 3D bounding box labels, if requested. # Also checks if BOUNDING_BOX_3D annotation exists because some datasets have sparse annotations. if "bounding_box_3d" in self.requested_annotations and "bounding_box_3d" in annotations: annotation_data = load_bounding_box_3d_annotations( annotations, self.get_scene_directory(scene_idx), self.json_category_id_to_contiguous_id) data.update(annotation_data) return data
def get_image_from_datum(self, scene_idx, sample_idx_in_scene, datum_idx_in_sample): """Get the sample image data from image datum. Parameters ---------- scene_idx: int Index of the scene. sample_idx_in_scene: int Index of the sample within the scene at scene_idx. datum_idx_in_sample: int Index of datum within sample datum keys Returns ------- data: OrderedDict "timestamp": int Timestamp of the image in microseconds. "datum_name": str Sensor name from which the data was collected "rgb": PIL.Image (mode=RGB) Image in RGB format. "intrinsics": np.ndarray Camera intrinsics. "extrinsics": Pose Camera extrinsics with respect to the vehicle frame. "pose": Pose Pose of sensor with respect to the world/global/local frame (reference frame that is initialized at start-time). (i.e. this provides the ego-pose in `pose_WC`). "bounding_box_2d": np.ndarray dtype=np.float32 Tensor containing bounding boxes for this sample (x, y, w, h) in absolute pixel coordinates "bounding_box_3d": list of BoundingBox3D 3D Bounding boxes for this sample specified in this camera's reference frame. (i.e. this provides the bounding box (B) in the camera's (C) reference frame `box_CB`). "class_ids": np.ndarray dtype=np.int64 Tensor containing class ids (aligned with ``bounding_box_2d`` and ``bounding_box_3d``) "instance_ids": np.ndarray dtype=np.int64 Tensor containing instance ids (aligned with ``bounding_box_2d`` and ``bounding_box_3d``) """ datum = self.get_datum(scene_idx, sample_idx_in_scene, datum_idx_in_sample) assert datum.datum.WhichOneof('datum_oneof') == 'image' # Get camera calibration and extrinsics for the datum name sample = self.get_sample(scene_idx, sample_idx_in_scene) camera = self.get_camera_calibration(sample.calibration_key, datum.id.name) pose_VC = self.get_sensor_extrinsics(sample.calibration_key, datum.id.name) # Get ego-pose for the image (at the corresponding image timestamp t=Tc) pose_WC_Tc = Pose.from_pose_proto(datum.datum.image.pose) \ if hasattr(datum.datum.image, 'pose') else Pose() # Populate data for image data image, annotations = self.load_datum_and_annotations( scene_idx, sample_idx_in_scene, datum_idx_in_sample) data = OrderedDict({ "timestamp": datum.id.timestamp.ToMicroseconds(), "datum_name": datum.id.name, "rgb": image, "intrinsics": camera.K, "extrinsics": pose_VC, "pose": pose_WC_Tc }) # Extract 2D/3D bounding box labels if requested # Also checks if BOUNDING_BOX_2D and BOUNDING_BOX_3D annotation exists because some datasets # have sparse annotations. if self.requested_annotations: ann_root_dir = self.get_scene_directory(scene_idx) # TODO: Load the datum based on the type, no need to hardcode these conditions. In particular, # figure out how to handle joint conditions like this: if "bounding_box_2d" in self.requested_annotations and "bounding_box_3d" in self.requested_annotations and "bounding_box_2d" in annotations and "bounding_box_3d" in annotations: annotation_data = load_aligned_bounding_box_annotations( annotations, ann_root_dir, self.json_category_id_to_contiguous_id) elif "bounding_box_2d" in self.requested_annotations and "bounding_box_2d" in annotations: annotation_data = load_bounding_box_2d_annotations( annotations, ann_root_dir, self.json_category_id_to_contiguous_id) elif "bounding_box_3d" in self.requested_annotations and "bounding_box_3d" in annotations: annotation_data = load_bounding_box_3d_annotations( annotations, ann_root_dir, self.json_category_id_to_contiguous_id) else: annotation_data = {} data.update(annotation_data) return data