示例#1
0
 def testKITTIObjToBBoxAndInverse(self):
     objects = kitti_data.LoadLabelFile(self._label_file)
     calib = kitti_data.LoadCalibrationFile(self._calib_file)
     for obj in objects:
         bbox3d = kitti_data._KITTIObjectToBBox3D(
             obj, kitti_data.CameraToVeloTransformation(calib))
         location, dimensions, rotation_y = kitti_data.BBox3DToKITTIObject(
             bbox3d, kitti_data.VeloToCameraTransformation(calib))
         self.assertAllClose(obj['location'], location)
         self.assertAllClose(obj['dimensions'], dimensions)
         self.assertAllClose(obj['rotation_y'], rotation_y)
示例#2
0
    def testVeloToImagePlaneTransformation(self):
        objects = kitti_data.LoadLabelFile(self._label_file)
        calib = kitti_data.LoadCalibrationFile(self._calib_file)

        # Only apply to object 0.
        obj = objects[0]
        bbox3d = kitti_data._KITTIObjectToBBox3D(
            obj, kitti_data.CameraToVeloTransformation(calib))

        # Convert to corners in our canonical space.
        corners = geometry.BBoxCorners(
            tf.constant([[bbox3d]], dtype=tf.float32))
        with self.session():
            corners_np = self.evaluate(corners)
        corners_np = corners_np.reshape([8, 3])

        # Add homogenous coordinates.
        corners_np = np.concatenate([corners_np, np.ones((8, 1))], axis=-1)

        # Apply the velo to image plane transformation.
        velo_to_img = kitti_data.VeloToImagePlaneTransformation(calib)
        corners_np = np.dot(corners_np, velo_to_img.T)

        # Divide by the last coordinate to recover pixel locations.
        corners_np[:, 0] /= corners_np[:, 2]
        corners_np[:, 1] /= corners_np[:, 2]

        # Obtain 2D bbox.
        min_x = np.min(corners_np[:, 0])
        max_x = np.max(corners_np[:, 0])
        min_y = np.min(corners_np[:, 1])
        max_y = np.max(corners_np[:, 1])
        bbox = [min_x, min_y, max_x, max_y]  # left, top, right, bottom.

        # This should correspond to the GT bbox in obj['bbox'].
        # We use atol=0.1 here since they should close to the nearest pixel.
        self.assertAllClose(bbox, obj['bbox'], atol=0.1)
示例#3
0
 def testVeloToCamAndCamToVeloAreInverses(self):
     calib = kitti_data.LoadCalibrationFile(self._calib_file)
     velo_to_cam = kitti_data.VeloToCameraTransformation(calib)
     cam_to_velo = kitti_data.CameraToVeloTransformation(calib)
     self.assertAllClose(cam_to_velo.dot(velo_to_cam), np.eye(4))
示例#4
0
def _ReadObjectDataset(root_dir, frame_names):
    """Reads and parses KITTI dataset files into a list of TFExample protos."""
    examples = []

    total_frames = len(frame_names)
    for frame_index, frame_name in enumerate(frame_names):
        image_file_path = os.path.join(root_dir, 'image_2',
                                       frame_name + '.png')
        calib_file_path = os.path.join(root_dir, 'calib', frame_name + '.txt')
        velo_file_path = os.path.join(root_dir, 'velodyne',
                                      frame_name + '.bin')
        label_file_path = os.path.join(root_dir, 'label_2',
                                       frame_name + '.txt')

        example = tf.train.Example()
        feature = example.features.feature

        # frame information
        feature['image/source_id'].bytes_list.value[:] = [frame_name]

        # 2D image data
        encoded_image = tf.gfile.Open(image_file_path).read()
        feature['image/encoded'].bytes_list.value[:] = [encoded_image]
        image = np.array(Image.open(io.BytesIO(encoded_image)))
        assert image.ndim == 3
        assert image.shape[2] == 3
        image_width = image.shape[1]
        image_height = image.shape[0]
        feature['image/width'].int64_list.value[:] = [image_width]
        feature['image/height'].int64_list.value[:] = [image_height]
        feature['image/format'].bytes_list.value[:] = ['PNG']

        # 3D velodyne point data
        velo_dict = kitti_data.LoadVeloBinFile(velo_file_path)
        point_list = velo_dict['xyz'].ravel().tolist()
        feature['pointcloud/xyz'].float_list.value[:] = point_list
        reflectance_list = velo_dict['reflectance'].ravel().tolist()
        feature[
            'pointcloud/reflectance'].float_list.value[:] = reflectance_list

        # Object data
        calib_dict = kitti_data.LoadCalibrationFile(calib_file_path)
        if tf.gfile.Exists(label_file_path):
            # Load object labels for training data
            object_dicts = kitti_data.LoadLabelFile(label_file_path)
            object_dicts = kitti_data.AnnotateKITTIObjectsWithBBox3D(
                object_dicts, calib_dict)
        else:
            # No object labels for test data
            object_dicts = {}

        num_objects = len(object_dicts)
        xmins = [None] * num_objects
        xmaxs = [None] * num_objects
        ymins = [None] * num_objects
        ymaxs = [None] * num_objects
        labels = [None] * num_objects
        has_3d_infos = [None] * num_objects

        # 3D info
        occlusions = [None] * num_objects
        truncations = [None] * num_objects
        xyzs = [None] * num_objects
        dim_xyzs = [None] * num_objects
        phis = [None] * num_objects

        for object_index, object_dict in enumerate(object_dicts):
            xmins[object_index] = object_dict['bbox'][0]
            xmaxs[object_index] = object_dict['bbox'][2]
            ymins[object_index] = object_dict['bbox'][1]
            ymaxs[object_index] = object_dict['bbox'][3]
            labels[object_index] = object_dict['type']
            has_3d_infos[object_index] = 1 if object_dict['has_3d_info'] else 0
            occlusions[object_index] = object_dict['occluded']
            truncations[object_index] = object_dict['truncated']
            xyzs[object_index] = object_dict['bbox3d'][:3]
            dim_xyzs[object_index] = object_dict['bbox3d'][3:6]
            phis[object_index] = object_dict['bbox3d'][6]

        feature['object/image/bbox/xmin'].float_list.value[:] = xmins
        feature['object/image/bbox/xmax'].float_list.value[:] = xmaxs
        feature['object/image/bbox/ymin'].float_list.value[:] = ymins
        feature['object/image/bbox/ymax'].float_list.value[:] = ymaxs
        feature['object/label'].bytes_list.value[:] = labels
        feature['object/has_3d_info'].int64_list.value[:] = has_3d_infos
        feature['object/occlusion'].int64_list.value[:] = occlusions
        feature['object/truncation'].float_list.value[:] = truncations
        xyzs = np.array(xyzs).ravel().tolist()
        feature['object/velo/bbox/xyz'].float_list.value[:] = xyzs
        dim_xyzs = np.array(dim_xyzs).ravel().tolist()
        feature['object/velo/bbox/dim_xyz'].float_list.value[:] = dim_xyzs
        feature['object/velo/bbox/phi'].float_list.value[:] = phis

        # Transformation matrices
        velo_to_image_plane = kitti_data.VeloToImagePlaneTransformation(
            calib_dict)
        feature['transform/velo_to_image_plane'].float_list.value[:] = (
            velo_to_image_plane.ravel().tolist())
        velo_to_camera = kitti_data.VeloToCameraTransformation(calib_dict)
        feature['transform/velo_to_camera'].float_list.value[:] = (
            velo_to_camera.ravel().tolist())
        cam_to_velo = kitti_data.CameraToVeloTransformation(calib_dict)
        feature['transform/camera_to_velo'].float_list.value[:] = (
            cam_to_velo.ravel().tolist())

        examples.append(example)
        if frame_index % 100 == 0:
            logging.info('Processed frame %d of %d.', frame_index,
                         total_frames)

    return examples