示例#1
0
    def CornerLoss(self, gt_bboxes, predicted_bboxes, symmetric=True):
        """Corner regularization loss.

    This function computes the corner loss, an alternative regression loss
    for box residuals. This was used in the Frustum-PointNets paper [1].

    We compute the predicted bboxes (all 8 corners) and compute a SmoothedL1
    loss between the corners of the predicted boxes and ground truth. Hence,
    this loss can help encourage the model to maximize the IoU of the
    predictions.

    [1] Frustum PointNets for 3D Object Detection from RGB-D Data
        https://arxiv.org/pdf/1711.08488.pdf

    Args:
      gt_bboxes: tf.float32 of shape [..., 7] which contains (x, y, z, dx, dy,
        dz, phi), corresponding to ground truth bbox parameters.
      predicted_bboxes: tf.float32 of same shape as gt_bboxes containing
        predicted bbox parameters.
      symmetric: boolean.  If True, computes the minimum of the corner loss
        with respect to both the gt box and the gt box rotated 180 degrees.

    Returns:
      tf.float32 Tensor of shape [...] where each entry contains the corner loss
      for the corresponding bbox.
    """
        bbox_shape = py_utils.GetShape(gt_bboxes)
        batch_size = bbox_shape[0]

        gt_bboxes = tf.reshape(gt_bboxes, [batch_size, -1, 7])
        predicted_bboxes = tf.reshape(predicted_bboxes, [batch_size, -1, 7])

        gt_corners = geometry.BBoxCorners(gt_bboxes)
        predicted_corners = geometry.BBoxCorners(predicted_bboxes)
        corner_dist = tf.norm(predicted_corners - gt_corners, axis=-1)
        huber_loss = self.ScaledHuberLoss(labels=tf.zeros_like(corner_dist),
                                          predictions=corner_dist)
        huber_loss = tf.reduce_sum(huber_loss, axis=-1)

        if symmetric:
            # Compute the loss assuming the ground truth is flipped 180, and
            # take the minimum of the two losses.
            rot = tf.constant([[[0., 0., 0., 0., 0., 0., np.pi]]],
                              dtype=tf.float32)
            rotated_gt_bboxes = gt_bboxes + rot
            rotated_gt_corners = geometry.BBoxCorners(rotated_gt_bboxes)
            rotated_corner_dist = tf.norm(predicted_corners -
                                          rotated_gt_corners,
                                          axis=-1)
            rotated_huber_loss = self.ScaledHuberLoss(
                labels=tf.zeros_like(rotated_corner_dist),
                predictions=rotated_corner_dist)
            rotated_huber_loss = tf.reduce_sum(rotated_huber_loss, axis=-1)
            huber_loss = tf.minimum(huber_loss, rotated_huber_loss)

        huber_loss = tf.reshape(huber_loss, bbox_shape[:-1])
        return huber_loss
示例#2
0
    def testBBoxCorners(self):
        # Create four bounding boxes, two identical in each batch.
        #
        # This tests both that the batching and number of box dimensions are handled
        # properly.
        bboxes = tf.constant([[[1, 2, 3, 4, 3, 6, 0.], [1, 2, 3, 4, 3, 6, 0.]],
                              [[1, 2, 3, 4, 3, 6, np.pi / 2.],
                               [1, 2, 3, 4, 3, 6, np.pi / 2.]]])
        corners = geometry.BBoxCorners(bboxes)
        with self.session() as sess:
            corners_np = sess.run(corners)
            self.assertEqual((2, 2, 8, 3), corners_np.shape)

            # Extrema of first two boxes are ([-1, 3], [0.5, 3.5], [0, 6])
            for i in [0, 1]:
                self.assertAllClose(-1, np.min(corners_np[0, i, :, 0]))
                self.assertAllClose(3, np.max(corners_np[0, i, :, 0]))
                self.assertAllClose(0.5, np.min(corners_np[0, i, :, 1]))
                self.assertAllClose(3.5, np.max(corners_np[0, i, :, 1]))
                self.assertAllClose(0, np.min(corners_np[0, i, :, 2]))
                self.assertAllClose(6, np.max(corners_np[0, i, :, 2]))

            # Extrema of second two boxes is ([-0.5, 2.5], [0, 4], [0, 6])
            # because it's the first box rotated by 90 degrees.
            for i in [0, 1]:
                self.assertAllClose(-0.5, np.min(corners_np[1, i, :, 0]))
                self.assertAllClose(2.5, np.max(corners_np[1, i, :, 0]))
                self.assertAllClose(0, np.min(corners_np[1, i, :, 1]))
                self.assertAllClose(4, np.max(corners_np[1, i, :, 1]))
                self.assertAllClose(0, np.min(corners_np[1, i, :, 2]))
                self.assertAllClose(6, np.max(corners_np[1, i, :, 2]))
示例#3
0
    def testVeloToImagePlaneTransformation(self):
        objects = kitti_data.LoadLabelFile(self._label_file)
        calib = kitti_data.LoadCalibrationFile(self._calib_file)

        # Only apply to object 0.
        obj = objects[0]
        bbox3d = kitti_data._KITTIObjectToBBox3D(
            obj, kitti_data.CameraToVeloTransformation(calib))

        # Convert to corners in our canonical space.
        corners = geometry.BBoxCorners(
            tf.constant([[bbox3d]], dtype=tf.float32))
        with self.session():
            corners_np = self.evaluate(corners)
        corners_np = corners_np.reshape([8, 3])

        # Add homogenous coordinates.
        corners_np = np.concatenate([corners_np, np.ones((8, 1))], axis=-1)

        # Apply the velo to image plane transformation.
        velo_to_img = kitti_data.VeloToImagePlaneTransformation(calib)
        corners_np = np.dot(corners_np, velo_to_img.T)

        # Divide by the last coordinate to recover pixel locations.
        corners_np[:, 0] /= corners_np[:, 2]
        corners_np[:, 1] /= corners_np[:, 2]

        # Obtain 2D bbox.
        min_x = np.min(corners_np[:, 0])
        max_x = np.max(corners_np[:, 0])
        min_y = np.min(corners_np[:, 1])
        max_y = np.max(corners_np[:, 1])
        bbox = [min_x, min_y, max_x, max_y]  # left, top, right, bottom.

        # This should correspond to the GT bbox in obj['bbox'].
        # We use atol=0.1 here since they should close to the nearest pixel.
        self.assertAllClose(bbox, obj['bbox'], atol=0.1)
示例#4
0
    def _Extract(self, features):
        p = self.params

        source_id = py_utils.HasShape(features['image/source_id'], [])
        xmin = _Dense(features['object/image/bbox/xmin'])
        xmax = _Dense(features['object/image/bbox/xmax'])
        ymin = _Dense(features['object/image/bbox/ymin'])
        ymax = _Dense(features['object/image/bbox/ymax'])

        # 2d bounding box in image coordinates.
        bboxes = tf.stack([ymin, xmin, ymax, xmax], axis=1)
        bboxes_count = tf.shape(bboxes)[0]
        bboxes = py_utils.PadOrTrimTo(bboxes, [p.max_num_objects, 4])

        bboxes_padding = 1.0 - py_utils.PadOrTrimTo(tf.ones([bboxes_count]),
                                                    [p.max_num_objects])

        dim_xyz = tf.reshape(_Dense(features['object/velo/bbox/dim_xyz']),
                             [-1, 3])
        loc_xyz = tf.reshape(_Dense(features['object/velo/bbox/xyz']), [-1, 3])
        phi = tf.reshape(_Dense(features['object/velo/bbox/phi']), [-1, 1])
        # bboxes_3d is in [x, y, z, dx, dy, dz, phi].
        bboxes_3d = tf.concat([loc_xyz, dim_xyz, phi], axis=1)

        cx, cy, _, dx, dy, _, _ = tf.unstack(bboxes_3d, num=7, axis=-1)
        bboxes_td = tf.stack([
            cy - dy / 2,
            cx - dx / 2,
            cy + dy / 2,
            cx + dx / 2,
        ],
                             axis=-1)  # pyformat: disable
        bboxes_td = py_utils.PadOrTrimTo(bboxes_td, [p.max_num_objects, 4])

        has_3d_info = tf.cast(_Dense(features['object/has_3d_info']),
                              tf.float32)
        bboxes_3d_mask = py_utils.PadOrTrimTo(has_3d_info, [p.max_num_objects])
        bboxes_td_mask = bboxes_3d_mask

        # Fill in difficulties from bounding box height, truncation and occlusion.
        bb_height = ymax - ymin
        box_image_height = py_utils.PadOrTrimTo(bb_height, [p.max_num_objects])
        box_image_height *= bboxes_3d_mask

        # 0 to 3 indicating occlusion level. 0 means fully visible, 1 means partly,
        occlusion = tf.reshape(_Dense(features['object/occlusion']), [-1])
        occlusion = tf.cast(occlusion, tf.float32)
        occlusion = py_utils.PadOrTrimTo(occlusion, [p.max_num_objects])
        occlusion *= bboxes_3d_mask

        # Truncation: 0 -> not truncated, 1.0 -> truncated
        truncation = tf.reshape(_Dense(features['object/truncation']), [-1])
        truncation = py_utils.PadOrTrimTo(truncation, [p.max_num_objects])
        truncation *= bboxes_3d_mask

        difficulties = ComputeKITTIDifficulties(box_image_height, occlusion,
                                                truncation)
        difficulties = py_utils.PadOrTrimTo(difficulties, [p.max_num_objects])

        # Make a batch axis to call BBoxCorners, and take the first result back.
        bbox3d_corners = geometry.BBoxCorners(bboxes_3d[tf.newaxis, ...])[0]

        # Project the 3D bbox to the image plane.
        velo_to_image_plane = features['transform/velo_to_image_plane']
        bboxes3d_proj_to_image_plane = geometry.PointsToImagePlane(
            tf.reshape(bbox3d_corners, [-1, 3]), velo_to_image_plane)

        # Output is [num_objects, 8 corners per object, (x, y)].
        bboxes3d_proj_to_image_plane = tf.reshape(bboxes3d_proj_to_image_plane,
                                                  [-1, 8, 2])
        bboxes3d_proj_to_image_plane = py_utils.PadOrTrimTo(
            bboxes3d_proj_to_image_plane, [p.max_num_objects, 8, 2])

        texts = features['object/label'].values
        labels = ops.static_map_string_int(x=texts,
                                           keys=self.KITTI_CLASS_NAMES)

        labels = py_utils.PadOrTrimTo(labels, [p.max_num_objects])
        texts = py_utils.PadOrTrimTo(texts, [p.max_num_objects])

        # Filter labels by setting bboxes_padding, bboxes_3d_mask, and
        # bboxes_td_mask appropriately.
        if p.filter_labels is not None:
            valid_labels = tf.constant([p.filter_labels])
            bbox_mask = tf.reduce_any(tf.equal(tf.expand_dims(labels, 1),
                                               valid_labels),
                                      axis=1)
            bbox_mask = tf.cast(bbox_mask, tf.float32)
            bboxes_padding = 1 - bbox_mask * (1 - bboxes_padding)
            filtered_bboxes_3d_mask = bboxes_3d_mask * bbox_mask
            bboxes_td_mask *= bbox_mask
        else:
            filtered_bboxes_3d_mask = bboxes_3d_mask

        # Placeholder for counting the number of laser points that reside within
        # each 3-d bounding box. This must be filled in outside of this function
        # based on the loaded 3-d laser points.
        bboxes_3d_num_points = tf.zeros([p.max_num_objects], dtype=tf.int32)
        bboxes_3d_num_points = py_utils.PadOrTrimTo(bboxes_3d_num_points,
                                                    [p.max_num_objects])

        # Pad bboxes_3d.
        bboxes_3d = py_utils.PadOrTrimTo(bboxes_3d, [p.max_num_objects, 7])

        return py_utils.NestedMap(
            source_id=source_id,
            bboxes_count=bboxes_count,
            bboxes=bboxes,
            bboxes_padding=bboxes_padding,
            bboxes_3d=bboxes_3d,
            bboxes_3d_mask=filtered_bboxes_3d_mask,
            unfiltered_bboxes_3d_mask=bboxes_3d_mask,
            bboxes3d_proj_to_image_plane=bboxes3d_proj_to_image_plane,
            bboxes_td=bboxes_td,
            bboxes_td_mask=bboxes_td_mask,
            bboxes_3d_num_points=bboxes_3d_num_points,
            labels=labels,
            texts=texts,
            box_image_height=box_image_height,
            occlusion=occlusion,
            truncation=truncation,
            difficulties=difficulties)
示例#5
0
  def ProcessOutputs(self, input_batch, model_outputs):
    """Produce additional decoder outputs for KITTI.

    Args:
      input_batch: A .NestedMap of the inputs to the model.
      model_outputs: A .NestedMap of the outputs of the model, including::
        - per_class_predicted_bboxes: [batch, num_classes, num_boxes, 7] float
          Tensor with per class 3D (7 DOF) bounding boxes.
        - per_class_predicted_bbox_scores: [batch, num_classes, num_boxes] float
          Tensor with per class, per box scores.
        - per_class_valid_mask: [batch, num_classes, num_boxes] masking Tensor
          indicating which boxes were still kept after NMS for each class.

    Returns:
      A NestedMap of additional decoder outputs needed for
      PostProcessDecodeOut.
    """
    p = self.params
    per_class_predicted_bboxes = model_outputs.per_class_predicted_bboxes
    batch_size, num_classes, num_boxes, _ = py_utils.GetShape(
        per_class_predicted_bboxes)
    flattened_num_boxes = num_classes * num_boxes

    input_labels = input_batch.decoder_copy.labels
    input_lasers = input_batch.decoder_copy.lasers
    input_images = input_batch.decoder_copy.images

    with tf.device('/cpu:0'):
      # Convert the predicted bounding box points to their corners
      # and then project them to the image plane.
      #
      # This output can be used to:
      #
      # A) Visualize bounding boxes (2d or 3d) on the camera image.
      #
      # B) Compute the height of the predicted boxes to filter 'too small' boxes
      #    as is done in the KITTI eval.
      predicted_bboxes = tf.reshape(per_class_predicted_bboxes,
                                    [batch_size, flattened_num_boxes, 7])
      bbox_corners = geometry.BBoxCorners(predicted_bboxes)
      bbox_corners = py_utils.HasShape(bbox_corners,
                                       [batch_size, flattened_num_boxes, 8, 3])
      utils_3d = detection_3d_lib.Utils3D()
      bbox_corners_image = utils_3d.CornersToImagePlane(
          bbox_corners, input_images.velo_to_image_plane)
      bbox_corners_image = py_utils.HasShape(
          bbox_corners_image, [batch_size, flattened_num_boxes, 8, 2])

      # Clip the bounding box corners so they remain within
      # the image coordinates.
      bbox2d_corners_image_clipped = self._BBox2DImage(bbox_corners_image,
                                                       input_images)
      bbox2d_corners_image_clipped = py_utils.HasShape(
          bbox2d_corners_image_clipped, [batch_size, flattened_num_boxes, 4])

      # Compute the frustum mask to filter out bounding boxes that
      # are 'outside the frustum'.
      frustum_mask = self._CreateFrustumMask(bbox_corners_image,
                                             bbox2d_corners_image_clipped,
                                             input_images.height,
                                             input_images.width)

      # Reshape all of these back to [batch_size, num_classes, num_boxes, ...]
      bbox_corners_image = tf.reshape(
          bbox_corners_image, [batch_size, num_classes, num_boxes, 8, 2])

      bbox2d_corners_image_clipped = tf.reshape(
          bbox2d_corners_image_clipped, [batch_size, num_classes, num_boxes, 4])
      frustum_mask = tf.reshape(frustum_mask,
                                [batch_size, num_classes, num_boxes])

    ret = py_utils.NestedMap({
        # For mAP eval
        'source_ids': input_labels.source_id,
        'difficulties': input_labels.difficulties,
        'num_points_in_bboxes': input_batch.labels.bboxes_3d_num_points,
        # For exporting.
        'velo_to_image_plane': input_images.velo_to_image_plane,
        'velo_to_camera': input_images.velo_to_camera,
        # Predictions.
        'bbox_corners_image': bbox_corners_image,
        'bbox2d_corners_image': bbox2d_corners_image_clipped,
        'frustum_mask': frustum_mask,
        # Ground truth.
        'bboxes_3d': input_labels.bboxes_3d,
        'bboxes_3d_mask': input_labels.bboxes_3d_mask,
        'unfiltered_bboxes_3d_mask': input_labels.unfiltered_bboxes_3d_mask,
        'labels': input_labels.labels,
    })

    laser_sample = self._SampleLaserForVisualization(
        input_lasers.points_xyz, input_lasers.points_padding)
    ret.update(laser_sample)

    if p.summarize_boxes_on_image:
      ret.camera_images = input_images.image
    return ret
示例#6
0
  def CornerLoss(self, gt_bboxes, predicted_bboxes):
    """Corner regularization loss.

    This function computes the corner loss, an alternative regression loss
    for box residuals. This was used in the Frustum-PointNets paper [1].

    We compute the predicted bboxes (all 8 corners) and compute a SmoothedL1
    loss between the corners of the predicted boxes and ground truth. Hence,
    this loss can help encourage the model to maximize the IoU of the
    predictions.

    [1] Frustum PointNets for 3D Object Detection from RGB-D Data
        https://arxiv.org/pdf/1711.08488.pdf

    TODO(bcyang): support arbitrary input shapes [..., 7].

    Args:
      gt_bboxes: tf.float32 of shape [batch_size, num_centers,
        num_anchor_bboxes_per_center, 7] which contains (x, y, z, dx, dy, dz,
        phi), corresponding to ground truth bbox parameters.
      predicted_bboxes: tf.float32 of same shape as gt_bboxes containing
        predicted bbox parameters.

    Returns:
      tf.float32 Tensor of shape [batch_size, num_centers,
      num_anchor_bboxes_per_center] where each entry contains the corner loss
      for the corresponding bbox.
    """
    batch_size, num_centers, num_anchor_bboxes_per_center = py_utils.GetShape(
        gt_bboxes, 3)
    gt_bboxes = py_utils.HasShape(
        gt_bboxes, [batch_size, num_centers, num_anchor_bboxes_per_center, 7])
    predicted_bboxes = py_utils.HasShape(
        predicted_bboxes,
        [batch_size, num_centers, num_anchor_bboxes_per_center, 7])

    gt_bboxes = tf.reshape(
        gt_bboxes, [batch_size, num_centers * num_anchor_bboxes_per_center, 7])
    predicted_bboxes = tf.reshape(
        predicted_bboxes,
        [batch_size, num_centers * num_anchor_bboxes_per_center, 7])
    rot = tf.constant([[[0., 0., 0., 0., 0., 0., np.pi]]], dtype=tf.float32)
    rotated_gt_bboxes = gt_bboxes + rot

    gt_corners = geometry.BBoxCorners(gt_bboxes)
    rotated_gt_corners = geometry.BBoxCorners(rotated_gt_bboxes)
    predicted_corners = geometry.BBoxCorners(predicted_bboxes)

    corner_dist = tf.norm(predicted_corners - gt_corners, axis=-1)
    rotated_corner_dist = tf.norm(
        predicted_corners - rotated_gt_corners, axis=-1)
    total_dist = tf.reduce_sum(corner_dist, axis=-1)
    rotated_total_dist = tf.reduce_sum(rotated_corner_dist, axis=-1)
    min_dist = tf.minimum(total_dist, rotated_total_dist)

    huber_loss = self.ScaledHuberLoss(
        labels=tf.zeros_like(total_dist), predictions=min_dist)
    huber_loss = tf.reshape(
        huber_loss, [batch_size, num_centers, num_anchor_bboxes_per_center])

    return huber_loss