def testCornerLoss(self): utils_3d = detection_3d_lib.Utils3D() gt_bboxes = tf.constant([[[[0., 0., 0., 1., 1., 1., 0.], [0., 0., 0., 1., 1., 1., 0.], [0., 0., 0., 1., 1., 1., 0.], [0., 0., 0., 1., 1., 1., 0.], [0., 0., 0., 1., 1., 1., 0.]]]]) predicted_bboxes = tf.constant([[[ [0., 0., 0., 1., 1., 1., 0.], # Same as GT [0., 0., 0., 1., 1., 1., np.pi], # Opposite heading [0., 0., 0., 1., 1., 1., np.pi / 2.], # 90-deg rotation [1., 1., 1., 1., 1., 1., 0], # Different center [0., 0., 0., 2., 2., 2., 0], # Different size ]]]) expected_loss = [[[ 0., 0., 8. * (1 - 0.5), 8. * (np.sqrt(3.) - 0.5), 8. * ((np.sqrt(0.5 * 0.5 * 3)**2) * 0.5), ]]] loss = utils_3d.CornerLoss(gt_bboxes, predicted_bboxes) with self.session() as sess: actual_loss = sess.run(loss) self.assertAllClose(actual_loss, expected_loss)
def testResidualsToBBoxesNegPiToPi(self): utils_3d = detection_3d_lib.Utils3D() anchor_bboxes = tf.constant( [[1, 2, 3, 4, 3, 6, 0.2], [1, 2, 3, 4, 3, 6, -0.2]], dtype=tf.float32) expected_predicted_bboxes = np.asarray( [[2, 22, 303, 4, 9, 12, -np.pi + 0.2], [2, 22, 303, 4, 9, 12, np.pi - 0.2]]) residuals = tf.constant([[ 1. / 5, 20. / 5, 300. / 6, 0., np.log(9. / 3.), np.log(12. / 6.), np.pi ], [ 1. / 5, 20. / 5, 300. / 6, 0., np.log(9. / 3.), np.log(12. / 6.), -np.pi ]], dtype=tf.float32) # pyformat: disable predicted_bboxes = utils_3d.ResidualsToBBoxes(anchor_bboxes, residuals, min_angle_rad=-np.pi, max_angle_rad=np.pi) with self.session() as sess: actual_predicted_bboxes = sess.run(predicted_bboxes) self.assertAllClose(actual_predicted_bboxes, expected_predicted_bboxes)
def testNMSIndices(self): utils_3d = detection_3d_lib.Utils3D() # Create three anchor boxes, two largely overlapping and one # not overlapping with either. # # Set a batch size of 1 and use the Batched version to test # both functions. anchor_bboxes = tf.constant( [[[1, 2, 3, 4, 3, 6, 0.], [1, 2, 2, 4, 3, 6, 0.], [10, 20, 30, 4, 3, 6, 0.]]], dtype=tf.float32) # Treat them all as high scores. scores = tf.constant([[0.7, 0.8, 0.6]]) with self.session() as sess: nms_indices, valid_mask = utils_3d.BatchedNMSIndices( anchor_bboxes, scores) indices, mask = sess.run([nms_indices, valid_mask]) # One box is filtered out. self.assertEqual(2, np.sum(mask)) # The two boxes that remain are the second one (because of its higher # score) and the last one (which overlaps with nothing). self.assertAllEqual([[1, 2, 0]], indices) # Flip the scores; expect the first box to be chosen instead. # Change the last box's threshold to be 0.0, so that the # default setting for the score threshold filters it out too. scores_2 = tf.constant([[0.8, 0.7, 0.0]]) nms_indices, valid_mask = utils_3d.BatchedNMSIndices( anchor_bboxes, scores_2) indices, mask = sess.run([nms_indices, valid_mask]) self.assertEqual(1, np.sum(mask)) self.assertAllEqual([[0, 0, 0]], indices)
def testAssignAnchorsWithPadding(self): utils_3d = detection_3d_lib.Utils3D() anchor_bboxes = tf.constant([[0, 0, 0, 1, 2, 3, 0], [1, 1, 1, 3, 4, 5, 0.5], [1, 1, 1, 1, 2, 3, 0], [2, 2, 2, 3, 4, 5, 0.5]]) gt_bboxes = anchor_bboxes + 0.05 gt_bboxes_labels = tf.constant([1, 2, 3, 4]) gt_bboxes_mask = tf.constant([1, 1, 0, 0]) assigned_anchors = utils_3d.AssignAnchors(anchor_bboxes, gt_bboxes, gt_bboxes_labels, gt_bboxes_mask) with self.session() as sess: actual_assigned_anchors, gt_bboxes = sess.run((assigned_anchors, gt_bboxes)) # Last two boxes are padded, thus not assigned. self.assertAllEqual(actual_assigned_anchors.assigned_gt_labels, [1, 2, 0, 0]) self.assertAllEqual(actual_assigned_anchors.assigned_gt_bbox[0:2, :], gt_bboxes[0:2, :]) # 2nd and 3rd should match dummy bbox. self.assertAllEqual(actual_assigned_anchors.assigned_gt_bbox[2, :], [0, 0, 0, 1, 1, 1, 0]) self.assertAllEqual(actual_assigned_anchors.assigned_gt_bbox[3, :], [0, 0, 0, 1, 1, 1, 0]) # First two are foreground, last two are background. self.assertAllEqual(actual_assigned_anchors.assigned_cls_mask, [1, 1, 1, 1]) self.assertAllEqual(actual_assigned_anchors.assigned_reg_mask, [1, 1, 0, 0]) self.assertAllEqual( actual_assigned_anchors.assigned_gt_similarity_score.shape, [4])
def _MultiClassOrientedDecodeWithNMS(predicted_bboxes, classification_scores, nms_iou_threshold, score_threshold, max_boxes_per_class=None): """Perform Oriented Per Class NMS on predicted bounding boxes / logits. Args: predicted_bboxes: [batch_size, num_boxes, 7] float Tensor containing predicted bounding box coordinates. classification_scores: [batch_size, num_boxes, num_classes] float Tensor containing predicted classification scores for each box. nms_iou_threshold: IoU threshold to use when determining whether two boxes overlap for purposes of suppression. Either a float or a list of len num_classes. score_threshold: The score threshold passed to NMS that allows NMS to quickly ignore irrelevant boxes. Either a float or a list of len num_classes. It is strongly recommended that the score for non-active classes (like background) be set to 1 so they are discarded. max_boxes_per_class: The maximum number of boxes per example to emit. If None, this value is set to num_boxes from the shape of predicted_bboxes. Returns: bbox_indices: Indices of the boxes selected after NMS. Tensor of shape [batch_size, num_classes, max_boxes_per_class]. predicted_bboxes: Filtered bboxes after NMS of shape [batch_size, num_classes, max_boxes_per_class, 7]. bbox_scores: A float32 Tensor with the score for each box of shape [batch_size, num_classes, max_boxes_per_class]. valid_mask: A float32 Tensor with 1/0 values indicating the validity of each box. 1 indicates valid, and 0 invalid. Tensor of shape [batch_size, num_classes, max_boxes_per_class]. """ utils_3d = detection_3d_lib.Utils3D() predicted_bboxes = py_utils.HasShape(predicted_bboxes, [-1, -1, 7]) batch_size, num_predicted_boxes, _ = py_utils.GetShape(predicted_bboxes) classification_scores = py_utils.HasShape( classification_scores, [batch_size, num_predicted_boxes, -1]) _, _, num_classes = py_utils.GetShape(classification_scores) if max_boxes_per_class is None: max_boxes_per_class = num_predicted_boxes # Compute NMS for every sample in the batch. bbox_indices, bbox_scores, valid_mask = utils_3d.BatchedOrientedNMSIndices( predicted_bboxes, classification_scores, nms_iou_threshold=nms_iou_threshold, score_threshold=score_threshold, max_boxes_per_class=max_boxes_per_class) # TODO(bencaine): Consider optimizing away the tf.tile or make upstream # changes to make predicted boxes include a class dimension. # Get the original box for each index selected by NMS. predicted_bboxes = tf.tile(predicted_bboxes[:, tf.newaxis, :, :], [1, num_classes, 1, 1]) predicted_bboxes = tf.array_ops.batch_gather(predicted_bboxes, bbox_indices) return bbox_indices, predicted_bboxes, bbox_scores, valid_mask
def testCornersToImagePlane(self): utils_3d = detection_3d_lib.Utils3D() batch = 4 num_boxes = 50 corners = tf.random.uniform([batch, num_boxes, 8, 3]) velo_to_image_plane = tf.random.uniform([batch, 3, 4]) corners_to_image_plane = utils_3d.CornersToImagePlane( corners, velo_to_image_plane) self.assertEqual([batch, num_boxes, 8, 2], corners_to_image_plane.shape)
def __init__(self, params): super().__init__(params) p = self.params self._utils = detection_3d_lib.Utils3D() self.CreateChild('input_featurizer', p.input_featurizer) self.CreateChild('backbone', p.backbone) self.CreateChild('class_detector', p.class_detector) self.CreateChild('regression_detector', p.regression_detector) if p.direction_classifier_weight > 0.0: self.CreateChild('direction_classifier', p.direction_classifier)
def testZeroResiduals(self): utils_3d = detection_3d_lib.Utils3D() anchor_bboxes = tf.constant([[1, 2, 3, 4, 3, 6, 0]], dtype=tf.float32) expected_predicted_bboxes = np.asarray([[1, 2, 3, 4, 3, 6, 0]]) residuals = tf.zeros((1, 7)) predicted_bboxes = utils_3d.ResidualsToBBoxes(anchor_bboxes, residuals) with self.session() as sess: actual_predicted_bboxes = sess.run(predicted_bboxes) self.assertAllClose(actual_predicted_bboxes, expected_predicted_bboxes)
def __init__(self, params): super(ModelBase, self).__init__(params) p = self.params self._utils = detection_3d_lib.Utils3D() if len(p.per_class_loss_weight) != p.num_classes: raise ValueError('`Need `per_class_loss_weight` to be of len equal ' 'to the number of classes.') if p.per_class_loss_weight[0] != 0.0: raise ValueError('Background class should be assigned 0 weight. ' 'per_class_loss_weight={}'.format( str(p.per_class_loss_weight)))
def __init__(self, params): super(ModelV1, self).__init__(params) p = self.params self._utils = detection_3d_lib.Utils3D() with tf.variable_scope(p.name): self.CreateChild('featurizer', p.featurizer) self.CreateChild('backbone', p.backbone) self.CreateChild('class_detector', p.class_detector) self.CreateChild('regression_detector', p.regression_detector) if p.direction_classifier_weight > 0.0: self.CreateChild('direction_classifier', p.direction_classifier)
def testResidualsToBBoxPhiFloorMod(self): utils_3d = detection_3d_lib.Utils3D() anchor_bboxes = tf.constant([[1, 2, 3, 4, 3, 6, np.pi]], dtype=tf.float32) # We expected the returned phi value to be floormod w.r.t. pi. expected_predicted_bboxes = np.asarray([[1, 2, 3, 4, 3, 6, 1.]]) residuals = tf.constant([[0, 0, 0, 0, 0, 0, 1.0]], dtype=tf.float32) predicted_bboxes = utils_3d.ResidualsToBBoxes(anchor_bboxes, residuals) with self.session() as sess: actual_predicted_bboxes = sess.run(predicted_bboxes) self.assertAllClose(actual_predicted_bboxes, expected_predicted_bboxes)
def testMakeAnchorBoxesWithRotation(self): utils_3d = detection_3d_lib.Utils3D() anchor_bboxes = utils_3d.MakeAnchorBoxes( anchor_centers=tf.constant([[0, 0, 0], [1, 1, 1]], dtype=tf.float32), anchor_box_dimensions=tf.constant([[1, 2, 3], [3, 4, 5]], dtype=tf.float32), anchor_box_offsets=tf.constant([[0, 0, 0], [1, 1, 1]], dtype=tf.float32), anchor_box_rotations=tf.constant([0, 0.5])) with self.session() as sess: actual_anchor_bboxes = sess.run(anchor_bboxes) self.assertAllEqual(actual_anchor_bboxes, [[[0, 0, 0, 1, 2, 3, 0], [1, 1, 1, 3, 4, 5, 0.5]], [[1, 1, 1, 1, 2, 3, 0], [2, 2, 2, 3, 4, 5, 0.5]]])
def testAssignAnchorsWithoutForceMatch(self): utils_3d = detection_3d_lib.Utils3D() anchor_bboxes = tf.constant( [ [0, 1, 1, 2, 2, 2, 0], # Ignored [-1, 1, 1, 2, 2, 2, 0], # Background [0.9, 1, 1, 2, 2, 2, 0], # Foreground [5, 5, 5, 1, 1, 2, 0], # Background, since no force match ], dtype=tf.float32) # Second gt box should be forced match, third one should be ignored. gt_bboxes = tf.constant([[1, 1, 1, 2, 2, 2, 0], [5, 5, 5, 2, 2, 2, 0], [10, 10, 10, 2, 2, 2, 0]], dtype=tf.float32) gt_bboxes_labels = tf.constant([1, 2, 3]) gt_bboxes_mask = tf.constant([1, 1, 1]) assigned_anchors = utils_3d.AssignAnchors( anchor_bboxes, gt_bboxes, gt_bboxes_labels, gt_bboxes_mask, foreground_assignment_threshold=0.5, background_assignment_threshold=0.25, force_match=False) with self.session() as sess: actual_assigned_anchors, gt_bboxes = sess.run( (assigned_anchors, gt_bboxes)) self.assertAllEqual(actual_assigned_anchors.assigned_gt_idx, [-1, -1, 0, -1]) self.assertAllEqual(actual_assigned_anchors.assigned_gt_labels, [0, 0, 1, 0]) self.assertAllEqual(actual_assigned_anchors.assigned_gt_bbox, [ [0, 0, 0, 1, 1, 1, 0], [0, 0, 0, 1, 1, 1, 0], [1, 1, 1, 2, 2, 2, 0], [0, 0, 0, 1, 1, 1, 0], ]) self.assertAllEqual(actual_assigned_anchors.assigned_cls_mask, [0, 1, 1, 1]) self.assertAllEqual(actual_assigned_anchors.assigned_reg_mask, [0, 0, 1, 0]) self.assertAllEqual( actual_assigned_anchors.assigned_gt_similarity_score.shape, [4])
def testCreateDenseCoordinates(self): utils_3d = detection_3d_lib.Utils3D() one_dim = utils_3d.CreateDenseCoordinates([(0.5, 1.5, 3)]) with self.session() as sess: actual_one_dim = sess.run(one_dim) self.assertAllEqual(actual_one_dim, [[0.5], [1.0], [1.5]]) two_by_two = utils_3d.CreateDenseCoordinates([(0, 1, 2), (1, 2, 2)]) with self.session() as sess: actual_two_by_two = sess.run(two_by_two) self.assertAllEqual(actual_two_by_two, [[0, 1], [0, 2], [1, 1], [1, 2]]) three_dims = utils_3d.CreateDenseCoordinates([(0, 1, 5), (1, 2, 5), (0, 10, 5)]) self.assertAllEqual(three_dims.shape, [5 * 5 * 5, 3])
def testCreateDenseCoordinatesCenterInCell(self): utils_3d = detection_3d_lib.Utils3D() one_dim = utils_3d.CreateDenseCoordinates([(0., 3., 3)], center_in_cell=True) with self.session(): actual_one_dim = self.evaluate(one_dim) self.assertAllEqual(actual_one_dim, [[0.5], [1.5], [2.5]]) two_by_two = utils_3d.CreateDenseCoordinates([(0, 1, 2), (1, 2, 2)], center_in_cell=True) with self.session(): actual_two_by_two = self.evaluate(two_by_two) self.assertAllEqual( actual_two_by_two, [[0.25, 1.25], [0.25, 1.75], [0.75, 1.25], [0.75, 1.75]])
def testScaledHuberLoss(self): utils_3d = detection_3d_lib.Utils3D() labels = tf.constant([1, 2, 3], dtype=tf.float32) # Predictions are less than delta, exactly at delta, and more than delta, # respectively. predictions = tf.constant([1.4, 1.2, 4.0], dtype=tf.float32) delta = 0.8 expected_loss = [ 1. / delta * 0.5 * (0.4)**2, 0.5 * delta, 1.0 - 0.5 * delta, ] loss = utils_3d.ScaledHuberLoss(labels, predictions, delta=delta) with self.session() as sess: actual_loss = sess.run(loss) self.assertAllClose(actual_loss, expected_loss)
def testResidualsToBBoxes(self): utils_3d = detection_3d_lib.Utils3D() anchor_bboxes = tf.constant([[1, 2, 3, 4, 3, 6, 0]], dtype=tf.float32) expected_predicted_bboxes = np.asarray([[2, 22, 303, 4, 9, 12, 0.5]]) residuals = tf.constant([[ 1. / 5, 20. / 5, 300. / 6, 0., np.log(9. / 3.), np.log(12. / 6.), 0.5, ]], dtype=tf.float32) # pyformat: disable predicted_bboxes = utils_3d.ResidualsToBBoxes(anchor_bboxes, residuals) with self.session() as sess: actual_predicted_bboxes = sess.run(predicted_bboxes) self.assertAllClose(actual_predicted_bboxes, expected_predicted_bboxes)
def testCornerLossAsym(self): utils_3d = detection_3d_lib.Utils3D() gt_bboxes = tf.constant([[[[0., 0., 0., 1., 1., 1., 0.], [0., 0., 0., 1., 1., 1., 0.]]]]) predicted_bboxes = tf.constant([[[ [0., 0., 0., 1., 1., 1., 0.], # Same as GT [0., 0., 0., 1., 1., 1., np.pi], # Opposite heading ]]]) expected_loss = [[[ 0., 8 * (np.sqrt(2) - 0.5), ]]] loss = utils_3d.CornerLoss(gt_bboxes, predicted_bboxes, symmetric=False) with self.session() as sess: actual_loss = sess.run(loss) print(actual_loss) self.assertAllClose(actual_loss, expected_loss)
def testCornerLoss(self): utils_3d = detection_3d_lib.Utils3D() gt_bboxes = tf.constant([[[[0., 0., 0., 1., 1., 1., 0.], [0., 0., 0., 1., 1., 1., 0.], [0., 0., 0., 1., 1., 1., 0.], [0., 0., 0., 1., 1., 1., 0.], [0., 0., 0., 1., 1., 1., 0.]]]]) predicted_bboxes = tf.constant([[[ [0., 0., 0., 1., 1., 1., 0.], # Same as GT [0., 0., 0., 1., 1., 1., np.pi], # Opposite heading [0., 0., 0., 1., 1., 1., np.pi / 2.], # 90-deg rotation [1., 1., 1., 1., 1., 1., 0], # Different center [0., 0., 0., 2., 2., 2., 0], # Different size ]]]) loss = utils_3d.CornerLoss(gt_bboxes, predicted_bboxes) with self.session(): actual_loss = self.evaluate(loss) self.assertEqual(actual_loss.shape, (1, 1, 5))
def testLocalizationResiduals(self): utils_3d = detection_3d_lib.Utils3D() anchor_bboxes = tf.constant([[1, 2, 3, 4, 3, 6, 0]], dtype=tf.float32) gt_bboxes = tf.constant([[2, 22, 303, 4, 9, 12, 0.5]], dtype=tf.float32) # diagonal_xy = 5 [since sqrt(3^2 + 4^2) = 5] expected_residuals = np.asarray([[ 1. / 5, 20. / 5, 300. / 6, 0., np.log(9. / 3.), np.log(12. / 6.), 0.5, ]]) residuals = utils_3d.LocalizationResiduals(anchor_bboxes, gt_bboxes) with self.session() as sess: actual_residuals = sess.run(residuals) self.assertAllClose(actual_residuals, expected_residuals)
def __init__(self, params): super(PointDetectorBase, self).__init__(params) p = self.params self._utils_3d = detection_3d_lib.Utils3D() with tf.variable_scope(p.name): self.CreateChild('output_decoder', p.output_decoder)
def __init__(self, params): super().__init__(params) p = self.params self._utils_3d = detection_3d_lib.Utils3D() self.CreateChild('output_decoder', p.output_decoder)
def testOrientedNMSIndices(self): utils_3d = detection_3d_lib.Utils3D() # Assignments and IoU scores calculated offline. bboxes_data = tf.constant( [[ [10.35, 8.429, -1.003, 3.7, 1.64, 1.49, 1.582], [10.35, 8.429, -1.003, 3.7, 1.64, 1.49, 0.0], # box 0 rotated [11.5, 8.429, -1.003, 3.7, 1.64, 1.49, 1.0 ], # Rotated to overlap [13.01, 8.149, -0.953, 4.02, 1.55, 1.52, 1.592], [13.51, 8.39, -1.0, 4.02, 1.55, 1.52, 1.592 ], # Slight translation [13.51, 8.39, -1.0, 1.0, 1.0, 1.52, 1.592], # Smaller box [13.51, 8.39, -1.0, 1.0, 1.0, 1.52, 1.9], # Smaller box ]], dtype=tf.float32) # Notes on the data: # Lets say we have 3 classes and a thresh of 0.1 # Keep box [0, 3] for class 0 # Keep box [6] only for class 1 # Keep box [2] for class 2 scores_data = tf.constant([[ [0.9, 0.1, 0.0], [0.89, 0.1, 0.01], [0.5, 0.01, 0.49], [0.8, 0.1, 0.1], [0.79, 0.11, 0.2], [0.2, 0.8, 0.1], [0.1, 0.9, 0.0], ]], dtype=tf.float32) with self.session() as sess: outputs = utils_3d.BatchedOrientedNMSIndices(bboxes_data, scores_data, nms_iou_threshold=0.1, score_threshold=0.3, max_boxes_per_class=5) indices, scores, valid_mask = sess.run(outputs) class_masks = [ valid_mask[0, cls_idx, :].astype(np.bool) for cls_idx in range(3) ] # Check the correct number of valid results per class self.assertEqual(class_masks[0].sum(), 2) self.assertEqual(class_masks[1].sum(), 1) self.assertEqual(class_masks[2].sum(), 1) # Check the results for each class self.assertAllEqual(indices[0, 0, class_masks[0]], [0, 3]) self.assertAllClose(scores[0, 0, class_masks[0]], [0.9, 0.8]) self.assertAllEqual(indices[0, 1, class_masks[1]], [6]) self.assertAllClose(scores[0, 1, class_masks[1]], [0.9]) self.assertAllEqual(indices[0, 2, class_masks[2]], [2]) self.assertAllClose(scores[0, 2, class_masks[2]], [0.49]) # Use a list of score thresholds instead outputs = utils_3d.BatchedOrientedNMSIndices( bboxes_data, scores_data, nms_iou_threshold=[0.1, 0.1, 0.1], score_threshold=[0.899, 0.5, 0.3], max_boxes_per_class=5) indices, scores, valid_mask = sess.run(outputs) class_masks = [ valid_mask[0, cls_idx, :].astype(np.bool) for cls_idx in range(3) ] # Check the correct number of valid results per class self.assertEqual(class_masks[0].sum(), 1) self.assertEqual(class_masks[1].sum(), 1) self.assertEqual(class_masks[2].sum(), 1) # Check the results for each class self.assertAllEqual(indices[0, 0, class_masks[0]], [0]) self.assertAllClose(scores[0, 0, class_masks[0]], [0.9]) self.assertAllEqual(indices[0, 1, class_masks[1]], [6]) self.assertAllClose(scores[0, 1, class_masks[1]], [0.9]) self.assertAllEqual(indices[0, 2, class_masks[2]], [2]) self.assertAllClose(scores[0, 2, class_masks[2]], [0.49])
def ProcessOutputs(self, input_batch, model_outputs): """Produce additional decoder outputs for KITTI. Args: input_batch: A .NestedMap of the inputs to the model. model_outputs: A .NestedMap of the outputs of the model, including:: - per_class_predicted_bboxes: [batch, num_classes, num_boxes, 7] float Tensor with per class 3D (7 DOF) bounding boxes. - per_class_predicted_bbox_scores: [batch, num_classes, num_boxes] float Tensor with per class, per box scores. - per_class_valid_mask: [batch, num_classes, num_boxes] masking Tensor indicating which boxes were still kept after NMS for each class. Returns: A NestedMap of additional decoder outputs needed for PostProcessDecodeOut. """ p = self.params per_class_predicted_bboxes = model_outputs.per_class_predicted_bboxes batch_size, num_classes, num_boxes, _ = py_utils.GetShape( per_class_predicted_bboxes) flattened_num_boxes = num_classes * num_boxes input_labels = input_batch.decoder_copy.labels input_lasers = input_batch.decoder_copy.lasers input_images = input_batch.decoder_copy.images with tf.device('/cpu:0'): # Convert the predicted bounding box points to their corners # and then project them to the image plane. # # This output can be used to: # # A) Visualize bounding boxes (2d or 3d) on the camera image. # # B) Compute the height of the predicted boxes to filter 'too small' boxes # as is done in the KITTI eval. predicted_bboxes = tf.reshape(per_class_predicted_bboxes, [batch_size, flattened_num_boxes, 7]) bbox_corners = geometry.BBoxCorners(predicted_bboxes) bbox_corners = py_utils.HasShape(bbox_corners, [batch_size, flattened_num_boxes, 8, 3]) utils_3d = detection_3d_lib.Utils3D() bbox_corners_image = utils_3d.CornersToImagePlane( bbox_corners, input_images.velo_to_image_plane) bbox_corners_image = py_utils.HasShape( bbox_corners_image, [batch_size, flattened_num_boxes, 8, 2]) # Clip the bounding box corners so they remain within # the image coordinates. bbox2d_corners_image_clipped = self._BBox2DImage(bbox_corners_image, input_images) bbox2d_corners_image_clipped = py_utils.HasShape( bbox2d_corners_image_clipped, [batch_size, flattened_num_boxes, 4]) # Compute the frustum mask to filter out bounding boxes that # are 'outside the frustum'. frustum_mask = self._CreateFrustumMask(bbox_corners_image, bbox2d_corners_image_clipped, input_images.height, input_images.width) # Reshape all of these back to [batch_size, num_classes, num_boxes, ...] bbox_corners_image = tf.reshape( bbox_corners_image, [batch_size, num_classes, num_boxes, 8, 2]) bbox2d_corners_image_clipped = tf.reshape( bbox2d_corners_image_clipped, [batch_size, num_classes, num_boxes, 4]) frustum_mask = tf.reshape(frustum_mask, [batch_size, num_classes, num_boxes]) ret = py_utils.NestedMap({ # For mAP eval 'source_ids': input_labels.source_id, 'difficulties': input_labels.difficulties, 'num_points_in_bboxes': input_batch.labels.bboxes_3d_num_points, # For exporting. 'velo_to_image_plane': input_images.velo_to_image_plane, 'velo_to_camera': input_images.velo_to_camera, # Predictions. 'bbox_corners_image': bbox_corners_image, 'bbox2d_corners_image': bbox2d_corners_image_clipped, 'frustum_mask': frustum_mask, # Ground truth. 'bboxes_3d': input_labels.bboxes_3d, 'bboxes_3d_mask': input_labels.bboxes_3d_mask, 'unfiltered_bboxes_3d_mask': input_labels.unfiltered_bboxes_3d_mask, 'labels': input_labels.labels, }) laser_sample = self._SampleLaserForVisualization( input_lasers.points_xyz, input_lasers.points_padding) ret.update(laser_sample) if p.summarize_boxes_on_image: ret.camera_images = input_images.image return ret
def _SingleClassDecodeWithNMS(predicted_bboxes, classification_scores, nms_iou_threshold, score_threshold, max_boxes_per_class=None): """Perform NMS on predicted bounding boxes / associated logits. Args: predicted_bboxes: [batch_size, num_boxes, 7] float Tensor containing predicted bounding box coordinates. classification_scores: [batch_size, num_boxes, num_classes] float Tensor containing predicted classification scores for each box. nms_iou_threshold: IoU threshold to use when determining whether two boxes overlap for purposes of suppression. score_threshold: The score threshold passed to NMS that allows NMS to quickly ignore irrelevant boxes. max_boxes_per_class: The maximum number of boxes per example to emit. If None, this value is set to num_boxes from the shape of predicted_bboxes. Returns: predicted_bboxes: Filtered bboxes after NMS of shape [batch_size, num_classes, max_boxes_per_class, 7]. bbox_scores: A float32 Tensor with the score for each box of shape [batch_size, num_classes, max_boxes_per_class]. valid_mask: A float32 Tensor with 1/0 values indicating the validity of each box. 1 indicates valid, and 0 invalid. Tensor of shape [batch_size, num_classes, max_boxes_per_class]. """ utils_3d = detection_3d_lib.Utils3D() predicted_bboxes = py_utils.HasShape(predicted_bboxes, [-1, -1, 7]) batch_size, num_predicted_boxes, _ = py_utils.GetShape(predicted_bboxes) classification_scores = py_utils.HasShape( classification_scores, [batch_size, num_predicted_boxes, -1]) _, _, num_classes = py_utils.GetShape(classification_scores) if not isinstance(nms_iou_threshold, float): raise ValueError('Single class NMS only supports a scalar ' '`nms_iou_threshold`.') if not isinstance(score_threshold, float): raise ValueError('Single class NMS only supports a scalar ' '`score_threshold`.') if max_boxes_per_class is None: max_boxes_per_class = num_predicted_boxes # TODO(jngiam): Change to be per-class bboxes, and hence, per-class NMS, and # per-class thresholding. # [batch, num_predicted_boxes] nms_scores = tf.reduce_max(classification_scores, axis=-1) # Compute the most likely label by computing the highest class score from # the output of the sigmoid. likely_labels = tf.argmax(classification_scores, axis=-1) # When background is the most likely class for the box, mask out the scores # of that box from NMS scoring so the background boxes don't dominate the # NMS. nms_scores *= tf.cast(likely_labels > 0, tf.float32) # Compute NMS for every sample in the batch. nms_indices, valid_mask = utils_3d.BatchedNMSIndices( predicted_bboxes, nms_scores, nms_iou_threshold=nms_iou_threshold, score_threshold=score_threshold, max_num_boxes=max_boxes_per_class) # Reorder the box data and logits according to NMS scoring. predicted_bboxes = tf.array_ops.batch_gather(predicted_bboxes, nms_indices) classification_scores = tf.array_ops.batch_gather(classification_scores, nms_indices) # Now reformat the output of NMS to match the format of the # MultiClassOrientedDecodeWithNMS, which outputs a per class NMS result. # This takes the leading shape of # [batch_size, num_classes, max_boxes_per_class] for all outputs, which # means since this NMS is not class specific we need to tile the outputs # num_classes times or reorder the data such that its [batch, num_classes]. predicted_bboxes = tf.tile(predicted_bboxes[:, tf.newaxis, :, :], [1, num_classes, 1, 1]) classification_scores = tf.transpose(classification_scores, (0, 2, 1)) classification_scores = py_utils.HasShape( classification_scores, [batch_size, num_classes, max_boxes_per_class]) valid_mask = tf.tile(valid_mask[:, tf.newaxis, :], [1, num_classes, 1]) return predicted_bboxes, classification_scores, valid_mask