def _MultiClassOrientedDecodeWithNMS(predicted_bboxes, classification_scores, nms_iou_threshold, score_threshold, max_boxes_per_class=None): """Perform Oriented Per Class NMS on predicted bounding boxes / logits. Args: predicted_bboxes: [batch_size, num_boxes, 7] float Tensor containing predicted bounding box coordinates. classification_scores: [batch_size, num_boxes, num_classes] float Tensor containing predicted classification scores for each box. nms_iou_threshold: IoU threshold to use when determining whether two boxes overlap for purposes of suppression. Either a float or a list of len num_classes. score_threshold: The score threshold passed to NMS that allows NMS to quickly ignore irrelevant boxes. Either a float or a list of len num_classes. It is strongly recommended that the score for non-active classes (like background) be set to 1 so they are discarded. max_boxes_per_class: The maximum number of boxes per example to emit. If None, this value is set to num_boxes from the shape of predicted_bboxes. Returns: predicted_bboxes: Filtered bboxes after NMS of shape [batch_size, num_classes, max_boxes_per_class, 7]. bbox_scores: A float32 Tensor with the score for each box of shape [batch_size, num_classes, max_boxes_per_class]. valid_mask: A float32 Tensor with 1/0 values indicating the validity of each box. 1 indicates valid, and 0 invalid. Tensor of shape [batch_size, num_classes, max_boxes_per_class]. """ utils_3d = detection_3d_lib.Utils3D() predicted_bboxes = py_utils.HasShape(predicted_bboxes, [-1, -1, 7]) batch_size, num_predicted_boxes, _ = py_utils.GetShape(predicted_bboxes) classification_scores = py_utils.HasShape( classification_scores, [batch_size, num_predicted_boxes, -1]) _, _, num_classes = py_utils.GetShape(classification_scores) if max_boxes_per_class is None: max_boxes_per_class = num_predicted_boxes # Compute NMS for every sample in the batch. bbox_indices, bbox_scores, valid_mask = utils_3d.BatchedOrientedNMSIndices( predicted_bboxes, classification_scores, nms_iou_threshold=nms_iou_threshold, score_threshold=score_threshold, max_boxes_per_class=max_boxes_per_class) # TODO(bencaine): Consider optimizing away the tf.tile or make upstream # changes to make predicted boxes include a class dimension. # Get the original box for each index selected by NMS. predicted_bboxes = tf.tile(predicted_bboxes[:, tf.newaxis, :, :], [1, num_classes, 1, 1]) predicted_bboxes = tf.batch_gather(predicted_bboxes, bbox_indices) return predicted_bboxes, bbox_scores, valid_mask
def SegmentPool3D(points, point_features, pooling_idx, closest_idx, pooling_method='max'): """Performs {min/max/average} pooling over a pointcloud given indices. This should be functionally identical when using max to the above MaxPool3D function, except it turns out to be much more memory efficient on a TPU, and supports min/max/mean. Args: points: A float tf.Tensor of shape [N, P1, 3] with point locations. point_features: A float tf.Tensor of shape [N, P1, C] with point features. pooling_idx: A tf.int32 tf.Tensor of shape [N, P2] with the index of which points we want to keep. Each value should be in the range [0, P1]. closest_idx: A tf.int32 tf.Tensor of shape [N, P1] representing which sampled point is closest to each original point. Each value should be in the range of [0, P2]. pooling_method: A string for which pooling function to use. Should be one of {'min', 'max', 'mean'}. Returns: pooled_points: A float tf.Tensor of shape [N, P2, 3] with the pooled point locations. pooled_features: A float tf.Tensor of shape [N, P2, C] with the pooled features. Raises: ValueError: If pooling_method is not one of {min/max/mean}. """ segment_pooling_functions = { 'min': tf.unsorted_segment_min, 'max': tf.unsorted_segment_max, 'mean': tf.unsorted_segment_mean } if pooling_method not in segment_pooling_functions: raise ValueError('`pooling_method` must be one of {}.'.format( segment_pooling_functions.keys())) segment_fn = segment_pooling_functions[pooling_method] points = py_utils.HasShape(points, [-1, -1, 3]) n, p1 = py_utils.GetShape(points, 2) point_features = py_utils.HasShape(point_features, [n, p1, -1]) _, _, c = py_utils.GetShape(point_features) pooling_idx = py_utils.HasShape(pooling_idx, [n, -1]) _, p2 = py_utils.GetShape(pooling_idx) closest_idx = py_utils.HasShape(closest_idx, [n, p1]) # Subselect our output points pooled_points = tf.batch_gather(points, pooling_idx) # Loop over batch dimension of our features/indices, as unsorted_segment_X # does not currently support a batch dimension. def _LoopFn(args): example_features, example_closest_idx = args return segment_fn(example_features, example_closest_idx, num_segments=p2) pooled_features = tf.map_fn(fn=_LoopFn, elems=(point_features, closest_idx), dtype=tf.float32) return (py_utils.HasShape(pooled_points, [n, p2, 3]), py_utils.HasShape(pooled_features, [n, p2, c]))
def MaxPool3D(points, point_features, pooling_idx, closest_idx): """Apply max pooling to a point cloud with computed sampling indices. sampled_idx and closest_idx are the outputs of a sampler such as FurthestPointSampler. The pooling operation results in a point cloud with fewer points, where the pooled points are specified by pooling_idx. Each element of pooling_idx contains an integer in the range [0, P1) containing the index of the point in points/points_features. Max pooling is performed by assigning each point to its closest pooled point, and then taking a max over the features of points assigned. We assume that this mapping is provided by closest_idx, where each element should contain an integer in the range [0, P2) containing the index of the pooled point that each point is assigned to. Note: This logic for pooling assumes that there will be at least one value > 0 per sampled region for each feature, otherwise it will return 0. Additionally, it does a reduce over a masked version of the features, so mean and min would not work without a change in the logic. Args: points: a floating point tf.Tensor with shape [N, P1, 3] point_features: a floating point tf.Tensor with shape [N, P1, C] pooling_idx: A tf.int32 tf.Tensor of shape [N, P2] with the index of which points we want to keep. Each value should be in the range [0, P1]. closest_idx: A tf.int32 tf.Tensor of shape [N, P1] representing which sampled point is closest to each original point. Each value should be in the range of [0, P2]. Returns: A tuple of tf.Tensors (pooled_points, pooled_features). pooled_points has shape [N, P2, 3] representing the locations of each selected point. P2 corresponds to num_pooled_points. pooled_features has shape [N, P2, C] representing the pooled features at each point. """ batch_size, num_points = py_utils.GetShape(points, 2) point_features = py_utils.HasShape(point_features, [batch_size, num_points, -1]) pooling_idx = py_utils.HasShape(pooling_idx, [batch_size, -1]) _, num_output_points = py_utils.GetShape(pooling_idx) _, _, feature_dims = py_utils.GetShape(point_features, 3) # Gather new point locations. pooled_points = tf.batch_gather(points, pooling_idx) mask = tf.one_hot(closest_idx, num_output_points) # [N, P1, P2] mask = tf.transpose(mask, [2, 0, 1]) # [P2, N, P1] def _PartialPoolFeaturesFn(partial_mask): partial_mask = tf.tile( tf.reshape(partial_mask, [batch_size, num_points, 1]), [1, 1, feature_dims]) # Note: This method of pooling assumes there will be a value > 0 # And will only work with max under this condition. return tf.reduce_max(partial_mask * point_features, axis=1) # Performing a map_fn over the pooled points is more memory efficient. pooled_point_features = tf.map_fn(_PartialPoolFeaturesFn, mask) # [P2, N, P1] pooled_point_features = tf.transpose(pooled_point_features, [1, 0, 2]) return pooled_points, pooled_point_features
def AssignAnchors(self, anchor_bboxes, gt_bboxes, gt_bboxes_labels, gt_bboxes_mask, foreground_assignment_threshold=0.5, background_assignment_threshold=0.35, background_class_id=0, force_match=True, similarity_fn=None): """Assigns anchors to bboxes using a similarity function (SSD-based). Each anchor box is assigned to the top matching ground truth box. Ground truth boxes can be assigned to multiple anchor boxes. Assignments can result in 3 outcomes: - Positive assignment (if score >= foreground_assignment_threshold): assigned_gt_labels will reflect the assigned box label and assigned_cls_mask will be set to 1.0 - Background assignment (if score <= background_assignment_threshold): assigned_gt_labels will be background_class_id and assigned_cls_mask will be set to 1.0 - Ignore assignment (otherwise): assigned_gt_labels will be background_class_id and assigned_cls_mask will be set to 0.0 The detection loss function would usually: - Use assigned_cls_mask for weighting the classification loss. The mask is set such that the loss applies to foreground and background assignments only - ignored anchors will be set to 0. - Use assigned_reg_mask for weighting the regression loss. The mask is set such that the loss applies to foreground assignments only. The thresholds (foreground_assignment_threshold and background_assignment_threshold) should be tuned per dataset. TODO(jngiam): Consider having a separate threshold for regression boxes; a separate threshold is used in PointRCNN. Args: anchor_bboxes: tf.float32. [A, 7], where [..., :] corresponds to box parameters (x, y, z, dx, dy, dz, r). gt_bboxes: tf.float32. [G, 7], where [..., :] corresponds to ground truth box parameters (x, y, z, dx, dy, dz, r). gt_bboxes_labels: tensor with shape [G]. Ground truth labels for each bounding box. gt_bboxes_mask: tensor with shape [G]. Mask for ground truth boxes, 1 iff the gt_bbox is a real bbox. foreground_assignment_threshold: Similarity score threshold for assigning foreground bounding boxes; scores need to be >= foreground_assignment_threshold to be assigned to foreground. background_assignment_threshold: Similarity score threshold for assigning background bounding boxes; scores need to be <= background_assignment_threshold to be assigned to background. background_class_id: class id to be assigned to anchors_gt_class if no anchor boxes match. force_match: Boolean specifying if force matching is enabled. If force matching is enabled, then matched anchors which are also the highest scoring with a ground-truth box are considered foreground matches as long as their similarity score > 0. similarity_fn: Function that computes the a similarity score (e.g., IOU) between pairs of bounding boxes. This function should take in two tensors corresponding to anchor and ground-truth bboxes, and return a matrix [A, G] with the similarity score between each pair of bboxes. The score must be non-negative, with greater scores representing more similar. The fore/background_assignment_thresholds will be applied to this score to determine if the an anchor is foreground, background or ignored. If set to None, the function will default to IOU2DRotatedBoxes. Returns: NestedMap with the following keys - assigned_gt_idx: shape [A] index corresponding to the index of the assigned ground truth box. Anchors not assigned to a ground truth box will have the index set to -1. - assigned_gt_bbox: shape [A, 7] bbox parameters assigned to each anchor. - assigned_gt_similarity_score: shape [A] (iou) score between the anchor and the gt bbox. - assigned_gt_labels: shape [A] label assigned to bbox. - assigned_cls_mask: shape [A] mask for classification loss per anchor. This should be 1.0 if the anchor has a foreground or background assignment; otherwise, it will be assigned to 0.0. - assigned_reg_mask: shape [A] mask for regression loss per anchor. This should be 1.0 if the anchor has a foreground assignment; otherwise, it will be assigned to 0.0. Note: background anchors do not have regression targets. """ if similarity_fn is None: similarity_fn = self.IOU2DRotatedBoxes # Shape validation. anchor_bboxes = py_utils.HasShape(anchor_bboxes, [-1, 7]) num_anchor_bboxes, _ = py_utils.GetShape(anchor_bboxes, 2) gt_bboxes = py_utils.HasShape(gt_bboxes, [-1, 7]) num_gt_bboxes, _ = py_utils.GetShape(gt_bboxes, 2) # Compute similarity score and reduce max by anchors and by ground-truth. similarity_score = similarity_fn(anchor_bboxes, gt_bboxes) similarity_score = py_utils.HasShape( similarity_score, [num_anchor_bboxes, num_gt_bboxes]) # Reduce over ground-truth boxes, so we have the max score per anchor. anchor_max_score = tf.reduce_max(similarity_score, axis=1) anchor_max_idx = tf.argmax(similarity_score, axis=1) if force_match: # Reduce over anchors, so we have the max score per ground truth box. gt_max_score = tf.reduce_max(similarity_score, axis=0, keepdims=True) # Force matches occur when the top matching gt bbox for an anchor is the # top matching anchor for the gt bbox. When force matching, we match # these boxes as long as their similarity score exceeds 0. force_matches = ( tf.equal(similarity_score, gt_max_score) & tf.equal(similarity_score, anchor_max_score[..., tf.newaxis]) & tf.greater(similarity_score, 0.) & tf.cast(gt_bboxes_mask[tf.newaxis, ...], tf.bool)) force_match_indicator = tf.reduce_any(force_matches, axis=1) force_match_idx = tf.argmax(tf.cast(force_matches, tf.int32), axis=1) # In assigning foreground/background anchors later, force_match_indicator # is used to determine which anchors are force foreground, and the index # assigned will be taken from anchor_max_idx. # Force matchers must also be the max scoring gt bbox per anchor. # We overwrite anchor_max_idx to ensure that the right match is done. anchor_max_idx = tf.where(force_match_indicator, force_match_idx, anchor_max_idx) # Ensure that max score boxes are not padded boxes by setting score to 0 # for boxes that are padded. gathered_mask = tf.batch_gather(gt_bboxes_mask, anchor_max_idx) anchor_max_score = tf.where(tf.equal(gathered_mask, 1), anchor_max_score, tf.zeros_like(anchor_max_score)) # Boolean tensors corresponding to whether an anchor is background or # foreground based on thresholding. background_anchors = tf.less_equal(anchor_max_score, background_assignment_threshold) foreground_anchors = tf.greater_equal(anchor_max_score, foreground_assignment_threshold) if force_match: # Background anchors are below threshold and not force matches. background_anchors &= ~force_match_indicator # Foreground anchors are above thresholds or force matches. foreground_anchors |= force_match_indicator # Add dummy background bbox to gt_boxes to facilitate batch gather. dummy_bbox = tf.constant([[0, 0, 0, 1, 1, 1, 0]], dtype=tf.float32) # Since we are concatenating the dummy bbox, the index corresponds to the # number of boxes. dummy_bbox_idx = py_utils.GetShape(gt_bboxes, 1)[0] gt_bboxes = tf.concat([gt_bboxes, dummy_bbox], axis=0) gt_bboxes_labels = tf.concat([gt_bboxes_labels, [background_class_id]], axis=0) # Gather indices so that all foreground boxes are gathered from gt_bboxes, # while all background and ignore boxes gather the dummy_bbox. anchor_gather_idx = tf.where( foreground_anchors, anchor_max_idx, tf.constant(dummy_bbox_idx, shape=py_utils.GetShape(anchor_max_idx), dtype=anchor_max_idx.dtype)) # Gather the bboxes and weights. assigned_gt_bbox = tf.batch_gather(gt_bboxes, anchor_gather_idx) assigned_gt_labels = tf.batch_gather(gt_bboxes_labels, anchor_gather_idx) # Set masks for classification and regression losses. assigned_cls_mask = tf.cast(background_anchors | foreground_anchors, tf.float32) assigned_reg_mask = tf.cast(foreground_anchors, tf.float32) # Set assigned_gt_idx such that dummy boxes have idx = -1. assigned_gt_idx = tf.where(tf.equal(anchor_gather_idx, dummy_bbox_idx), tf.ones_like(anchor_gather_idx) * -1, anchor_gather_idx) assigned_gt_idx = tf.cast(assigned_gt_idx, tf.int32) return py_utils.NestedMap( assigned_gt_idx=assigned_gt_idx, assigned_gt_bbox=assigned_gt_bbox, assigned_gt_similarity_score=anchor_max_score, assigned_gt_labels=assigned_gt_labels, assigned_cls_mask=assigned_cls_mask, assigned_reg_mask=assigned_reg_mask)
def _SingleClassDecodeWithNMS(predicted_bboxes, classification_scores, nms_iou_threshold, score_threshold, max_boxes_per_class=None): """Perform NMS on predicted bounding boxes / associated logits. Args: predicted_bboxes: [batch_size, num_boxes, 7] float Tensor containing predicted bounding box coordinates. classification_scores: [batch_size, num_boxes, num_classes] float Tensor containing predicted classification scores for each box. nms_iou_threshold: IoU threshold to use when determining whether two boxes overlap for purposes of suppression. score_threshold: The score threshold passed to NMS that allows NMS to quickly ignore irrelevant boxes. max_boxes_per_class: The maximum number of boxes per example to emit. If None, this value is set to num_boxes from the shape of predicted_bboxes. Returns: predicted_bboxes: Filtered bboxes after NMS of shape [batch_size, num_classes, max_boxes_per_class, 7]. bbox_scores: A float32 Tensor with the score for each box of shape [batch_size, num_classes, max_boxes_per_class]. valid_mask: A float32 Tensor with 1/0 values indicating the validity of each box. 1 indicates valid, and 0 invalid. Tensor of shape [batch_size, num_classes, max_boxes_per_class]. """ utils_3d = detection_3d_lib.Utils3D() predicted_bboxes = py_utils.HasShape(predicted_bboxes, [-1, -1, 7]) batch_size, num_predicted_boxes, _ = py_utils.GetShape(predicted_bboxes) classification_scores = py_utils.HasShape( classification_scores, [batch_size, num_predicted_boxes, -1]) _, _, num_classes = py_utils.GetShape(classification_scores) if not isinstance(nms_iou_threshold, float): raise ValueError('Single class NMS only supports a scalar ' '`nms_iou_threshold`.') if not isinstance(score_threshold, float): raise ValueError('Single class NMS only supports a scalar ' '`score_threshold`.') if max_boxes_per_class is None: max_boxes_per_class = num_predicted_boxes # TODO(jngiam): Change to be per-class bboxes, and hence, per-class NMS, and # per-class thresholding. # [batch, num_predicted_boxes] nms_scores = tf.reduce_max(classification_scores, axis=-1) # Compute the most likely label by computing the highest class score from # the output of the sigmoid. likely_labels = tf.argmax(classification_scores, axis=-1) # When background is the most likely class for the box, mask out the scores # of that box from NMS scoring so the background boxes don't dominate the # NMS. nms_scores *= tf.to_float(likely_labels > 0) # Compute NMS for every sample in the batch. nms_indices, valid_mask = utils_3d.BatchedNMSIndices( predicted_bboxes, nms_scores, nms_iou_threshold=nms_iou_threshold, score_threshold=score_threshold, max_num_boxes=max_boxes_per_class) # Reorder the box data and logits according to NMS scoring. predicted_bboxes = tf.batch_gather(predicted_bboxes, nms_indices) classification_scores = tf.batch_gather(classification_scores, nms_indices) # Now reformat the output of NMS to match the format of the # MultiClassOrientedDecodeWithNMS, which outputs a per class NMS result. # This takes the leading shape of # [batch_size, num_classes, max_boxes_per_class] for all outputs, which # means since this NMS is not class specific we need to tile the outputs # num_classes times or reorder the data such that its [batch, num_classes]. predicted_bboxes = tf.tile(predicted_bboxes[:, tf.newaxis, :, :], [1, num_classes, 1, 1]) classification_scores = tf.transpose(classification_scores, (0, 2, 1)) classification_scores = py_utils.HasShape( classification_scores, [batch_size, num_classes, max_boxes_per_class]) valid_mask = tf.tile(valid_mask[:, tf.newaxis, :], [1, num_classes, 1]) return predicted_bboxes, classification_scores, valid_mask
def FProp(self, theta, input_data): """Apply projection to inputs. Args: theta: A NestedMap object containing weights' values of this layer and its children layers. input_data: A NestedMap object containing 'points', 'features', 'padding' Tensors, all of type tf.float32. 'points': Shape [N, P1, 3] 'features': Shape [N, P1, F] 'padding': Shape [N, P1] where 0 indicates real, 1 indicates padded. Returns: A NestedMap consisting of the following two NestedMaps, grouped_points: consists of the grouped points, features and padding. query_points: consists of the sampled points and padding. """ p = self.params features = input_data.features n, p1, c = py_utils.GetShape(features) points = py_utils.HasShape(input_data.points, [n, p1, 3]) padding = py_utils.HasShape(input_data.padding, [n, p1]) # Sampling sampled_idx, _ = car_lib.FarthestPointSampler( points, padding, num_sampled_points=p.num_samples) query_points = car_lib.MatmulGather(points, tf.expand_dims(sampled_idx, -1)) query_points = tf.squeeze(query_points, -2) # Grouping grouped_idx, grouped_padding = car_lib.NeighborhoodIndices( points, query_points, p.group_size, points_padding=padding, max_distance=p.ball_radius, sample_neighbors_uniformly=p.sample_neighbors_uniformly) grouped_points = car_lib.MatmulGather(points, grouped_idx) # Normalize the grouped points based on the location of the query point. grouped_points -= tf.expand_dims(query_points, -2) grouped_features = car_lib.MatmulGather(features, grouped_idx) # Get the padding for the query points. query_padding = tf.batch_gather(padding, sampled_idx) # Verify the shapes of output tensors. query_points = py_utils.HasShape(query_points, [n, p.num_samples, 3]) query_padding = py_utils.HasShape(query_padding, [n, p.num_samples]) grouped_features = py_utils.HasShape( grouped_features, [n, p.num_samples, p.group_size, c]) grouped_padding = py_utils.HasShape(grouped_padding, [n, p.num_samples, p.group_size]) output_grouped_points = py_utils.NestedMap(points=grouped_points, features=grouped_features, padding=grouped_padding) output_query = py_utils.NestedMap(points=query_points, padding=query_padding) output_map = py_utils.NestedMap({ 'grouped_points': output_grouped_points, 'query_points': output_query }) return output_map