def nms(bboxes, bbox_scores, nms_iou_threshold=0.7, nms_score_threshold=0.00, max_nms_boxes=200, use_oriented_per_class_nms=True): """NMS.""" batch_size = get_shape(bboxes)[0] bboxes = tf.reshape(bboxes, [batch_size, -1, 7]) bbox_scores = tf.reshape(bbox_scores, [batch_size, -1, 1]) bbox_background = tf.zeros_like(bbox_scores) bbox_scores = tf.concat([bbox_background, bbox_scores], axis=-1) nms_bboxes, nms_bbox_scores, nms_valid_mask = ( detection_decoder.DecodeWithNMS( bboxes, bbox_scores, nms_iou_threshold=nms_iou_threshold, score_threshold=nms_score_threshold, max_boxes_per_class=max_nms_boxes, use_oriented_per_class_nms=use_oriented_per_class_nms)) nms_bboxes = tf.reshape(nms_bboxes[:, 1, :, :], [batch_size, -1, 7]) nms_bbox_scores = tf.reshape(nms_bbox_scores[:, 1, :], [batch_size, -1]) nms_valid_mask = tf.reshape(nms_valid_mask[:, 1, :], [batch_size, -1]) nms_valid_mask = tf.cast(nms_valid_mask, tf.dtypes.int32) return nms_bboxes, nms_bbox_scores, nms_valid_mask
def testDecoderWithOrientedPerClassNMS(self): batch_size = 4 num_preds = 8 num_classes = 10 # An example of setting the score threshold high and IOU threshold low # for classes we don't care about score_threshold = [1.0] * num_classes score_threshold[1] = 0.05 nms_iou_threshold = [0.0] * num_classes nms_iou_threshold[1] = 0.5 with tf.Graph().as_default(): tf.random.set_seed(12345) predicted_bboxes = tf.random.normal([batch_size, num_preds, 7]) classification_scores = tf.random.uniform( [batch_size, num_preds, num_classes], minval=0, maxval=1) idxs, bboxes, bbox_scores, valid_mask = detection_decoder.DecodeWithNMS( predicted_bboxes, classification_scores, nms_iou_threshold=nms_iou_threshold, score_threshold=score_threshold, use_oriented_per_class_nms=True) with self.session(): outputs = self.evaluate([ predicted_bboxes, classification_scores, idxs, bboxes, bbox_scores, valid_mask ]) (input_bboxes, input_scores, output_idxs, output_bboxes, output_scores, mask) = outputs self.assertEqual((batch_size, num_preds, 7), input_bboxes.shape) self.assertEqual((batch_size, num_classes, num_preds), output_idxs.shape) self.assertEqual((batch_size, num_classes, num_preds, 7), output_bboxes.shape) self.assertEqual((batch_size, num_preds, num_classes), input_scores.shape) self.assertEqual((batch_size, num_classes, num_preds), output_scores.shape) self.assertEqual((batch_size, num_classes, num_preds), mask.shape) # Assert that NMS did some kind of filtering for each class for cls_idx in range(num_classes): self.assertEqual(mask[:, cls_idx, :].sum(), (input_scores[:, :, cls_idx] > score_threshold[cls_idx]).sum()) self.assertEqual(mask[:, cls_idx, :].sum(), (output_scores[:, cls_idx, :] > score_threshold[cls_idx]).sum())
def Inference(self): """Builds the inference graph. Default subgraph should return: predicted_bboxes: A [batch_size, num_boxes, 7] float Tensor. classification_scores: A [batch_size, num_boxes, num_classes] float Tensor. Returns: A dictionary whose values are a tuple of fetches and feeds. """ p = self.params subgraphs = {} with tf.name_scope('inference'): input_placeholders = self._Placeholders() predictions = self.ComputePredictions(self.theta, input_placeholders) bboxes_and_logits = self._BBoxesAndLogits(input_placeholders, predictions) predicted_bboxes = bboxes_and_logits.predicted_bboxes classification_logits = bboxes_and_logits.classification_logits classification_scores = tf.sigmoid(classification_logits) _, per_cls_bboxes, per_cls_bbox_scores, per_cls_valid_mask = ( detection_decoder.DecodeWithNMS( predicted_bboxes, classification_scores, nms_iou_threshold=p.nms_iou_threshold, score_threshold=p.nms_score_threshold, max_boxes_per_class=p.max_nms_boxes, use_oriented_per_class_nms=p.use_oriented_per_class_nms)) per_cls_bbox_scores *= per_cls_valid_mask # TODO(vrv): Fix the inference graph for KITTI, since we need # to apply frustum clipping. This requires customizing the # inference placeholders for each model. fetches = { 'per_class_predicted_bboxes': per_cls_bboxes, 'per_class_predicted_bbox_scores': per_cls_bbox_scores, 'per_class_valid_mask': per_cls_valid_mask } subgraphs['default'] = fetches, dict( input_placeholders.FlattenItems()) return subgraphs
def testDecoderSingleClassNMS(self): batch_size = 4 num_preds = 8 num_classes = 10 score_threshold = 0.05 nms_iou_threshold = 0.5 with tf.Graph().as_default(): tf.random.set_seed(12345) predicted_bboxes = tf.random.normal([batch_size, num_preds, 7]) classification_scores = tf.random.uniform( [batch_size, num_preds, num_classes], minval=0, maxval=1) idxs, bboxes, bbox_scores, valid_mask = detection_decoder.DecodeWithNMS( predicted_bboxes, classification_scores, nms_iou_threshold=nms_iou_threshold, score_threshold=score_threshold, use_oriented_per_class_nms=False) with self.session(): outputs = self.evaluate([ predicted_bboxes, classification_scores, idxs, bboxes, bbox_scores, valid_mask ]) (input_bboxes, input_scores, output_idxs, output_bboxes, output_scores, mask) = outputs self.assertEqual((batch_size, num_preds, 7), input_bboxes.shape) self.assertEqual((batch_size, num_preds), output_idxs.shape) self.assertEqual((batch_size, num_classes, num_preds, 7), output_bboxes.shape) self.assertEqual((batch_size, num_preds, num_classes), input_scores.shape) self.assertEqual((batch_size, num_classes, num_preds), output_scores.shape) self.assertEqual((batch_size, num_classes, num_preds), mask.shape)
def Decode(self, input_batch): """Decode an input batch, computing predicted bboxes from residuals.""" p = self.params predictions = self.ComputePredictions(self.theta, input_batch) bboxes_and_logits = self._BBoxesAndLogits(input_batch, predictions) predicted_bboxes = bboxes_and_logits.predicted_bboxes batch_size, num_bboxes, _ = py_utils.GetShape(predicted_bboxes, 3) classification_logits = bboxes_and_logits.classification_logits classification_logits = py_utils.HasShape( classification_logits, [batch_size, num_bboxes, p.num_classes]) classification_scores = tf.sigmoid(classification_logits) _, per_example_dict = self.ComputeLoss(self.theta, predictions, input_batch) if 'score_scaler' in per_example_dict: classification_scores *= per_example_dict['score_scaler'] with tf.device('/cpu:0'): # Decode the predicted bboxes, performing NMS. per_cls_idxs, per_cls_bboxes, per_cls_bbox_scores, per_cls_valid_mask = ( detection_decoder.DecodeWithNMS( predicted_bboxes, classification_scores, nms_iou_threshold=p.nms_iou_threshold, score_threshold=p.nms_score_threshold, max_boxes_per_class=p.max_nms_boxes, use_oriented_per_class_nms=p.use_oriented_per_class_nms)) # per_cls_valid_mask is [batch, num_classes, num_boxes] Tensor that # indicates which boxes were selected by NMS. Each example will have a # different number of chosen bboxes, so the mask is present to allow us # to keep the boxes as a batched dense Tensor. # # We mask the scores by the per_cls_valid_mask so that none of these boxes # will be interpreted as valid. per_cls_bbox_scores *= per_cls_valid_mask visualization_weights = py_utils.HasShape( per_cls_bbox_scores, [batch_size, p.num_classes, p.max_nms_boxes]) # For top down visualization, filter boxes whose scores are not above the # visualization threshold. visualization_weights = tf.where( tf.greater_equal(visualization_weights, p.visualization_classification_threshold), visualization_weights, tf.zeros_like(visualization_weights)) model_outputs = py_utils.NestedMap() model_outputs.per_class_predicted_bboxes = per_cls_bboxes model_outputs.per_class_predicted_bbox_scores = per_cls_bbox_scores model_outputs.per_class_valid_mask = per_cls_valid_mask decoder_outputs = py_utils.NestedMap({ 'per_class_predicted_bboxes': per_cls_bboxes, 'per_class_predicted_bbox_scores': per_cls_bbox_scores, 'per_class_valid_mask': per_cls_valid_mask, 'visualization_weights': visualization_weights, }) if p.decode_include_residuals: # Including the residuals in the decoder output makes it possible to save # the outputs for further analysis. Note that we ensure that the outputs # match the per-class NMS output format of [batch, num_classes, ...]. def _ReshapeGather(tensor): """Reshapes tensor and then gathers using the nms indices.""" tensor = tf.gather( tf.reshape(tensor, [batch_size, num_bboxes, -1]), per_cls_idxs, batch_dims=1) if not p.use_oriented_per_class_nms: # Tile so that the data fits the expected per class shape of # [batch_size, num_classes, ...]. When *not* using oriented NMS, the # num_classes dimension will be missing since the indices will not # have it. tensor = tf.tile(tensor[:, tf.newaxis, :, :], [1, p.num_classes, 1, 1]) return tensor decoder_outputs.update({ 'per_class_gt_residuals': _ReshapeGather(input_batch.anchor_localization_residuals), 'per_class_gt_labels': _ReshapeGather(input_batch.assigned_gt_labels), 'per_class_residuals': _ReshapeGather(predictions.residuals), 'per_class_logits': _ReshapeGather(predictions.classification_logits), 'per_class_anchor_boxes': _ReshapeGather(input_batch.anchor_bboxes), }) decoder_outputs.update( self.output_decoder.ProcessOutputs(input_batch, model_outputs)) # Produce global step as an output (which is the step # of the checkpoint being decoded.) decoder_outputs.global_step = py_utils.GetGlobalStep() return decoder_outputs
def Decode(self, input_batch): """Decode an input batch, computing predicted bboxes from residuals.""" p = self.params bboxes_and_logits = self._BBoxesAndLogits(input_batch) predicted_bboxes = bboxes_and_logits.predicted_bboxes batch_size, num_bboxes, _ = py_utils.GetShape(predicted_bboxes, 3) classification_logits = bboxes_and_logits.classification_logits classification_logits = py_utils.HasShape( classification_logits, [batch_size, num_bboxes, p.num_classes]) classification_scores = tf.sigmoid(classification_logits) # Score scaler. if 'score_scaler' in bboxes_and_logits: classification_scores *= bboxes_and_logits.score_scaler with tf.device('/cpu:0'): # Decode the predicted bboxes, performing NMS. per_cls_bboxes, per_cls_bbox_scores, per_cls_valid_mask = ( detection_decoder.DecodeWithNMS( predicted_bboxes, classification_scores, nms_iou_threshold=p.nms_iou_threshold, score_threshold=p.nms_score_threshold, max_boxes_per_class=p.max_nms_boxes, use_oriented_per_class_nms=p.use_oriented_per_class_nms)) # per_cls_valid_mask is [batch, num_classes, num_boxes] Tensor that # indicates which boxes were selected by NMS. Each example will have a # different number of chosen bboxes, so the mask is present to allow us # to keep the boxes as a batched dense Tensor. # # We mask the scores by the per_cls_valid_mask so that none of these boxes # will be interpreted as valid. per_cls_bbox_scores *= per_cls_valid_mask visualization_weights = py_utils.HasShape( per_cls_bbox_scores, [batch_size, p.num_classes, p.max_nms_boxes]) # For top down visualization, filter boxes whose scores are not above the # visualization threshold. visualization_weights = tf.where( tf.greater_equal(visualization_weights, p.visualization_classification_threshold), visualization_weights, tf.zeros_like(visualization_weights)) model_outputs = py_utils.NestedMap() model_outputs.per_class_predicted_bboxes = per_cls_bboxes model_outputs.per_class_predicted_bbox_scores = per_cls_bbox_scores model_outputs.per_class_valid_mask = per_cls_valid_mask decoder_outputs = py_utils.NestedMap({ 'per_class_predicted_bboxes': per_cls_bboxes, 'per_class_predicted_bbox_scores': per_cls_bbox_scores, 'per_class_valid_mask': per_cls_valid_mask, 'visualization_weights': visualization_weights, }) decoder_outputs.update( self.output_decoder.ProcessOutputs(input_batch, model_outputs)) # Produce global step as an output (which is the step # of the checkpoint being decoded.) decoder_outputs.global_step = py_utils.GetGlobalStep() return decoder_outputs