def test_sort_by_field_descending_order(self): exp_corners = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]] exp_scores = [.95, .9, .75, .6, .5, .3] exp_weights = [.2, .45, .6, .75, .8, .92] shuffle = [2, 4, 0, 5, 1, 3] corners = tf.constant([exp_corners[i] for i in shuffle], tf.float32) boxes = box_list.BoxList(corners) boxes.add_field( 'scores', tf.constant([exp_scores[i] for i in shuffle], tf.float32)) boxes.add_field( 'weights', tf.constant([exp_weights[i] for i in shuffle], tf.float32)) sort_by_score = box_list_ops.sort_by_field(boxes, 'scores') with self.test_session() as sess: corners_out, scores_out, weights_out = sess.run([ sort_by_score.get(), sort_by_score.get_field('scores'), sort_by_score.get_field('weights') ]) self.assertAllClose(corners_out, exp_corners) self.assertAllClose(scores_out, exp_scores) self.assertAllClose(weights_out, exp_weights)
def get_box_loss(pred, gt, num_classes): lamd_coord = 5.0 xmin = pred[:, 0] - pred[:, 2] / 2 xmax = pred[:, 0] + pred[:, 2] / 2 ymin = pred[:, 1] - pred[:, 3] / 2 ymax = pred[:, 1] + pred[:, 3] / 2 boxes1 = tf.transpose(tf.stack([ymin, xmin, ymax, xmax])) xmin = gt[0] - gt[2] / 2 xmax = gt[0] + gt[2] / 2 ymin = gt[1] - gt[3] / 2 ymax = gt[1] + gt[3] / 2 boxes2 = tf.transpose(tf.stack([ymin, xmin, ymax, xmax])) boxes2 = tf.expand_dims(boxes2, 0) boxes_obj1 = box_list.BoxList(boxes1) boxes_obj2 = box_list.BoxList(boxes2) iou_score = box_list_ops.iou(boxes_obj2, boxes_obj1) boxes_obj1.add_field('scores', iou_score[0]) boxes_obj1.add_field('classes', pred[:, 5:]) boxes_obj1.add_field('objness', pred[:, 4]) boxes_obj1 = box_list_ops.sort_by_field(boxes_obj1, 'scores') matched_info = boxes_obj1.get_center_coordinates_and_sizes()[0] pred_cen = matched_info[0:2] pred_size = tf.sqrt(matched_info[2:4]) gt_center = tf.stack([gt[1], gt[0]]) gt_size = tf.stack([gt[3], gt[2]]) geo_loss = tf.losses.mean_squared_error( gt_center, pred_cen) + tf.losses.mean_squared_error( gt_size, pred_size) geo_loss = lamd_coord * geo_loss pred_confi = boxes_obj1.get_field('objness')[0] - 1 confi_loss = pred_confi * pred_confi predi_class = boxes_obj1.get_field('classes')[0] box_class = gt[4:] class_loss = tf.losses.mean_squared_error(box_class, predi_class) return confi_loss + geo_loss + class_loss
def test_sort_by_field_invalid_inputs(self): corners = tf.constant( [4 * [0.0], 4 * [0.5], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]) misc = tf.constant([[.95, .9], [.5, .3]], tf.float32) weights = tf.constant([.1, .2], tf.float32) boxes = box_list.BoxList(corners) boxes.add_field('misc', misc) boxes.add_field('weights', weights) with self.test_session() as sess: with self.assertRaises(ValueError): box_list_ops.sort_by_field(boxes, 'area') with self.assertRaises(ValueError): box_list_ops.sort_by_field(boxes, 'misc') with self.assertRaisesWithPredicateMatch( errors.InvalidArgumentError, 'Incorrect field size'): sess.run(box_list_ops.sort_by_field(boxes, 'weights').get())
def multiclass_non_max_suppression(boxes, scores, score_thresh, iou_thresh, max_size_per_class, max_total_size=0, clip_window=None, change_coordinate_frame=False, masks=None, boundaries=None, pad_to_max_output_size=False, additional_fields=None, scope=None): """Multi-class version of non maximum suppression. This op greedily selects a subset of detection bounding boxes, pruning away boxes that have high IOU (intersection over union) overlap (> thresh) with already selected boxes. It operates independently for each class for which scores are provided (via the scores field of the input box_list), pruning boxes with score less than a provided threshold prior to applying NMS. Please note that this operation is performed on *all* classes, therefore any background classes should be removed prior to calling this function. Selected boxes are guaranteed to be sorted in decreasing order by score (but the sort is not guaranteed to be stable). Args: boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either number of classes or 1 depending on whether a separate box is predicted per class. scores: A [k, num_classes] float32 tensor containing the scores for each of the k detections. The scores have to be non-negative when pad_to_max_output_size is True. score_thresh: scalar threshold for score (low scoring boxes are removed). iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap with previously selected boxes are removed). max_size_per_class: maximum number of retained boxes per class. max_total_size: maximum number of boxes retained over all classes. By default returns all boxes retained after capping boxes per class. clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max] representing the window to clip and normalize boxes to before performing non-max suppression. change_coordinate_frame: Whether to normalize coordinates after clipping relative to clip_window (this can only be set to True if a clip_window is provided) masks: (optional) a [k, q, mask_height, mask_width] float32 tensor containing box masks. `q` can be either number of classes or 1 depending on whether a separate mask is predicted per class. boundaries: (optional) a [k, q, boundary_height, boundary_width] float32 tensor containing box boundaries. `q` can be either number of classes or 1 depending on whether a separate boundary is predicted per class. pad_to_max_output_size: If true, the output nmsed boxes are padded to be of length `max_size_per_class`. Defaults to false. additional_fields: (optional) If not None, a dictionary that maps keys to tensors whose first dimensions are all of size `k`. After non-maximum suppression, all tensors corresponding to the selected boxes will be added to resulting BoxList. scope: name scope. Returns: A tuple of sorted_boxes and num_valid_nms_boxes. The sorted_boxes is a BoxList holds M boxes with a rank-1 scores field representing corresponding scores for each box with scores sorted in decreasing order and a rank-1 classes field representing a class label for each box. The num_valid_nms_boxes is a 0-D integer tensor representing the number of valid elements in `BoxList`, with the valid elements appearing first. Raises: ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have a valid scores field. """ if not 0 <= iou_thresh <= 1.0: raise ValueError('iou_thresh must be between 0 and 1') if scores.shape.ndims != 2: raise ValueError('scores field must be of rank 2') if scores.shape[1].value is None: raise ValueError('scores must have statically defined second ' 'dimension') if boxes.shape.ndims != 3: raise ValueError('boxes must be of rank 3.') if not (boxes.shape[1].value == scores.shape[1].value or boxes.shape[1].value == 1): raise ValueError('second dimension of boxes must be either 1 or equal ' 'to the second dimension of scores') if boxes.shape[2].value != 4: raise ValueError('last dimension of boxes must be of size 4.') if change_coordinate_frame and clip_window is None: raise ValueError('if change_coordinate_frame is True, then a clip_window' 'must be specified.') with tf.name_scope(scope, 'MultiClassNonMaxSuppression'): num_scores = tf.shape(scores)[0] num_classes = scores.get_shape()[1] selected_boxes_list = [] num_valid_nms_boxes_cumulative = tf.constant(0) per_class_boxes_list = tf.unstack(boxes, axis=1) if masks is not None: per_class_masks_list = tf.unstack(masks, axis=1) if boundaries is not None: per_class_boundaries_list = tf.unstack(boundaries, axis=1) boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1 else [0] * num_classes.value) for class_idx, boxes_idx in zip(range(num_classes), boxes_ids): per_class_boxes = per_class_boxes_list[boxes_idx] boxlist_and_class_scores = box_list.BoxList(per_class_boxes) class_scores = tf.reshape( tf.slice(scores, [0, class_idx], tf.stack([num_scores, 1])), [-1]) boxlist_and_class_scores.add_field(fields.BoxListFields.scores, class_scores) if masks is not None: per_class_masks = per_class_masks_list[boxes_idx] boxlist_and_class_scores.add_field(fields.BoxListFields.masks, per_class_masks) if boundaries is not None: per_class_boundaries = per_class_boundaries_list[boxes_idx] boxlist_and_class_scores.add_field(fields.BoxListFields.boundaries, per_class_boundaries) if additional_fields is not None: for key, tensor in additional_fields.items(): boxlist_and_class_scores.add_field(key, tensor) if pad_to_max_output_size: max_selection_size = max_size_per_class selected_indices, num_valid_nms_boxes = ( tf.image.non_max_suppression_padded( boxlist_and_class_scores.get(), boxlist_and_class_scores.get_field(fields.BoxListFields.scores), max_selection_size, iou_threshold=iou_thresh, score_threshold=score_thresh, pad_to_max_output_size=True)) else: max_selection_size = tf.minimum(max_size_per_class, boxlist_and_class_scores.num_boxes()) selected_indices = tf.image.non_max_suppression( boxlist_and_class_scores.get(), boxlist_and_class_scores.get_field(fields.BoxListFields.scores), max_selection_size, iou_threshold=iou_thresh, score_threshold=score_thresh) num_valid_nms_boxes = tf.shape(selected_indices)[0] selected_indices = tf.concat( [selected_indices, tf.zeros(max_selection_size-num_valid_nms_boxes, tf.int32)], 0) nms_result = box_list_ops.gather(boxlist_and_class_scores, selected_indices) # Make the scores -1 for invalid boxes. valid_nms_boxes_indx = tf.less( tf.range(max_selection_size), num_valid_nms_boxes) nms_scores = nms_result.get_field(fields.BoxListFields.scores) nms_result.add_field(fields.BoxListFields.scores, tf.where(valid_nms_boxes_indx, nms_scores, -1*tf.ones(max_selection_size))) num_valid_nms_boxes_cumulative += num_valid_nms_boxes nms_result.add_field( fields.BoxListFields.classes, (tf.zeros_like( nms_result.get_field(fields.BoxListFields.scores)) + class_idx)) selected_boxes_list.append(nms_result) selected_boxes = box_list_ops.concatenate(selected_boxes_list) sorted_boxes = box_list_ops.sort_by_field(selected_boxes, fields.BoxListFields.scores) if clip_window is not None: # When pad_to_max_output_size is False, it prunes the boxes with zero # area. sorted_boxes = box_list_ops.clip_to_window( sorted_boxes, clip_window, filter_nonoverlapping=not pad_to_max_output_size) # Set the scores of boxes with zero area to -1 to keep the default # behaviour of pruning out zero area boxes. sorted_boxes_size = tf.shape(sorted_boxes.get())[0] non_zero_box_area = tf.cast(box_list_ops.area(sorted_boxes), tf.bool) sorted_boxes_scores = tf.where( non_zero_box_area, sorted_boxes.get_field(fields.BoxListFields.scores), -1*tf.ones(sorted_boxes_size)) sorted_boxes.add_field(fields.BoxListFields.scores, sorted_boxes_scores) num_valid_nms_boxes_cumulative = tf.reduce_sum( tf.cast(tf.greater_equal(sorted_boxes_scores, 0), tf.int32)) sorted_boxes = box_list_ops.sort_by_field(sorted_boxes, fields.BoxListFields.scores) if change_coordinate_frame: sorted_boxes = box_list_ops.change_coordinate_frame( sorted_boxes, clip_window) if max_total_size: max_total_size = tf.minimum(max_total_size, sorted_boxes.num_boxes()) sorted_boxes = box_list_ops.gather(sorted_boxes, tf.range(max_total_size)) num_valid_nms_boxes_cumulative = tf.where( max_total_size > num_valid_nms_boxes_cumulative, num_valid_nms_boxes_cumulative, max_total_size) # Select only the valid boxes if pad_to_max_output_size is False. if not pad_to_max_output_size: sorted_boxes = box_list_ops.gather( sorted_boxes, tf.range(num_valid_nms_boxes_cumulative)) return sorted_boxes, num_valid_nms_boxes_cumulative
def multiclass_non_max_suppression(boxes, scores, score_thresh, iou_thresh, max_size_per_class, max_total_size=0, clip_window=None, change_coordinate_frame=False, masks=None, boundaries=None, additional_fields=None, scope=None): """Multi-class version of non maximum suppression. This op greedily selects a subset of detection bounding boxes, pruning away boxes that have high IOU (intersection over union) overlap (> thresh) with already selected boxes. It operates independently for each class for which scores are provided (via the scores field of the input box_list), pruning boxes with score less than a provided threshold prior to applying NMS. Please note that this operation is performed on *all* classes, therefore any background classes should be removed prior to calling this function. Args: boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either number of classes or 1 depending on whether a separate box is predicted per class. scores: A [k, num_classes] float32 tensor containing the scores for each of the k detections. score_thresh: scalar threshold for score (low scoring boxes are removed). iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap with previously selected boxes are removed). max_size_per_class: maximum number of retained boxes per class. max_total_size: maximum number of boxes retained over all classes. By default returns all boxes retained after capping boxes per class. clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max] representing the window to clip and normalize boxes to before performing non-max suppression. change_coordinate_frame: Whether to normalize coordinates after clipping relative to clip_window (this can only be set to True if a clip_window is provided) masks: (optional) a [k, q, mask_height, mask_width] float32 tensor containing box masks. `q` can be either number of classes or 1 depending on whether a separate mask is predicted per class. boundaries: (optional) a [k, q, boundary_height, boundary_width] float32 tensor containing box boundaries. `q` can be either number of classes or 1 depending on whether a separate boundary is predicted per class. additional_fields: (optional) If not None, a dictionary that maps keys to tensors whose first dimensions are all of size `k`. After non-maximum suppression, all tensors corresponding to the selected boxes will be added to resulting BoxList. scope: name scope. Returns: a BoxList holding M boxes with a rank-1 scores field representing corresponding scores for each box with scores sorted in decreasing order and a rank-1 classes field representing a class label for each box. Raises: ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have a valid scores field. """ if not 0 <= iou_thresh <= 1.0: raise ValueError('iou_thresh must be between 0 and 1') if scores.shape.ndims != 2: raise ValueError('scores field must be of rank 2') if scores.shape[1].value is None: raise ValueError('scores must have statically defined second ' 'dimension') if boxes.shape.ndims != 3: raise ValueError('boxes must be of rank 3.') if not (boxes.shape[1].value == scores.shape[1].value or boxes.shape[1].value == 1): raise ValueError('second dimension of boxes must be either 1 or equal ' 'to the second dimension of scores') if boxes.shape[2].value != 4: raise ValueError('last dimension of boxes must be of size 4.') if change_coordinate_frame and clip_window is None: raise ValueError('if change_coordinate_frame is True, then a clip_window' 'must be specified.') with tf.name_scope(scope, 'MultiClassNonMaxSuppression'): num_boxes = tf.shape(boxes)[0] num_scores = tf.shape(scores)[0] num_classes = scores.get_shape()[1] length_assert = tf.Assert( tf.equal(num_boxes, num_scores), ['Incorrect scores field length: actual vs expected.', num_scores, num_boxes]) selected_boxes_list = [] per_class_boxes_list = tf.unstack(boxes, axis=1) if masks is not None: per_class_masks_list = tf.unstack(masks, axis=1) if boundaries is not None: per_class_boundaries_list = tf.unstack(boundaries, axis=1) boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1 else [0] * num_classes) for class_idx, boxes_idx in zip(range(num_classes), boxes_ids): per_class_boxes = per_class_boxes_list[boxes_idx] boxlist_and_class_scores = box_list.BoxList(per_class_boxes) with tf.control_dependencies([length_assert]): class_scores = tf.reshape( tf.slice(scores, [0, class_idx], tf.stack([num_scores, 1])), [-1]) boxlist_and_class_scores.add_field(fields.BoxListFields.scores, class_scores) if masks is not None: per_class_masks = per_class_masks_list[boxes_idx] boxlist_and_class_scores.add_field(fields.BoxListFields.masks, per_class_masks) if boundaries is not None: per_class_boundaries = per_class_boundaries_list[boxes_idx] boxlist_and_class_scores.add_field(fields.BoxListFields.boundaries, per_class_boundaries) if additional_fields is not None: for key, tensor in additional_fields.items(): boxlist_and_class_scores.add_field(key, tensor) boxlist_filtered = box_list_ops.filter_greater_than( boxlist_and_class_scores, score_thresh) if clip_window is not None: boxlist_filtered = box_list_ops.clip_to_window( boxlist_filtered, clip_window) if change_coordinate_frame: boxlist_filtered = box_list_ops.change_coordinate_frame( boxlist_filtered, clip_window) max_selection_size = tf.minimum(max_size_per_class, boxlist_filtered.num_boxes()) selected_indices = tf.image.non_max_suppression( boxlist_filtered.get(), boxlist_filtered.get_field(fields.BoxListFields.scores), max_selection_size, iou_threshold=iou_thresh) nms_result = box_list_ops.gather(boxlist_filtered, selected_indices) nms_result.add_field( fields.BoxListFields.classes, (tf.zeros_like( nms_result.get_field(fields.BoxListFields.scores)) + class_idx)) selected_boxes_list.append(nms_result) selected_boxes = box_list_ops.concatenate(selected_boxes_list) sorted_boxes = box_list_ops.sort_by_field(selected_boxes, fields.BoxListFields.scores) if max_total_size: max_total_size = tf.minimum(max_total_size, sorted_boxes.num_boxes()) sorted_boxes = box_list_ops.gather(sorted_boxes, tf.range(max_total_size)) return sorted_boxes