Python decode_boxes示例，utils.box_utils.decode_boxes Python示例

示例#1

0

显示文件

文件： postprocess.py 项目： qing0991/tpu

    def __call__(self, box_outputs, class_outputs, anchor_boxes, image_shape):
        # Collects outputs from all levels into a list.
        boxes = []
        scores = []
        for i in range(self._min_level, self._max_level + 1):
            batch_size = tf.shape(class_outputs[i])[0]

            # Applies score transformation and remove the implicit background class.
            scores_i = _apply_score_activation(class_outputs[i],
                                               self._num_classes,
                                               self._score_activation)

            # Box decoding.
            # The anchor boxes are shared for all data in a batch.
            # One stage detector only supports class agnostic box regression.
            anchor_boxes_i = tf.reshape(anchor_boxes[i], [batch_size, -1, 4])
            box_outputs_i = tf.reshape(box_outputs[i], [batch_size, -1, 4])
            boxes_i = box_utils.decode_boxes(box_outputs_i, anchor_boxes_i)

            # Box clipping.
            boxes_i = box_utils.clip_boxes(boxes_i, image_shape)

            boxes.append(boxes_i)
            scores.append(scores_i)
        boxes = tf.concat(boxes, axis=1)
        scores = tf.concat(scores, axis=1)
        boxes = tf.expand_dims(boxes, axis=2)

        (nmsed_boxes, nmsed_scores, nmsed_classes,
         valid_detections) = self._generate_detections(boxes, scores)
        # Adds 1 to offset the background class which has index 0.
        nmsed_classes += 1
        return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections

示例#2

0

显示文件

文件： instance_segmentation.py 项目： waterbearbee/single-network-panoptic-segmentation

            def _decode_and_nms_fn(inputs, anchors):
                with tf.variable_scope("DecodeAndApplyNMS"):
                    boxes_encoded = inputs[0]
                    boxes_scores = inputs[1]
                    boxes_decoded = box_utils.decode_boxes(
                        boxes_encoded, anchors)
                    boxes_resized = box_utils.resize_normalized_boxes(
                        boxes_decoded, img_shape[0], img_shape[1])
                    boxes_clipped = box_utils.clip_to_img_boundaries(
                        boxes_resized, image_shape=img_shape)

                    boxes_probs = slim.softmax(boxes_scores)

                    boxes_clipped_formatted = box_utils.convert_xyxy_to_yxyx_format(
                        boxes_clipped)

                    keep_boxes_ids = tf.image.non_max_suppression(
                        boxes=boxes_clipped_formatted,
                        scores=boxes_probs[:, 1],
                        max_output_size=self.rpn_nms_num_samples,
                        iou_threshold=self.rpn_nms_iou_th)

                    boxes_out = tf.gather(boxes_clipped, keep_boxes_ids)
                    probs_out = tf.gather(boxes_probs, keep_boxes_ids)

                    return boxes_out, probs_out

示例#3

0

显示文件

文件： postprocess_ops.py 项目： vishalbelsare/tpu

    def __call__(self, box_outputs, class_outputs, anchor_boxes, image_shape):
        # Collects outputs from all levels into a list.
        boxes = []
        encoded_boxes = []
        scores = []
        for i in range(self._min_level, self._max_level + 1):
            _, feature_h, feature_w, num_predicted_corners = (
                box_outputs[i].get_shape().as_list())
            num_anchors_per_locations = num_predicted_corners // 4
            num_classes = (class_outputs[i].get_shape().as_list()[-1] //
                           num_anchors_per_locations)
            num_anchors = feature_h * feature_w * num_anchors_per_locations

            scores_i = tf.reshape(class_outputs[i],
                                  [-1, num_anchors, num_classes])
            if self._apply_sigmoid:
                # Applies score transformation.
                scores_i = tf.sigmoid(scores_i)

            # Remove the implicit background class.
            scores_i = tf.slice(scores_i, [0, 0, 1], [-1, -1, -1])

            # Box decoding.
            # The anchor boxes are shared for all data in a batch.
            # One stage detector only supports class agnostic box regression.
            anchor_boxes_i = tf.reshape(anchor_boxes[i], [-1, num_anchors, 4])
            box_outputs_i = tf.reshape(box_outputs[i], [-1, num_anchors, 4])
            encoded_boxes.append(box_outputs_i)
            boxes_i = box_utils.decode_boxes(box_outputs_i, anchor_boxes_i)

            # Box clipping.
            boxes_i = box_utils.clip_boxes(boxes_i, image_shape)

            boxes.append(boxes_i)
            scores.append(scores_i)
        boxes = tf.concat(boxes, axis=1)
        boxes = tf.expand_dims(boxes, axis=2)
        encoded_boxes = tf.concat(encoded_boxes, axis=1)
        scores = tf.concat(scores, axis=1)

        if not self._apply_nms:
            return {
                'raw_boxes': boxes,
                'raw_encoded_boxes': encoded_boxes,
                'raw_scores': scores,
            }

        nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections = (
            self._generate_detections(boxes, scores))

        # Adds 1 to offset the background class which has index 0.
        nmsed_classes += 1

        return {
            'num_detections': valid_detections,
            'detection_boxes': nmsed_boxes,
            'detection_classes': nmsed_classes,
            'detection_scores': nmsed_scores,
        }

示例#4

0

显示文件

文件： cascade_maskrcnn_model.py 项目： vishalbelsare/tpu

    def _box_outputs_to_rois(self, box_outputs, rois, correct_class,
                             image_info, regression_weights):
        """Convert the box_outputs to be the new rois for the next cascade.

    Args:
      box_outputs: `tensor` with predicted bboxes in the most recent frcnn head.
        The predictions are relative to the anchors/rois, so we must convert
          them to x/y min/max to be used as rois in the following layer.
      rois: `tensor`, the rois used as input for frcnn head.
      correct_class: `tensor` of classes that the box should be predicted for.
        Used to filter the correct bbox prediction since they are done for
        all classes if `class_agnostic_bbox_pred` is not set to true.
      image_info: `list`, the height and width of the input image.
      regression_weights: `list`, weights used for l1 loss in bounding box
        regression.

    Returns:
      new_rois: rois to be used for the next frcnn layer in the cascade.
    """
        if self._class_agnostic_bbox_pred:
            new_rois = box_outputs
        else:
            dtype = box_outputs.dtype
            batch_size, num_rois, num_class_specific_boxes = (
                box_outputs.get_shape().as_list())
            num_classes = num_class_specific_boxes // 4
            box_outputs = tf.reshape(box_outputs,
                                     [batch_size, num_rois, num_classes, 4])

            # correct_class is of shape [batch_size, num_rois].
            # correct_class_one_hot has shape [batch_size, num_rois, num_classes, 4].
            correct_class_one_hot = tf.tile(
                tf.expand_dims(
                    tf.one_hot(correct_class, num_classes, dtype=dtype), -1),
                [1, 1, 1, 4])
            new_rois = tf.reduce_sum(box_outputs * correct_class_one_hot,
                                     axis=2)
        new_rois = tf.cast(new_rois, tf.float32)

        # Before new_rois are predicting the relative center coords and
        # log scale offsets, so we need to run decode on them to get
        # the x/y min/max values needed for roi operations.
        # operations.
        new_rois = box_utils.decode_boxes(new_rois,
                                          rois,
                                          weights=regression_weights)
        new_rois = box_utils.clip_boxes(new_rois, image_info)
        return new_rois

示例#5

0

显示文件

    def __call__(self, box_outputs, class_outputs, anchor_boxes):
        # Collects outputs from all levels into a list.
        boxes = []
        scores = []
        for i in range(self._min_level, self._max_level + 1):
            batch_size = tf.shape(class_outputs[i])[0]
            scores_i = _apply_score_activation(class_outputs[i],
                                               self._num_classes,
                                               self._score_activation)
            # The anchor boxes are shared for all data in a batch.
            # One stage detector only supports class agnostic box regression.
            anchor_boxes_i = tf.reshape(anchor_boxes[i], [batch_size, -1, 4])
            box_outputs_i = tf.reshape(box_outputs[i], [batch_size, -1, 4])
            boxes_i = box_utils.decode_boxes(box_outputs_i, anchor_boxes_i)

            boxes.append(boxes_i)
            scores.append(scores_i)
        boxes = tf.concat(boxes, axis=1)
        scores = tf.concat(scores, axis=1)
        boxes = tf.expand_dims(boxes, axis=2)

        (nmsed_boxes, nmsed_scores, nmsed_classes,
         valid_detections) = self._generate_detections(boxes, scores)
        return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections

示例#6

0

显示文件

文件： roi_ops.py 项目： yuezha01/tpu

def multilevel_propose_rois(rpn_boxes,
                            rpn_scores,
                            anchor_boxes,
                            image_shape,
                            rpn_pre_nms_top_k=2000,
                            rpn_post_nms_top_k=1000,
                            rpn_nms_threshold=0.7,
                            rpn_score_threshold=0.0,
                            rpn_min_size_threshold=0.0,
                            decode_boxes=True,
                            clip_boxes=True,
                            use_batched_nms=False,
                            apply_sigmoid_to_score=True):
    """Proposes RoIs given a group of candidates from different FPN levels.

  The following describes the steps:
    1. For each individual level:
      a. Apply sigmoid transform if specified.
      b. Decode boxes if specified.
      c. Clip boxes if specified.
      d. Filter small boxes and those fall outside image if specified.
      e. Apply pre-NMS filtering including pre-NMS top k and score thresholding.
      f. Apply NMS.
    2. Aggregate post-NMS boxes from each level.
    3. Apply an overall top k to generate the final selected RoIs.

  Args:
    rpn_boxes: a dict with keys representing FPN levels and values representing
      box tenors of shape [batch_size, feature_h, feature_w, num_anchors * 4].
    rpn_scores: a dict with keys representing FPN levels and values representing
      logit tensors of shape [batch_size, feature_h, feature_w, num_anchors].
    anchor_boxes: a dict with keys representing FPN levels and values
      representing anchor box tensors of shape
      [batch_size, feature_h, feature_w, num_anchors * 4].
    image_shape: a tensor of shape [batch_size, 2] where the last dimension are
      [height, width] of the scaled image.
    rpn_pre_nms_top_k: an integer of top scoring RPN proposals *per level* to
      keep before applying NMS. Default: 2000.
    rpn_post_nms_top_k: an integer of top scoring RPN proposals *in total* to
      keep after applying NMS. Default: 1000.
    rpn_nms_threshold: a float between 0 and 1 representing the IoU threshold
      used for NMS. If 0.0, no NMS is applied. Default: 0.7.
    rpn_score_threshold: a float between 0 and 1 representing the minimal box
      score to keep before applying NMS. This is often used as a pre-filtering
      step for better performance. If 0, no filtering is applied. Default: 0.
    rpn_min_size_threshold: a float representing the minimal box size in each
      side (w.r.t. the scaled image) to keep before applying NMS. This is often
      used as a pre-filtering step for better performance. If 0, no filtering is
      applied. Default: 0.
    decode_boxes: a boolean indicating whether `rpn_boxes` needs to be decoded
      using `anchor_boxes`. If False, use `rpn_boxes` directly and ignore
      `anchor_boxes`. Default: True.
    clip_boxes: a boolean indicating whether boxes are first clipped to the
      scaled image size before appliying NMS. If False, no clipping is applied
      and `image_shape` is ignored. Default: True.
    use_batched_nms: a boolean indicating whether NMS is applied in batch using
      `tf.image.combined_non_max_suppression`. Currently only available in
      CPU/GPU. Default: False.
    apply_sigmoid_to_score: a boolean indicating whether apply sigmoid to
      `rpn_scores` before applying NMS. Default: True.

  Returns:
    selected_rois: a tensor of shape [batch_size, rpn_post_nms_top_k, 1],
      representing the scores of the selected proposals.
    selected_roi_scores: a tensor of shape [batch_size, rpn_post_nms_top_k, 4],
      representing the box coordinates of the selected proposals w.r.t. the
      scaled image.
  """
    with tf.name_scope('multilevel_propose_rois'):
        rois = []
        roi_scores = []
        for level in sorted(rpn_scores.keys()):
            with tf.name_scope('level_%d' % level):
                _, feature_h, feature_w, num_anchors_per_location = (
                    rpn_scores[level].get_shape().as_list())

                num_boxes = feature_h * feature_w * num_anchors_per_location
                this_level_scores = tf.reshape(rpn_scores[level],
                                               [-1, num_boxes])
                this_level_boxes = tf.reshape(rpn_boxes[level],
                                              [-1, num_boxes, 4])
                this_level_anchors = tf.cast(tf.reshape(
                    anchor_boxes[level], [-1, num_boxes, 4]),
                                             dtype=this_level_scores.dtype)

                if apply_sigmoid_to_score:
                    this_level_scores = tf.sigmoid(this_level_scores)

                image_shape = tf.expand_dims(image_shape, axis=1)
                if decode_boxes:
                    this_level_boxes = box_utils.decode_boxes(
                        this_level_boxes, this_level_anchors)
                if clip_boxes:
                    this_level_boxes = box_utils.clip_boxes(
                        this_level_boxes, image_shape)

                if rpn_min_size_threshold > 0.0:
                    this_level_boxes, this_level_scores = box_utils.filter_boxes(
                        this_level_boxes, this_level_scores, image_shape,
                        rpn_min_size_threshold)

                if rpn_nms_threshold > 0.0:
                    this_level_pre_nms_top_k = min(num_boxes,
                                                   rpn_pre_nms_top_k)
                    if use_batched_nms:
                        this_level_rois, this_level_roi_scores, _, _ = (
                            tf.image.combined_non_max_suppression(
                                tf.expand_dims(this_level_boxes, axis=2),
                                tf.expand_dims(this_level_scores, axis=-1),
                                max_output_size_per_class=
                                this_level_pre_nms_top_k,
                                max_total_size=rpn_post_nms_top_k,
                                iou_threshold=rpn_nms_threshold,
                                score_threshold=rpn_score_threshold,
                                pad_per_class=False,
                                clip_boxes=False))
                    else:
                        if rpn_score_threshold > 0.0:
                            this_level_boxes, this_level_scores = (
                                box_utils.filter_boxes_by_scores(
                                    this_level_boxes, this_level_scores,
                                    rpn_score_threshold))
                        this_level_boxes, this_level_scores = box_utils.top_k_boxes(
                            this_level_boxes,
                            this_level_scores,
                            k=this_level_pre_nms_top_k)
                        this_level_roi_scores, this_level_rois = (
                            nms.sorted_non_max_suppression_padded(
                                this_level_scores,
                                this_level_boxes,
                                max_output_size=rpn_post_nms_top_k,
                                iou_threshold=rpn_nms_threshold))
                else:
                    this_level_rois, this_level_roi_scores = box_utils.top_k_boxes(
                        this_level_rois,
                        this_level_scores,
                        k=rpn_post_nms_top_k)

                rois.append(this_level_rois)
                roi_scores.append(this_level_roi_scores)

        rois = tf.concat(rois, axis=1)
        roi_scores = tf.concat(roi_scores, axis=1)

        with tf.name_scope('top_k_rois'):
            _, num_valid_rois = roi_scores.get_shape().as_list()
            overall_top_k = min(num_valid_rois, rpn_post_nms_top_k)

            selected_rois, selected_roi_scores = box_utils.top_k_boxes(
                rois, roi_scores, k=overall_top_k)

        return selected_rois, selected_roi_scores

示例#7

0

显示文件

文件： postprocess_ops.py 项目： vishalbelsare/tpu

    def __call__(self,
                 box_outputs,
                 class_outputs,
                 anchor_boxes,
                 image_shape,
                 regression_weights=None,
                 bbox_per_class=True,
                 distill_class_outputs=None):
        """Generate final detections.

    Args:
      box_outputs: a tensor of shape of [batch_size, K, num_classes * 4]
        representing the class-specific box coordinates relative to anchors.
      class_outputs: a tensor of shape of [batch_size, K, num_classes]
        representing the class logits before applying score activation.
      anchor_boxes: a tensor of shape of [batch_size, K, 4] representing the
        corresponding anchor boxes w.r.t `box_outputs`.
      image_shape: a tensor of shape of [batch_size, 2] storing the image height
        and width w.r.t. the scaled image, i.e. the same image space as
        `box_outputs` and `anchor_boxes`.
      regression_weights: A list of four float numbers to scale coordinates.
      bbox_per_class: A `bool`. If True, perform per-class box regression.
      distill_class_outputs: a float tensor of shape of
        [batch_size, K, num_classes-1] representing the distilled class logits
        before applying score activation, without the background class.

    Returns:
      nmsed_boxes: `float` Tensor of shape [batch_size, max_total_size, 4]
        representing top detected boxes in [y1, x1, y2, x2].
      nmsed_scores: `float` Tensor of shape [batch_size, max_total_size]
        representing sorted confidence scores for detected boxes. The values are
        between [0, 1].
      nmsed_classes: `int` Tensor of shape [batch_size, max_total_size]
        representing classes for detected boxes.
      valid_detections: `int` Tensor of shape [batch_size] only the top
        `valid_detections` boxes are valid detections.
    """
        class_outputs_shape = tf.shape(class_outputs)
        num_locations = class_outputs_shape[1]
        num_classes = class_outputs_shape[-1]

        if self._discard_background:
            # Removes the background class before softmax.
            class_outputs = tf.slice(class_outputs, [0, 0, 1], [-1, -1, -1])

        class_outputs = tf.nn.softmax(class_outputs, axis=-1)

        if not self._discard_background:
            # Removes the background class.
            class_outputs = tf.slice(class_outputs, [0, 0, 1], [-1, -1, -1])

        if self._feat_distill == 'double_branch':
            distill_class_outputs = tf.nn.softmax(
                distill_class_outputs, axis=-1)  # [B, num_rois, num_classes]
            third_component = (
                1.0 - self._rare_mask
            ) * distill_class_outputs + self._rare_mask * class_outputs
            weighted_product = distill_class_outputs * class_outputs * third_component
            class_outputs = tf.pow(weighted_product, 1.0 / 3.0)

        if bbox_per_class:
            num_detections = num_locations * (num_classes - 1)
            box_outputs = tf.reshape(box_outputs,
                                     [-1, num_locations, num_classes, 4])
            box_outputs = tf.slice(box_outputs, [0, 0, 1, 0], [-1, -1, -1, -1])
            anchor_boxes = tf.tile(tf.expand_dims(anchor_boxes, axis=2),
                                   [1, 1, num_classes - 1, 1])
            box_outputs = tf.reshape(box_outputs, [-1, num_detections, 4])
            anchor_boxes = tf.reshape(anchor_boxes, [-1, num_detections, 4])

        # Box decoding.
        if regression_weights is None:
            regression_weights = [10.0, 10.0, 5.0, 5.0]
        decoded_boxes = box_utils.decode_boxes(box_outputs,
                                               anchor_boxes,
                                               weights=regression_weights)

        # Box clipping
        decoded_boxes = box_utils.clip_boxes(decoded_boxes, image_shape)

        if bbox_per_class:
            decoded_boxes = tf.reshape(decoded_boxes,
                                       [-1, num_locations, num_classes - 1, 4])
        else:
            decoded_boxes = tf.expand_dims(decoded_boxes, axis=2)

        if not self._apply_nms:
            return {
                'raw_boxes': decoded_boxes,
                'raw_scores': class_outputs,
            }

        nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections = (
            self._generate_detections(decoded_boxes, class_outputs))

        # Adds 1 to offset the background class which has index 0.
        nmsed_classes += 1

        return {
            'num_detections': valid_detections,
            'detection_boxes': nmsed_boxes,
            'detection_classes': nmsed_classes,
            'detection_scores': nmsed_scores,
        }

示例#8

0

显示文件

文件： postprocess_ops.py 项目： shimacos37/tpu_experiment

  def __call__(self, box_outputs, class_outputs, anchor_boxes, image_shape):
    """Generate final detections.

    Args:
      box_outputs: a tensor of shape of [batch_size, K, num_classes * 4]
        representing the class-specific box coordinates relative to anchors.
      class_outputs: a tensor of shape of [batch_size, K, num_classes]
        representing the class logits before applying score activiation.
      anchor_boxes: a tensor of shape of [batch_size, K, 4] representing the
        corresponding anchor boxes w.r.t `box_outputs`.
      image_shape: a tensor of shape of [batch_size, 2] storing the image height
        and width w.r.t. the scaled image, i.e. the same image space as
        `box_outputs` and `anchor_boxes`.

    Returns:
      nms_boxes: `float` Tensor of shape [batch_size, max_total_size, 4]
        representing top detected boxes in [y1, x1, y2, x2].
      nms_scores: `float` Tensor of shape [batch_size, max_total_size]
        representing sorted confidence scores for detected boxes. The values are
        between [0, 1].
      nms_classes: `int` Tensor of shape [batch_size, max_total_size]
        representing classes for detected boxes.
      valid_detections: `int` Tensor of shape [batch_size] only the top
        `valid_detections` boxes are valid detections.
    """
    class_outputs = tf.nn.softmax(class_outputs, axis=-1)

    # Removes the background class.
    class_outputs_shape = tf.shape(class_outputs)
    batch_size = class_outputs_shape[0]
    num_locations = class_outputs_shape[1]
    num_classes = class_outputs_shape[-1]
    num_detections = num_locations * (num_classes - 1)

    class_outputs = tf.slice(class_outputs, [0, 0, 1], [-1, -1, -1])
    box_outputs = tf.reshape(
        box_outputs,
        tf.stack([batch_size, num_locations, num_classes, 4], axis=-1))
    box_outputs = tf.slice(
        box_outputs, [0, 0, 1, 0], [-1, -1, -1, -1])
    anchor_boxes = tf.tile(
        tf.expand_dims(anchor_boxes, axis=2), [1, 1, num_classes - 1, 1])
    box_outputs = tf.reshape(
        box_outputs,
        tf.stack([batch_size, num_detections, 4], axis=-1))
    anchor_boxes = tf.reshape(
        anchor_boxes,
        tf.stack([batch_size, num_detections, 4], axis=-1))

    # Box decoding.
    decoded_boxes = box_utils.decode_boxes(
        box_outputs, anchor_boxes, weights=[10.0, 10.0, 5.0, 5.0])

    # Box clipping
    decoded_boxes = box_utils.clip_boxes(decoded_boxes, image_shape)

    decoded_boxes = tf.reshape(
        decoded_boxes,
        tf.stack([batch_size, num_locations, num_classes - 1, 4], axis=-1))

    nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections = (
        self._generate_detections(decoded_boxes, class_outputs))

    # Adds 1 to offset the background class which has index 0.
    nmsed_classes += 1

    return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections

示例#9

0

显示文件

文件： bbox_transform.py 项目： wavce/letsdet

 def __call__(self, proposals, delta):
     return box_utils.decode_boxes(delta, proposals, self.weights)

示例#10

0

显示文件

文件： instance_segmentation.py 项目： waterbearbee/single-network-panoptic-segmentation

            def _decode_and_nms_fn(inputs):
                with tf.variable_scope("DecodeAndApplyNMS"):
                    boxes_encoded = inputs[0]
                    boxes_scores = inputs[1]
                    rois = inputs[2]

                    boxes_probs = slim.softmax(boxes_scores)
                    boxes_classes = tf.argmax(boxes_probs, axis=1)
                    # Do not include background prediction
                    boxes_probs_red = tf.reduce_max(boxes_probs[..., 1:],
                                                    axis=1)
                    boxes_classes_one_hot = tf.cast(
                        tf.one_hot(boxes_classes,
                                   depth=self.params.num_things_classes + 1),
                        tf.bool)

                    pad_num = tf.shape(boxes_classes)[0]

                    boxes_encoded_per_class = tf.boolean_mask(
                        boxes_encoded, boxes_classes_one_hot)
                    boxes_encoded_per_class = tf.reshape(
                        boxes_encoded_per_class, [-1, 4])

                    # Decode boxes
                    boxes_decoded = box_utils.decode_boxes(
                        boxes_encoded_per_class,
                        rois,
                        scale_factors=self.roi_encoder_scales)

                    # Clip boxes to image boundaries
                    boxes_resized = box_utils.resize_normalized_boxes(
                        boxes_decoded, img_shape[0], img_shape[1])
                    boxes_clipped = box_utils.clip_to_img_boundaries(
                        boxes_resized, image_shape=img_shape)

                    # Find indices of boxes with score above the threshold and gather
                    indices = tf.reshape(
                        tf.where(
                            tf.greater(boxes_probs_red,
                                       self.params.det_nms_score_th)), [-1])
                    boxes_clipped = tf.gather(boxes_clipped, indices)
                    boxes_probs_red = tf.gather(boxes_probs_red, indices)

                    # Subtract the background class from the predicted classes
                    boxes_classes = tf.gather(boxes_classes, indices) - 1

                    boxes_clipped_formatted = box_utils.convert_xyxy_to_yxyx_format(
                        boxes_clipped)

                    keep_boxes_ids = tf.image.non_max_suppression(
                        boxes=boxes_clipped_formatted,
                        scores=boxes_probs_red,
                        max_output_size=pad_num,
                        iou_threshold=self.params.det_nms_iou_th)

                    boxes_out = tf.gather(boxes_clipped, keep_boxes_ids)
                    probs_out = tf.gather(boxes_probs_red, keep_boxes_ids)
                    class_out = tf.gather(boxes_classes, keep_boxes_ids)

                    boxes_pad, num_boxes = box_utils.pad_boxes_and_return_num(
                        boxes_out, pad_num)
                    probs_pad = tf.pad(probs_out, [[0, pad_num - num_boxes]])
                    class_pad = tf.pad(class_out, [[0, pad_num - num_boxes]])

                    boxes_pad = tf.reshape(boxes_pad, [pad_num, 4])
                    probs_pad = tf.reshape(probs_pad, [pad_num])
                    class_pad = tf.reshape(class_pad, [pad_num])

                    return boxes_pad, class_pad, probs_pad, num_boxes