示例#1
0
def resize_and_crop_boxes(boxes, image_scale, output_size, offset):
    """Resizes boxes to output size with scale and offset.

  Args:
    boxes: `Tensor` of shape [N, 4] representing ground truth boxes.
    image_scale: 2D float `Tensor` representing scale factors that apply to
      [height, width] of input image.
    output_size: 2D `Tensor` or `int` representing [height, width] of target
      output image size.
    offset: 2D `Tensor` representing top-left corner [y0, x0] to crop scaled
      boxes.

  Returns:
    boxes: `Tensor` of shape [N, 4] representing the scaled boxes.
  """
    with tf.name_scope('resize_and_crop_boxes'):
        # Adjusts box coordinates based on image_scale and offset.
        boxes *= tf.tile(tf.expand_dims(image_scale, axis=0), [1, 2])
        boxes -= tf.tile(tf.expand_dims(offset, axis=0), [1, 2])
        # Clips the boxes.
        boxes = box_ops.clip_boxes(boxes, output_size)
        return boxes
示例#2
0
def undo_info(boxes: tf.Tensor,
              num_detections: int,
              info: tf.Tensor,
              expand: bool = True) -> tf.Tensor:
  """Clip and normalize boxes for serving."""

  mask = tf.sequence_mask(num_detections, maxlen=tf.shape(boxes)[1])
  boxes = tf.cast(tf.expand_dims(mask, axis=-1), boxes.dtype) * boxes

  if expand:
    info = tf.cast(tf.expand_dims(info, axis=0), boxes.dtype)
  inshape = tf.expand_dims(info[:, 1, :], axis=1)
  ogshape = tf.expand_dims(info[:, 0, :], axis=1)
  scale = tf.expand_dims(info[:, 2, :], axis=1)
  offset = tf.expand_dims(info[:, 3, :], axis=1)

  boxes = box_ops.denormalize_boxes(boxes, inshape)
  boxes += tf.tile(offset, [1, 1, 2])
  boxes /= tf.tile(scale, [1, 1, 2])
  boxes = box_ops.clip_boxes(boxes, ogshape)
  boxes = box_ops.normalize_boxes(boxes, ogshape)
  return boxes
    def _decode_multilevel_outputs(
            self,
            raw_boxes: Mapping[str, tf.Tensor],
            raw_scores: Mapping[str, tf.Tensor],
            anchor_boxes: Mapping[str, tf.Tensor],
            image_shape: tf.Tensor,
            raw_attributes: Optional[Mapping[str, tf.Tensor]] = None):
        """Collects dict of multilevel boxes, scores, attributes into lists."""
        boxes = []
        scores = []
        if raw_attributes:
            attributes = {att_name: [] for att_name in raw_attributes.keys()}
        else:
            attributes = {}

        levels = list(raw_boxes.keys())
        min_level = int(min(levels))
        max_level = int(max(levels))
        for i in range(min_level, max_level + 1):
            raw_boxes_i = raw_boxes[str(i)]
            raw_scores_i = raw_scores[str(i)]
            batch_size = tf.shape(raw_boxes_i)[0]
            (_, feature_h_i, feature_w_i, num_anchors_per_locations_times_4
             ) = raw_boxes_i.get_shape().as_list()
            num_locations = feature_h_i * feature_w_i
            num_anchors_per_locations = num_anchors_per_locations_times_4 // 4
            num_classes = raw_scores_i.get_shape().as_list(
            )[-1] // num_anchors_per_locations

            # Applies score transformation and remove the implicit background class.
            scores_i = tf.sigmoid(
                tf.reshape(raw_scores_i, [
                    batch_size, num_locations * num_anchors_per_locations,
                    num_classes
                ]))
            scores_i = tf.slice(scores_i, [0, 0, 1], [-1, -1, -1])

            # Box decoding.
            # The anchor boxes are shared for all data in a batch.
            # One stage detector only supports class agnostic box regression.
            anchor_boxes_i = tf.reshape(
                anchor_boxes[str(i)],
                [batch_size, num_locations * num_anchors_per_locations, 4])
            raw_boxes_i = tf.reshape(
                raw_boxes_i,
                [batch_size, num_locations * num_anchors_per_locations, 4])
            boxes_i = box_ops.decode_boxes(raw_boxes_i, anchor_boxes_i)

            # Box clipping.
            boxes_i = box_ops.clip_boxes(boxes_i,
                                         tf.expand_dims(image_shape, axis=1))

            boxes.append(boxes_i)
            scores.append(scores_i)

            if raw_attributes:
                for att_name, raw_att in raw_attributes.items():
                    attribute_size = raw_att[str(i)].get_shape().as_list(
                    )[-1] // num_anchors_per_locations
                    att_i = tf.reshape(raw_att[str(i)], [
                        batch_size, num_locations * num_anchors_per_locations,
                        attribute_size
                    ])
                    attributes[att_name].append(att_i)

        boxes = tf.concat(boxes, axis=1)
        boxes = tf.expand_dims(boxes, axis=2)
        scores = tf.concat(scores, axis=1)

        if raw_attributes:
            for att_name in raw_attributes.keys():
                attributes[att_name] = tf.concat(attributes[att_name], axis=1)
                attributes[att_name] = tf.expand_dims(attributes[att_name],
                                                      axis=2)

        return boxes, scores, attributes
    def __call__(self,
                 raw_boxes: tf.Tensor,
                 raw_scores: tf.Tensor,
                 anchor_boxes: tf.Tensor,
                 image_shape: tf.Tensor,
                 regression_weights: Optional[List[float]] = None,
                 bbox_per_class: bool = True):
        """Generates final detections.

    Args:
      raw_boxes: A `tf.Tensor` of shape of `[batch_size, K, num_classes * 4]`
        representing the class-specific box coordinates relative to anchors.
      raw_scores: A `tf.Tensor` of shape of `[batch_size, K, num_classes]`
        representing the class logits before applying score activiation.
      anchor_boxes: A `tf.Tensor` of shape of `[batch_size, K, 4]` representing
        the corresponding anchor boxes w.r.t `box_outputs`.
      image_shape: A `tf.Tensor` of shape of `[batch_size, 2]` storing the image
        height and width w.r.t. the scaled image, i.e. the same image space as
        `box_outputs` and `anchor_boxes`.
      regression_weights: A list of four float numbers to scale coordinates.
      bbox_per_class: A `bool`. If True, perform per-class box regression.

    Returns:
      If `apply_nms` = True, the return is a dictionary with keys:
        `detection_boxes`: A `float` tf.Tensor of shape
          [batch, max_num_detections, 4] representing top detected boxes in
          [y1, x1, y2, x2].
        `detection_scores`: A `float` `tf.Tensor` of shape
          [batch, max_num_detections] representing sorted confidence scores for
          detected boxes. The values are between [0, 1].
        `detection_classes`: An `int` tf.Tensor of shape
          [batch, max_num_detections] representing classes for detected boxes.
        `num_detections`: An `int` tf.Tensor of shape [batch] only the first
          `num_detections` boxes are valid detections
      If `apply_nms` = False, the return is a dictionary with keys:
        `decoded_boxes`: A `float` tf.Tensor of shape [batch, num_raw_boxes, 4]
          representing all the decoded boxes.
        `decoded_box_scores`: A `float` tf.Tensor of shape
          [batch, num_raw_boxes] representing socres of all the decoded boxes.
    """
        box_scores = tf.nn.softmax(raw_scores, axis=-1)

        # Removes the background class.
        box_scores_shape = tf.shape(box_scores)
        box_scores_shape_list = box_scores.get_shape().as_list()
        batch_size = box_scores_shape[0]
        num_locations = box_scores_shape_list[1]
        num_classes = box_scores_shape_list[-1]

        box_scores = tf.slice(box_scores, [0, 0, 1], [-1, -1, -1])

        if bbox_per_class:
            num_detections = num_locations * (num_classes - 1)
            raw_boxes = tf.reshape(raw_boxes,
                                   [batch_size, num_locations, num_classes, 4])
            raw_boxes = tf.slice(raw_boxes, [0, 0, 1, 0], [-1, -1, -1, -1])
            anchor_boxes = tf.tile(tf.expand_dims(anchor_boxes, axis=2),
                                   [1, 1, num_classes - 1, 1])
            raw_boxes = tf.reshape(raw_boxes, [batch_size, num_detections, 4])
            anchor_boxes = tf.reshape(anchor_boxes,
                                      [batch_size, num_detections, 4])

        # Box decoding.
        decoded_boxes = box_ops.decode_boxes(raw_boxes,
                                             anchor_boxes,
                                             weights=regression_weights)

        # Box clipping
        decoded_boxes = box_ops.clip_boxes(decoded_boxes,
                                           tf.expand_dims(image_shape, axis=1))

        if bbox_per_class:
            decoded_boxes = tf.reshape(
                decoded_boxes, [batch_size, num_locations, num_classes - 1, 4])
        else:
            decoded_boxes = tf.expand_dims(decoded_boxes, axis=2)

        if not self._config_dict['apply_nms']:
            return {
                'decoded_boxes': decoded_boxes,
                'decoded_box_scores': box_scores,
            }

        # Optionally force the NMS be run on CPU.
        if self._config_dict['use_cpu_nms']:
            nms_context = tf.device('cpu:0')
        else:
            nms_context = contextlib.nullcontext()

        with nms_context:
            if self._config_dict['nms_version'] == 'batched':
                (nmsed_boxes, nmsed_scores, nmsed_classes,
                 valid_detections) = (_generate_detections_batched(
                     decoded_boxes, box_scores,
                     self._config_dict['pre_nms_score_threshold'],
                     self._config_dict['nms_iou_threshold'],
                     self._config_dict['max_num_detections']))
            elif self._config_dict['nms_version'] == 'v1':
                (nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections,
                 _) = (_generate_detections_v1(
                     decoded_boxes,
                     box_scores,
                     pre_nms_top_k=self._config_dict['pre_nms_top_k'],
                     pre_nms_score_threshold=self.
                     _config_dict['pre_nms_score_threshold'],
                     nms_iou_threshold=self._config_dict['nms_iou_threshold'],
                     max_num_detections=self.
                     _config_dict['max_num_detections'],
                     soft_nms_sigma=self._config_dict['soft_nms_sigma']))
            elif self._config_dict['nms_version'] == 'v2':
                (nmsed_boxes, nmsed_scores, nmsed_classes,
                 valid_detections) = (_generate_detections_v2(
                     decoded_boxes,
                     box_scores,
                     pre_nms_top_k=self._config_dict['pre_nms_top_k'],
                     pre_nms_score_threshold=self.
                     _config_dict['pre_nms_score_threshold'],
                     nms_iou_threshold=self._config_dict['nms_iou_threshold'],
                     max_num_detections=self._config_dict['max_num_detections']
                 ))
            else:
                raise ValueError('NMS version {} not supported.'.format(
                    self._config_dict['nms_version']))

        # Adds 1 to offset the background class which has index 0.
        nmsed_classes += 1

        return {
            'num_detections': valid_detections,
            'detection_boxes': nmsed_boxes,
            'detection_classes': nmsed_classes,
            'detection_scores': nmsed_scores,
        }
示例#5
0
def _multilevel_propose_rois(raw_boxes: Mapping[str, tf.Tensor],
                             raw_scores: Mapping[str, tf.Tensor],
                             anchor_boxes: Mapping[str, tf.Tensor],
                             image_shape: tf.Tensor,
                             pre_nms_top_k: int = 2000,
                             pre_nms_score_threshold: float = 0.0,
                             pre_nms_min_size_threshold: float = 0.0,
                             nms_iou_threshold: float = 0.7,
                             num_proposals: int = 1000,
                             use_batched_nms: bool = False,
                             decode_boxes: bool = True,
                             clip_boxes: bool = True,
                             apply_sigmoid_to_score: bool = True):
    """Proposes RoIs given a group of candidates from different FPN levels.

  The following describes the steps:
    1. For each individual level:
      a. Apply sigmoid transform if specified.
      b. Decode boxes if specified.
      c. Clip boxes if specified.
      d. Filter small boxes and those fall outside image if specified.
      e. Apply pre-NMS filtering including pre-NMS top k and score thresholding.
      f. Apply NMS.
    2. Aggregate post-NMS boxes from each level.
    3. Apply an overall top k to generate the final selected RoIs.

  Args:
    raw_boxes: A `dict` with keys representing FPN levels and values
      representing box tenors of shape
      [batch_size, feature_h, feature_w, num_anchors * 4].
    raw_scores: A `dict` with keys representing FPN levels and values
      representing logit tensors of shape
      [batch_size, feature_h, feature_w, num_anchors].
    anchor_boxes: A `dict` with keys representing FPN levels and values
      representing anchor box tensors of shape
      [batch_size, feature_h * feature_w * num_anchors, 4].
    image_shape: A `tf.Tensor` of shape [batch_size, 2] where the last dimension
      are [height, width] of the scaled image.
    pre_nms_top_k: An `int` of top scoring RPN proposals *per level* to keep
      before applying NMS. Default: 2000.
    pre_nms_score_threshold: A `float` between 0 and 1 representing the minimal
      box score to keep before applying NMS. This is often used as a
      pre-filtering step for better performance. Default: 0, no filtering is
      applied.
    pre_nms_min_size_threshold: A `float` representing the minimal box size in
      each side (w.r.t. the scaled image) to keep before applying NMS. This is
      often used as a pre-filtering step for better performance. Default: 0, no
      filtering is applied.
    nms_iou_threshold: A `float` between 0 and 1 representing the IoU threshold
      used for NMS. If 0.0, no NMS is applied. Default: 0.7.
    num_proposals: An `int` of top scoring RPN proposals *in total* to keep
      after applying NMS. Default: 1000.
    use_batched_nms: A `bool` indicating whether NMS is applied in batch using
      `tf.image.combined_non_max_suppression`. Currently only available in
      CPU/GPU. Default is False.
    decode_boxes: A `bool` indicating whether `raw_boxes` needs to be decoded
      using `anchor_boxes`. If False, use `raw_boxes` directly and ignore
      `anchor_boxes`. Default is True.
    clip_boxes: A `bool` indicating whether boxes are first clipped to the
      scaled image size before appliying NMS. If False, no clipping is applied
      and `image_shape` is ignored. Default is True.
    apply_sigmoid_to_score: A `bool` indicating whether apply sigmoid to
      `raw_scores` before applying NMS. Default is True.

  Returns:
    selected_rois: A `tf.Tensor` of shape [batch_size, num_proposals, 4],
      representing the box coordinates of the selected proposals w.r.t. the
      scaled image.
    selected_roi_scores: A `tf.Tensor` of shape [batch_size, num_proposals, 1],
      representing the scores of the selected proposals.
  """
    with tf.name_scope('multilevel_propose_rois'):
        rois = []
        roi_scores = []
        image_shape = tf.expand_dims(image_shape, axis=1)
        for level in sorted(raw_scores.keys()):
            with tf.name_scope('level_%s' % level):
                _, feature_h, feature_w, num_anchors_per_location = (
                    raw_scores[level].get_shape().as_list())

                num_boxes = feature_h * feature_w * num_anchors_per_location
                this_level_scores = tf.reshape(raw_scores[level],
                                               [-1, num_boxes])
                this_level_boxes = tf.reshape(raw_boxes[level],
                                              [-1, num_boxes, 4])
                this_level_anchors = tf.cast(tf.reshape(
                    anchor_boxes[level], [-1, num_boxes, 4]),
                                             dtype=this_level_scores.dtype)

                if apply_sigmoid_to_score:
                    this_level_scores = tf.sigmoid(this_level_scores)

                if decode_boxes:
                    this_level_boxes = box_ops.decode_boxes(
                        this_level_boxes, this_level_anchors)
                if clip_boxes:
                    this_level_boxes = box_ops.clip_boxes(
                        this_level_boxes, image_shape)

                if pre_nms_min_size_threshold > 0.0:
                    this_level_boxes, this_level_scores = box_ops.filter_boxes(
                        this_level_boxes, this_level_scores, image_shape,
                        pre_nms_min_size_threshold)

                this_level_pre_nms_top_k = min(num_boxes, pre_nms_top_k)
                this_level_post_nms_top_k = min(num_boxes, num_proposals)
                if nms_iou_threshold > 0.0:
                    if use_batched_nms:
                        this_level_rois, this_level_roi_scores, _, _ = (
                            tf.image.combined_non_max_suppression(
                                tf.expand_dims(this_level_boxes, axis=2),
                                tf.expand_dims(this_level_scores, axis=-1),
                                max_output_size_per_class=
                                this_level_pre_nms_top_k,
                                max_total_size=this_level_post_nms_top_k,
                                iou_threshold=nms_iou_threshold,
                                score_threshold=pre_nms_score_threshold,
                                pad_per_class=False,
                                clip_boxes=False))
                    else:
                        if pre_nms_score_threshold > 0.0:
                            this_level_boxes, this_level_scores = (
                                box_ops.filter_boxes_by_scores(
                                    this_level_boxes, this_level_scores,
                                    pre_nms_score_threshold))
                        this_level_boxes, this_level_scores = box_ops.top_k_boxes(
                            this_level_boxes,
                            this_level_scores,
                            k=this_level_pre_nms_top_k)
                        this_level_roi_scores, this_level_rois = (
                            nms.sorted_non_max_suppression_padded(
                                this_level_scores,
                                this_level_boxes,
                                max_output_size=this_level_post_nms_top_k,
                                iou_threshold=nms_iou_threshold))
                else:
                    this_level_rois, this_level_roi_scores = box_ops.top_k_boxes(
                        this_level_boxes,
                        this_level_scores,
                        k=this_level_post_nms_top_k)

                rois.append(this_level_rois)
                roi_scores.append(this_level_roi_scores)

        all_rois = tf.concat(rois, axis=1)
        all_roi_scores = tf.concat(roi_scores, axis=1)

        with tf.name_scope('top_k_rois'):
            _, num_valid_rois = all_roi_scores.get_shape().as_list()
            overall_top_k = min(num_valid_rois, num_proposals)

            selected_rois, selected_roi_scores = box_ops.top_k_boxes(
                all_rois, all_roi_scores, k=overall_top_k)

        return selected_rois, selected_roi_scores
  def _call_box_outputs(
      self, images: tf.Tensor,
      image_shape: tf.Tensor,
      anchor_boxes: Optional[Mapping[str, tf.Tensor]] = None,
      gt_boxes: Optional[tf.Tensor] = None,
      gt_classes: Optional[tf.Tensor] = None,
      training: Optional[bool] = None) -> Tuple[
          Mapping[str, tf.Tensor], Mapping[str, tf.Tensor]]:
    """Implementation of the Faster-RCNN logic for boxes."""
    model_outputs = {}

    # Feature extraction.
    (backbone_features,
     decoder_features) = self._get_backbone_and_decoder_features(images)

    # Region proposal network.
    rpn_scores, rpn_boxes = self.rpn_head(decoder_features)

    model_outputs.update({
        'backbone_features': backbone_features,
        'decoder_features': decoder_features,
        'rpn_boxes': rpn_boxes,
        'rpn_scores': rpn_scores
    })

    # Generate anchor boxes for this batch if not provided.
    if anchor_boxes is None:
      _, image_height, image_width, _ = images.get_shape().as_list()
      anchor_boxes = anchor.Anchor(
          min_level=self._config_dict['min_level'],
          max_level=self._config_dict['max_level'],
          num_scales=self._config_dict['num_scales'],
          aspect_ratios=self._config_dict['aspect_ratios'],
          anchor_size=self._config_dict['anchor_size'],
          image_size=(image_height, image_width)).multilevel_boxes
      for l in anchor_boxes:
        anchor_boxes[l] = tf.tile(
            tf.expand_dims(anchor_boxes[l], axis=0),
            [tf.shape(images)[0], 1, 1, 1])

    # Generate RoIs.
    current_rois, _ = self.roi_generator(rpn_boxes, rpn_scores, anchor_boxes,
                                         image_shape, training)

    next_rois = current_rois
    all_class_outputs = []
    for cascade_num in range(len(self.roi_sampler)):
      # In cascade RCNN we want the higher layers to have different regression
      # weights as the predicted deltas become smaller and smaller.
      regression_weights = self._cascade_layer_to_weights[cascade_num]
      current_rois = next_rois

      (class_outputs, box_outputs, model_outputs, matched_gt_boxes,
       matched_gt_classes, matched_gt_indices,
       current_rois) = self._run_frcnn_head(
           features=decoder_features,
           rois=current_rois,
           gt_boxes=gt_boxes,
           gt_classes=gt_classes,
           training=training,
           model_outputs=model_outputs,
           cascade_num=cascade_num,
           regression_weights=regression_weights)
      all_class_outputs.append(class_outputs)

      # Generate ROIs for the next cascade head if there is any.
      if cascade_num < len(self.roi_sampler) - 1:
        next_rois = box_ops.decode_boxes(
            tf.cast(box_outputs, tf.float32),
            current_rois,
            weights=regression_weights)
        next_rois = box_ops.clip_boxes(next_rois,
                                       tf.expand_dims(image_shape, axis=1))

    if not training:
      if self._config_dict['cascade_class_ensemble']:
        class_outputs = tf.add_n(all_class_outputs) / len(all_class_outputs)

      detections = self.detection_generator(
          box_outputs,
          class_outputs,
          current_rois,
          image_shape,
          regression_weights,
          bbox_per_class=(not self._config_dict['class_agnostic_bbox_pred']))
      model_outputs.update({
          'cls_outputs': class_outputs,
          'box_outputs': box_outputs,
      })
      if self.detection_generator.get_config()['apply_nms']:
        model_outputs.update({
            'detection_boxes': detections['detection_boxes'],
            'detection_scores': detections['detection_scores'],
            'detection_classes': detections['detection_classes'],
            'num_detections': detections['num_detections']
        })
      else:
        model_outputs.update({
            'decoded_boxes': detections['decoded_boxes'],
            'decoded_box_scores': detections['decoded_box_scores']
        })

    intermediate_outputs = {
        'matched_gt_boxes': matched_gt_boxes,
        'matched_gt_indices': matched_gt_indices,
        'matched_gt_classes': matched_gt_classes,
        'current_rois': current_rois,
    }
    return (model_outputs, intermediate_outputs)
def affine_warp_boxes(affine, boxes, output_size, box_history):
  """Applies random rotation, random perspective change and random translation.

  and random scaling to the boxes.

  Args:
    affine: A `Tensor` for the augmenting matrix for the boxes.
    boxes: A `Tensor` for the boxes.
    output_size: A `list` of two integers, a two-element vector or a tensor such
      that all but the last dimensions are `broadcastable` to `boxes`. The last
      dimension is 2, which represents [height, width].
    box_history: A `Tensor` for the boxes history, which are the boxes that
      undergo the same augmentations as `boxes`, but no clipping was applied. We
      can keep track of how much changes are done to the boxes by keeping track
      of this tensor.

  Returns:
    clipped_boxes: A `Tensor` representing the augmented boxes.
    box_history: A `Tensor` representing the augmented box_history.
  """

  def _get_corners(box):
    """Get the corner of each box as a tuple of (x, y) coordinates."""
    ymi, xmi, yma, xma = tf.split(box, 4, axis=-1)
    tl = tf.concat([xmi, ymi], axis=-1)
    bl = tf.concat([xmi, yma], axis=-1)
    tr = tf.concat([xma, ymi], axis=-1)
    br = tf.concat([xma, yma], axis=-1)
    return tf.concat([tl, bl, tr, br], axis=-1)

  def _corners_to_boxes(corner):
    """Convert (x, y) corners back into boxes [ymin, xmin, ymax, xmax]."""
    corner = tf.reshape(corner, [-1, 4, 2])
    y = corner[..., 1]
    x = corner[..., 0]
    y_min = tf.reduce_min(y, axis=-1)
    x_min = tf.reduce_min(x, axis=-1)
    y_max = tf.reduce_max(y, axis=-1)
    x_max = tf.reduce_max(x, axis=-1)
    return tf.stack([y_min, x_min, y_max, x_max], axis=-1)

  def _aug_boxes(affine_matrix, box):
    """Apply an affine transformation matrix M to the boxes augment boxes."""
    corners = _get_corners(box)
    corners = tf.reshape(corners, [-1, 4, 2])
    z = tf.expand_dims(tf.ones_like(corners[..., 1]), axis=-1)
    corners = tf.concat([corners, z], axis=-1)

    corners = tf.transpose(
        tf.matmul(affine_matrix, corners, transpose_b=True), perm=(0, 2, 1))

    corners, p = tf.split(corners, [2, 1], axis=-1)
    corners /= p
    corners = tf.reshape(corners, [-1, 8])
    box = _corners_to_boxes(corners)
    return box

  boxes = _aug_boxes(affine, boxes)
  box_history = _aug_boxes(affine, box_history)

  clipped_boxes = bbox_ops.clip_boxes(boxes, output_size)
  return clipped_boxes, box_history