Пример #1
0
    def _get_rpn_samples(self, match_results):
        """Computes anchor labels.

    This function performs subsampling for foreground (fg) and background (bg)
    anchors.
    Args:
      match_results: A integer tensor with shape [N] representing the matching
        results of anchors. (1) match_results[i]>=0, meaning that column i is
        matched with row match_results[i]. (2) match_results[i]=-1, meaning that
        column i is not matched. (3) match_results[i]=-2, meaning that column i
        is ignored.

    Returns:
      score_targets: a integer tensor with the a shape of [N].
        (1) score_targets[i]=1, the anchor is a positive sample.
        (2) score_targets[i]=0, negative. (3) score_targets[i]=-1, the anchor is
        don't care (ignore).
    """
        sampler = (
            balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
                positive_fraction=self._rpn_fg_fraction, is_static=False))
        # indicator includes both positive and negative labels.
        # labels includes only positives labels.
        # positives = indicator & labels.
        # negatives = indicator & !labels.
        # ignore = !indicator.
        indicator = tf.greater(match_results, -2)
        labels = tf.greater(match_results, -1)

        samples = sampler.subsample(indicator, self._rpn_batch_size_per_im,
                                    labels)
        positive_labels = tf.where(
            tf.logical_and(samples, labels),
            tf.constant(2, dtype=tf.int32, shape=match_results.shape),
            tf.constant(0, dtype=tf.int32, shape=match_results.shape))
        negative_labels = tf.where(
            tf.logical_and(samples, tf.logical_not(labels)),
            tf.constant(1, dtype=tf.int32, shape=match_results.shape),
            tf.constant(0, dtype=tf.int32, shape=match_results.shape))
        ignore_labels = tf.fill(match_results.shape, -1)

        return (ignore_labels + positive_labels + negative_labels,
                positive_labels, negative_labels)
  def label_anchors_lrtb(self, gt_boxes, gt_labels):
    """Labels anchors with ground truth inputs.

    Args:
      gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
        For each row, it stores [y0, x0, y1, x1] for four corners of a box.
      gt_labels: A integer tensor with shape [N, 1] representing groundtruth
        classes.

    Returns:
      score_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors]. The height_l and width_l
        represent the dimension of class logits at l-th level.
      box_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors * 4]. The height_l and
        width_l represent the dimension of bounding box regression output at
        l-th level.
      lrtb_targets_dict: Same strucure to box_target_dict, except the regression
        targets are converted from xyhw to lrtb format. Ordered dictionary with
        keys [min_level, min_level+1, ..., max_level]. The values are tensor
        with shape [height_l, width_l, num_anchors * 4]. The height_l and
        width_l represent the dimension of bounding box regression output at
        l-th level.
      center_targets_dict: Same structure to score_tragets_dict, except the
        scores are centerness values ranging from 0 to 1. Ordered dictionary
        with keys [min_level, min_level+1, ..., max_level]. The values are
        tensor with shape [height_l, width_l, num_anchors]. The height_l and
        width_l represent the dimension of class logits at l-th level.
    """
    gt_box_list = box_list.BoxList(gt_boxes)
    anchor_box_list = box_list.BoxList(self._anchor.boxes)

    # cls_targets, cls_weights, box_weights are not used.
    (_, _, box_targets, _, matches,
     matched_gt_box_list, matched_anchors_mask,
     center_matched_gt_box_list, center_matched_anchors_mask,
     matched_ious) = self._target_assigner.assign(
         anchor_box_list, gt_box_list, gt_labels)
    # Box lrtb_targets.
    lrtb_targets, _ = box_utils.encode_boxes_lrtb(
        matched_gt_box_list.data['boxes'],
        anchor_box_list.data['boxes'],
        weights=[1.0, 1.0, 1.0, 1.0])
    lrtb_sanity = tf.logical_and(
        tf.greater(tf.reduce_min(lrtb_targets, -1), 0.),
        matched_anchors_mask)
    # To broadcast lrtb_sanity to the same shape as lrtb_targets.
    lrtb_sanity = tf.tile(tf.expand_dims(lrtb_sanity, 1),
                          [1, tf.shape(lrtb_targets)[1]])
    lrtb_targets = tf.where(lrtb_sanity,
                            lrtb_targets,
                            tf.zeros_like(lrtb_targets))
    # RPN anchor-gtbox iou values.
    iou_targets = tf.where(tf.greater(matched_ious, 0.0),
                           matched_ious,
                           tf.zeros_like(matched_ious))
    # Centerness_targets.
    _, center_targets = box_utils.encode_boxes_lrtb(
        center_matched_gt_box_list.data['boxes'],
        anchor_box_list.data['boxes'],
        weights=[1.0, 1.0, 1.0, 1.0])
    # Positive-negative centerness sampler.
    num_center_samples_per_im = self._num_center_samples_per_im
    center_pos_neg_sampler = (
        balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
            positive_fraction=(1.- 1./num_center_samples_per_im),
            is_static=False))
    center_pos_neg_indicator = tf.logical_or(
        center_matched_anchors_mask,
        tf.less(iou_targets, self._center_unmatched_iou_threshold))
    center_pos_labels = center_matched_anchors_mask
    center_samples = center_pos_neg_sampler.subsample(
        center_pos_neg_indicator, num_center_samples_per_im, center_pos_labels)
    is_valid = center_samples
    center_targets = tf.where(is_valid,
                              center_targets,
                              (-1) * tf.ones_like(center_targets))

    # score_targets contains the subsampled positive and negative anchors.
    score_targets, _, _ = self._get_rpn_samples(matches.match_results)

    # Unpacks labels.
    score_targets_dict = self._anchor.unpack_labels(score_targets)
    box_targets_dict = self._anchor.unpack_labels(box_targets)
    lrtb_targets_dict = self._anchor.unpack_labels(lrtb_targets)
    center_targets_dict = self._anchor.unpack_labels(center_targets)

    return (score_targets_dict, box_targets_dict,
            lrtb_targets_dict, center_targets_dict)
def assign_and_sample_proposals(proposed_boxes,
                                gt_boxes,
                                gt_classes,
                                num_samples_per_image=512,
                                mix_gt_boxes=True,
                                fg_fraction=0.25,
                                fg_iou_thresh=0.5,
                                bg_iou_thresh_hi=0.5,
                                bg_iou_thresh_lo=0.0):
    """Assigns the proposals with groundtruth classes and performs subsmpling.

  Given `proposed_boxes`, `gt_boxes`, and `gt_classes`, the function uses the
  following algorithm to generate the final `num_samples_per_image` RoIs.
    1. Calculates the IoU between each proposal box and each gt_boxes.
    2. Assigns each proposed box with a groundtruth class and box by choosing
       the largest IoU overlap.
    3. Samples `num_samples_per_image` boxes from all proposed boxes, and
       returns box_targets, class_targets, and RoIs.

  Args:
    proposed_boxes: a tensor of shape of [batch_size, N, 4]. N is the number of
      proposals before groundtruth assignment. The last dimension is the box
      coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] format.
    gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. The
      coordinates of gt_boxes are in the pixel coordinates of the scaled image.
      This tensor might have padding of values -1 indicating the invalid box
      coordinates.
    gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
      tensor might have paddings with values of -1 indicating the invalid
      classes.
    num_samples_per_image: a integer represents RoI minibatch size per image.
    mix_gt_boxes: a bool indicating whether to mix the groundtruth boxes before
      sampling proposals.
    fg_fraction: a float represents the target fraction of RoI minibatch that is
      labeled foreground (i.e., class > 0).
    fg_iou_thresh: a float represents the IoU overlap threshold for an RoI to be
      considered foreground (if >= fg_iou_thresh).
    bg_iou_thresh_hi: a float represents the IoU overlap threshold for an RoI to
      be considered background (class = 0 if overlap in [LO, HI)).
    bg_iou_thresh_lo: a float represents the IoU overlap threshold for an RoI to
      be considered background (class = 0 if overlap in [LO, HI)).

  Returns:
    sampled_rois: a tensor of shape of [batch_size, K, 4], representing the
      coordinates of the sampled RoIs, where K is the number of the sampled
      RoIs, i.e. K = num_samples_per_image.
    sampled_gt_boxes: a tensor of shape of [batch_size, K, 4], storing the
      box coordinates of the matched groundtruth boxes of the samples RoIs.
    sampled_gt_classes: a tensor of shape of [batch_size, K], storing the
      classes of the matched groundtruth boxes of the sampled RoIs.
    sampled_gt_indices: a tensor of shape of [batch_size, K], storing the
      indices of the sampled groudntruth boxes in the original `gt_boxes`
      tensor, i.e. gt_boxes[sampled_gt_indices[:, i]] = sampled_gt_boxes[:, i].
  """

    with tf.name_scope('sample_proposals'):
        if mix_gt_boxes:
            boxes = tf.concat([proposed_boxes, gt_boxes], axis=1)
        else:
            boxes = proposed_boxes

        (matched_gt_boxes, matched_gt_classes, matched_gt_indices, matched_iou,
         _) = box_matching(boxes, gt_boxes, gt_classes)

        positive_match = tf.greater(matched_iou, fg_iou_thresh)
        negative_match = tf.logical_and(
            tf.greater_equal(matched_iou, bg_iou_thresh_lo),
            tf.less(matched_iou, bg_iou_thresh_hi))
        ignored_match = tf.less(matched_iou, 0.0)

        # re-assign negatively matched boxes to the background class.
        matched_gt_classes = tf.where(negative_match,
                                      tf.zeros_like(matched_gt_classes),
                                      matched_gt_classes)
        matched_gt_indices = tf.where(negative_match,
                                      tf.zeros_like(matched_gt_indices),
                                      matched_gt_indices)

        sample_candidates = tf.logical_and(
            tf.logical_or(positive_match, negative_match),
            tf.logical_not(ignored_match))

        sampler = (
            balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
                positive_fraction=fg_fraction, is_static=True))

        batch_size, _ = sample_candidates.get_shape().as_list()
        sampled_indicators = []
        for i in range(batch_size):
            sampled_indicator = sampler.subsample(sample_candidates[i],
                                                  num_samples_per_image,
                                                  positive_match[i])
            sampled_indicators.append(sampled_indicator)
        sampled_indicators = tf.stack(sampled_indicators)
        _, sampled_indices = tf.nn.top_k(tf.cast(sampled_indicators,
                                                 dtype=tf.int32),
                                         k=num_samples_per_image,
                                         sorted=True)

        sampled_indices_shape = tf.shape(sampled_indices)
        batch_indices = (
            tf.expand_dims(tf.range(sampled_indices_shape[0]), axis=-1) *
            tf.ones([1, sampled_indices_shape[-1]], dtype=tf.int32))
        gather_nd_indices = tf.stack([batch_indices, sampled_indices], axis=-1)

        sampled_rois = tf.gather_nd(boxes, gather_nd_indices)
        sampled_gt_boxes = tf.gather_nd(matched_gt_boxes, gather_nd_indices)
        sampled_gt_classes = tf.gather_nd(matched_gt_classes,
                                          gather_nd_indices)
        sampled_gt_indices = tf.gather_nd(matched_gt_indices,
                                          gather_nd_indices)

        return (sampled_rois, sampled_gt_boxes, sampled_gt_classes,
                sampled_gt_indices)