def _get_rpn_samples(self, match_results): """Computes anchor labels. This function performs subsampling for foreground (fg) and background (bg) anchors. Args: match_results: A integer tensor with shape [N] representing the matching results of anchors. (1) match_results[i]>=0, meaning that column i is matched with row match_results[i]. (2) match_results[i]=-1, meaning that column i is not matched. (3) match_results[i]=-2, meaning that column i is ignored. Returns: score_targets: a integer tensor with the a shape of [N]. (1) score_targets[i]=1, the anchor is a positive sample. (2) score_targets[i]=0, negative. (3) score_targets[i]=-1, the anchor is don't care (ignore). """ sampler = ( balanced_positive_negative_sampler.BalancedPositiveNegativeSampler( positive_fraction=self._rpn_fg_fraction, is_static=False)) # indicator includes both positive and negative labels. # labels includes only positives labels. # positives = indicator & labels. # negatives = indicator & !labels. # ignore = !indicator. indicator = tf.greater(match_results, -2) labels = tf.greater(match_results, -1) samples = sampler.subsample(indicator, self._rpn_batch_size_per_im, labels) positive_labels = tf.where( tf.logical_and(samples, labels), tf.constant(2, dtype=tf.int32, shape=match_results.shape), tf.constant(0, dtype=tf.int32, shape=match_results.shape)) negative_labels = tf.where( tf.logical_and(samples, tf.logical_not(labels)), tf.constant(1, dtype=tf.int32, shape=match_results.shape), tf.constant(0, dtype=tf.int32, shape=match_results.shape)) ignore_labels = tf.fill(match_results.shape, -1) return (ignore_labels + positive_labels + negative_labels, positive_labels, negative_labels)
def assign_and_sample_proposals(proposed_boxes, gt_boxes, gt_classes, num_samples_per_image=512, mix_gt_boxes=True, fg_fraction=0.25, fg_iou_thresh=0.5, bg_iou_thresh_hi=0.5, bg_iou_thresh_lo=0.0): """Assigns the proposals with groundtruth classes and performs subsmpling. Given `proposed_boxes`, `gt_boxes`, and `gt_classes`, the function uses the following algorithm to generate the final `num_samples_per_image` RoIs. 1. Calculates the IoU between each proposal box and each gt_boxes. 2. Assigns each proposed box with a groundtruth class and box by choosing the largest IoU overlap. 3. Samples `num_samples_per_image` boxes from all proposed boxes, and returns box_targets, class_targets, and RoIs. Args: proposed_boxes: a tensor of shape of [batch_size, N, 4]. N is the number of proposals before groundtruth assignment. The last dimension is the box coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] format. gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. The coordinates of gt_boxes are in the pixel coordinates of the scaled image. This tensor might have padding of values -1 indicating the invalid box coordinates. gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This tensor might have paddings with values of -1 indicating the invalid classes. num_samples_per_image: a integer represents RoI minibatch size per image. mix_gt_boxes: a bool indicating whether to mix the groundtruth boxes before sampling proposals. fg_fraction: a float represents the target fraction of RoI minibatch that is labeled foreground (i.e., class > 0). fg_iou_thresh: a float represents the IoU overlap threshold for an RoI to be considered foreground (if >= fg_iou_thresh). bg_iou_thresh_hi: a float represents the IoU overlap threshold for an RoI to be considered background (class = 0 if overlap in [LO, HI)). bg_iou_thresh_lo: a float represents the IoU overlap threshold for an RoI to be considered background (class = 0 if overlap in [LO, HI)). Returns: sampled_rois: a tensor of shape of [batch_size, K, 4], representing the coordinates of the sampled RoIs, where K is the number of the sampled RoIs, i.e. K = num_samples_per_image. sampled_gt_boxes: a tensor of shape of [batch_size, K, 4], storing the box coordinates of the matched groundtruth boxes of the samples RoIs. sampled_gt_classes: a tensor of shape of [batch_size, K], storing the classes of the matched groundtruth boxes of the sampled RoIs. sampled_gt_indices: a tensor of shape of [batch_size, K], storing the indices of the sampled groudntruth boxes in the original `gt_boxes` tensor, i.e. gt_boxes[sampled_gt_indices[:, i]] = sampled_gt_boxes[:, i]. """ with tf.name_scope('sample_proposals'): if mix_gt_boxes: boxes = tf.concat([proposed_boxes, gt_boxes], axis=1) else: boxes = proposed_boxes (matched_gt_boxes, matched_gt_classes, matched_gt_indices, matched_iou, _) = box_matching(boxes, gt_boxes, gt_classes) positive_match = tf.greater(matched_iou, fg_iou_thresh) negative_match = tf.logical_and( tf.greater_equal(matched_iou, bg_iou_thresh_lo), tf.less(matched_iou, bg_iou_thresh_hi)) ignored_match = tf.less(matched_iou, 0.0) # re-assign negatively matched boxes to the background class. matched_gt_classes = tf.where(negative_match, tf.zeros_like(matched_gt_classes), matched_gt_classes) matched_gt_indices = tf.where(negative_match, tf.zeros_like(matched_gt_indices), matched_gt_indices) sample_candidates = tf.logical_and( tf.logical_or(positive_match, negative_match), tf.logical_not(ignored_match)) sampler = ( balanced_positive_negative_sampler.BalancedPositiveNegativeSampler( positive_fraction=fg_fraction, is_static=True)) batch_size, _ = sample_candidates.get_shape().as_list() sampled_indicators = [] for i in range(batch_size): sampled_indicator = sampler.subsample(sample_candidates[i], num_samples_per_image, positive_match[i]) sampled_indicators.append(sampled_indicator) sampled_indicators = tf.stack(sampled_indicators) _, sampled_indices = tf.nn.top_k(tf.cast(sampled_indicators, dtype=tf.int32), k=num_samples_per_image, sorted=True) sampled_indices_shape = tf.shape(sampled_indices) batch_indices = ( tf.expand_dims(tf.range(sampled_indices_shape[0]), axis=-1) * tf.ones([1, sampled_indices_shape[-1]], dtype=tf.int32)) gather_nd_indices = tf.stack([batch_indices, sampled_indices], axis=-1) sampled_rois = tf.gather_nd(boxes, gather_nd_indices) sampled_gt_boxes = tf.gather_nd(matched_gt_boxes, gather_nd_indices) sampled_gt_classes = tf.gather_nd(matched_gt_classes, gather_nd_indices) sampled_gt_indices = tf.gather_nd(matched_gt_indices, gather_nd_indices) return (sampled_rois, sampled_gt_boxes, sampled_gt_classes, sampled_gt_indices)
def label_anchors_lrtb(self, gt_boxes, gt_labels): """Labels anchors with ground truth inputs. Args: gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes. For each row, it stores [y0, x0, y1, x1] for four corners of a box. gt_labels: A integer tensor with shape [N, 1] representing groundtruth classes. Returns: score_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors]. The height_l and width_l represent the dimension of class logits at l-th level. box_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors * 4]. The height_l and width_l represent the dimension of bounding box regression output at l-th level. lrtb_targets_dict: Same strucure to box_target_dict, except the regression targets are converted from xyhw to lrtb format. Ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors * 4]. The height_l and width_l represent the dimension of bounding box regression output at l-th level. center_targets_dict: Same structure to score_tragets_dict, except the scores are centerness values ranging from 0 to 1. Ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors]. The height_l and width_l represent the dimension of class logits at l-th level. """ gt_box_list = box_list.BoxList(gt_boxes) anchor_box_list = box_list.BoxList(self._anchor.boxes) # cls_targets, cls_weights, box_weights are not used. (_, _, box_targets, _, matches, matched_gt_box_list, matched_anchors_mask, center_matched_gt_box_list, center_matched_anchors_mask, matched_ious) = self._target_assigner.assign(anchor_box_list, gt_box_list, gt_labels) # Box lrtb_targets. lrtb_targets, _ = box_utils.encode_boxes_lrtb( matched_gt_box_list.data['boxes'], anchor_box_list.data['boxes'], weights=[1.0, 1.0, 1.0, 1.0]) lrtb_sanity = tf.logical_and( tf.greater(tf.reduce_min(lrtb_targets, -1), 0.), matched_anchors_mask) # To broadcast lrtb_sanity to the same shape as lrtb_targets. lrtb_sanity = tf.tile(tf.expand_dims(lrtb_sanity, 1), [1, tf.shape(lrtb_targets)[1]]) lrtb_targets = tf.where(lrtb_sanity, lrtb_targets, tf.zeros_like(lrtb_targets)) # RPN anchor-gtbox iou values. iou_targets = tf.where(tf.greater(matched_ious, 0.0), matched_ious, tf.zeros_like(matched_ious)) # Centerness_targets. _, center_targets = box_utils.encode_boxes_lrtb( center_matched_gt_box_list.data['boxes'], anchor_box_list.data['boxes'], weights=[1.0, 1.0, 1.0, 1.0]) # Positive-negative centerness sampler. num_center_samples_per_im = self._num_center_samples_per_im center_pos_neg_sampler = ( balanced_positive_negative_sampler.BalancedPositiveNegativeSampler( positive_fraction=(1. - 1. / num_center_samples_per_im), is_static=False)) center_pos_neg_indicator = tf.logical_or( center_matched_anchors_mask, tf.less(iou_targets, self._center_unmatched_iou_threshold)) center_pos_labels = center_matched_anchors_mask center_samples = center_pos_neg_sampler.subsample( center_pos_neg_indicator, num_center_samples_per_im, center_pos_labels) is_valid = center_samples center_targets = tf.where(is_valid, center_targets, (-1) * tf.ones_like(center_targets)) # score_targets contains the subsampled positive and negative anchors. score_targets, _, _ = self._get_rpn_samples(matches.match_results) # Unpacks labels. score_targets_dict = self._anchor.unpack_labels(score_targets) box_targets_dict = self._anchor.unpack_labels(box_targets) lrtb_targets_dict = self._anchor.unpack_labels(lrtb_targets) center_targets_dict = self._anchor.unpack_labels(center_targets) return (score_targets_dict, box_targets_dict, lrtb_targets_dict, center_targets_dict)