def _get_proposals_single(self, rpn_probs, rpn_deltas, anchors, valid_flags, img_shape, with_probs): ''' Calculate proposals. Args --- rpn_probs: [num_anchors] rpn_deltas: [num_anchors, (dy, dx, log(dh), log(dw))] anchors: [num_anchors, (y1, x1, y2, x2)] anchors defined in pixel coordinates. valid_flags: [num_anchors] img_shape: np.ndarray. [2]. (img_height, img_width) with_probs: bool. Returns --- proposals: [num_proposals, (y1, x1, y2, x2)] in normalized coordinates. ''' H, W = img_shape # filter invalid anchors valid_flags = tf.cast(valid_flags, tf.bool) rpn_probs = tf.boolean_mask(rpn_probs, valid_flags) rpn_deltas = tf.boolean_mask(rpn_deltas, valid_flags) anchors = tf.boolean_mask(anchors, valid_flags) # Improve performance pre_nms_limit = min(6000, anchors.shape[0]) ix = tf.nn.top_k(rpn_probs, pre_nms_limit, sorted=True).indices rpn_probs = tf.gather(rpn_probs, ix) rpn_deltas = tf.gather(rpn_deltas, ix) anchors = tf.gather(anchors, ix) # Get refined anchors proposals = transforms.delta2bbox(anchors, rpn_deltas, self.target_means, self.target_stds) window = tf.constant([0., 0., H, W], dtype=tf.float32) proposals = transforms.bbox_clip(proposals, window) # Normalize proposals = proposals / tf.constant([H, W, H, W], dtype=tf.float32) # NMS indices = tf.image.non_max_suppression(proposals, rpn_probs, self.proposal_count, self.nms_threshold) proposals = tf.gather(proposals, indices) if with_probs: proposal_probs = tf.expand_dims(tf.gather(rpn_probs, indices), axis=1) proposals = tf.concat([proposals, proposal_probs], axis=1) return proposals
def _get_bboxes_single(self, rcnn_probs, rcnn_deltas, rois, img_shape): ''' Args --- rcnn_probs: [num_rois, num_classes] rcnn_deltas: [num_rois, num_classes, (dy, dx, log(dh), log(dw))] rois: [num_rois, (y1, x1, y2, x2)] img_shape: np.ndarray. [2]. (img_height, img_width) ''' H, W = img_shape # Class IDs per ROI class_ids = tf.argmax(rcnn_probs, axis=1, output_type=tf.int32) # Class probability of the top class of each ROI indices = tf.stack([tf.range(rcnn_probs.shape[0]), class_ids], axis=1) class_scores = tf.gather_nd(rcnn_probs, indices) # Class-specific bounding box deltas deltas_specific = tf.gather_nd(rcnn_deltas, indices) # Apply bounding box deltas # Shape: [num_rois, (y1, x1, y2, x2)] in normalized coordinates refined_rois = transforms.delta2bbox(rois, deltas_specific, self.target_means, self.target_stds) # Clip boxes to image window refined_rois *= tf.constant([H, W, H, W], dtype=tf.float32) window = tf.constant([0., 0., H * 1., W * 1.], dtype=tf.float32) refined_rois = transforms.bbox_clip(refined_rois, window) # Filter out background boxes keep = tf.where(class_ids > 0)[:, 0] # Filter out low confidence boxes if self.min_confidence: conf_keep = tf.where(class_scores >= self.min_confidence)[:, 0] keep = tf.sets.intersection(tf.expand_dims(keep, 0), tf.expand_dims(conf_keep, 0)) keep = tf.sparse.to_dense(keep)[0] # Apply per-class NMS # 1. Prepare variables pre_nms_class_ids = tf.gather(class_ids, keep) pre_nms_scores = tf.gather(class_scores, keep) pre_nms_rois = tf.gather(refined_rois, keep) unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0] def nms_keep_map(class_id): '''Apply Non-Maximum Suppression on ROIs of the given class.''' # Indices of ROIs of the given class ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0] # Apply NMS class_keep = tf.image.non_max_suppression( tf.gather(pre_nms_rois, ixs), tf.gather(pre_nms_scores, ixs), max_output_size=self.max_instances, iou_threshold=self.nms_threshold) # Map indices class_keep = tf.gather(keep, tf.gather(ixs, class_keep)) return class_keep # 2. Map over class IDs nms_keep = [] for i in range(unique_pre_nms_class_ids.shape[0]): nms_keep.append(nms_keep_map(unique_pre_nms_class_ids[i])) if len(nms_keep) != 0: nms_keep = tf.concat(nms_keep, axis=0) else: nms_keep = tf.zeros([0,], tf.int64) # 3. Compute intersection between keep and nms_keep keep = tf.sets.intersection(tf.expand_dims(keep, 0), tf.expand_dims(nms_keep, 0)) keep = tf.sparse.to_dense(keep)[0] # Keep top detections roi_count = self.max_instances class_scores_keep = tf.gather(class_scores, keep) num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count) top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1] keep = tf.gather(keep, top_ids) detections = tf.concat([ tf.gather(refined_rois, keep), tf.cast(tf.gather(class_ids, keep), tf.float32)[..., tf.newaxis], tf.gather(class_scores, keep)[..., tf.newaxis] ], axis=1) return detections
def _get_proposals_single(self, rpn_probs, rpn_deltas, anchors, img_shape, batch_ind, with_probs): '''Calculate proposals. Args --- rpn_probs: [num_anchors] rpn_deltas: [num_anchors, (dy, dx, log(dh), log(dw))] anchors: [num_anchors, (y1, x1, y2, x2)] anchors defined in pixel coordinates. valid_flags: [num_anchors] img_shape: np.ndarray. [2]. (img_height, img_width) batch_ind: int. with_probs: bool. Returns --- proposals: [num_proposals, (batch_ind, y1, x1, y2, x2)] in normalized coordinates. ''' # Improve performance pre_nms_limit = min(6000, anchors.shape[0]) ix = tf.nn.top_k(rpn_probs, pre_nms_limit, sorted=True).indices rpn_probs = tf.gather(rpn_probs, ix) rpn_deltas = tf.gather(rpn_deltas, ix) anchors = tf.gather(anchors, ix) # Get refined anchors proposals = transforms.delta2bbox(anchors, rpn_deltas, self.target_means, self.target_stds) window = tf.concat([ tf.constant([0., 0.], dtype=tf.float32), tf.cast(img_shape, tf.float32) ], axis=0) proposals = transforms.bbox_clip(proposals, window) # Normalize proposals = proposals / tf.repeat(img_shape, 2) # NMS indices = tf.image.non_max_suppression(proposals, rpn_probs, self.proposal_count, self.nms_threshold) proposals = tf.gather(proposals, indices) if with_probs: proposal_probs = tf.expand_dims(tf.gather(rpn_probs, indices), axis=1) proposals = tf.concat([proposals, proposal_probs], axis=1) # Pad padding = tf.maximum(self.proposal_count - tf.shape(proposals)[0], 0) proposals = tf.pad(proposals, [(0, padding), (0, 0)]) batch_inds = tf.ones((proposals.shape[0], 1)) * batch_ind proposals = tf.concat([batch_inds, proposals], axis=1) return proposals