Пример #1
0
class ProposalLayer(KE.Layer):
    """
        Receives anchor scores and selects a subset to pass as proposals
        to the second stage. Filtering is done based on anchor scores and
        non-max suppression to remove overlaps. It also applies bounding
        box refinement deltas to anchors.

        Inputs:
            rpn_probs: [batch, num_anchors, (bg prob, fg prob)]
            rpn_bbox: [batch, num_anchors, (dy, dx, log(dh), log(dw))]
            anchors: [batch, num_anchors, (y1, x1, y2, x2)] anchors in normalized coordinates

        Returns:
            Proposals in normalized coordinates [batch, rois, (y1, x1, y2, x2)]
    """
    def __init__(self, proposal_count, nms_threshold, batch_size, **kwargs):
        super(ProposalLayer, self).__init__(**kwargs)

        self.proposal_count = proposal_count
        self.nms_threshold = nms_threshold
        self.batch_size = batch_size

        self.misc_utils = MiscUtils()
        self.bbox_utils = BboxUtil()

        pass

    def call(self, inputs):
        """
            这里的 call 方法,会被 __init__() 方法回调
        :param inputs:
        :return:
        """
        # Box Scores. Use the foreground class confidence. [Batch, num_rois, 1]
        scores = inputs[0][:, :, 1]
        # Box deltas [batch, num_rois, 4]
        deltas = inputs[1]
        rpn_bbox_std_dev = np.array(cfg.COMMON.RPN_BBOX_STD_DEV)
        deltas = deltas * np.reshape(rpn_bbox_std_dev, [1, 1, 4])
        # Anchors
        anchors = inputs[2]

        # Improve performance by trimming to top anchors by score
        # and doing the rest on the smaller subset.
        pre_nms_limit = tf.minimum(cfg.COMMON.PRE_NMS_LIMIT,
                                   tf.shape(anchors)[1])
        ix = tf.nn.top_k(scores,
                         pre_nms_limit,
                         sorted=True,
                         name="top_anchors").indices

        scores = self.misc_utils.batch_slice([scores, ix],
                                             lambda x, y: tf.gather(x, y),
                                             self.batch_size)
        deltas = self.misc_utils.batch_slice([deltas, ix],
                                             lambda x, y: tf.gather(x, y),
                                             self.batch_size)
        pre_nms_anchors = self.misc_utils.batch_slice(
            [anchors, ix],
            lambda a, x: tf.gather(a, x),
            self.batch_size,
            names=["pre_nms_anchors"])

        # Apply deltas to anchors to get refined anchors.
        # [batch, N, (y1, x1, y2, x2)]
        boxes = self.misc_utils.batch_slice(
            [pre_nms_anchors, deltas],
            lambda x, y: self.bbox_utils.apply_box_deltas_graph(x, y),
            self.batch_size,
            names=["refined_anchors"])

        # Clip to image boundaries. Since we're in normalized coordinates,
        # clip to 0..1 range. [batch, N, (y1, x1, y2, x2)]
        window = np.array([0, 0, 1, 1], dtype=np.float32)
        boxes = self.misc_utils.batch_slice(
            boxes,
            lambda x: self.bbox_utils.clip_boxes_graph(x, window),
            self.batch_size,
            names=["refined_anchors_clipped"])

        # Filter out small boxes
        # According to Xinlei Chen's paper, this reduces detection accuracy
        # for small objects, so we're skipping it.

        # Non-max suppression
        def nms(boxes, scores):
            indices = tf.image.non_max_suppression(
                boxes,
                scores,
                self.proposal_count,
                self.nms_threshold,
                name="rpn_non_max_suppression")
            proposals = tf.gather(boxes, indices)
            # Pad if needed
            padding = tf.maximum(self.proposal_count - tf.shape(proposals)[0],
                                 0)
            proposals = tf.pad(proposals, [(0, padding), (0, 0)])
            return proposals

        proposals = self.misc_utils.batch_slice([boxes, scores], nms,
                                                self.batch_size)

        return proposals

    def compute_output_shape(self, input_shape):
        return (None, self.proposal_count, 4)
Пример #2
0
def refine_detections_graph(rois, probs, deltas, window):
    """
        Refine classified proposals and filter overlaps and return final detections.
    :param rois: [N, (y1, x1, y2, x2)] in normalized coordinates
    :param probs: [N, num_classes]. Class probabilities.
    :param deltas: [N, num_classes, (dy, dx, log(dh), log(dw))]. Class-specific bounding box deltas.
    :param window: (y1, x1, y2, x2) in normalized coordinates.
                   The part of the image that contains the image excluding the padding.
    :return: Returns detections shaped:
            [num_detections, (y1, x1, y2, x2, class_id, score)] where coordinates are normalized.
    """
    # Class IDs per ROI
    class_ids = tf.argmax(probs, axis=1, output_type=tf.int32)
    # Class probability of the top class of each ROI
    indices = tf.stack([tf.range(probs.shape[0]), class_ids], axis=1)
    class_scores = tf.gather_nd(probs, indices)
    # Class-specific bounding box deltas
    deltas_specific = tf.gather_nd(deltas, indices)
    # Apply bounding box deltas
    # Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates
    bbox_utils = BboxUtil()
    refined_rois = bbox_utils.apply_box_deltas_graph(
        rois, deltas_specific * cfg.COMMON.BBOX_STD_DEV)
    # Clip boxes to image window
    refined_rois = bbox_utils.clip_boxes_graph(refined_rois, window)

    # TODO: Filter out boxes with zero area

    # Filter out background boxes
    keep = tf.where(class_ids > 0)[:, 0]
    # Filter out low confidence boxes
    defection_min_confidence = cfg.COMMON.DETECTION_MIN_CONFIDENCE
    if defection_min_confidence:
        conf_keep = tf.where(class_scores >= defection_min_confidence)[:, 0]
        keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
                                        tf.expand_dims(conf_keep, 0))
        keep = tf.sparse_tensor_to_dense(keep)[0]

    # Apply per-class NMS
    # 1. Prepare variables
    pre_nms_class_ids = tf.gather(class_ids, keep)
    pre_nms_scores = tf.gather(class_scores, keep)
    pre_nms_rois = tf.gather(refined_rois, keep)
    unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0]

    def nms_keep_map(class_id):
        """Apply Non-Maximum Suppression on ROIs of the given class."""

        defection_max_instances = cfg.TEST.DETECTION_MAX_INSTANCES
        # Indices of ROIs of the given class
        ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0]
        # Apply NMS
        class_keep = tf.image.non_max_suppression(
            tf.gather(pre_nms_rois, ixs),
            tf.gather(pre_nms_scores, ixs),
            max_output_size=defection_max_instances,
            iou_threshold=cfg.TEST.DETECTION_NMS_THRESHOLD)
        # Map indices
        class_keep = tf.gather(keep, tf.gather(ixs, class_keep))
        # Pad with -1 so returned tensors have the same shape
        gap = defection_max_instances - tf.shape(class_keep)[0]
        class_keep = tf.pad(class_keep, [(0, gap)],
                            mode='CONSTANT',
                            constant_values=-1)
        # Set shape so map_fn() can infer result shape
        class_keep.set_shape([defection_max_instances])
        return class_keep

    # 2. Map over class IDs
    nms_keep = tf.map_fn(nms_keep_map,
                         unique_pre_nms_class_ids,
                         dtype=tf.int64)
    # 3. Merge results into one list, and remove -1 padding
    nms_keep = tf.reshape(nms_keep, [-1])
    nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0])
    # 4. Compute intersection between keep and nms_keep
    keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
                                    tf.expand_dims(nms_keep, 0))
    keep = tf.sparse_tensor_to_dense(keep)[0]
    # Keep top detections
    roi_count = cfg.TEST.DETECTION_MAX_INSTANCES
    class_scores_keep = tf.gather(class_scores, keep)
    num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count)
    top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1]
    keep = tf.gather(keep, top_ids)

    # Arrange output as [N, (y1, x1, y2, x2, class_id, score)]
    # Coordinates are normalized.
    detections = tf.concat([
        tf.gather(refined_rois, keep),
        tf.to_float(tf.gather(class_ids, keep))[..., tf.newaxis],
        tf.gather(class_scores, keep)[..., tf.newaxis]
    ],
                           axis=1)

    # Pad with zeros if detections < DETECTION_MAX_INSTANCES
    gap = cfg.TEST.DETECTION_MAX_INSTANCES - tf.shape(detections)[0]
    detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT")
    return detections