示例#1
0
class DetectionLayer(KE.Layer):
    """
        Takes classified proposal boxes and their bounding box deltas and
        returns the final detection boxes.
        Returns:
            [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] where
            coordinates are normalized.
    """
    def __init__(self, batch_size, **kwargs):
        super(DetectionLayer, self).__init__(**kwargs)
        self.batch_size = batch_size
        self.detection_max_instances = cfg.TEST.DETECTION_MAX_INSTANCES
        self.image_utils = ImageUtils()
        self.bbox_utils = BboxUtil()
        self.misc_utils = MiscUtils()

    def call(self, inputs):
        rois = inputs[0]
        mrcnn_class = inputs[1]
        mrcnn_bbox = inputs[2]
        image_meta = inputs[3]

        # Get windows of images in normalized coordinates. Windows are the area
        # in the image that excludes the padding.
        # Use the shape of the first image in the batch to normalize the window
        # because we know that all images get resized to the same size.
        m = self.image_utils.parse_image_meta_graph(image_meta)
        image_shape = m['image_shape'][0]
        window = self.bbox_utils.norm_boxes_graph(m['window'], image_shape[:2])

        # Run detection refinement graph on each item in the batch
        detections_batch = self.misc_utils.batch_slice(
            [rois, mrcnn_class, mrcnn_bbox, window],
            lambda x, y, w, z: refine_detections_graph(x, y, w, z),
            self.batch_size)

        # Reshape output
        # [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] in
        # normalized coordinates
        return tf.reshape(detections_batch,
                          [self.batch_size, self.detection_max_instances, 6])

    def compute_output_shape(self, input_shape):
        return (None, self.detection_max_instances, 6)
示例#2
0
class DetectionTargetLayer(KE.Layer):
    """
        Subsamples proposals and generates target box refinement, class_ids, and masks for each.
        Inputs:
            proposals: [batch, N, (y1, x1, y2, x2)] in normalized coordinates. Might
               be zero padded if there are not enough proposals.
            gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs.
            gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized
                      coordinates.
            gt_masks: [batch, height, width, MAX_GT_INSTANCES] of boolean type

        Returns: Target ROIs and corresponding class IDs, bounding box shifts, and masks.
            rois: [batch, TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized
                  coordinates
            target_class_ids: [batch, TRAIN_ROIS_PER_IMAGE]. Integer class IDs.
            target_deltas: [batch, TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw)]
            target_mask: [batch, TRAIN_ROIS_PER_IMAGE, height, width]
                         Masks cropped to bbox boundaries and resized to neural
                         network output size.
        Note: Returned arrays might be zero padded if not enough target ROIs.
    """
    def __init__(self, batch_size, **kwargs):
        super(DetectionTargetLayer, self).__init__(**kwargs)
        self.batch_size = batch_size
        self.misc_utils = MiscUtils()

        self.rois_per_image = cfg.TRAIN.ROIS_PER_IMAGE
        self.mask_shape = cfg.TRAIN.MASK_SHAPE
        pass

    def call(self, inputs):
        """
            这里的 call 方法,会被 __init__() 方法回调
        :param inputs: 参数如下所示
        :return:
        """
        proposals = inputs[0]
        gt_class_ids = inputs[1]
        gt_boxes = inputs[2]
        gt_masks = inputs[3]

        # Slice the batch and run a graph for each slice
        # TODO: Rename target_bbox to target_deltas for clarity
        names = ["rois", "target_class_ids", "target_bbox", "target_mask"]
        outputs = self.misc_utils.batch_slice(
            [proposals, gt_class_ids, gt_boxes, gt_masks],
            lambda w, x, y, z: self.misc_utils.detection_targets_graph(
                w, x, y, z),
            self.batch_size,
            names=names)
        return outputs

    def compute_output_shape(self, input_shape):
        return [
            (None, self.rois_per_image, 4),  # rois
            (None, self.rois_per_image),  # class_ids
            (None, self.rois_per_image, 4),  # deltas
            (None, self.rois_per_image, self.mask_shape[0], self.mask_shape[1]
             )  # masks
        ]

    def compute_mask(self, inputs, mask=None):
        return [None, None, None, None]
示例#3
0
class ProposalLayer(KE.Layer):
    """
        Receives anchor scores and selects a subset to pass as proposals
        to the second stage. Filtering is done based on anchor scores and
        non-max suppression to remove overlaps. It also applies bounding
        box refinement deltas to anchors.

        Inputs:
            rpn_probs: [batch, num_anchors, (bg prob, fg prob)]
            rpn_bbox: [batch, num_anchors, (dy, dx, log(dh), log(dw))]
            anchors: [batch, num_anchors, (y1, x1, y2, x2)] anchors in normalized coordinates

        Returns:
            Proposals in normalized coordinates [batch, rois, (y1, x1, y2, x2)]
    """
    def __init__(self, proposal_count, nms_threshold, batch_size, **kwargs):
        super(ProposalLayer, self).__init__(**kwargs)

        self.proposal_count = proposal_count
        self.nms_threshold = nms_threshold
        self.batch_size = batch_size

        self.misc_utils = MiscUtils()
        self.bbox_utils = BboxUtil()

        pass

    def call(self, inputs):
        """
            这里的 call 方法,会被 __init__() 方法回调
        :param inputs:
        :return:
        """
        # Box Scores. Use the foreground class confidence. [Batch, num_rois, 1]
        scores = inputs[0][:, :, 1]
        # Box deltas [batch, num_rois, 4]
        deltas = inputs[1]
        rpn_bbox_std_dev = np.array(cfg.COMMON.RPN_BBOX_STD_DEV)
        deltas = deltas * np.reshape(rpn_bbox_std_dev, [1, 1, 4])
        # Anchors
        anchors = inputs[2]

        # Improve performance by trimming to top anchors by score
        # and doing the rest on the smaller subset.
        pre_nms_limit = tf.minimum(cfg.COMMON.PRE_NMS_LIMIT,
                                   tf.shape(anchors)[1])
        ix = tf.nn.top_k(scores,
                         pre_nms_limit,
                         sorted=True,
                         name="top_anchors").indices

        scores = self.misc_utils.batch_slice([scores, ix],
                                             lambda x, y: tf.gather(x, y),
                                             self.batch_size)
        deltas = self.misc_utils.batch_slice([deltas, ix],
                                             lambda x, y: tf.gather(x, y),
                                             self.batch_size)
        pre_nms_anchors = self.misc_utils.batch_slice(
            [anchors, ix],
            lambda a, x: tf.gather(a, x),
            self.batch_size,
            names=["pre_nms_anchors"])

        # Apply deltas to anchors to get refined anchors.
        # [batch, N, (y1, x1, y2, x2)]
        boxes = self.misc_utils.batch_slice(
            [pre_nms_anchors, deltas],
            lambda x, y: self.bbox_utils.apply_box_deltas_graph(x, y),
            self.batch_size,
            names=["refined_anchors"])

        # Clip to image boundaries. Since we're in normalized coordinates,
        # clip to 0..1 range. [batch, N, (y1, x1, y2, x2)]
        window = np.array([0, 0, 1, 1], dtype=np.float32)
        boxes = self.misc_utils.batch_slice(
            boxes,
            lambda x: self.bbox_utils.clip_boxes_graph(x, window),
            self.batch_size,
            names=["refined_anchors_clipped"])

        # Filter out small boxes
        # According to Xinlei Chen's paper, this reduces detection accuracy
        # for small objects, so we're skipping it.

        # Non-max suppression
        def nms(boxes, scores):
            indices = tf.image.non_max_suppression(
                boxes,
                scores,
                self.proposal_count,
                self.nms_threshold,
                name="rpn_non_max_suppression")
            proposals = tf.gather(boxes, indices)
            # Pad if needed
            padding = tf.maximum(self.proposal_count - tf.shape(proposals)[0],
                                 0)
            proposals = tf.pad(proposals, [(0, padding), (0, 0)])
            return proposals

        proposals = self.misc_utils.batch_slice([boxes, scores], nms,
                                                self.batch_size)

        return proposals

    def compute_output_shape(self, input_shape):
        return (None, self.proposal_count, 4)