示例#1
0
    def testClippingOfProposals(self):
        """
        Test clipping of proposals
        """
        gt_boxes = np.array([
            [10, 10, 20, 22],
            [10, 10, 20, 22],
            [10, 10, 20, 22],
            [10, 10, 20, 22],
        ])
        all_anchors = np.array([
            [11, 13, 12, 16],
            [10, 10, 20, 22],
            [11, 13, 12, 28],
            [7, 13, 34, 30],
        ])
        rpn_cls_prob = np.array([[0.3, 0.7], [0.4, 0.6], [0.9, 0.1],
                                 [0.8, 0.2]])

        results = self._run_rpn_proposal(all_anchors,
                                         rpn_cls_prob,
                                         self.config,
                                         gt_boxes=gt_boxes)

        im_size = tf.placeholder(tf.float32, shape=(2, ))
        proposals = tf.placeholder(
            tf.float32, shape=(results['nms_proposals'][:, :4].shape))
        clip_bboxes_tf = clip_boxes(proposals, im_size)

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            clipped_proposals = sess.run(clip_bboxes_tf,
                                         feed_dict={
                                             proposals:
                                             results['nms_proposals'][:, :4],
                                             im_size:
                                             self.im_size
                                         })

        # Check we get proposals clipped to the image.
        self.assertAllEqual(results['nms_proposals'][:, :4], clipped_proposals)
示例#2
0
    def _build(self, cls_prob, loc_pred, all_anchors, im_shape):
        """
        Args:
            cls_prob: A softmax probability for each anchor where the idx = 0
                is the background class (which we should ignore).
                Shape (total_anchors, num_classes + 1)
            loc_pred: A Tensor with the regression output for each anchor.
                Its shape should be (total_anchors, 4).
            all_anchors: A Tensor with the anchors bounding boxes of shape
                (total_anchors, 4), having (x_min, y_min, x_max, y_max) for
                each anchor.
            im_shape: A Tensor with the image shape in format (height, width).
        Returns:
            prediction_dict with the following keys:
                raw_proposals: The raw proposals i.e. the anchors adjusted
                    using loc_pred.
                proposals: The proposals of the network after appling some
                    filters like negative area; and NMS. It's shape is
                    (final_num_proposals, 4), where final_num_proposals is
                    unknown before-hand (it depends on NMS).
                    The 4-length Tensor for each corresponds to:
                    (x_min, y_min, x_max, y_max).
                proposal_label: It's shape is (final_num_proposals,)
                proposal_label_prob: It's shape is (final_num_proposals,)
        """
        selected_boxes = []
        selected_probs = []
        selected_labels = []
        selected_anchors = []  # For debugging

        for class_id in range(self._num_classes):
            # Get the confidences for this class (+ 1 is to ignore background)
            class_cls_prob = cls_prob[:, class_id + 1]

            # Filter by min_prob_threshold
            min_prob_filter = tf.greater_equal(class_cls_prob,
                                               self._min_prob_threshold)
            class_cls_prob = tf.boolean_mask(class_cls_prob, min_prob_filter)
            class_loc_pred = tf.boolean_mask(loc_pred, min_prob_filter)
            anchors = tf.boolean_mask(all_anchors, min_prob_filter)

            # Using the loc_pred and the anchors, we generate the proposals.
            raw_proposals = decode(anchors, class_loc_pred, self._variances)
            # Clip boxes to image.
            clipped_proposals = clip_boxes(raw_proposals, im_shape)

            # Filter proposals that have an non-valid area.
            (x_min, y_min, x_max, y_max) = tf.unstack(clipped_proposals,
                                                      axis=1)
            proposal_filter = tf.greater(
                tf.maximum(x_max - x_min, 0.) * tf.maximum(y_max - y_min, 0.),
                0.)
            class_proposals = tf.boolean_mask(clipped_proposals,
                                              proposal_filter)
            class_loc_pred = tf.boolean_mask(class_loc_pred, proposal_filter)
            class_cls_prob = tf.boolean_mask(class_cls_prob, proposal_filter)
            proposal_anchors = tf.boolean_mask(anchors, proposal_filter)

            # Log results of filtering non-valid area proposals
            total_anchors = tf.shape(all_anchors)[0]
            total_proposals = tf.shape(class_proposals)[0]
            total_raw_proposals = tf.shape(raw_proposals)[0]
            tf.summary.scalar('invalid_proposals',
                              total_proposals - total_raw_proposals, ['ssd'])
            tf.summary.scalar(
                'valid_proposals_ratio',
                tf.cast(total_anchors, tf.float32) /
                tf.cast(total_proposals, tf.float32), ['ssd'])

            # We have to use the TensorFlow's bounding box convention to use
            # the included function for NMS.
            # After gathering results we should normalize it back.
            class_proposal_tf = change_order(class_proposals)

            # Apply class NMS.
            class_selected_idx = tf.image.non_max_suppression(
                class_proposal_tf,
                class_cls_prob,
                self._class_max_detections,
                iou_threshold=self._class_nms_threshold)

            # Using NMS resulting indices, gather values from Tensors.
            class_proposal_tf = tf.gather(class_proposal_tf,
                                          class_selected_idx)
            class_cls_prob = tf.gather(class_cls_prob, class_selected_idx)

            # We append values to a regular list which will later be
            # transformed to a proper Tensor.
            selected_boxes.append(class_proposal_tf)
            selected_probs.append(class_cls_prob)
            # In the case of the class_id, since it is a loop on classes, we
            # already have a fixed class_id. We use `tf.tile` to create that
            # Tensor with the total number of indices returned by the NMS.
            selected_labels.append(
                tf.tile([class_id], [tf.shape(class_selected_idx)[0]]))
            selected_anchors.append(proposal_anchors)

        # We use concat (axis=0) to generate a Tensor where the rows are
        # stacked on top of each other
        proposals_tf = tf.concat(selected_boxes, axis=0)
        # Return to the original convention.
        proposals = change_order(proposals_tf)
        proposal_label = tf.concat(selected_labels, axis=0)
        proposal_label_prob = tf.concat(selected_probs, axis=0)
        proposal_anchors = tf.concat(selected_anchors, axis=0)

        # Get topK detections of all classes.
        k = tf.minimum(self._total_max_detections,
                       tf.shape(proposal_label_prob)[0])
        top_k = tf.nn.top_k(proposal_label_prob, k=k)
        top_k_proposal_label_prob = top_k.values
        top_k_proposals = tf.gather(proposals, top_k.indices)
        top_k_proposal_label = tf.gather(proposal_label, top_k.indices)
        top_k_proposal_anchors = tf.gather(proposal_anchors, top_k.indices)

        return {
            'objects': top_k_proposals,
            'labels': top_k_proposal_label,
            'probs': top_k_proposal_label_prob,
            'raw_proposals': raw_proposals,
            'anchors': top_k_proposal_anchors,
        }
示例#3
0
    def _build(self, proposals, bbox_pred, cls_prob, im_shape):
        """
        Args:
            proposals: Tensor with the RPN proposals bounding boxes.
                Shape (num_proposals, 4). Where num_proposals is less than
                POST_NMS_TOP_N (We don't know exactly beforehand)
            bbox_pred: Tensor with the RCNN delta predictions for each proposal
                for each class. Shape (num_proposals, 4 * num_classes)
            cls_prob: A softmax probability for each proposal where the idx = 0
                is the background class (which we should ignore).
                Shape (num_proposals, num_classes + 1)

        Returns:
            objects:
                Shape (final_num_proposals, 4)
                Where final_num_proposals is unknown before-hand (it depends on
                NMS). The 4-length Tensor for each corresponds to:
                (x_min, y_min, x_max, y_max).
            objects_label:
                Shape (final_num_proposals,)
            objects_label_prob:
                Shape (final_num_proposals,)

        """
        # First we want get the most probable label for each proposal
        # We still have the background on idx 0 so we subtract 1 to the idxs.
        proposal_label = tf.argmax(cls_prob, axis=1) - 1
        # Get the probability for the selected label for each proposal.
        proposal_label_prob = tf.reduce_max(cls_prob, axis=1)

        # We are going to use only the non-background proposals.
        non_background_filter = tf.greater_equal(proposal_label, 0)
        # Filter proposals with less than threshold probability.
        min_prob_filter = tf.greater_equal(proposal_label_prob,
                                           self._min_prob_threshold)
        proposal_filter = tf.logical_and(non_background_filter,
                                         min_prob_filter)

        total_proposals = tf.shape(proposals)[0]

        equal_shapes = tf.assert_equal(
            tf.shape(proposals)[0],
            tf.shape(bbox_pred)[0])
        with tf.control_dependencies([equal_shapes]):
            # Filter all tensors for getting all non-background proposals.
            proposals = tf.boolean_mask(proposals, proposal_filter)
            proposal_label = tf.boolean_mask(proposal_label, proposal_filter)
            proposal_label_prob = tf.boolean_mask(proposal_label_prob,
                                                  proposal_filter)
            bbox_pred = tf.boolean_mask(bbox_pred, proposal_filter)

        filtered_proposals = tf.shape(proposals)[0]

        tf.summary.scalar('background_or_low_prob_proposals',
                          total_proposals - filtered_proposals, ['rcnn'])

        # Create one hot with labels for using it to filter bbox_predictions.
        label_one_hot = tf.one_hot(proposal_label, depth=self._num_classes)
        # Flatten label_one_hot to get
        # (num_non_background_proposals * num_classes, 1) for filtering.
        label_one_hot_flatten = tf.cast(tf.reshape(label_one_hot, [-1]),
                                        tf.bool)
        # Flatten bbox_predictions getting
        # (num_non_background_proposals * num_classes, 4).
        bbox_pred_flatten = tf.reshape(bbox_pred, [-1, 4])

        equal_shapes = tf.assert_equal(
            tf.shape(bbox_pred_flatten)[0],
            tf.shape(label_one_hot_flatten)[0])
        with tf.control_dependencies([equal_shapes]):
            # Control same number of dimensions between bbox and mask.
            bbox_pred = tf.boolean_mask(bbox_pred_flatten,
                                        label_one_hot_flatten)

        # Using the bbox_pred and the proposals we generate the objects.
        raw_objects = decode(proposals, bbox_pred)
        # Clip boxes to image.
        clipped_objects = clip_boxes(raw_objects, im_shape)

        # Filter objects that have an non-valid area.
        (x_min, y_min, x_max, y_max) = tf.unstack(clipped_objects, axis=1)
        object_filter = tf.greater_equal(
            tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0),
            0.0)

        total_raw_objects = tf.shape(raw_objects)[0]
        objects = tf.boolean_mask(clipped_objects, object_filter)
        proposal_label = tf.boolean_mask(proposal_label, object_filter)
        proposal_label_prob = tf.boolean_mask(proposal_label_prob,
                                              object_filter)

        total_objects = tf.shape(objects)[0]

        tf.summary.scalar('invalid_proposals',
                          total_objects - total_raw_objects, ['rcnn'])

        tf.summary.scalar(
            'valid_proposals_ratio',
            tf.cast(total_proposals, tf.float32) /
            tf.cast(total_objects, tf.float32), ['rcnn'])

        # We have to use the TensorFlow's bounding box convention to use the
        # included function for NMS.
        # After gathering results we should normalize it back.
        objects_tf = change_order(objects)

        selected_boxes = []
        selected_probs = []
        selected_labels = []
        # For each class we want to filter those objects and apply NMS to them.
        for class_id in range(self._num_classes):
            # Filter objects Tensors with class.
            class_filter = tf.equal(proposal_label, class_id)
            class_objects_tf = tf.boolean_mask(objects_tf, class_filter)
            class_prob = tf.boolean_mask(proposal_label_prob, class_filter)

            # Apply class NMS.
            class_selected_idx = tf.image.non_max_suppression(
                class_objects_tf,
                class_prob,
                self._class_max_detections,
                iou_threshold=self._class_nms_threshold)

            # Using NMS resulting indices, gather values from Tensors.
            class_objects_tf = tf.gather(class_objects_tf, class_selected_idx)
            class_prob = tf.gather(class_prob, class_selected_idx)

            # We append values to a regular list which will later be transform
            # to a proper Tensor.
            selected_boxes.append(class_objects_tf)
            selected_probs.append(class_prob)
            # In the case of the class_id, since it is a loop on classes, we
            # already have a fixed class_id. We use `tf.tile` to create that
            # Tensor with the total number of indices returned by the NMS.
            selected_labels.append(
                tf.tile([class_id], [tf.shape(class_selected_idx)[0]]))

        # We use concat (axis=0) to generate a Tensor where the rows are
        # stacked on top of each other
        objects_tf = tf.concat(selected_boxes, axis=0)
        # Return to the original convention.
        objects = change_order(objects_tf)
        proposal_label = tf.concat(selected_labels, axis=0)
        proposal_label_prob = tf.concat(selected_probs, axis=0)

        # Get topK detections of all classes.
        k = tf.minimum(self._total_max_detections,
                       tf.shape(proposal_label_prob)[0])
        top_k = tf.nn.top_k(proposal_label_prob, k=k)
        top_k_proposal_label_prob = top_k.values
        top_k_objects = tf.gather(objects, top_k.indices)
        top_k_proposal_label = tf.gather(proposal_label, top_k.indices)

        return {
            'raw_objects': raw_objects,
            'objects': top_k_objects,
            'proposal_label': top_k_proposal_label,
            'proposal_label_prob': top_k_proposal_label_prob,
            'selected_boxes': selected_boxes,
            'selected_probs': selected_probs,
            'selected_labels': selected_labels,
        }
示例#4
0
    def testClippingOfProposals(self):
        """
        Test clipping of proposals before and after NMS
        """
        # Before NMS
        gt_boxes = np.array([
            [0, 0, 10, 12],
            [10, 10, 20, 22],
            [10, 10, 20, 22],
            [30, 25, 39, 39],
        ])
        all_anchors = np.array([
            [-20, -10, 12, 6],
            [2, -10, 20, 20],
            [0, 0, 12, 16],
            [2, -10, 20, 2],
        ])
        rpn_cls_prob = np.array([
            [0.3, 0.7],
            [0.4, 0.6],
            [0.3, 0.7],
            [0.1, 0.9],
        ])

        rpn_bbox_pred = np.array([  # This is set to zeros so when decode is
            [0, 0, 0, 0],           # applied in RPNProposal the anchors don't
            [0, 0, 0, 0],           # change, leaving us with unclipped
            [0, 0, 0, 0],           # proposals.
            [0, 0, 0, 0],
        ])
        config = EasyDict(self.config)
        config['clip_after_nms'] = False
        results_before = self._run_rpn_proposal(
            all_anchors, rpn_cls_prob, config, gt_boxes=gt_boxes,
            rpn_bbox_pred=rpn_bbox_pred)
        im_size = tf.placeholder(tf.float32, shape=(2,))
        proposals_unclipped = tf.placeholder(
            tf.float32, shape=(results_before['proposals_unclipped'].shape))
        clip_bboxes_tf = clip_boxes(proposals_unclipped, im_size)

        with self.test_session() as sess:
            clipped_proposals = sess.run(clip_bboxes_tf, feed_dict={
                proposals_unclipped: results_before['proposals_unclipped'],
                im_size: self.im_size
            })

        # Check we clip proposals right after filtering the invalid area ones.
        self.assertAllEqual(
            results_before['unsorted_proposals'],
            clipped_proposals
        )

        # Checks all NMS proposals have values inside the image boundaries
        proposals = results_before['proposals']
        self.assertTrue((proposals >= 0).all())
        self.assertTrue(
            (proposals < np.array(self.im_size + self.im_size)).all()
        )

        # After NMS
        config['clip_after_nms'] = True
        results_after = self._run_rpn_proposal(
            all_anchors, rpn_cls_prob, config, gt_boxes=gt_boxes,
            rpn_bbox_pred=rpn_bbox_pred)
        im_size = tf.placeholder(tf.float32, shape=(2,))
        proposals_unclipped = tf.placeholder(
            tf.float32, shape=(results_after['proposals_unclipped'].shape))
        clip_bboxes_tf = clip_boxes(proposals_unclipped, im_size)

        with self.test_session() as sess:
            clipped_proposals = sess.run(clip_bboxes_tf, feed_dict={
                proposals_unclipped: results_after['proposals_unclipped'],
                im_size: self.im_size
            })

        # Check we don't clip proposals in the beginning of the function.
        self.assertAllEqual(
            results_after['unsorted_proposals'],
            results_after['proposals_unclipped']
        )

        proposals = results_after['proposals']
        # Checks all NMS proposals have values inside the image boundaries
        self.assertTrue((proposals >= 0).all())
        self.assertTrue(
            (proposals < np.array(self.im_size + self.im_size)).all()
        )
示例#5
0
    def _build(self, rpn_cls_prob, rpn_bbox_pred, all_anchors, im_shape):
        """

        Args:
            rpn_cls_prob: A Tensor with the softmax output for each anchor.
                Its shape should be (total_anchors, 2), with the probability of
                being background and the probability of being foreground for
                each anchor.
            rpn_bbox_pred: A Tensor with the regression output for each anchor.
                Its shape should be (total_anchors, 4).
            all_anchors: A Tensor with the anchors bounding boxes of shape
                (total_anchors, 4), having (x_min, y_min, x_max, y_max) for
                each anchor.
            im_shape: A Tensor with the image shape in format (height, width).

        Returns:
            prediction_dict with the following keys:
                nms_proposals: A Tensor with the final selected proposed
                    bounding boxes. Its shape should be
                    (total_nms_proposals, 4).
                nms_proposals_scores: A Tensor with the probability of being an
                    object for that proposal. Its shape should be
                    (total_nms_proposals, 1)
                scores:  A Tensor with the scores of the proposals contained
                    in `proposals` and `proposals_unclipped`.
                proposals: A Tensor with all the valid area RPN proposals, this
                    tensor is returned in debug mode and is used for
                    testing, the proposals are clipped if `clip_after_nms` is
                    set to False.
                proposals_unclipped: Same as proposals but the proposals in
                    this tensor are never clipped.
                all_proposals: A Tensor with all the proposals, including the
                    ones with zero or negative area.
        """
        # Scores are extracted from the second scalar of the cls probability.
        # cls_probability is a softmax of (background, foreground).
        scores = rpn_cls_prob[:, 1]
        # Force flatten the scores (it should be already be flatten).
        scores = tf.reshape(scores, [-1])

        if self._filter_outside_anchors:
            with tf.name_scope('filter_outside_anchors'):
                (x_min_anchor, y_min_anchor,
                 x_max_anchor, y_max_anchor) = tf.unstack(all_anchors, axis=1)

                anchor_filter = tf.logical_and(
                    tf.logical_and(
                        tf.greater_equal(x_min_anchor, 0),
                        tf.greater_equal(y_min_anchor, 0)
                    ),
                    tf.logical_and(
                        tf.less(x_max_anchor, im_shape[1]),
                        tf.less(y_max_anchor, im_shape[0])
                    )
                )
                anchor_filter = tf.reshape(anchor_filter, [-1])
                all_anchors = tf.boolean_mask(
                    all_anchors, anchor_filter, name='filter_anchors')
                rpn_bbox_pred = tf.boolean_mask(rpn_bbox_pred, anchor_filter)
                scores = tf.boolean_mask(scores, anchor_filter)

        # Decode boxes
        all_proposals = decode(all_anchors, rpn_bbox_pred)

        # Filter proposals with negative or zero area.
        (x_min, y_min, x_max, y_max) = tf.unstack(
            all_proposals, axis=1
        )
        proposal_filter = tf.greater(
            tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0),
            0.0
        )
        proposal_filter = tf.reshape(proposal_filter, [-1])

        # Filter proposals and scores.
        total_proposals = tf.shape(scores)[0]
        scores = tf.boolean_mask(
            scores, proposal_filter,
            name='filter_invalid_scores'
        )
        proposals = tf.boolean_mask(
            all_proposals, proposal_filter,
            name='filter_invalid_proposals'
        )
        if self._debug:
            proposals_unclipped = tf.identity(proposals)

        if not self._clip_after_nms:
            # Clip proposals to the image.
            proposals = clip_boxes(proposals, im_shape)

        filtered_proposals = tf.shape(scores)[0]

        tf.summary.scalar(
            'valid_proposals_ratio',
            (
                tf.cast(filtered_proposals, tf.float32) /
                tf.cast(total_proposals, tf.float32)
            ), ['rpn'])

        tf.summary.scalar(
            'invalid_proposals', total_proposals - filtered_proposals, ['rpn'])

        # Get top `pre_nms_top_n` indices by sorting the proposals by score.
        k = tf.minimum(self._pre_nms_top_n, tf.shape(scores)[0])
        top_k = tf.nn.top_k(scores, k=k)
        top_k_scores = top_k.values

        top_k_proposals = tf.gather(proposals, top_k.indices)
        # We reorder the proposals into TensorFlows bounding box order for
        # `tf.image.non_max_supression` compatibility.
        proposals_tf_order = change_order(top_k_proposals)

        # We cut the pre_nms filter in pure TF version and go straight into
        # NMS.
        selected_indices = tf.image.non_max_suppression(
            proposals_tf_order, tf.squeeze(top_k_scores), self._post_nms_top_n,
            iou_threshold=self._nms_threshold
        )

        # Selected_indices is a smaller tensor, we need to extract the
        # proposals and scores using it.
        nms_proposals = tf.gather(
            proposals_tf_order, selected_indices, name='gather_nms_proposals'
        )
        nms_proposals_scores = tf.gather(
            top_k_scores, selected_indices, name='gather_nms_proposals_scores'
        )

        # We switch back again to the regular bbox encoding.
        nms_proposals = change_order(nms_proposals)

        if self._clip_after_nms:
            # Clip proposals to the image after NMS.
            nms_proposals = clip_boxes(nms_proposals, im_shape)

        # Adds batch number for consistency and multi image batch support.
        batch_inds = tf.zeros(
            (tf.shape(nms_proposals)[0], 1), dtype=tf.float32
        )
        nms_proposals = tf.concat([batch_inds, nms_proposals], axis=1)

        pred = {
            'nms_proposals': tf.stop_gradient(nms_proposals),
            'nms_proposals_scores': tf.stop_gradient(nms_proposals_scores),
        }

        if self._debug:
            pred.update({
                'proposals': proposals,
                'scores': scores,
                'proposals_unclipped': proposals_unclipped,
                'top_k_proposals': top_k_proposals,
                'top_k_scores': top_k_scores,
                'all_proposals': all_proposals,
            })

        return pred
示例#6
0
def patch_image(image, bboxes=None, offset_height=0, offset_width=0,
                target_height=None, target_width=None):
    """Gets a patch using tf.image.crop_to_bounding_box and adjusts bboxes

    If patching would leave us with zero bboxes, we return the image and bboxes
    unchanged.

    Args:
        image: Float32 Tensor with shape (H, W, 3).
        bboxes: Tensor with the ground-truth boxes. Shaped (total_boxes, 5).
            The last element in each box is the category label.
        offset_height: Height of the upper-left corner of the patch with
            respect to the original image. Non-negative.
        offset_width: Width of the upper-left corner of the patch with respect
            to the original image. Non-negative.
        target_height: Height of the patch. If set to none, it will be the
            maximum (tf.shape(image)[0] - offset_height - 1). Positive.
        target_width: Width of the patch. If set to none, it will be the
            maximum (tf.shape(image)[1] - offset_width - 1). Positive.

    Returns:
        image: Patch of the original image.
        bboxes: Adjusted bboxes (only those whose centers are inside the
            patch). The key isn't set if bboxes is None.
    """
    # TODO: make this function safe with respect to senseless inputs (i.e
    # having an offset_height that's larger than tf.shape(image)[0], etc.)
    # As of now we only use it inside random_patch, which already makes sure
    # the arguments are legal.
    im_shape = tf.shape(image)
    if target_height is None:
        target_height = (im_shape[0] - offset_height - 1)
    if target_width is None:
        target_width = (im_shape[1] - offset_width - 1)

    new_image = tf.image.crop_to_bounding_box(
        image,
        offset_height=offset_height, offset_width=offset_width,
        target_height=target_height, target_width=target_width
    )
    patch_shape = tf.shape(new_image)

    # Return if we didn't have bboxes.
    if bboxes is None:
        # Resize the patch to the original image's size. This is to make sure
        # we respect restrictions in image size in the models.
        new_image_resized = tf.image.resize_images(
            new_image, im_shape[:2],
            method=tf.image.ResizeMethod.BILINEAR
        )
        return_dict = {'image': new_image_resized}
        return return_dict

    # Now we will remove all bboxes whose centers are not inside the cropped
    # image.

    # First get the x  and y coordinates of the center of each of the
    # bboxes.
    bboxes_center_x = tf.reduce_mean(
        tf.concat(
            [
                # bboxes[:, 0] gets a Tensor with shape (20,).
                # We do this to get a Tensor with shape (20, 1).
                bboxes[:, 0:1],
                bboxes[:, 2:3]
            ],
            axis=1
        )
    )
    bboxes_center_y = tf.reduce_mean(
        tf.concat(
            [
                bboxes[:, 1:2],
                bboxes[:, 3:4]
            ],
            axis=1
        ),
        axis=1
    )

    # Now we get a boolean tensor holding for each of the bboxes' centers
    # wheter they are inside the patch.
    center_x_is_inside = tf.logical_and(
        tf.greater(
            bboxes_center_x,
            offset_width
        ),
        tf.less(
            bboxes_center_x,
            tf.add(target_width, offset_width)
        )
    )
    center_y_is_inside = tf.logical_and(
        tf.greater(
            bboxes_center_y,
            offset_height
        ),
        tf.less(
            bboxes_center_y,
            tf.add(target_height, offset_height)
        )
    )
    center_is_inside = tf.logical_and(
        center_x_is_inside,
        center_y_is_inside
    )

    # Now we mask the bboxes, removing all those whose centers are outside
    # the patch.
    masked_bboxes = tf.boolean_mask(bboxes, center_is_inside)
    # We move the bboxes to the right place, clipping them if
    # necessary.
    new_bboxes_unclipped = tf.concat(
        [
            tf.subtract(masked_bboxes[:, 0:1], offset_width),
            tf.subtract(masked_bboxes[:, 1:2], offset_height),
            tf.subtract(masked_bboxes[:, 2:3], offset_width),
            tf.subtract(masked_bboxes[:, 3:4], offset_height),
        ],
        axis=1,
    )
    # Finally, we clip the boxes and add back the labels.
    new_bboxes = tf.concat(
        [
            tf.to_int32(
                clip_boxes(
                    new_bboxes_unclipped,
                    imshape=patch_shape[:2]
                ),
            ),
            masked_bboxes[:, 4:]
        ],
        axis=1
    )
    # Now resize the image to the original size and adjust bboxes accordingly
    new_image_resized = tf.image.resize_images(
        new_image, im_shape[:2],
        method=tf.image.ResizeMethod.BILINEAR
    )
    # adjust_bboxes requires height and width values with dtype=float32
    new_bboxes_resized = adjust_bboxes(
        new_bboxes,
        old_height=tf.to_float(patch_shape[0]),
        old_width=tf.to_float(patch_shape[1]),
        new_height=tf.to_float(im_shape[0]),
        new_width=tf.to_float(im_shape[1])
    )

    # Finally, set up the return dict, but only update the image and bboxes if
    # our patch has at least one bbox in it.
    update_condition = tf.greater_equal(
        tf.shape(new_bboxes_resized)[0],
        1
    )
    return_dict = {}
    return_dict['image'] = tf.cond(
        update_condition,
        lambda: new_image_resized,
        lambda: image
    )
    return_dict['bboxes'] = tf.cond(
        update_condition,
        lambda: new_bboxes_resized,
        lambda: bboxes
    )
    return return_dict
示例#7
0
    def _build(self, rpn_cls_prob, rpn_bbox_pred, all_anchors, im_shape):
        """

        Args:
            rpn_cls_prob: A Tensor with the softmax output for each anchor.
                Its shape should be (total_anchors, 2), with the probability of
                being background and the probability of being foreground for
                each anchor.
            rpn_bbox_pred: A Tensor with the regression output for each anchor.
                Its shape should be (total_anchors, 4).
            all_anchors: A Tensor with the anchors bounding boxes of shape
                (total_anchors, 4), having (x_min, y_min, x_max, y_max) for
                each anchor.
            im_shape: A Tensor with the image shape in format (height, width).

        Returns:
            prediction_dict with the following keys:
                proposals: A Tensor with the final selected proposed
                    bounding boxes. Its shape should be
                    (total_proposals, 4).
                scores: A Tensor with the probability of being an
                    object for that proposal. Its shape should be
                    (total_proposals, 1)
        """
        # Scores are extracted from the second scalar of the cls probability.
        # cls_probability is a softmax of (background, foreground).
        all_scores = rpn_cls_prob[:, 1]
        # Force flatten the scores (it should be already be flatten).
        all_scores = tf.reshape(all_scores, [-1])

        if self._filter_outside_anchors:
            with tf.name_scope('filter_outside_anchors'):
                (x_min_anchor, y_min_anchor,
                 x_max_anchor, y_max_anchor) = tf.unstack(all_anchors, axis=1)

                anchor_filter = tf.logical_and(
                    tf.logical_and(
                        tf.greater_equal(x_min_anchor, 0),
                        tf.greater_equal(y_min_anchor, 0)
                    ),
                    tf.logical_and(
                        tf.less(x_max_anchor, im_shape[1]),
                        tf.less(y_max_anchor, im_shape[0])
                    )
                )
                anchor_filter = tf.reshape(anchor_filter, [-1])
                all_anchors = tf.boolean_mask(
                    all_anchors, anchor_filter, name='filter_anchors')
                rpn_bbox_pred = tf.boolean_mask(rpn_bbox_pred, anchor_filter)
                all_scores = tf.boolean_mask(all_scores, anchor_filter)

        # Decode boxes
        all_proposals = decode(all_anchors, rpn_bbox_pred)

        # Filter proposals with less than threshold probability.
        min_prob_filter = tf.greater_equal(
            all_scores, self._min_prob_threshold
        )

        # Filter proposals with negative or zero area.
        (x_min, y_min, x_max, y_max) = tf.unstack(all_proposals, axis=1)
        zero_area_filter = tf.greater(
            tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0),
            0.0
        )
        proposal_filter = tf.logical_and(zero_area_filter, min_prob_filter)

        # Filter proposals and scores.
        all_proposals_total = tf.shape(all_scores)[0]
        unsorted_scores = tf.boolean_mask(
            all_scores, proposal_filter,
            name='filtered_scores'
        )
        unsorted_proposals = tf.boolean_mask(
            all_proposals, proposal_filter,
            name='filtered_proposals'
        )
        if self._debug:
            proposals_unclipped = tf.identity(unsorted_proposals)

        if not self._clip_after_nms:
            # Clip proposals to the image.
            unsorted_proposals = clip_boxes(unsorted_proposals, im_shape)

        filtered_proposals_total = tf.shape(unsorted_scores)[0]

        tf.summary.scalar(
            'valid_proposals_ratio',
            (
                tf.cast(filtered_proposals_total, tf.float32) /
                tf.cast(all_proposals_total, tf.float32)
            ), ['rpn'])

        tf.summary.scalar(
            'invalid_proposals',
            all_proposals_total - filtered_proposals_total, ['rpn'])

        # Get top `pre_nms_top_n` indices by sorting the proposals by score.
        k = tf.minimum(self._pre_nms_top_n, tf.shape(unsorted_scores)[0])
        top_k = tf.nn.top_k(unsorted_scores, k=k)

        sorted_top_proposals = tf.gather(unsorted_proposals, top_k.indices)
        sorted_top_scores = top_k.values

        if self._apply_nms:
            with tf.name_scope('nms'):
                # We reorder the proposals into TensorFlows bounding box order
                # for `tf.image.non_max_supression` compatibility.
                proposals_tf_order = change_order(sorted_top_proposals)
                # We cut the pre_nms filter in pure TF version and go straight
                # into NMS.
                selected_indices = tf.image.non_max_suppression(
                    proposals_tf_order, tf.reshape(
                        sorted_top_scores, [-1]
                    ),
                    self._post_nms_top_n, iou_threshold=self._nms_threshold
                )

                # Selected_indices is a smaller tensor, we need to extract the
                # proposals and scores using it.
                nms_proposals_tf_order = tf.gather(
                    proposals_tf_order, selected_indices,
                    name='gather_nms_proposals'
                )

                # We switch back again to the regular bbox encoding.
                proposals = change_order(nms_proposals_tf_order)
                scores = tf.gather(
                    sorted_top_scores, selected_indices,
                    name='gather_nms_proposals_scores'
                )
        else:
            proposals = sorted_top_proposals
            scores = sorted_top_scores

        if self._clip_after_nms:
            # Clip proposals to the image after NMS.
            proposals = clip_boxes(proposals, im_shape)

        pred = {
            'proposals': proposals,
            'scores': scores,
        }

        if self._debug:
            pred.update({
                'sorted_top_scores': sorted_top_scores,
                'sorted_top_proposals': sorted_top_proposals,
                'unsorted_proposals': unsorted_proposals,
                'unsorted_scores': unsorted_scores,
                'all_proposals': all_proposals,
                'all_scores': all_scores,
                # proposals_unclipped has the unsorted_scores scores
                'proposals_unclipped': proposals_unclipped,
            })

        return pred
示例#8
0
    def _build(self, rpn_cls_prob, rpn_bbox_pred, all_anchors, im_shape):
        """

        Args:
            rpn_cls_prob: A Tensor with the softmax output for each anchor.
                Its shape should be (total_anchors, 2), with the probability of
                being background and the probability of being foreground for
                each anchor.
            rpn_bbox_pred: A Tensor with the regression output for each anchor.
                Its shape should be (total_anchors, 4).
            all_anchors: A Tensor with the anchors bounding boxes of shape
                (total_anchors, 4), having (x_min, y_min, x_max, y_max) for
                each anchor.
            im_shape: A Tensor with the image shape in format (height, width).

        Returns:
            prediction_dict with the following keys:
                proposals: A Tensor with the final selected proposed
                    bounding boxes. Its shape should be
                    (total_proposals, 4).
                scores: A Tensor with the probability of being an
                    object for that proposal. Its shape should be
                    (total_proposals, 1)
        """
        # Scores are extracted from the second scalar of the cls probability.
        # cls_probability is a softmax of (background, foreground).
        all_scores = rpn_cls_prob[:, 1]
        # Force flatten the scores (it should be already be flatten).
        all_scores = tf.reshape(all_scores, [-1])

        if self._filter_outside_anchors:
            with tf.name_scope('filter_outside_anchors'):
                (x_min_anchor, y_min_anchor,
                 x_max_anchor, y_max_anchor) = tf.unstack(all_anchors, axis=1)

                anchor_filter = tf.logical_and(
                    tf.logical_and(
                        tf.greater_equal(x_min_anchor, 0),
                        tf.greater_equal(y_min_anchor, 0)
                    ),
                    tf.logical_and(
                        tf.less(x_max_anchor, im_shape[1]),
                        tf.less(y_max_anchor, im_shape[0])
                    )
                )
                anchor_filter = tf.reshape(anchor_filter, [-1])
                all_anchors = tf.boolean_mask(
                    all_anchors, anchor_filter, name='filter_anchors')
                rpn_bbox_pred = tf.boolean_mask(rpn_bbox_pred, anchor_filter)
                all_scores = tf.boolean_mask(all_scores, anchor_filter)

        # Decode boxes
        all_proposals = decode(all_anchors, rpn_bbox_pred)

        # Filter proposals with less than threshold probability.
        min_prob_filter = tf.greater_equal(
            all_scores, self._min_prob_threshold
        )

        # Filter proposals with negative or zero area.
        (x_min, y_min, x_max, y_max) = tf.unstack(all_proposals, axis=1)
        zero_area_filter = tf.greater(
            tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0),
            0.0
        )
        proposal_filter = tf.logical_and(zero_area_filter, min_prob_filter)

        # Filter proposals and scores.
        all_proposals_total = tf.shape(all_scores)[0]
        unsorted_scores = tf.boolean_mask(
            all_scores, proposal_filter,
            name='filtered_scores'
        )
        unsorted_proposals = tf.boolean_mask(
            all_proposals, proposal_filter,
            name='filtered_proposals'
        )
        if self._debug:
            proposals_unclipped = tf.identity(unsorted_proposals)

        if not self._clip_after_nms:
            # Clip proposals to the image.
            unsorted_proposals = clip_boxes(unsorted_proposals, im_shape)

        filtered_proposals_total = tf.shape(unsorted_scores)[0]

        tf.summary.scalar(
            'valid_proposals_ratio',
            (
                tf.cast(filtered_proposals_total, tf.float32) /
                tf.cast(all_proposals_total, tf.float32)
            ), ['rpn'])

        tf.summary.scalar(
            'invalid_proposals',
            all_proposals_total - filtered_proposals_total, ['rpn'])

        # Get top `pre_nms_top_n` indices by sorting the proposals by score.
        k = tf.minimum(self._pre_nms_top_n, tf.shape(unsorted_scores)[0])
        top_k = tf.nn.top_k(unsorted_scores, k=k)

        sorted_top_proposals = tf.gather(unsorted_proposals, top_k.indices)
        sorted_top_scores = top_k.values

        if self._apply_nms:
            with tf.name_scope('nms'):
                # We reorder the proposals into TensorFlows bounding box order
                # for `tf.image.non_max_supression` compatibility.
                proposals_tf_order = change_order(sorted_top_proposals)
                # We cut the pre_nms filter in pure TF version and go straight
                # into NMS.
                selected_indices = tf.image.non_max_suppression(
                    proposals_tf_order, tf.reshape(
                        sorted_top_scores, [-1]
                    ),
                    self._post_nms_top_n, iou_threshold=self._nms_threshold
                )

                # Selected_indices is a smaller tensor, we need to extract the
                # proposals and scores using it.
                nms_proposals_tf_order = tf.gather(
                    proposals_tf_order, selected_indices,
                    name='gather_nms_proposals'
                )

                # We switch back again to the regular bbox encoding.
                proposals = change_order(nms_proposals_tf_order)
                scores = tf.gather(
                    sorted_top_scores, selected_indices,
                    name='gather_nms_proposals_scores'
                )
        else:
            proposals = sorted_top_proposals
            scores = sorted_top_scores

        if self._clip_after_nms:
            # Clip proposals to the image after NMS.
            proposals = clip_boxes(proposals, im_shape)

        pred = {
            'proposals': proposals,
            'scores': scores,
        }

        if self._debug:
            pred.update({
                'sorted_top_scores': sorted_top_scores,
                'sorted_top_proposals': sorted_top_proposals,
                'unsorted_proposals': unsorted_proposals,
                'unsorted_scores': unsorted_scores,
                'all_proposals': all_proposals,
                'all_scores': all_scores,
                # proposals_unclipped has the unsorted_scores scores
                'proposals_unclipped': proposals_unclipped,
            })

        return pred
示例#9
0
    def testClippingOfProposals(self):
        """
        Test clipping of proposals before and after NMS
        """
        # Before NMS
        gt_boxes = np.array([
            [0, 0, 10, 12],
            [10, 10, 20, 22],
            [10, 10, 20, 22],
            [30, 25, 39, 39],
        ])
        all_anchors = np.array([
            [-20, -10, 12, 6],
            [2, -10, 20, 20],
            [0, 0, 12, 16],
            [2, -10, 20, 2],
        ])
        rpn_cls_prob = np.array([
            [0.3, 0.7],
            [0.4, 0.6],
            [0.3, 0.7],
            [0.1, 0.9],
        ])

        rpn_bbox_pred = np.array([  # This is set to zeros so when decode is
            [0, 0, 0, 0],  # applied in RPNProposal the anchors don't
            [0, 0, 0, 0],  # change, leaving us with unclipped
            [0, 0, 0, 0],  # proposals.
            [0, 0, 0, 0],
        ])
        config = EasyDict(self.config)
        config['clip_after_nms'] = False
        results_before = self._run_rpn_proposal(all_anchors,
                                                rpn_cls_prob,
                                                config,
                                                gt_boxes=gt_boxes,
                                                rpn_bbox_pred=rpn_bbox_pred)
        im_size = tf.placeholder(tf.float32, shape=(2, ))
        proposals_unclipped = tf.placeholder(
            tf.float32, shape=(results_before['proposals_unclipped'].shape))
        clip_bboxes_tf = clip_boxes(proposals_unclipped, im_size)

        with self.test_session() as sess:
            clipped_proposals = sess.run(
                clip_bboxes_tf,
                feed_dict={
                    proposals_unclipped: results_before['proposals_unclipped'],
                    im_size: self.im_size
                })

        # Check we clip proposals right after filtering the invalid area ones.
        self.assertAllEqual(results_before['unsorted_proposals'],
                            clipped_proposals)

        # Checks all NMS proposals have values inside the image boundaries
        proposals = results_before['proposals']
        self.assertTrue((proposals >= 0).all())
        self.assertTrue(
            (proposals < np.array(self.im_size + self.im_size)).all())

        # After NMS
        config['clip_after_nms'] = True
        results_after = self._run_rpn_proposal(all_anchors,
                                               rpn_cls_prob,
                                               config,
                                               gt_boxes=gt_boxes,
                                               rpn_bbox_pred=rpn_bbox_pred)
        im_size = tf.placeholder(tf.float32, shape=(2, ))
        proposals_unclipped = tf.placeholder(
            tf.float32, shape=(results_after['proposals_unclipped'].shape))
        clip_bboxes_tf = clip_boxes(proposals_unclipped, im_size)

        with self.test_session() as sess:
            clipped_proposals = sess.run(
                clip_bboxes_tf,
                feed_dict={
                    proposals_unclipped: results_after['proposals_unclipped'],
                    im_size: self.im_size
                })

        # Check we don't clip proposals in the beginning of the function.
        self.assertAllEqual(results_after['unsorted_proposals'],
                            results_after['proposals_unclipped'])

        proposals = results_after['proposals']
        # Checks all NMS proposals have values inside the image boundaries
        self.assertTrue((proposals >= 0).all())
        self.assertTrue(
            (proposals < np.array(self.im_size + self.im_size)).all())
示例#10
0
    def _build(self, rpn_cls_prob, rpn_bbox_pred, all_anchors, im_shape):
        """

        Args:
            rpn_cls_prob: A Tensor with the softmax output for each anchor.
                Its shape should be (total_anchors, 2), with the probability of
                being background and the probability of being foreground for
                each anchor.
                rpn预测的类别的概率
            rpn_bbox_pred: A Tensor with the regression output for each anchor.
                Its shape should be (total_anchors, 4).
                rpn预测的框
            all_anchors: A Tensor with the anchors bounding boxes of shape
                (total_anchors, 4), having (x_min, y_min, x_max, y_max) for
                each anchor.
                进入rpn的anchors
            im_shape: A Tensor with the image shape in format (height, width).

        Returns:
            prediction_dict with the following keys:
                proposals: A Tensor with the final selected proposed
                    bounding boxes. Its shape should be
                    (total_proposals, 4).
                    最终确定的提案区域
                scores: A Tensor with the probability of being an
                    object for that proposal. Its shape should be
                    (total_proposals, 1)
                    提案是目标的概率
        """
        # Scores are extracted from the second scalar of the cls probability.
        # cls_probability is a softmax of (background, foreground).
        # 得分从类概率的第二个标量中提出
        # 类概率是一个关于前景背景的softmax分类结果
        all_scores = rpn_cls_prob[:, 1]
        # Force flatten the scores (it should be already be flatten).
        # 这里这么做,还有必要么?还是说只是为了确保万无一失?
        all_scores = tf.reshape(all_scores, [-1])

        if self._filter_outside_anchors:
            with tf.name_scope('filter_outside_anchors'):
                # 沿着指定维度进行拆分,保留剩余的维度 原本为(total_anchors, 4)
                # 拆分为四个独立的anchor数目为长度的张量,聚合了四个坐标的值
                (x_min_anchor, y_min_anchor, x_max_anchor,
                 y_max_anchor) = tf.unstack(all_anchors, axis=1)

                # 逻辑操作,判断是否超界,对于图像,横为x纵为y
                # 所以im_shape[0]对应着y,im_shape[1]对应着x
                # im_shape in format (height, width).
                # 对左上角和右下角坐标在图像范围内的对应的张量判定为真,其余为假
                # 筛选出来没有超界的anchor,顺带得到对应的预测边框和得分
                anchor_filter = tf.logical_and(
                    tf.logical_and(tf.greater_equal(x_min_anchor, 0),
                                   tf.greater_equal(y_min_anchor, 0)),
                    tf.logical_and(tf.less(x_max_anchor, im_shape[1]),
                                   tf.less(y_max_anchor, im_shape[0])))
                anchor_filter = tf.reshape(anchor_filter, [-1])
                all_anchors = tf.boolean_mask(all_anchors,
                                              anchor_filter,
                                              name='filter_anchors')
                rpn_bbox_pred = tf.boolean_mask(rpn_bbox_pred, anchor_filter)
                all_scores = tf.boolean_mask(all_scores, anchor_filter)

        # Decode boxes
        # 从参考的anchors和预测的偏移量获得最终预测的原图的框坐标
        all_proposals = decode(all_anchors, rpn_bbox_pred)

        # Filter proposals with less than threshold probability.
        # 滤掉小于概率阈值的得分,得到的是一个代表大于等于阈值的元素位置的张量
        min_prob_filter = tf.greater_equal(all_scores,
                                           self._min_prob_threshold)

        # Filter proposals with negative or zero area.
        # 因为要求xmax>xmin, ymax>ymin,所以需要保证正常的计算面积要为正
        (x_min, y_min, x_max, y_max) = tf.unstack(all_proposals, axis=1)
        zero_area_filter = tf.greater(
            tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0),
            0.0)
        # 得到的是一个面积为正的提案区域的逻辑张量,也对应着数据有效的位置
        proposal_filter = tf.logical_and(zero_area_filter, min_prob_filter)

        # Filter proposals and scores.
        # all_scores = rpn_cls_prob[:, 1]
        # 下面两步boolean_mask得到了对应要保留的得分和提案
        all_proposals_total = tf.shape(all_scores)[0]
        unsorted_scores = tf.boolean_mask(all_scores,
                                          proposal_filter,
                                          name='filtered_scores')
        unsorted_proposals = tf.boolean_mask(all_proposals,
                                             proposal_filter,
                                             name='filtered_proposals')
        if self._debug:
            proposals_unclipped = tf.identity(unsorted_proposals)

        # Run clipping of proposals after running NMS.
        # 不在NMS后,而是在其前运行提案剪裁
        # clip_boxes对于位于图像区域之外的提案框进行了一定的限制
        if not self._clip_after_nms:
            # Clip proposals to the image.
            unsorted_proposals = clip_boxes(unsorted_proposals, im_shape)

        filtered_proposals_total = tf.shape(unsorted_scores)[0]

        tf.summary.scalar('valid_proposals_ratio',
                          (tf.cast(filtered_proposals_total, tf.float32) /
                           tf.cast(all_proposals_total, tf.float32)), ['rpn'])

        tf.summary.scalar('invalid_proposals',
                          all_proposals_total - filtered_proposals_total,
                          ['rpn'])

        # Get top `pre_nms_top_n` indices by sorting the proposals by score.
        # NMS之前排序获得前N个提案,但要保证N<=shape[0]
        k = tf.minimum(self._pre_nms_top_n, tf.shape(unsorted_scores)[0])
        # 查找最后一个维度的k个最大条目的值和索引。
        top_k = tf.nn.top_k(unsorted_scores, k=k)

        # 根据索引,从unsorted_proposals上采集切片,同时获取对应的得分
        sorted_top_proposals = tf.gather(unsorted_proposals, top_k.indices)
        sorted_top_scores = top_k.values

        if self._apply_nms:
            with tf.name_scope('nms'):
                # We reorder the proposals into TensorFlows bounding box order
                # for `tf.image.non_max_supression` compatibility.
                # 为了与“tf.image.non_max_supression”兼容,我们将提案重新排序到
                # TensorFlow边框顺序中。
                proposals_tf_order = change_order(sorted_top_proposals)
                # We cut the pre_nms filter in pure TF version and go straight
                # into NMS.
                # 修剪掉与以前选择的框重叠的具有高度IOU的框
                selected_indices = tf.image.non_max_suppression(
                    proposals_tf_order,
                    tf.reshape(sorted_top_scores, [-1]),
                    self._post_nms_top_n,
                    iou_threshold=self._nms_threshold)

                # Selected_indices is a smaller tensor, we need to extract the
                # proposals and scores using it.
                nms_proposals_tf_order = tf.gather(proposals_tf_order,
                                                   selected_indices,
                                                   name='gather_nms_proposals')

                # We switch back again to the regular bbox encoding.
                # 改回原始的提案编码
                proposals = change_order(nms_proposals_tf_order)
                scores = tf.gather(sorted_top_scores,
                                   selected_indices,
                                   name='gather_nms_proposals_scores')
        else:
            proposals = sorted_top_proposals
            scores = sorted_top_scores

        # 在NMS后运行提案剪裁
        if self._clip_after_nms:
            # Clip proposals to the image after NMS.
            proposals = clip_boxes(proposals, im_shape)

        pred = {
            'proposals': proposals,
            'scores': scores,
        }

        if self._debug:
            pred.update({
                'sorted_top_scores': sorted_top_scores,
                'sorted_top_proposals': sorted_top_proposals,
                'unsorted_proposals': unsorted_proposals,
                'unsorted_scores': unsorted_scores,
                'all_proposals': all_proposals,
                'all_scores': all_scores,
                # proposals_unclipped has the unsorted_scores scores
                'proposals_unclipped': proposals_unclipped,
            })

        return pred
示例#11
0
文件: image.py 项目: Mesitis/luminoth
def patch_image(image,
                bboxes=None,
                offset_height=0,
                offset_width=0,
                target_height=None,
                target_width=None):
    """Gets a patch using tf.image.crop_to_bounding_box and adjusts bboxes

    If patching would leave us with zero bboxes, we return the image and bboxes
    unchanged.

    Args:
        image: Float32 Tensor with shape (H, W, 3).
        bboxes: Tensor with the ground-truth boxes. Shaped (total_boxes, 5).
            The last element in each box is the category label.
        offset_height: Height of the upper-left corner of the patch with
            respect to the original image. Non-negative.
        offset_width: Width of the upper-left corner of the patch with respect
            to the original image. Non-negative.
        target_height: Height of the patch. If set to none, it will be the
            maximum (tf.shape(image)[0] - offset_height - 1). Positive.
        target_width: Width of the patch. If set to none, it will be the
            maximum (tf.shape(image)[1] - offset_width - 1). Positive.

    Returns:
        image: Patch of the original image.
        bboxes: Adjusted bboxes (only those whose centers are inside the
            patch). The key isn't set if bboxes is None.
    """
    # TODO: make this function safe with respect to senseless inputs (i.e
    # having an offset_height that's larger than tf.shape(image)[0], etc.)
    # As of now we only use it inside random_patch, which already makes sure
    # the arguments are legal.
    im_shape = tf.shape(image)
    if target_height is None:
        target_height = im_shape[0] - offset_height - 1
    if target_width is None:
        target_width = im_shape[1] - offset_width - 1

    new_image = tf.image.crop_to_bounding_box(
        image,
        offset_height=offset_height,
        offset_width=offset_width,
        target_height=target_height,
        target_width=target_width,
    )
    patch_shape = tf.shape(new_image)

    # Return if we didn't have bboxes.
    if bboxes is None:
        # Resize the patch to the original image's size. This is to make sure
        # we respect restrictions in image size in the models.
        new_image_resized = tf.image.resize_images(
            new_image, im_shape[:2], method=tf.image.ResizeMethod.BILINEAR)
        return_dict = {"image": new_image_resized}
        return return_dict

    # Now we will remove all bboxes whose centers are not inside the cropped
    # image.

    # First get the x  and y coordinates of the center of each of the
    # bboxes.
    bboxes_center_x = tf.reduce_mean(
        tf.concat(
            [
                # bboxes[:, 0] gets a Tensor with shape (20,).
                # We do this to get a Tensor with shape (20, 1).
                bboxes[:, 0:1],
                bboxes[:, 2:3],
            ],
            axis=1,
        ))
    bboxes_center_y = tf.reduce_mean(tf.concat(
        [bboxes[:, 1:2], bboxes[:, 3:4]], axis=1),
                                     axis=1)

    # Now we get a boolean tensor holding for each of the bboxes' centers
    # wheter they are inside the patch.
    center_x_is_inside = tf.logical_and(
        tf.greater(bboxes_center_x, offset_width),
        tf.less(bboxes_center_x, tf.add(target_width, offset_width)))
    center_y_is_inside = tf.logical_and(
        tf.greater(bboxes_center_y, offset_height),
        tf.less(bboxes_center_y, tf.add(target_height, offset_height)))
    center_is_inside = tf.logical_and(center_x_is_inside, center_y_is_inside)

    # Now we mask the bboxes, removing all those whose centers are outside
    # the patch.
    masked_bboxes = tf.boolean_mask(bboxes, center_is_inside)
    # We move the bboxes to the right place, clipping them if
    # necessary.
    new_bboxes_unclipped = tf.concat(
        [
            tf.subtract(masked_bboxes[:, 0:1], offset_width),
            tf.subtract(masked_bboxes[:, 1:2], offset_height),
            tf.subtract(masked_bboxes[:, 2:3], offset_width),
            tf.subtract(masked_bboxes[:, 3:4], offset_height),
        ],
        axis=1,
    )
    # Finally, we clip the boxes and add back the labels.
    new_bboxes = tf.concat(
        [
            tf.to_int32(
                clip_boxes(new_bboxes_unclipped, imshape=patch_shape[:2]), ),
            masked_bboxes[:, 4:],
        ],
        axis=1,
    )
    # Now resize the image to the original size and adjust bboxes accordingly
    new_image_resized = tf.image.resize_images(
        new_image, im_shape[:2], method=tf.image.ResizeMethod.BILINEAR)
    # adjust_bboxes requires height and width values with dtype=float32
    new_bboxes_resized = adjust_bboxes(
        new_bboxes,
        old_height=tf.to_float(patch_shape[0]),
        old_width=tf.to_float(patch_shape[1]),
        new_height=tf.to_float(im_shape[0]),
        new_width=tf.to_float(im_shape[1]),
    )

    # Finally, set up the return dict, but only update the image and bboxes if
    # our patch has at least one bbox in it.
    update_condition = tf.greater_equal(tf.shape(new_bboxes_resized)[0], 1)
    return_dict = {}
    return_dict["image"] = tf.cond(update_condition, lambda: new_image_resized,
                                   lambda: image)
    return_dict["bboxes"] = tf.cond(update_condition,
                                    lambda: new_bboxes_resized, lambda: bboxes)
    return return_dict
    def _build(self, proposals, bbox_pred, cls_prob, im_shape):
        """
        Args:
            proposals: Tensor with the RPN proposals bounding boxes.
                Shape (num_proposals, 4). Where num_proposals is less than
                POST_NMS_TOP_N (We don't know exactly beforehand)
            bbox_pred: Tensor with the RCNN delta predictions for each proposal
                for each class. Shape (num_proposals, 4 * num_classes)
            cls_prob: A softmax probability for each proposal where the idx = 0
                is the background class (which we should ignore).
                Shape (num_proposals, num_classes + 1)

        Returns:
            objects:
                Shape (final_num_proposals, 4)
                Where final_num_proposals is unknown before-hand (it depends on
                NMS). The 4-length Tensor for each corresponds to:
                (x_min, y_min, x_max, y_max).
            objects_label:
                Shape (final_num_proposals,)
            objects_label_prob:
                Shape (final_num_proposals,)

        """
        with tf.variable_scope("build_without_filter"):
            without_filter_dict = self.build_without_filter(
                proposals, bbox_pred, cls_prob, im_shape
            )

        selected_boxes = []
        selected_probs = []
        selected_labels = []

        # For each class, take the proposals with the class-specific
        # predictions (class scores and bbox regression) and filter accordingly
        # (valid area, min probability score and NMS).
        for class_id in range(self._num_classes):
            # Apply the class-specific transformations to the proposals to
            # obtain the current class' prediction.
            class_prob = cls_prob[:, class_id + 1]  # 0 is background class.
            class_bboxes = bbox_pred[:, (4 * class_id):(4 * class_id + 4)]
            raw_class_objects = decode(
                proposals,
                class_bboxes,
                variances=self._variances,
            )

            # Clip bboxes so they don't go out of the image.
            class_objects = clip_boxes(raw_class_objects, im_shape)

            # Filter objects based on the min probability threshold and on them
            # having a valid area.
            prob_filter = tf.greater_equal(
                class_prob, self._min_prob_threshold
            )

            (x_min, y_min, x_max, y_max) = tf.unstack(class_objects, axis=1)
            area_filter = tf.greater(
                tf.maximum(x_max - x_min, 0.0)
                * tf.maximum(y_max - y_min, 0.0),
                0.0
            )

            object_filter = tf.logical_and(area_filter, prob_filter)

            class_objects = tf.boolean_mask(class_objects, object_filter)
            class_prob = tf.boolean_mask(class_prob, object_filter)

            # We have to use the TensorFlow's bounding box convention to use
            # the included function for NMS.
            class_objects_tf = change_order(class_objects)

            # Apply class NMS.
            class_selected_idx = tf.image.non_max_suppression(
                class_objects_tf, class_prob, self._class_max_detections,
                iou_threshold=self._class_nms_threshold
            )

            # Using NMS resulting indices, gather values from Tensors.
            class_objects_tf = tf.gather(class_objects_tf, class_selected_idx)
            class_prob = tf.gather(class_prob, class_selected_idx)

            # Revert to our bbox convention.
            class_objects = change_order(class_objects_tf)

            # We append values to a regular list which will later be
            # transformed to a proper Tensor.
            selected_boxes.append(class_objects)
            selected_probs.append(class_prob)
            # In the case of the class_id, since it is a loop on classes, we
            # already have a fixed class_id. We use `tf.tile` to create that
            # Tensor with the total number of indices returned by the NMS.
            selected_labels.append(
                tf.tile([class_id], [tf.shape(class_selected_idx)[0]])
            )

        # We use concat (axis=0) to generate a Tensor where the rows are
        # stacked on top of each other
        objects = tf.concat(selected_boxes, axis=0)
        proposal_label = tf.concat(selected_labels, axis=0)
        proposal_label_prob = tf.concat(selected_probs, axis=0)

        tf.summary.histogram(
            'proposal_cls_scores', proposal_label_prob, ['rcnn']
        )

        # Get top-k detections of all classes.
        k = tf.minimum(
            self._total_max_detections,
            tf.shape(proposal_label_prob)[0]
        )
        top_k = tf.nn.top_k(proposal_label_prob, k=k)
        top_k_proposal_label_prob = top_k.values
        top_k_objects = tf.gather(objects, top_k.indices)
        top_k_proposal_label = tf.gather(proposal_label, top_k.indices)


        return {
            'objects': top_k_objects,
            'proposal_label': top_k_proposal_label,
            'proposal_label_prob': top_k_proposal_label_prob,
            'selected_boxes': selected_boxes,
            'selected_probs': selected_probs,
            'selected_labels': selected_labels,

            "without_filter_dict": without_filter_dict
        }
    def build_without_filter(self, proposals, bbox_pred, cls_prob, im_shape):
        selected_boxes = []
        selected_probs = []
        selected_labels = []

        # For each class, take the proposals with the class-specific
        # predictions (class scores and bbox regression) and filter accordingly
        # (valid area, min probability score and NMS).
        for class_id in range(self._num_classes):
            # Apply the class-specific transformations to the proposals to
            # obtain the current class' prediction.
            class_prob = cls_prob[:, class_id + 1]  # 0 is background class.
            class_bboxes = bbox_pred[:, (4 * class_id):(4 * class_id + 4)]
            raw_class_objects = decode(
                proposals,
                class_bboxes,
                variances=self._variances,
            )

            # Clip bboxes so they don't go out of the image.
            class_objects = clip_boxes(raw_class_objects, im_shape)

            # Filter objects based on the min probability threshold and on them
            # having a valid area.
            ##### train for 0.7

            prob_filter = tf.greater_equal(
                class_prob, 0.7
            )

            (x_min, y_min, x_max, y_max) = tf.unstack(class_objects, axis=1)

            area_filter = tf.greater(
                tf.maximum(x_max - x_min, 0.0)
                * tf.maximum(y_max - y_min, 0.0),
                76654.0
            )

            object_filter = tf.logical_and(area_filter, prob_filter)

            class_objects = tf.boolean_mask(class_objects, object_filter)
            class_prob = tf.boolean_mask(class_prob, object_filter)

            # We have to use the TensorFlow's bounding box convention to use
            # the included function for NMS.
            class_objects_tf = change_order(class_objects)

            # Apply class NMS.
            class_selected_idx = tf.image.non_max_suppression(
                class_objects_tf, class_prob, self._class_max_detections,
                iou_threshold=self._class_nms_threshold
            )

            # Using NMS resulting indices, gather values from Tensors.
            class_objects_tf = tf.gather(class_objects_tf, class_selected_idx)
            class_prob = tf.gather(class_prob, class_selected_idx)

            # Revert to our bbox convention.
            class_objects = change_order(class_objects_tf)

            # We append values to a regular list which will later be
            # transformed to a proper Tensor.
            selected_boxes.append(class_objects)
            selected_probs.append(class_prob)
            # In the case of the class_id, since it is a loop on classes, we
            # already have a fixed class_id. We use `tf.tile` to create that
            # Tensor with the total number of indices returned by the NMS.

            selected_labels.append(
                tf.tile([class_id], [tf.shape(class_selected_idx)[0]])
            )

        # We use concat (axis=0) to generate a Tensor where the rows are
        # stacked on top of each other
        objects = tf.concat(selected_boxes, axis=0)
        proposal_label = tf.concat(selected_labels, axis=0)
        proposal_label_prob = tf.concat(selected_probs, axis=0)

        tf.summary.histogram(
            'proposal_cls_scores', proposal_label_prob, ['rcnn']
        )

        # Get top-k detections of all classes.
        k = tf.minimum(
            self._total_max_detections,
            tf.shape(proposal_label_prob)[0]
        )
        top_k = tf.nn.top_k(proposal_label_prob, k=k)
        top_k_proposal_label_prob = top_k.values
        top_k_objects = tf.gather(objects, top_k.indices)
        top_k_proposal_label = tf.gather(proposal_label, top_k.indices)

        return {
            'objects': top_k_objects,
            'proposal_label': top_k_proposal_label,
            'proposal_label_prob': top_k_proposal_label_prob,
            'selected_boxes': selected_boxes,
            'selected_probs': selected_probs,
            'selected_labels': selected_labels,
        }
示例#14
0
    def _build(self, cls_prob, loc_pred, all_anchors, im_shape):
        """
        Args:
            cls_prob: A softmax probability for each anchor where the idx = 0
                is the background class (which we should ignore).
                Shape (total_anchors, num_classes + 1)
                预测类别概率
            loc_pred: A Tensor with the regression output for each anchor.
                Its shape should be (total_anchors, 4).
                预测框偏移缩放量
            all_anchors: A Tensor with the anchors bounding boxes of shape
                (total_anchors, 4), having (x_min, y_min, x_max, y_max) for
                each anchor.
                所有anchors的真实坐标
            im_shape: A Tensor with the image shape in format (height, width).
        Returns:
            prediction_dict with the following keys:
                raw_proposals: The raw proposals i.e. the anchors adjusted
                    using loc_pred.
                proposals: The proposals of the network after appling some
                    filters like negative area; and NMS. It's shape is
                    (final_num_proposals, 4), where final_num_proposals is
                    unknown before-hand (it depends on NMS).
                    The 4-length Tensor for each corresponds to:
                    (x_min, y_min, x_max, y_max).
                proposal_label: It's shape is (final_num_proposals,)
                proposal_label_prob: It's shape is (final_num_proposals,)
        """
        selected_boxes = []
        selected_probs = []
        selected_labels = []
        selected_anchors = []  # For debugging

        # 分析各类别下, 大于最小概率阈值的预测概率和预测偏移缩放量, 进而以此获得预测的边界
        # 框的坐标, 进行边界剪裁, 坐标合理性限定, NMS处理, 得到最终选定的各个类别下的提案
        for class_id in range(self._num_classes):
            # Get the confidences for this class (+ 1 is to ignore background)
            # 获取该类别下, 所有预测框的情况
            class_cls_prob = cls_prob[:, class_id + 1]

            # Filter by min_prob_threshold
            min_prob_filter = tf.greater_equal(class_cls_prob,
                                               self._min_prob_threshold)
            class_cls_prob = tf.boolean_mask(class_cls_prob, min_prob_filter)
            class_loc_pred = tf.boolean_mask(loc_pred, min_prob_filter)
            # 对所有anchors进行筛选
            anchors = tf.boolean_mask(all_anchors, min_prob_filter)

            # Using the loc_pred and the anchors, we generate the proposals.
            raw_proposals = decode(anchors, class_loc_pred, self._variances)

            # Clip boxes to image.
            clipped_proposals = clip_boxes(raw_proposals, im_shape)

            # Filter proposals that have an non-valid area.
            (x_min, y_min, x_max, y_max) = tf.unstack(clipped_proposals,
                                                      axis=1)
            proposal_filter = tf.greater(
                tf.maximum(x_max - x_min, 0.) * tf.maximum(y_max - y_min, 0.),
                0.)
            # 筛选剪裁后的框坐标
            class_proposals = tf.boolean_mask(clipped_proposals,
                                              proposal_filter)
            # 筛选边界框偏移
            class_loc_pred = tf.boolean_mask(class_loc_pred, proposal_filter)
            # 筛选类别概率
            class_cls_prob = tf.boolean_mask(class_cls_prob, proposal_filter)
            # 筛选对应的anchors
            proposal_anchors = tf.boolean_mask(anchors, proposal_filter)

            # Log results of filtering non-valid area proposals
            # 所有anchors数量
            total_anchors = tf.shape(all_anchors)[0]
            # 所有坐标有效的框数量
            total_proposals = tf.shape(class_proposals)[0]
            # ques: 所有框的数量, 这里数量和anchors应该是一样的吧?
            # ans: 不一样, 未进行坐标和理性判断时框的总数, 但是已经进行了阈值判断
            total_raw_proposals = tf.shape(raw_proposals)[0]

            tf.summary.scalar('invalid_proposals',
                              total_proposals - total_raw_proposals, ['ssd'])
            tf.summary.scalar(
                'valid_proposals_ratio',
                tf.cast(total_anchors, tf.float32) /
                tf.cast(total_proposals, tf.float32), ['ssd'])

            # We have to use the TensorFlow's bounding box convention to use
            # the included function for NMS.
            # After gathering results we should normalize it back.
            class_proposal_tf = change_order(class_proposals)

            # Apply class NMS.
            # 使用该类别下所有预测的框坐标, 和对应的预测概率, 进行非极大值抑制, 得到索引
            # 剩下来的就认为是该类别下的结果, 也就是这个类别选择了这几个预测
            class_selected_idx = tf.image.non_max_suppression(
                class_proposal_tf,
                class_cls_prob,
                self._class_max_detections,
                iou_threshold=self._class_nms_threshold)

            # Using NMS resulting indices, gather values from Tensors.
            # 获得该类别选择的预测框和对应的类别预测概率
            class_proposal_tf = tf.gather(class_proposal_tf,
                                          class_selected_idx)
            class_cls_prob = tf.gather(class_cls_prob, class_selected_idx)

            # We append values to a regular list which will later be
            # transformed to a proper Tensor.
            #  获得该类别选择的预测框和对应的类别预测概率
            selected_boxes.append(class_proposal_tf)
            selected_probs.append(class_cls_prob)

            # In the case of the class_id, since it is a loop on classes, we
            # already have a fixed class_id. We use `tf.tile` to create that
            # Tensor with the total number of indices returned by the NMS.
            # 重复张量, 沿着后面指定的各个维度上的次数来进行重复
            # 与下面的的张量里的anchors相对应, 表示其类别标签
            selected_labels.append(
                tf.tile([class_id], [tf.shape(class_selected_idx)[0]]))
            # 确定该类别下所有坐标合理概率超过阈值的对应的anchors
            selected_anchors.append(proposal_anchors)

        # We use concat (axis=0) to generate a Tensor where the rows are
        # stacked on top of each other
        # (num_proposals, 4)
        proposals_tf = tf.concat(selected_boxes, axis=0)
        # Return to the original convention.
        proposals = change_order(proposals_tf)
        # (num_proposals, )
        proposal_label = tf.concat(selected_labels, axis=0)
        # (num_proposals, )
        proposal_label_prob = tf.concat(selected_probs, axis=0)
        # # (num_proposals, 4)
        proposal_anchors = tf.concat(selected_anchors, axis=0)

        # Get topK detections of all classes.
        k = tf.minimum(self._total_max_detections,
                       tf.shape(proposal_label_prob)[0])

        # 主题顺序是按照proposal_label_prob为参考的, 其中有各个类的结果, 顺序大致是按照
        # 类别来的, 下面的都是, 所以使用同一个索引是可以
        top_k = tf.nn.top_k(proposal_label_prob, k=k)

        # 依次获得NMS后前k个最大的预测概率值, 对应的预测框坐标组, 各类别中保留下来的提案对
        # 应的该类别, 对应的参考anchors坐标
        top_k_proposal_label_prob = top_k.values
        top_k_proposals = tf.gather(proposals, top_k.indices)
        top_k_proposal_label = tf.gather(proposal_label, top_k.indices)
        top_k_proposal_anchors = tf.gather(proposal_anchors, top_k.indices)

        return {
            'objects': top_k_proposals,
            'labels': top_k_proposal_label,
            'probs': top_k_proposal_label_prob,
            'raw_proposals': raw_proposals,
            'anchors': top_k_proposal_anchors,
        }
示例#15
0
    def _build(self, proposals, bbox_pred, cls_prob, im_shape):
        """
        Args:
            proposals: Tensor with the RPN proposals bounding boxes.
                Shape (num_proposals, 4). Where num_proposals is less than
                POST_NMS_TOP_N (We don't know exactly beforehand)
            bbox_pred: Tensor with the RCNN delta predictions for each proposal
                for each class. Shape (num_proposals, 4 * num_classes)
            cls_prob: A softmax probability for each proposal where the idx = 0
                is the background class (which we should ignore).
                Shape (num_proposals, num_classes + 1)

        Returns:
            objects:
                Shape (final_num_proposals, 4)
                Where final_num_proposals is unknown before-hand (it depends on
                NMS). The 4-length Tensor for each corresponds to:
                (x_min, y_min, x_max, y_max).
            objects_label:
                Shape (final_num_proposals,)
            objects_label_prob:
                Shape (final_num_proposals,)

        """
        # First we want get the most probable label for each proposal
        # We still have the background on idx 0 so we subtract 1 to the idxs.
        proposal_label = tf.argmax(cls_prob, axis=1) - 1
        # Get the probability for the selected label for each proposal.
        proposal_label_prob = tf.reduce_max(cls_prob, axis=1)

        # We are going to use only the non-background proposals.
        non_background_filter = tf.greater_equal(proposal_label, 0)
        # Filter proposals with less than threshold probability.
        min_prob_filter = tf.greater_equal(
            proposal_label_prob, self._min_prob_threshold
        )
        proposal_filter = tf.logical_and(
            non_background_filter, min_prob_filter
        )

        total_proposals = tf.shape(proposals)[0]

        equal_shapes = tf.assert_equal(
            tf.shape(proposals)[0], tf.shape(bbox_pred)[0]
        )
        with tf.control_dependencies([equal_shapes]):
            # Filter all tensors for getting all non-background proposals.
            proposals = tf.boolean_mask(
                proposals, proposal_filter)
            proposal_label = tf.boolean_mask(
                proposal_label, proposal_filter)
            proposal_label_prob = tf.boolean_mask(
                proposal_label_prob, proposal_filter)
            bbox_pred = tf.boolean_mask(
                bbox_pred, proposal_filter)

        filtered_proposals = tf.shape(proposals)[0]

        tf.summary.scalar(
            'background_or_low_prob_proposals',
            total_proposals - filtered_proposals,
            ['rcnn']
        )

        # Create one hot with labels for using it to filter bbox_predictions.
        label_one_hot = tf.one_hot(proposal_label, depth=self._num_classes)
        # Flatten label_one_hot to get
        # (num_non_background_proposals * num_classes, 1) for filtering.
        label_one_hot_flatten = tf.cast(
            tf.reshape(label_one_hot, [-1]), tf.bool
        )
        # Flatten bbox_predictions getting
        # (num_non_background_proposals * num_classes, 4).
        bbox_pred_flatten = tf.reshape(bbox_pred, [-1, 4])

        equal_shapes = tf.assert_equal(
            tf.shape(bbox_pred_flatten)[0], tf.shape(label_one_hot_flatten)[0]
        )
        with tf.control_dependencies([equal_shapes]):
            # Control same number of dimensions between bbox and mask.
            bbox_pred = tf.boolean_mask(
                bbox_pred_flatten, label_one_hot_flatten)

        # Using the bbox_pred and the proposals we generate the objects.
        raw_objects = decode(proposals, bbox_pred)
        # Clip boxes to image.
        clipped_objects = clip_boxes(raw_objects, im_shape)

        # Filter objects that have an non-valid area.
        (x_min, y_min, x_max, y_max) = tf.unstack(clipped_objects, axis=1)
        object_filter = tf.greater_equal(
            tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0),
            0.0
        )

        total_raw_objects = tf.shape(raw_objects)[0]
        objects = tf.boolean_mask(
            clipped_objects, object_filter)
        proposal_label = tf.boolean_mask(
            proposal_label, object_filter)
        proposal_label_prob = tf.boolean_mask(
            proposal_label_prob, object_filter)

        total_objects = tf.shape(objects)[0]

        tf.summary.scalar(
            'invalid_proposals',
            total_objects - total_raw_objects, ['rcnn']
        )

        valid_proposals_ratio = (
            tf.cast(total_proposals, tf.float32) /
            tf.cast(total_objects, tf.float32)
        )

        tf.summary.scalar(
            'valid_proposals_ratio', valid_proposals_ratio, ['rcnn']
        )

        # We have to use the TensorFlow's bounding box convention to use the
        # included function for NMS.
        # After gathering results we should normalize it back.
        objects_tf = change_order(objects)

        selected_boxes = []
        selected_probs = []
        selected_labels = []
        # For each class we want to filter those objects and apply NMS to them.
        for class_id in range(self._num_classes):
            # Filter objects Tensors with class.
            class_filter = tf.equal(proposal_label, class_id)
            class_objects_tf = tf.boolean_mask(objects_tf, class_filter)
            class_prob = tf.boolean_mask(proposal_label_prob, class_filter)

            # Apply class NMS.
            class_selected_idx = tf.image.non_max_suppression(
                class_objects_tf, class_prob, self._class_max_detections,
                iou_threshold=self._class_nms_threshold
            )

            # Using NMS resulting indices, gather values from Tensors.
            class_objects_tf = tf.gather(class_objects_tf, class_selected_idx)
            class_prob = tf.gather(class_prob, class_selected_idx)

            # We append values to a regular list which will later be transform
            # to a proper Tensor.
            selected_boxes.append(class_objects_tf)
            selected_probs.append(class_prob)
            # In the case of the class_id, since it is a loop on classes, we
            # already have a fixed class_id. We use `tf.tile` to create that
            # Tensor with the total number of indices returned by the NMS.
            selected_labels.append(
                tf.tile([class_id], [tf.shape(class_selected_idx)[0]])
            )

        # We use concat (axis=0) to generate a Tensor where the rows are
        # stacked on top of each other
        objects_tf = tf.concat(selected_boxes, axis=0)
        # Return to the original convention.
        objects = change_order(objects_tf)
        proposal_label = tf.concat(selected_labels, axis=0)
        proposal_label_prob = tf.concat(selected_probs, axis=0)

        # Get topK detections of all classes.
        k = tf.minimum(
            self._total_max_detections,
            tf.shape(proposal_label_prob)[0]
        )
        top_k = tf.nn.top_k(proposal_label_prob, k=k)
        top_k_proposal_label_prob = top_k.values
        top_k_objects = tf.gather(objects, top_k.indices)
        top_k_proposal_label = tf.gather(proposal_label, top_k.indices)

        return {
            'raw_objects': raw_objects,
            'objects': top_k_objects,
            'proposal_label': top_k_proposal_label,
            'proposal_label_prob': top_k_proposal_label_prob,
            'selected_boxes': selected_boxes,
            'selected_probs': selected_probs,
            'selected_labels': selected_labels,
        }
示例#16
0
    def _build(self, proposals, bbox_pred, cls_prob, im_shape):
        """
        Args:
            这个是RPN的输出
            proposals: Tensor with the RPN proposals bounding boxes.
                Shape (num_proposals, 4). Where num_proposals is less than
                POST_NMS_TOP_N (We don't know exactly beforehand)
                RPN边界框数据
            这两个是RCNN的输出
            bbox_pred: Tensor with the RCNN delta predictions for each proposal
                for each class. Shape (num_proposals, 4 * num_classes)
                RCNN针对每个(上面的RPN的)提案框在每个类别下的预测偏移量和缩放量
            cls_prob: A softmax probability for each proposal where the idx = 0
                is the background class (which we should ignore).
                Shape (num_proposals, num_classes + 1)
                对于每个边界框针对各个类别的softmax概率

        Returns:
            objects:
                Shape (final_num_proposals, 4)
                Where final_num_proposals is unknown before-hand (it depends on
                NMS). The 4-length Tensor for each corresponds to:
                (x_min, y_min, x_max, y_max).
                最终保留下来的边界框的坐标集合
            objects_label:
                Shape (final_num_proposals,)
            objects_label_prob:
                Shape (final_num_proposals,)
        """
        selected_boxes = []
        selected_probs = []
        selected_labels = []

        # For each class, take the proposals with the class-specific
        # predictions (class scores and bbox regression) and filter accordingly
        # (valid area, min probability score and NMS).
        # 对每个类别, 取其类特定预测的提案(类得分和边界框偏移缩放), 并根据合法区域, 最小概率
        # 得分, NMS来进行过滤
        # 对于class_id对应的类别进行如下的操作:
        # ...
        for class_id in range(self._num_classes):
            # Apply the class-specific transformations to the proposals to
            # obtain the current class' prediction.
            # 应用特定类别的转化到提案上, 来获取当前类别的预测
            # 获取该类别下所有提案的类别预测结果, 以及边界框预测结果
            class_prob = cls_prob[:, class_id + 1]  # 0 is background class.
            class_bboxes = bbox_pred[:, (4 * class_id):(4 * class_id + 4)]
            # 针对该类, 从RCNN预测的偏移量(class_bboxes)和RPN输出的参考值(proposals)得
            # 到的预测的左上角和右下角坐标, 获得RCNN的预测的框的真实坐标
            raw_class_objects = decode(
                proposals,  # (num_proposals, 4)
                class_bboxes,  # (num_proposals, 4)
                variances=self._variances,
            )

            # Clip bboxes so they don't go out of the image.
            # 对超出图像的边界框部分进行裁剪, 得到属于图像内部的边界框
            # (num_proposals, 4)
            class_objects = clip_boxes(raw_class_objects, im_shape)

            # Filter objects based on the min probability threshold and on them
            # having a valid area.
            # 对于该类别预测概率大于等于阈值的数据进行筛选
            prob_filter = tf.greater_equal(class_prob,
                                           self._min_prob_threshold)

            (x_min, y_min, x_max, y_max) = tf.unstack(class_objects, axis=1)
            # 要确保,x_max - x_min, y_max - y_min同号, 也就是保证计算面积为正
            area_filter = tf.greater(
                tf.maximum(x_max - x_min, 0.0) *
                tf.maximum(y_max - y_min, 0.0), 0.0)

            # 上面两条判定都要满足
            object_filter = tf.logical_and(area_filter, prob_filter)

            # 满足上面两条要求的RCNN预测边界框坐标
            class_objects = tf.boolean_mask(class_objects, object_filter)
            # 满足要求的RCNN预测边界框针对该类别的概率
            class_prob = tf.boolean_mask(class_prob, object_filter)

            # We have to use the TensorFlow's bounding box convention to use
            # the included function for NMS.
            class_objects_tf = change_order(class_objects)

            # Apply class NMS.
            # NMS后得到保留的边界框的索引, 此时保留的也就是该类别下最终保留的
            # 保留下来的数量是一定的, 由self._class_max_detections(Maximum number
            # of detections for each class.)确定
            class_selected_idx = tf.image.non_max_suppression(
                class_objects_tf,
                class_prob,
                self._class_max_detections,
                iou_threshold=self._class_nms_threshold)

            # Using NMS resulting indices, gather values from Tensors.
            class_objects_tf = tf.gather(class_objects_tf, class_selected_idx)
            class_prob = tf.gather(class_prob, class_selected_idx)

            # Revert to our bbox convention.
            class_objects = change_order(class_objects_tf)

            # We append values to a regular list which will later be
            # transformed to a proper Tensor.
            # 这里选定的是该类别下, 经过"边界剪裁(不会删除边界框), 对于该类别预测概率限定+坐
            # 标合理性限定+NMS(都会删除边界框)"处理后剩下的预测框的原图坐标和对应的预测概率
            selected_boxes.append(class_objects)
            selected_probs.append(class_prob)
            # In the case of the class_id, since it is a loop on classes, we
            # already have a fixed class_id. We use `tf.tile` to create that
            # Tensor with the total number of indices returned by the NMS.
            # 这里利用tile重复张量[class_id]了tf.shape(class_selected_idx)[0]次,
            # 生成了与剩下来的边界框的数量相同的长度的张量, 对应着selected_probs, 表述其中
            # 的边界框对应的类别
            selected_labels.append(
                tf.tile([class_id], [tf.shape(class_selected_idx)[0]]))

        # We use concat (axis=0) to generate a Tensor where the rows are
        # stacked on top of each other
        # selected_boxes ([num_classes, num_pred_after_nms, 4])
        objects = tf.concat(selected_boxes, axis=0)
        # selected_labels ([num_classes, num_pred_after_nms, 1])
        proposal_label = tf.concat(selected_labels, axis=0)
        # selected_probs ([num_classes, num_pred_after_nms, 1])
        proposal_label_prob = tf.concat(selected_probs, axis=0)

        tf.summary.histogram('proposal_cls_scores', proposal_label_prob,
                             ['rcnn'])

        # Get top-k detections of all classes.
        k = tf.minimum(self._total_max_detections,
                       tf.shape(proposal_label_prob)[0])
        # 获得所有框的所有类别的预测概率中, 前k个最大的结果, 概率, 坐标, 类别标签
        top_k = tf.nn.top_k(proposal_label_prob, k=k)
        top_k_proposal_label_prob = top_k.values
        top_k_objects = tf.gather(objects, top_k.indices)
        top_k_proposal_label = tf.gather(proposal_label, top_k.indices)

        return {
            'objects': top_k_objects,
            'proposal_label': top_k_proposal_label,
            'proposal_label_prob': top_k_proposal_label_prob,
            'selected_boxes': selected_boxes,
            'selected_probs': selected_probs,
            'selected_labels': selected_labels,
        }