Пример #1
0
    def _get_iou(self, bbox1_val, bbox2_val):
        """Get IoU for two sets of bounding boxes.

        It also checks that both implementations return the same before
        returning.

        Args:
            bbox1_val: Array of shape (total_bbox1, 4).
            bbox2_val: Array of shape (total_bbox2, 4).

        Returns:
            iou: Array of shape (total_bbox1, total_bbox2)
        """
        bbox1 = tf.placeholder(tf.float32, (None, 4))
        bbox2 = tf.placeholder(tf.float32, (None, 4))
        iou = bbox_overlap_tf(bbox1, bbox2)

        with self.test_session() as sess:
            iou_val_tf = sess.run(iou, feed_dict={
                bbox1: np.array(bbox1_val),
                bbox2: np.array(bbox2_val),
            })

        iou_val_np = bbox_overlap(np.array(bbox1_val), np.array(bbox2_val))
        self.assertAllClose(iou_val_np, iou_val_tf)
        return iou_val_tf
Пример #2
0
    def _get_iou(self, bbox1_val, bbox2_val):
        """Get IoU for two sets of bounding boxes.

        It also checks that both implementations return the same before
        returning.

        Args:
            bbox1_val: Array of shape (total_bbox1, 4).
            bbox2_val: Array of shape (total_bbox2, 4).

        Returns:
            iou: Array of shape (total_bbox1, total_bbox2)
        """
        bbox1 = tf.placeholder(tf.float32, (None, 4))
        bbox2 = tf.placeholder(tf.float32, (None, 4))
        iou = bbox_overlap_tf(bbox1, bbox2)

        with self.test_session() as sess:
            iou_val_tf = sess.run(iou,
                                  feed_dict={
                                      bbox1: np.array(bbox1_val),
                                      bbox2: np.array(bbox2_val),
                                  })

        iou_val_np = bbox_overlap(np.array(bbox1_val), np.array(bbox2_val))
        self.assertAllClose(iou_val_np, iou_val_tf)
        return iou_val_tf
def single_tf_iou_filter(gt_boxes, main_part_label = 1):
    main_part_gt_boxes = tf.boolean_mask(gt_boxes ,tf.equal(gt_boxes[...,1], main_part_label))
    not_main_part_gt_boxes = tf.boolean_mask(gt_boxes ,tf.logical_not(tf.equal(gt_boxes[...,1], main_part_label)))

    #### [num-main, num-part]
    iou_tensor = bbox_overlap_tf(main_part_gt_boxes[:, -4:], not_main_part_gt_boxes[:, -4:])
    total_intersection_num = tf.reduce_sum(tf.reshape(tf.sign(tf.reduce_sum(tf.cast(tf.greater(iou_tensor, 0.0), tf.float32), axis=1)), [-1]),axis=0)

    return total_intersection_num
Пример #4
0
    def _build(self, all_anchors, gt_boxes, im_shape):
        """
        We compare anchors to GT and using the minibatch size and the different
        config settings (clobber, foreground fraction, etc), we end up with
        training targets *only* for the elements we want to use in the batch,
        while everything else is ignored.

        Basically what it does is, first generate the targets for all (valid)
        anchors, and then start subsampling the positive (foreground) and the
        negative ones (background) based on the number of samples of each type
        that we want.

        Args:
            all_anchors:
                A Tensor with all the bounding boxes coords of the anchors.
                Its shape should be (num_anchors, 4).
            gt_boxes:
                A Tensor with the ground truth bounding boxes of the image of
                the batch being processed. Its shape should be (num_gt, 5).
                The last dimension is used for the label.
            im_shape:
                Shape of original image (height, width) in order to define
                anchor targers in respect with gt_boxes.

        Returns:
            Tuple of the tensors of:
                labels: (1, 0, -1) for each anchor.
                    Shape (num_anchors, 1)
                bbox_targets: 4d bbox targets as specified by paper.
                    Shape (num_anchors, 4)
                max_overlaps: Max IoU overlap with ground truth boxes.
                    Shape (num_anchors, 1)
        """
        # Keep only the coordinates of gt_boxes
        gt_boxes = gt_boxes[:, :4]
        all_anchors = all_anchors[:, :4]

        # Only keep anchors inside the image
        (x_min_anchor, y_min_anchor,
         x_max_anchor, y_max_anchor) = tf.unstack(all_anchors, axis=1)

        anchor_filter = tf.logical_and(
            tf.logical_and(
                tf.greater_equal(x_min_anchor, -self._allowed_border),
                tf.greater_equal(y_min_anchor, -self._allowed_border)
            ),
            tf.logical_and(
                tf.less(x_max_anchor, im_shape[1] + self._allowed_border),
                tf.less(y_max_anchor, im_shape[0] + self._allowed_border)
            )
        )

        # We (force) reshape the filter so that we can use it as a boolean mask
        anchor_filter = tf.reshape(anchor_filter, [-1])
        # Filter anchors.
        anchors = tf.boolean_mask(
            all_anchors, anchor_filter, name='filter_anchors')

        # Generate array with the labels for all_anchors.
        labels = tf.fill((tf.gather(tf.shape(all_anchors), [0])), -1)
        labels = tf.boolean_mask(labels, anchor_filter, name='filter_labels')

        # Intersection over union (IoU) overlap between the anchors and the
        # ground truth boxes.
        overlaps = bbox_overlap_tf(tf.to_float(anchors), tf.to_float(gt_boxes))

        # Generate array with the IoU value of the closest GT box for each
        # anchor.
        max_overlaps = tf.reduce_max(overlaps, axis=1)
        if not self._clobber_positives:
            # Assign bg labels first so that positive labels can clobber them.
            # First we get an array with True where IoU is less than
            # self._negative_overlap
            negative_overlap_nonzero = tf.less(
                max_overlaps, self._negative_overlap)

            # Finally we set 0 at True indices
            labels = tf.where(
                condition=negative_overlap_nonzero,
                x=tf.zeros(tf.shape(labels)), y=tf.to_float(labels)
            )
        # Get the value of the max IoU for the closest anchor for each gt.
        gt_max_overlaps = tf.reduce_max(overlaps, axis=0)

        # Find all the indices that match (at least one, but could be more).
        gt_argmax_overlaps = tf.squeeze(tf.equal(overlaps, gt_max_overlaps))
        gt_argmax_overlaps = tf.where(gt_argmax_overlaps)[:, 0]
        # Eliminate duplicates indices.
        gt_argmax_overlaps, _ = tf.unique(gt_argmax_overlaps)
        # Order the indices for sparse_to_dense compatibility
        gt_argmax_overlaps, _ = tf.nn.top_k(
            gt_argmax_overlaps, k=tf.shape(gt_argmax_overlaps)[-1])
        gt_argmax_overlaps = tf.reverse(gt_argmax_overlaps, [0])

        # Foreground label: for each ground-truth, anchor with highest overlap.
        # When the argmax is many items we use all of them (for consistency).
        # We set 1 at gt_argmax_overlaps_cond indices
        gt_argmax_overlaps_cond = tf.sparse_to_dense(
            gt_argmax_overlaps, tf.shape(labels, out_type=tf.int64),
            True, default_value=False
        )

        labels = tf.where(
            condition=gt_argmax_overlaps_cond,
            x=tf.ones(tf.shape(labels)), y=tf.to_float(labels)
        )

        # Foreground label: above threshold Intersection over Union (IoU)
        # First we get an array with True where IoU is greater or equal than
        # self._positive_overlap
        positive_overlap_inds = tf.greater_equal(
            max_overlaps, self._positive_overlap)
        # Finally we set 1 at True indices
        labels = tf.where(
            condition=positive_overlap_inds,
            x=tf.ones(tf.shape(labels)), y=labels
        )

        if self._clobber_positives:
            # Assign background labels last so that negative labels can clobber
            # positives. First we get an array with True where IoU is less than
            # self._negative_overlap
            negative_overlap_nonzero = tf.less(
                max_overlaps, self._negative_overlap)
            # Finally we set 0 at True indices
            labels = tf.where(
                condition=negative_overlap_nonzero,
                x=tf.zeros(tf.shape(labels)), y=labels
            )

        # Subsample positive labels if we have too many
        def subsample_positive():
            # Shuffle the foreground indices
            disable_fg_inds = tf.random_shuffle(fg_inds, seed=self._seed)
            # Select the indices that we have to ignore, this is
            # `tf.shape(fg_inds)[0] - num_fg` because we want to get only
            # `num_fg` foreground labels.
            disable_place = (tf.shape(fg_inds)[0] - num_fg)
            disable_fg_inds = disable_fg_inds[:disable_place]
            # Order the indices for sparse_to_dense compatibility
            disable_fg_inds, _ = tf.nn.top_k(
                disable_fg_inds, k=tf.shape(disable_fg_inds)[-1])
            disable_fg_inds = tf.reverse(disable_fg_inds, [0])
            disable_fg_inds = tf.sparse_to_dense(
                disable_fg_inds, tf.shape(labels, out_type=tf.int64),
                True, default_value=False
            )
            # Put -1 to ignore the anchors in the selected indices
            return tf.where(
                condition=tf.squeeze(disable_fg_inds),
                x=tf.to_float(tf.fill(tf.shape(labels), -1)), y=labels
            )

        num_fg = tf.to_int32(self._foreground_fraction * self._minibatch_size)
        # Get foreground indices, get True in the indices where we have a one.
        fg_inds = tf.equal(labels, 1)
        # We get only the indices where we have True.
        fg_inds = tf.squeeze(tf.where(fg_inds), axis=1)
        fg_inds_size = tf.size(fg_inds)
        # Condition for check if we have too many positive labels.
        subsample_positive_cond = fg_inds_size > num_fg
        # Check the condition and subsample positive labels.
        labels = tf.cond(
            subsample_positive_cond,
            true_fn=subsample_positive, false_fn=lambda: labels
        )

        # Subsample negative labels if we have too many
        def subsample_negative():
            # Shuffle the background indices
            disable_bg_inds = tf.random_shuffle(bg_inds, seed=self._seed)

            # Select the indices that we have to ignore, this is
            # `tf.shape(bg_inds)[0] - num_bg` because we want to get only
            # `num_bg` background labels.
            disable_place = (tf.shape(bg_inds)[0] - num_bg)
            disable_bg_inds = disable_bg_inds[:disable_place]
            # Order the indices for sparse_to_dense compatibility
            disable_bg_inds, _ = tf.nn.top_k(
                disable_bg_inds, k=tf.shape(disable_bg_inds)[-1])
            disable_bg_inds = tf.reverse(disable_bg_inds, [0])
            disable_bg_inds = tf.sparse_to_dense(
                disable_bg_inds, tf.shape(labels, out_type=tf.int64),
                True, default_value=False
            )
            # Put -1 to ignore the anchors in the selected indices
            return tf.where(
                condition=tf.squeeze(disable_bg_inds),
                x=tf.to_float(tf.fill(tf.shape(labels), -1)), y=labels
            )

        # Recalculate the foreground indices after (maybe) disable some of them

        # Get foreground indices, get True in the indices where we have a one.
        fg_inds = tf.equal(labels, 1)
        # We get only the indices where we have True.
        fg_inds = tf.squeeze(tf.where(fg_inds), axis=1)
        fg_inds_size = tf.size(fg_inds)

        num_bg = tf.to_int32(self._minibatch_size - fg_inds_size)
        # Get background indices, get True in the indices where we have a zero.
        bg_inds = tf.equal(labels, 0)
        # We get only the indices where we have True.
        bg_inds = tf.squeeze(tf.where(bg_inds), axis=1)
        bg_inds_size = tf.size(bg_inds)
        # Condition for check if we have too many positive labels.
        subsample_negative_cond = bg_inds_size > num_bg
        # Check the condition and subsample positive labels.
        labels = tf.cond(
            subsample_negative_cond,
            true_fn=subsample_negative, false_fn=lambda: labels
        )

        # Return bbox targets with shape (anchors.shape[0], 4).

        # Find the closest gt box for each anchor.
        argmax_overlaps = tf.argmax(overlaps, axis=1)
        # Eliminate duplicates.
        argmax_overlaps_unique, _ = tf.unique(argmax_overlaps)
        # Filter the gt_boxes.
        # We get only the indices where we have "inside anchors".
        anchor_filter_inds = tf.where(anchor_filter)
        gt_boxes = tf.gather(gt_boxes, argmax_overlaps)

        bbox_targets = encode_tf(anchors, gt_boxes)

        # For the anchors that arent foreground, we ignore the bbox_targets.
        anchor_foreground_filter = tf.equal(labels, 1)
        bbox_targets = tf.where(
            condition=anchor_foreground_filter,
            x=bbox_targets, y=tf.zeros_like(bbox_targets)
        )

        # We unroll "inside anchors" value for all anchors (for shape
        # compatibility).

        # We complete the missed indices with zeros
        # (because scatter_nd has zeros as default).
        bbox_targets = tf.scatter_nd(
            indices=tf.to_int32(anchor_filter_inds),
            updates=bbox_targets,
            shape=tf.shape(all_anchors)
        )

        labels_scatter = tf.scatter_nd(
            indices=tf.to_int32(anchor_filter_inds),
            updates=labels,
            shape=[tf.shape(all_anchors)[0]]
        )
        # We have to put -1 to ignore the indices with 0 generated by
        # scatter_nd, otherwise it will be considered as background.
        labels = tf.where(
            condition=anchor_filter, x=labels_scatter,
            y=tf.to_float(tf.fill(tf.shape(labels_scatter), -1))
        )

        max_overlaps = tf.scatter_nd(
            indices=tf.to_int32(anchor_filter_inds),
            updates=max_overlaps,
            shape=[tf.shape(all_anchors)[0]]
        )

        return labels, bbox_targets, max_overlaps
Пример #5
0
    def _build(self, proposals, gt_boxes):
        """
        Args:
            proposals: A Tensor with the RPN bounding boxes proposals.
                The shape of the Tensor is (num_proposals, 4).
            gt_boxes: A Tensor with the ground truth boxes for the image.
                The shape of the Tensor is (num_gt, 5), having the truth label
                as the last value for each box.
        Returns:
            proposals_label: Either a truth value of the proposals (a value
                between 0 and num_classes, with 0 being background), or -1 when
                the proposal is to be ignored in the minibatch.
                The shape of the Tensor is (num_proposals, 1).
            bbox_targets: A bounding box regression target for each of the
                proposals that have and greater than zero label. For every
                other proposal we return zeros.
                The shape of the Tensor is (num_proposals, 4).
        """
        overlaps = bbox_overlap_tf(proposals, gt_boxes[:, :4])
        # overlaps now contains (num_proposals, num_gt_boxes) with the IoU of
        # proposal P and ground truth box G in overlaps[P, G]

        # We are going to label each proposal based on the IoU with
        # `gt_boxes`. Start by filling the labels with -1, marking them as
        # ignored.
        proposals_label_shape = tf.gather(tf.shape(proposals), [0])
        proposals_label = tf.fill(
            dims=proposals_label_shape,
            value=-1.
        )
        # For each overlap there is three possible outcomes for labelling:
        #  if max(iou) < config.background_threshold_low then we ignore.
        #  elif max(iou) <= config.background_threshold_high then we label
        #      background.
        #  elif max(iou) > config.foreground_threshold then we label with
        #      the highest IoU in overlap.
        #
        # max_overlaps gets, for each proposal, the index in which we can
        # find the gt_box with which it has the highest overlap.
        max_overlaps = tf.reduce_max(overlaps, axis=1)

        iou_is_high_enough_for_bg = tf.greater_equal(
            max_overlaps, self._background_threshold_low
        )
        iou_is_not_too_high_for_bg = tf.less(
            max_overlaps, self._background_threshold_high
        )
        bg_condition = tf.logical_and(
            iou_is_high_enough_for_bg, iou_is_not_too_high_for_bg
        )
        proposals_label = tf.where(
            condition=bg_condition,
            x=tf.zeros_like(proposals_label, dtype=tf.float32),
            y=proposals_label
        )

        # Get the index of the best gt_box for each proposal.
        overlaps_best_gt_idxs = tf.argmax(overlaps, axis=1)
        # Having the index of the gt bbox with the best label we need to get
        # the label for each gt box and sum it one because 0 is used for
        # background.
        best_fg_labels_for_proposals = tf.add(
            tf.gather(gt_boxes[:, 4], overlaps_best_gt_idxs),
            1.
        )
        iou_is_fg = tf.greater_equal(
            max_overlaps, self._foreground_threshold
        )
        best_proposals_idxs = tf.argmax(overlaps, axis=0)

        # Set the indices in best_proposals_idxs to True, and the rest to
        # false.
        # tf.sparse_to_dense is used because we know the set of indices which
        # we want to set to True, and we know the rest of the indices
        # should be set to False. That's exactly the use case of
        # tf.sparse_to_dense.
        is_best_box = tf.sparse_to_dense(
            sparse_indices=tf.reshape(best_proposals_idxs, [-1]),
            sparse_values=True, default_value=False,
            output_shape=tf.cast(proposals_label_shape, tf.int64),
            validate_indices=False
        )
        # We update proposals_label with the value in
        # best_fg_labels_for_proposals only when the box is foreground.
        proposals_label = tf.where(
            condition=iou_is_fg,
            x=best_fg_labels_for_proposals,
            y=proposals_label
        )
        # Now we need to find the proposals that are the best for each of the
        # gt_boxes. We overwrite the previous proposals_label with this
        # because setting the best proposal for each gt_box has priority.
        best_proposals_gt_labels = tf.sparse_to_dense(
            sparse_indices=tf.reshape(best_proposals_idxs, [-1]),
            sparse_values=gt_boxes[:, 4] + 1,
            default_value=0.,
            output_shape=tf.cast(proposals_label_shape, tf.int64),
            validate_indices=False,
            name="get_right_labels_for_bestboxes"
        )
        proposals_label = tf.where(
            condition=is_best_box,
            x=best_proposals_gt_labels,
            y=proposals_label,
            name="update_labels_for_bestbox_proposals"
        )

        # proposals_label now has a value in [0, num_classes + 1] for
        # proposals we are going to use and -1 for the ones we should ignore.
        # But we still need to make sure we don't have a number of proposals
        # higher than minibatch_size * foreground_fraction.
        max_fg = int(self._foreground_fraction * self._minibatch_size)
        fg_condition = tf.logical_or(
            iou_is_fg, is_best_box
        )
        fg_inds = tf.where(
            condition=fg_condition
        )

        def disable_some_fgs():
            # We want to delete a randomly-selected subset of fg_inds of
            # size `fg_inds.shape[0] - max_fg`.
            # We shuffle along the dimension 0 and then we get the first
            # num_fg_inds - max_fg indices and we disable them.
            shuffled_inds = tf.random_shuffle(fg_inds, seed=self._seed)
            disable_place = (tf.shape(fg_inds)[0] - max_fg)
            # This function should never run if num_fg_inds <= max_fg, so we
            # add an assertion to catch the wrong behaviour if it happens.
            integrity_assertion = tf.assert_positive(
                disable_place,
                message="disable_place in disable_some_fgs is negative."
            )
            with tf.control_dependencies([integrity_assertion]):
                disable_inds = shuffled_inds[:disable_place]
            is_disabled = tf.sparse_to_dense(
                sparse_indices=disable_inds,
                sparse_values=True, default_value=False,
                output_shape=tf.cast(proposals_label_shape, tf.int64),
                # We are shuffling the indices, so they may not be ordered.
                validate_indices=False
            )
            return tf.where(
                condition=is_disabled,
                # We set it to -label for debugging purposes.
                x=tf.negative(proposals_label),
                y=proposals_label
            )
        # Disable some fgs if we have too many foregrounds.
        proposals_label = tf.cond(
            tf.greater(tf.shape(fg_inds)[0], max_fg),
            true_fn=disable_some_fgs,
            false_fn=lambda: proposals_label
        )

        total_fg_in_batch = tf.shape(
            tf.where(
                condition=tf.greater(proposals_label, 0)
            )
        )[0]

        # Now we want to do the same for backgrounds.
        # We calculate up to how many backgrounds we desire based on the
        # final number of foregrounds and the total desired batch size.
        max_bg = self._minibatch_size - total_fg_in_batch

        # We can't use bg_condition because some of the proposals that satisfy
        # the IoU conditions to be background may have been labeled as
        # foreground due to them being the best proposal for a certain gt_box.
        bg_mask = tf.equal(proposals_label, 0)
        bg_inds = tf.where(
            condition=bg_mask,
        )

        def disable_some_bgs():
            # Mutatis mutandis, all comments from disable_some_fgs apply.
            shuffled_inds = tf.random_shuffle(bg_inds, seed=self._seed)
            disable_place = (tf.shape(bg_inds)[0] - max_bg)
            integrity_assertion = tf.assert_non_negative(
                disable_place,
                message="disable_place in disable_some_bgs is negative."
            )
            with tf.control_dependencies([integrity_assertion]):
                disable_inds = shuffled_inds[:disable_place]
            is_disabled = tf.sparse_to_dense(
                sparse_indices=disable_inds,
                sparse_values=True, default_value=False,
                output_shape=tf.cast(proposals_label_shape, tf.int64),
                validate_indices=False
            )
            return tf.where(
                condition=is_disabled,
                x=tf.fill(
                    dims=proposals_label_shape,
                    value=-1.
                ),
                y=proposals_label
            )

        proposals_label = tf.cond(
            tf.greater_equal(tf.shape(bg_inds)[0], max_bg),
            true_fn=disable_some_bgs,
            false_fn=lambda: proposals_label
        )

        """
        Next step is to calculate the proper targets for the proposals labeled
        based on the values of the ground-truth boxes.
        We have to use only the proposals labeled >= 1, each matching with
        the proper gt_boxes
        """

        # Get the ids of the proposals that matter for bbox_target comparisson.
        is_proposal_with_target = tf.greater(
            proposals_label, 0
        )
        proposals_with_target_idx = tf.where(
            condition=is_proposal_with_target
        )
        # Get the corresponding ground truth box only for the proposals with
        # target.
        gt_boxes_idxs = tf.gather(
            overlaps_best_gt_idxs,
            proposals_with_target_idx
        )
        # Get the values of the ground truth boxes.
        proposals_gt_boxes = tf.gather_nd(
            gt_boxes[:, :4], gt_boxes_idxs
        )
        # We create the same array but with the proposals
        proposals_with_target = tf.gather_nd(
            proposals,
            proposals_with_target_idx
        )
        # We create our targets with bbox_transform
        bbox_targets_nonzero = encode(
            proposals_with_target,
            proposals_gt_boxes,
        )
        # TODO: We should normalize it in order for bbox_targets to have zero
        # mean and unit variance according to the paper.

        # We unmap targets to proposal_labels (containing the length of
        # proposals)
        bbox_targets = tf.scatter_nd(
            indices=proposals_with_target_idx,
            updates=bbox_targets_nonzero,
            shape=tf.cast(tf.shape(proposals), tf.int64)
        )

        proposals_label = proposals_label
        bbox_targets = bbox_targets

        return proposals_label, bbox_targets
Пример #6
0
    def _build(self, proposals, gt_boxes):
        """
        Args:
            proposals: A Tensor with the RPN bounding boxes proposals.
                The shape of the Tensor is (num_proposals, 4).
            gt_boxes: A Tensor with the ground truth boxes for the image.
                The shape of the Tensor is (num_gt, 5), having the truth label
                as the last value for each box.
        Returns:
            proposals_label: Either a truth value of the proposals (a value
                between 0 and num_classes, with 0 being background), or -1 when
                the proposal is to be ignored in the minibatch.
                The shape of the Tensor is (num_proposals, 1).
            bbox_targets: A bounding box regression target for each of the
                proposals that have and greater than zero label. For every
                other proposal we return zeros.
                The shape of the Tensor is (num_proposals, 4).
        """
        overlaps = bbox_overlap_tf(proposals, gt_boxes[:, :4])
        # overlaps now contains (num_proposals, num_gt_boxes) with the IoU of
        # proposal P and ground truth box G in overlaps[P, G]

        # We are going to label each proposal based on the IoU with
        # `gt_boxes`. Start by filling the labels with -1, marking them as
        # ignored.
        proposals_label_shape = tf.gather(tf.shape(proposals), [0])
        proposals_label = tf.fill(
            dims=proposals_label_shape,
            value=-1.
        )
        # For each overlap there is three possible outcomes for labelling:
        #  if max(iou) < config.background_threshold_low then we ignore.
        #  elif max(iou) <= config.background_threshold_high then we label
        #      background.
        #  elif max(iou) > config.foreground_threshold then we label with
        #      the highest IoU in overlap.
        #
        # max_overlaps gets, for each proposal, the index in which we can
        # find the gt_box with which it has the highest overlap.
        max_overlaps = tf.reduce_max(overlaps, axis=1)

        iou_is_high_enough_for_bg = tf.greater_equal(
            max_overlaps, self._background_threshold_low
        )
        iou_is_not_too_high_for_bg = tf.less(
            max_overlaps, self._background_threshold_high
        )
        bg_condition = tf.logical_and(
            iou_is_high_enough_for_bg, iou_is_not_too_high_for_bg
        )
        proposals_label = tf.where(
            condition=bg_condition,
            x=tf.zeros_like(proposals_label, dtype=tf.float32),
            y=proposals_label
        )

        # Get the index of the best gt_box for each proposal.
        overlaps_best_gt_idxs = tf.argmax(overlaps, axis=1)
        # Having the index of the gt bbox with the best label we need to get
        # the label for each gt box and sum it one because 0 is used for
        # background.
        best_fg_labels_for_proposals = tf.add(
            tf.gather(gt_boxes[:, 4], overlaps_best_gt_idxs),
            1.
        )
        iou_is_fg = tf.greater_equal(
            max_overlaps, self._foreground_threshold
        )
        best_proposals_idxs = tf.argmax(overlaps, axis=0)

        # Set the indices in best_proposals_idxs to True, and the rest to
        # false.
        # tf.sparse_to_dense is used because we know the set of indices which
        # we want to set to True, and we know the rest of the indices
        # should be set to False. That's exactly the use case of
        # tf.sparse_to_dense.
        is_best_box = tf.sparse_to_dense(
            sparse_indices=tf.reshape(best_proposals_idxs, [-1]),
            sparse_values=True, default_value=False,
            output_shape=tf.cast(proposals_label_shape, tf.int64),
            validate_indices=False
        )
        # We update proposals_label with the value in
        # best_fg_labels_for_proposals only when the box is foreground.
        proposals_label = tf.where(
            condition=iou_is_fg,
            x=best_fg_labels_for_proposals,
            y=proposals_label
        )
        # Now we need to find the proposals that are the best for each of the
        # gt_boxes. We overwrite the previous proposals_label with this
        # because setting the best proposal for each gt_box has priority.
        best_proposals_gt_labels = tf.sparse_to_dense(
            sparse_indices=tf.reshape(best_proposals_idxs, [-1]),
            sparse_values=gt_boxes[:, 4] + 1,
            default_value=0.,
            output_shape=tf.cast(proposals_label_shape, tf.int64),
            validate_indices=False,
            name="get_right_labels_for_bestboxes"
        )
        proposals_label = tf.where(
            condition=is_best_box,
            x=best_proposals_gt_labels,
            y=proposals_label,
            name="update_labels_for_bestbox_proposals"
        )

        # proposals_label now has a value in [0, num_classes + 1] for
        # proposals we are going to use and -1 for the ones we should ignore.
        # But we still need to make sure we don't have a number of proposals
        # higher than minibatch_size * foreground_fraction.
        max_fg = int(self._foreground_fraction * self._minibatch_size)
        fg_condition = tf.logical_or(
            iou_is_fg, is_best_box
        )
        fg_inds = tf.where(
            condition=fg_condition
        )

        def disable_some_fgs():
            # We want to delete a randomly-selected subset of fg_inds of
            # size `fg_inds.shape[0] - max_fg`.
            # We shuffle along the dimension 0 and then we get the first
            # num_fg_inds - max_fg indices and we disable them.
            shuffled_inds = tf.random_shuffle(fg_inds, seed=self._seed)
            disable_place = (tf.shape(fg_inds)[0] - max_fg)
            # This function should never run if num_fg_inds <= max_fg, so we
            # add an assertion to catch the wrong behaviour if it happens.
            integrity_assertion = tf.assert_positive(
                disable_place,
                message="disable_place in disable_some_fgs is negative."
            )
            with tf.control_dependencies([integrity_assertion]):
                disable_inds = shuffled_inds[:disable_place]
            is_disabled = tf.sparse_to_dense(
                sparse_indices=disable_inds,
                sparse_values=True, default_value=False,
                output_shape=tf.cast(proposals_label_shape, tf.int64),
                # We are shuffling the indices, so they may not be ordered.
                validate_indices=False
            )
            return tf.where(
                condition=is_disabled,
                # We set it to -label for debugging purposes.
                x=tf.negative(proposals_label),
                y=proposals_label
            )
        # Disable some fgs if we have too many foregrounds.
        proposals_label = tf.cond(
            tf.greater(tf.shape(fg_inds)[0], max_fg),
            true_fn=disable_some_fgs,
            false_fn=lambda: proposals_label
        )

        total_fg_in_batch = tf.shape(
            tf.where(
                condition=tf.greater(proposals_label, 0)
            )
        )[0]

        # Now we want to do the same for backgrounds.
        # We calculate up to how many backgrounds we desire based on the
        # final number of foregrounds and the total desired batch size.
        max_bg = self._minibatch_size - total_fg_in_batch

        # We can't use bg_condition because some of the proposals that satisfy
        # the IoU conditions to be background may have been labeled as
        # foreground due to them being the best proposal for a certain gt_box.
        bg_mask = tf.equal(proposals_label, 0)
        bg_inds = tf.where(
            condition=bg_mask,
        )

        def disable_some_bgs():
            # Mutatis mutandis, all comments from disable_some_fgs apply.
            shuffled_inds = tf.random_shuffle(bg_inds, seed=self._seed)
            disable_place = (tf.shape(bg_inds)[0] - max_bg)
            integrity_assertion = tf.assert_non_negative(
                disable_place,
                message="disable_place in disable_some_bgs is negative."
            )
            with tf.control_dependencies([integrity_assertion]):
                disable_inds = shuffled_inds[:disable_place]
            is_disabled = tf.sparse_to_dense(
                sparse_indices=disable_inds,
                sparse_values=True, default_value=False,
                output_shape=tf.cast(proposals_label_shape, tf.int64),
                validate_indices=False
            )
            return tf.where(
                condition=is_disabled,
                x=tf.fill(
                    dims=proposals_label_shape,
                    value=-1.
                ),
                y=proposals_label
            )

        proposals_label = tf.cond(
            tf.greater_equal(tf.shape(bg_inds)[0], max_bg),
            true_fn=disable_some_bgs,
            false_fn=lambda: proposals_label
        )

        """
        Next step is to calculate the proper targets for the proposals labeled
        based on the values of the ground-truth boxes.
        We have to use only the proposals labeled >= 1, each matching with
        the proper gt_boxes
        """

        # Get the ids of the proposals that matter for bbox_target comparisson.
        is_proposal_with_target = tf.greater(
            proposals_label, 0
        )
        proposals_with_target_idx = tf.where(
            condition=is_proposal_with_target
        )
        # Get the corresponding ground truth box only for the proposals with
        # target.
        gt_boxes_idxs = tf.gather(
            overlaps_best_gt_idxs,
            proposals_with_target_idx
        )
        # Get the values of the ground truth boxes.
        proposals_gt_boxes = tf.gather_nd(
            gt_boxes[:, :4], gt_boxes_idxs
        )
        # We create the same array but with the proposals
        proposals_with_target = tf.gather_nd(
            proposals,
            proposals_with_target_idx
        )
        # We create our targets with bbox_transform.
        bbox_targets_nonzero = encode(
            proposals_with_target,
            proposals_gt_boxes,
            variances=self._variances,
        )

        # We unmap targets to proposal_labels (containing the length of
        # proposals)
        bbox_targets = tf.scatter_nd(
            indices=proposals_with_target_idx,
            updates=bbox_targets_nonzero,
            shape=tf.cast(tf.shape(proposals), tf.int64)
        )

        proposals_label = proposals_label
        bbox_targets = bbox_targets

        return proposals_label, bbox_targets
    def generate_PartDetector_features(self, input_image, input_feature, gt_boxes, only_main_part_boxes = False):
        assert only_main_part_boxes in [True, False]
        main_part_label = self._main_part_label

        image_h, image_w = tf.split(tf.shape(input_image)[0:2], num_or_size_splits=2)
        feature_h, feature_w = tf.split(tf.shape(input_feature)[1:3], num_or_size_splits=2)

        main_part_gt_boxes = tf.boolean_mask(gt_boxes ,tf.reshape(tf.equal(gt_boxes[...,-1], main_part_label), [-1]))
        if not only_main_part_boxes:
            not_main_part_gt_boxes = tf.boolean_mask(gt_boxes ,tf.reshape(tf.logical_not(tf.equal(gt_boxes[...,-1], main_part_label)), [-1]))

            iou_tensor = bbox_overlap_tf(main_part_gt_boxes[:, :4], not_main_part_gt_boxes[:, :4])

            reproduce_iou = iou_tensor > tf.constant(0.0, dtype=tf.float32)
            intersection_indexes = tf.where(reproduce_iou)
            intersection_indexes = tf.cast(intersection_indexes, dtype=tf.int32)

        #### total_shape [ 1 + 24 * 24 * 1024 + 7 * 5] = [589860]
        def single_patch_image(patch_dict ,image_resize = (24, 24) ,bboxes_padding_range = 7.0):
            image = patch_dict["image"]
            im_shape = tf.shape(image)
            shape_prod = im_shape[0] * im_shape[1] * im_shape[2]

            image = tf.cond(
                tf.greater(shape_prod, 0),
                true_fn=lambda : tf.image.resize_images(tf.expand_dims(image, 0), size=image_resize),
                false_fn=lambda : tf.zeros(shape=[1 ,24, 24, 256], dtype=tf.float32)
            )

            image = tf.layers.max_pooling2d(
                inputs = image,
                pool_size = (2, 2), strides = (1, 1),
                padding='same',
            )
            image_flatten = tf.reshape(image, [-1])

            if not only_main_part_boxes:
                bboxes = tf.cast(patch_dict["bboxes"], dtype=tf.float32)
                bboxes = bboxes[:tf.cast(bboxes_padding_range, tf.int32), ...]

                num_bboxes = tf.cast(tf.shape(bboxes)[0], tf.float32)
                bboxes_padding = tf.concat([bboxes, tf.zeros(shape=[tf.cast(bboxes_padding_range - num_bboxes, dtype=tf.int32), 5])], axis=0)
                bboxes_flatten = tf.reshape(bboxes_padding, [-1])
                num_bboxes = tf.reshape(num_bboxes, [-1])

                return  tf.concat([num_bboxes, image_flatten, bboxes_flatten], axis=0)

            return image_flatten

        def single_map(main_index):
            t4 = tf.reshape(tf.cast(main_part_gt_boxes[main_index][:4], tf.int32), [-1])
            #return t4

            if not only_main_part_boxes:
                bbox = tf.cast(tf.gather(not_main_part_gt_boxes, tf.reshape(tf.gather(intersection_indexes[:, -1] ,tf.where(tf.equal(intersection_indexes[:, 0], main_index))), [-1])),
                               dtype=tf.int32)
                bbox = bbox[:, :5]
                patch_bbox_conclusion = patch_image(
                    image=input_image, bboxes=bbox,
                    offset_width=t4[0], offset_height=t4[1],
                    target_width=t4[2] - t4[0] + 1, target_height=t4[3] - t4[1] + 1
                )
                bboxes_patched = patch_bbox_conclusion["bboxes"]
            else:
                bboxes_patched = None

            patch_feature_conclusion = tf.slice(input_feature,
                                                begin=[0,
                                                       tf.reshape(tf.cast(tf.cast(t4[1], tf.float32) / tf.cast(image_h, tf.float32) * tf.cast(feature_h, tf.float32), tf.int32), []),
                                                       tf.reshape(tf.cast(tf.cast(t4[0], tf.float32) / tf.cast(image_w, tf.float32) * tf.cast(feature_w, tf.float32), tf.int32), []),
                                                       0],
                                                size=[-1,
                                                      tf.reshape(tf.cast(tf.cast(t4[3] - t4[1], tf.float32)/ tf.cast(image_h, tf.float32) * tf.cast(feature_h, tf.float32), tf.int32), []) ,
                                                      tf.reshape(tf.cast(tf.cast(t4[2] - t4[0], tf.float32) / tf.cast(image_w, tf.float32) * tf.cast(feature_w, tf.float32), tf.int32), []) ,
                                                      -1]
                                                )

            feature_patched = tf.squeeze(patch_feature_conclusion, 0)

            patch_conclusion = {
                "image": feature_patched,
                "bboxes": bboxes_patched
            }

            #### when only_main_part_boxes, single_tensor is only flatten_image, the return final is 5 + flatten_image_dim i.e. 5 + 24 * 24 * 256 = 147461
            single_tensor = single_patch_image(patch_conclusion)

            #### 5 +
            concat_tensor = tf.concat([tf.reshape(main_part_gt_boxes[main_index], [-1]) ,single_tensor], axis=0)
            return concat_tensor

        # ?????
        return tf.cond(
            tf.greater(tf.reduce_sum(tf.reshape(tf.cast(reproduce_iou, tf.float32), [-1])), 0.0),
            true_fn=lambda :tf.map_fn(
                single_map, intersection_indexes[:, 0], dtype=tf.float32
            ),
            false_fn=lambda :tf.zeros([0, 147497], dtype=tf.float32)
        ) if not only_main_part_boxes else tf.map_fn(single_map, tf.cast(tf.range(tf.shape(main_part_gt_boxes)[0]), tf.int32),
                                                     dtype=tf.float32)
Пример #8
0
    def _build(self, probs, all_anchors, gt_boxes):
        """
        Args:
            all_anchors: A Tensor with anchors for all of SSD's features.
                The shape of the Tensor is (num_anchors, 4).
            gt_boxes: A Tensor with the ground truth boxes for the image.
                The shape of the Tensor is (num_gt, 5), having the truth label
                as the last value for each box.
        Returns:
            class_targets: Either a truth value of the anchor (a value
                between 0 and num_classes, with 0 being background), or -1 when
                the anchor is to be ignored in the minibatch.
                The shape of the Tensor is (num_anchors, 1).
            bbox_offsets_targets: A bounding box regression target for each of
                the anchors that have a greater than zero label. For every
                other anchors we return zeros.
                The shape of the Tensor is (num_anchors, 4).
        """

        all_anchors = tf.cast(all_anchors, tf.float32)
        gt_boxes = tf.cast(gt_boxes, tf.float32)

        # We are going to label each anchor based on the IoU with
        # `gt_boxes`. Start by filling the labels with -1, marking them as
        # unknown.
        anchors_label_shape = tf.gather(tf.shape(all_anchors), [0])
        anchors_label = tf.fill(dims=anchors_label_shape, value=-1.)

        overlaps = bbox_overlap_tf(all_anchors, gt_boxes[:, :4])
        max_overlaps = tf.reduce_max(overlaps, axis=1)

        # Get the index of the best gt_box for each anchor.
        best_gtbox_for_anchors_idx = tf.argmax(overlaps, axis=1)

        # Having the index of the gt bbox with the best label we need to get
        # the label for each gt box and sum 1 to it because 0 is used for
        # background.
        best_fg_labels_for_anchors = tf.add(
            tf.gather(gt_boxes[:, 4], best_gtbox_for_anchors_idx), 1.)
        iou_is_fg = tf.greater_equal(max_overlaps, self._foreground_threshold)
        # We update anchors_label with the value in
        # best_fg_labels_for_anchors only when the box is foreground.
        # TODO: Replace with a sparse_to_dense with -1 default_value
        anchors_label = tf.where(condition=iou_is_fg,
                                 x=best_fg_labels_for_anchors,
                                 y=anchors_label)

        best_anchor_idxs = tf.argmax(overlaps, axis=0)
        is_best_box = tf.sparse_to_dense(sparse_indices=best_anchor_idxs,
                                         sparse_values=True,
                                         default_value=False,
                                         output_shape=tf.cast(
                                             anchors_label_shape, tf.int64),
                                         validate_indices=False)

        # Now we need to find the anchors that are the best for each of the
        # gt_boxes. We overwrite the previous anchors_label with this
        # because setting the best anchor for each gt_box has priority.
        best_anchors_gt_labels = tf.sparse_to_dense(
            sparse_indices=best_anchor_idxs,
            sparse_values=gt_boxes[:, 4] + 1,
            default_value=-1,
            output_shape=tf.cast(anchors_label_shape, tf.int64),
            validate_indices=False,
            name="get_right_labels_for_bestboxes")
        anchors_label = tf.where(condition=is_best_box,
                                 x=best_anchors_gt_labels,
                                 y=anchors_label,
                                 name="update_labels_for_bestbox_anchors")

        # Use the worst backgrounds (the bgs whose probability of being fg is
        # the greatest).
        cls_probs = probs[:, 1:]
        max_cls_probs = tf.reduce_max(cls_probs, axis=1)

        # Exclude boxes with IOU > `background_threshold_high` with any GT.
        iou_less_than_bg_tresh_high_filter = tf.less_equal(
            max_overlaps, self._background_threshold_high)
        bg_anchors = tf.less_equal(anchors_label, 0)
        bg_overlaps_filter = tf.logical_and(iou_less_than_bg_tresh_high_filter,
                                            bg_anchors)

        max_cls_probs = tf.where(
            condition=bg_overlaps_filter,
            x=max_cls_probs,
            y=tf.fill(dims=anchors_label_shape, value=-1.),
        )

        # We calculate up to how many backgrounds we desire based on the
        # final number of foregrounds and the hard minning ratio.
        num_fg_mask = tf.greater(anchors_label, 0.0)
        num_fg = tf.cast(tf.count_nonzero(num_fg_mask), tf.float32)

        num_bg = tf.cast(num_fg * self._hard_negative_ratio, tf.int32)
        top_k_bg = tf.nn.top_k(max_cls_probs, k=num_bg)

        set_bg = tf.sparse_to_dense(sparse_indices=top_k_bg.indices,
                                    sparse_values=True,
                                    default_value=False,
                                    output_shape=anchors_label_shape,
                                    validate_indices=False)

        anchors_label = tf.where(condition=set_bg,
                                 x=tf.fill(dims=anchors_label_shape, value=0.),
                                 y=anchors_label)

        # Next step is to calculate the proper bbox targets for the labeled
        # anchors based on the values of the ground-truth boxes.
        # We have to use only the anchors labeled >= 1, each matching with
        # the proper gt_boxes

        # Get the ids of the anchors that mater for bbox_target comparison.
        is_anchor_with_target = tf.greater(anchors_label, 0)
        anchors_with_target_idx = tf.where(condition=is_anchor_with_target)
        # Get the corresponding ground truth box only for the anchors with
        # target.
        gt_boxes_idxs = tf.gather(best_gtbox_for_anchors_idx,
                                  anchors_with_target_idx)
        # Get the values of the ground truth boxes.
        anchors_gt_boxes = tf.gather_nd(gt_boxes[:, :4], gt_boxes_idxs)
        # We create the same array but with the anchors
        anchors_with_target = tf.gather_nd(all_anchors,
                                           anchors_with_target_idx)
        # We create our targets with bbox_transform
        bbox_targets = encode(anchors_with_target,
                              anchors_gt_boxes,
                              variances=self._variances)

        # We unmap targets to anchor_labels (containing the length of
        # anchors)
        bbox_targets = tf.scatter_nd(indices=anchors_with_target_idx,
                                     updates=bbox_targets,
                                     shape=tf.cast(tf.shape(all_anchors),
                                                   tf.int64))

        return anchors_label, bbox_targets
Пример #9
0
    def _build(self, proposals, gt_boxes):
        """
        Args:
            proposals: A Tensor with the RPN bounding boxes proposals.
                The shape of the Tensor is (num_proposals, 4).
                RPN得出的边界框提案
            gt_boxes: A Tensor with the ground truth boxes for the image.
                The shape of the Tensor is (num_gt, 5), having the truth label
                as the last value for each box.
                真实的边界框提案
        Returns:
            proposals_label: Either a truth value of the proposals (a value
                between 0 and num_classes, with 0 being background), or -1 when
                the proposal is to be ignored in the minibatch.
                The shape of the Tensor is (num_proposals, 1).
                对于每个提案, 返回的是0~类别数目之间的值, 表示对应的类别, -1表示忽略的提案
                对于这个结果, 实际上已经考虑了minibatch的内部的而正负样本之间的平衡的问题
            bbox_targets: A bounding box regression target for each of the
                proposals that have and greater than zero label. For every
                other proposal we return zeros.
                The shape of the Tensor is (num_proposals, 4).
                返回每个有着大于0标签的提案的边界框回归目标, 其他的返回0.
                在前景提案的位置上更新与自身最好的真实框与前景提案之间的偏移量和缩放连量(4个
                值), 其余为0
        """
        # 计算IoU (num_proposals, num_gt_boxes)
        overlaps = bbox_overlap_tf(proposals, gt_boxes[:, :4])
        # overlaps now contains (num_proposals, num_gt_boxes) with the IoU of
        # proposal P and ground truth box G in overlaps[P, G]

        # We are going to label each proposal based on the IoU with
        # `gt_boxes`. Start by filling the labels with -1, marking them as
        # ignored.
        # tf.gather根据索引获取目标值组成的张量
        # (num_proposals, 4) -> [num_proposals]
        proposals_label_shape = tf.gather(tf.shape(proposals), [0])
        # (num_proposals, ) x -1
        proposals_label = tf.fill(dims=proposals_label_shape, value=-1.)
        # For each overlap there is three possible outcomes for labelling:
        #  if max(iou) < config.background_threshold_low then we ignore.
        #  elif max(iou) <= config.background_threshold_high then we label
        #      background.
        #  elif max(iou) > config.foreground_threshold then we label with
        #      the highest IoU in overlap.
        #
        # max_overlaps gets, for each proposal, the index in which we can
        # find the gt_box with which it has the highest overlap.
        # (num_proposals, ) <= (num_proposals, num_gt_boxes)
        # 得到对于每个提案各自与所有真实框之间的最大的IoU
        max_overlaps = tf.reduce_max(overlaps, axis=1)

        iou_is_high_enough_for_bg = tf.greater_equal(
            max_overlaps, self._background_threshold_low)
        iou_is_not_too_high_for_bg = tf.less(max_overlaps,
                                             self._background_threshold_high)

        # 获得背景提案集合
        bg_condition = tf.logical_and(iou_is_high_enough_for_bg,
                                      iou_is_not_too_high_for_bg)

        # 背景提案的位置对应的标签为0, 其余的保持原样, 此时为-1
        proposals_label = tf.where(condition=bg_condition,
                                   x=tf.zeros_like(proposals_label,
                                                   dtype=tf.float32),
                                   y=proposals_label)

        # Get the index of the best gt_box for each proposal.
        # 得到对于每个提案而言最好的IoU的真实框的索引
        # (num_proposals, ) <= (num_proposals, num_gt_boxes)
        overlaps_best_gt_idxs = tf.argmax(overlaps, axis=1)
        # Having the index of the gt bbox with the best label we need to get
        # the label for each gt box and sum it one because 0 is used for
        # background.
        # 对于每个提案, 最好的真实框的类别标签, 对于框的类别标签都要加上一个1, 为了给背景腾出
        # 来一个0标签
        # (num_proposals, ) -> (num_overlaps_best_gt_idxs, )
        best_fg_labels_for_proposals = tf.add(
            tf.gather(gt_boxes[:, 4], overlaps_best_gt_idxs), 1.)

        # 获取前景
        iou_is_fg = tf.greater_equal(max_overlaps, self._foreground_threshold)
        # 获取每个真实框对应的最为接近的提案的索引
        # (num_gt_boxes, 1) <= (num_proposals, num_gt_boxes)
        best_proposals_idxs = tf.argmax(overlaps, axis=0)

        # Set the indices in best_proposals_idxs to True, and the rest to
        # false.
        # tf.sparse_to_dense is used because we know the set of indices which
        # we want to set to True, and we know the rest of the indices
        # should be set to False. That's exactly the use case of
        # tf.sparse_to_dense.
        # sparse_to_dense表示的就是在output_shape大小(num_proposal)的张量上, 设定默认
        # 值为default_value, 而在sparse_indices对应的位置上, 设定为sparse_values
        # 这里也就是将原本的提案中的被真实框有着最好的对应的几个框的位置标定位True
        is_best_box = tf.sparse_to_dense(
            sparse_indices=tf.reshape(best_proposals_idxs, [-1]),
            sparse_values=True,
            default_value=False,
            output_shape=tf.cast(proposals_label_shape, tf.int64),
            validate_indices=False)

        # 将每个前景框对应的最好的真实框的类别更新到提案框的标签(num_proposals, )中
        # We update proposals_label with the value in
        # best_fg_labels_for_proposals only when the box is foreground.
        proposals_label = tf.where(condition=iou_is_fg,
                                   x=best_fg_labels_for_proposals,
                                   y=proposals_label)
        # Now we need to find the proposals that are the best for each of the
        # gt_boxes. We overwrite the previous proposals_label with this
        # because setting the best proposal for each gt_box has priority.
        # 下面两个函数实现了对于每个真实框对应的最好的提案的位置上更新对应的类别标签

        # 这里实现了对proposals_label_shape的best_proposals_idxs(
        # 对于每个真实框对应的最好的提案)位置上更新为真实类别+1
        # 其余位置置零
        # 挑选提案, 要使用每个真实框对应的最好的提案框
        best_proposals_gt_labels = tf.sparse_to_dense(
            sparse_indices=tf.reshape(best_proposals_idxs, [-1]),
            sparse_values=gt_boxes[:, 4] + 1,
            default_value=0.,
            output_shape=tf.cast(proposals_label_shape, tf.int64),
            validate_indices=False,
            name="get_right_labels_for_bestboxes")
        # 对每个真实框对应的最好的提案的位置上更新标签
        proposals_label = tf.where(condition=is_best_box,
                                   x=best_proposals_gt_labels,
                                   y=proposals_label,
                                   name="update_labels_for_bestbox_proposals")

        # proposals_label now has a value in [0, num_classes + 1] for
        # proposals we are going to use and -1 for the ones we should ignore.
        # But we still need to make sure we don't have a number of proposals
        # higher than minibatch_size * foreground_fraction.
        # 在进行确定要被忽略的提案之前, 先要确定正负样本够不够, 比例合不合适
        max_fg = int(self._foreground_fraction * self._minibatch_size)
        # 所谓前景: 每个真实框对应的最好的边界框, 以及IoU最大的边界框
        # 所以这里或操作, 实现了一个合并
        fg_condition = tf.logical_or(iou_is_fg, is_best_box)
        # 获得前景的索引
        fg_inds = tf.where(condition=fg_condition)

        # 删除数量超出比例的前景
        def disable_some_fgs():
            # We want to delete a randomly-selected subset of fg_inds of
            # size `fg_inds.shape[0] - max_fg`.
            # We shuffle along the dimension 0 and then we get the first
            # num_fg_inds - max_fg indices and we disable them.
            shuffled_inds = tf.random_shuffle(fg_inds, seed=self._seed)
            disable_place = (tf.shape(fg_inds)[0] - max_fg)
            # This function should never run if num_fg_inds <= max_fg, so we
            # add an assertion to catch the wrong behaviour if it happens.
            integrity_assertion = tf.assert_positive(
                disable_place,
                message="disable_place in disable_some_fgs is negative.")
            with tf.control_dependencies([integrity_assertion]):
                disable_inds = shuffled_inds[:disable_place]
            is_disabled = tf.sparse_to_dense(
                sparse_indices=disable_inds,
                sparse_values=True,
                default_value=False,
                output_shape=tf.cast(proposals_label_shape, tf.int64),
                # We are shuffling the indices, so they may not be ordered.
                validate_indices=False)

            # 要是被忽略的话, 那就直接标签进行取反就可以
            return tf.where(
                condition=is_disabled,
                # We set it to -label for debugging purposes.
                x=tf.negative(proposals_label),
                y=proposals_label)

        # Disable some fgs if we have too many foregrounds.
        proposals_label = tf.cond(tf.greater(tf.shape(fg_inds)[0], max_fg),
                                  true_fn=disable_some_fgs,
                                  false_fn=lambda: proposals_label)

        # 确定所有的前景的数量
        total_fg_in_batch = tf.shape(
            tf.where(condition=tf.greater(proposals_label, 0)))[0]

        # Now we want to do the same for backgrounds.
        # We calculate up to how many backgrounds we desire based on the
        # final number of foregrounds and the total desired batch size.
        max_bg = self._minibatch_size - total_fg_in_batch

        # We can't use bg_condition because some of the proposals that satisfy
        # the IoU conditions to be background may have been labeled as
        # foreground due to them being the best proposal for a certain gt_box.
        bg_mask = tf.equal(proposals_label, 0)
        bg_inds = tf.where(condition=bg_mask, )

        def disable_some_bgs():
            # Mutatis mutandis, all comments from disable_some_fgs apply.
            shuffled_inds = tf.random_shuffle(bg_inds, seed=self._seed)
            disable_place = (tf.shape(bg_inds)[0] - max_bg)
            integrity_assertion = tf.assert_non_negative(
                disable_place,
                message="disable_place in disable_some_bgs is negative.")
            with tf.control_dependencies([integrity_assertion]):
                disable_inds = shuffled_inds[:disable_place]
            is_disabled = tf.sparse_to_dense(sparse_indices=disable_inds,
                                             sparse_values=True,
                                             default_value=False,
                                             output_shape=tf.cast(
                                                 proposals_label_shape,
                                                 tf.int64),
                                             validate_indices=False)
            return tf.where(condition=is_disabled,
                            x=tf.fill(dims=proposals_label_shape, value=-1.),
                            y=proposals_label)

        proposals_label = tf.cond(tf.greater_equal(
            tf.shape(bg_inds)[0], max_bg),
                                  true_fn=disable_some_bgs,
                                  false_fn=lambda: proposals_label)

        # Next step is to calculate the proper targets for the proposals labeled
        # based on the values of the ground-truth boxes.
        # We have to use only the proposals labeled >= 1, each matching with
        # the proper gt_boxes
        # 接下来基于真实边界框, 对于标定的预测边界框计算更为合适的target
        # 只需要计算标定值大于等于1(非背景, 未被忽略的提案), 每一个都匹配一个更为合适的真实框

        # Get the ids of the proposals that matter for bbox_target comparisson.
        # 获得前景提案的逻辑索引
        is_proposal_with_target = tf.greater(proposals_label, 0)
        # 获得前景提案的坐标索引
        proposals_with_target_idx = tf.where(condition=is_proposal_with_target)

        # Get the corresponding ground truth box only for the proposals with
        # target.
        # 根据前面得到前景提案的索引, 从对于每个提案而言最好的真实框索引中索引数据
        # overlaps_best_gt_idxs (num_proposals, )
        gt_boxes_idxs = tf.gather(overlaps_best_gt_idxs,
                                  proposals_with_target_idx)
        # Get the values of the ground truth boxes.
        # 根据索引获得对于每个前景提案而言最好的真实框的数据
        # gather_nd支持对多维的索引
        proposals_gt_boxes = tf.gather_nd(gt_boxes[:, :4], gt_boxes_idxs)
        # 这里相当于就是索引前景提案
        # We create the same array but with the proposals
        # proposal (num_proposals, 4), 这样的索引才可以真正保留原坐标的格式
        proposals_with_target = tf.gather_nd(proposals,
                                             proposals_with_target_idx)
        # We create our targets with bbox_transform.
        # 计算proposals_gt_boxes与proposals_with_target的相对的偏移量和缩放量
        # 也就是计算对于每个前景提案而言最好的真实框与前景提案之间的偏移量和缩放连量
        bbox_targets_nonzero = encode(
            proposals_with_target,
            proposals_gt_boxes,
            variances=self._variances,
        )

        # We unmap targets to proposal_labels (containing the length of
        # proposals)
        # 使用indices在zeros(update)的矩阵上对应的位置更新数据update
        # 这里的结果就是将前景提案的对应位置上, 更新与自身最好的真实框与前景提案之间的偏移量和
        # 缩放连量
        bbox_targets = tf.scatter_nd(indices=proposals_with_target_idx,
                                     updates=bbox_targets_nonzero,
                                     shape=tf.cast(tf.shape(proposals),
                                                   tf.int64))

        proposals_label = proposals_label
        bbox_targets = bbox_targets

        return proposals_label, bbox_targets
Пример #10
0
    def _build(self, probs, all_anchors, gt_boxes):
        """
        在向类的实例传值的时候调用
        Args:
            probs: 这里包含了背景的类别, 所以是 num_classes+1
            all_anchors: A Tensor with anchors for all of SSD's features.
                The shape of the Tensor is (num_anchors, 4).
                所有的anchors的原图上的坐标结果
            gt_boxes: A Tensor with the ground truth boxes for the image.
                The shape of the Tensor is (num_gt, 5), having the truth label
                as the last value for each box.
        Returns:
            class_targets: Either a truth value of the anchor (a value
                between 0 and num_classes, with 0 being background), or -1 when
                the anchor is to be ignored in the minibatch.
                The shape of the Tensor is (num_anchors, 1).
                返回各个anchor对应的类别标签
            bbox_offsets_targets: A bounding box regression target for each of
                the anchors that have a greater than zero label. For every
                other anchors we return zeros.
                The shape of the Tensor is (num_anchors, 4).
                返回各个前景anchor对应的坐标偏移量, 其余的返回0
                在all_anchors中前景anchors的位置上更新对应的真实框相对于自身坐标的偏移
                量和缩放量
        """

        all_anchors = tf.cast(all_anchors, tf.float32)
        gt_boxes = tf.cast(gt_boxes, tf.float32)

        # We are going to label each anchor based on the IoU with
        # `gt_boxes`. Start by filling the labels with -1, marking them as
        # unknown.
        # (num_anchors, 1)
        anchors_label_shape = tf.gather(tf.shape(all_anchors), [0])

        # [-1] ###############################################################

        # -1 * (num_anchors, 1)
        anchors_label = tf.fill(dims=anchors_label_shape, value=-1.)

        # (num_anchors, num_gt)
        overlaps = bbox_overlap_tf(all_anchors, gt_boxes[:, :4])
        # (num_anchors, )
        # 对于每个eanchor和所有真实框的IoU的 最大IoU值
        max_overlaps = tf.reduce_max(overlaps, axis=1)

        #######################################################################
        # 这里开始从anchors的角度来思考, 考虑和它最好的真实框所对应的IoU, 超过阈值,
        # anchors就作为正样本
        #######################################################################

        # Get the index of the best gt_box for each anchor.
        # 对于每个anchor最为接近的真实框
        # (num_anchors, ), 每个元素表示真实框的 对应序号
        best_gtbox_for_anchors_idx = tf.argmax(overlaps, axis=1)

        # Having the index of the gt bbox with the best label we need to get
        # the label for each gt box and sum 1 to it because 0 is used for
        # background.
        # 在对于每个anchor最为接近的真实框的类别标签(0~20)上加1, 作为这些anchors的标签
        # (num_anchors, 4)
        best_fg_labels_for_anchors = tf.add(
            tf.gather(gt_boxes[:, 4], best_gtbox_for_anchors_idx), 1.)
        # (num_anchors, ) 依据每个anchors对应的最大的IoU值, 确定前景anchors为true
        iou_is_fg = tf.greater_equal(max_overlaps, self._foreground_threshold)

        # [-1] =====> [-1, 1~20(前景anchor)] ##################################

        # We update anchors_label with the value in
        # best_fg_labels_for_anchors only when the box is foreground.
        # TODO: Replace with a sparse_to_dense with -1 default_value
        # 从前景anchor中将确定的最好的真实框的标签设定为anchors的标签, 其余保持-1不变
        anchors_label = tf.where(condition=iou_is_fg,
                                 x=best_fg_labels_for_anchors,
                                 y=anchors_label)

        #######################################################################
        # 这里开始从真实框的角度来思考, 防止有真实框没有对应的anchors, 所以要考虑和真实框对
        # 应的最好的anchors作为正样本
        #######################################################################

        # (num_gt, ) 对于每个真实框而言, 最好的anchor的位置
        best_anchor_idxs = tf.argmax(overlaps, axis=0)
        # 使用得到的anchors的位置, 生成一个稀疏张量, 大小为(num_anchors, ),
        # 有真实框对应的anchors位置上为True, 这是最起码的正样本
        # 这里为后面的tf.where实际上创造了一个条件张量
        is_best_box = tf.sparse_to_dense(sparse_indices=best_anchor_idxs,
                                         sparse_values=True,
                                         default_value=False,
                                         output_shape=tf.cast(
                                             anchors_label_shape, tf.int64),
                                         validate_indices=False)

        # Now we need to find the anchors that are the best for each of the
        # gt_boxes. We overwrite the previous anchors_label with this
        # because setting the best anchor for each gt_box has priority.
        # 这里与上面基本类似, 只不过这里是在对应的位置上标记类别标签
        best_anchors_gt_labels = tf.sparse_to_dense(
            sparse_indices=best_anchor_idxs,
            sparse_values=gt_boxes[:, 4] + 1,
            default_value=-1,
            output_shape=tf.cast(anchors_label_shape, tf.int64),
            validate_indices=False,
            name="get_right_labels_for_bestboxes")

        # [-1, 1~20(前景anchor)] =====> [-1, 1~20(+对于每个真实框最接近的anchor)]

        # 修改anchors_label中, 每个真实框对应的最好的anchor的标签为对应的类别
        # 注意, 到这里的时候, 可能会觉得存在一个anchors会对应多个类别, 但是没关系, 这里是一
        # 个更新操作, 这里的优先级更高, 可以覆盖之前的判定
        anchors_label = tf.where(condition=is_best_box,
                                 x=best_anchors_gt_labels,
                                 y=anchors_label,
                                 name="update_labels_for_bestbox_anchors")

        # Use the worst backgrounds (the bgs whose probability of being fg is
        # the greatest).
        # (num_anchors, (num_classes+1)[1:]), 选择各个anchors的前景类别的对应概率
        cls_probs = probs[:, 1:]
        # 得到所有anchors的针对各个前景类别的最大概率
        max_cls_probs = tf.reduce_max(cls_probs, axis=1)

        # Exclude boxes with IOU > `background_threshold_high` with any GT.
        # 最终被认定为背景的anchors, 是和所有真实框的最大IoU值小于背景阈值(0.2), 而且又是
        # 标签被标定为小于等于0的anchors
        # 标签小于等于0, 实际上就是标签小于0, 因为标签为0尚未确定
        iou_less_than_bg_tresh_high_filter = tf.less_equal(
            max_overlaps, self._background_threshold_high)

        # 这里确定了没有被通过IoU来判定为前景类别的anchors, 从中选择阈值小于背景上限阈值
        # 的, 作为后续的操作对象
        bg_anchors = tf.less_equal(anchors_label, 0)
        bg_overlaps_filter = tf.logical_and(iou_less_than_bg_tresh_high_filter,
                                            bg_anchors)

        # 在非前景anchors中选择和真实框的IoU小于阈值的, 在其位置上, 保留其针对各个前景类
        # 别的最大概率, 留作后面选择背景anchors用, 其余的标记为 -1
        # ques: 这里满足上面的条件的应该是对应的负样本/背景了呀, 怎么还保留可能的概率呢?
        # ans: 这里用作背景的anchors实际上是选择有着较大分类概率, 但是不接近真实框而且还
        #   标签小于-1的anchors
        max_cls_probs = tf.where(
            condition=bg_overlaps_filter,
            x=max_cls_probs,
            y=tf.fill(dims=anchors_label_shape, value=-1.),
        )

        # We calculate up to how many backgrounds we desire based on the
        # final number of foregrounds and the hard minning ratio.
        # 两句指令得到前景anchors数量
        num_fg_mask = tf.greater(anchors_label, 0.0)
        num_fg = tf.cast(tf.count_nonzero(num_fg_mask), tf.float32)

        # 得到背景数量=3*num_fg
        num_bg = tf.cast(num_fg * self._hard_negative_ratio, tf.int32)
        # 从max_clas_prob里选择前num_bg(各个类别概率最大值)的anchors作为背景anchors
        # 索引
        top_k_bg = tf.nn.top_k(max_cls_probs, k=num_bg)
        # 将对应的anchors位置标定位true, 这里当做下面的一个条件
        set_bg = tf.sparse_to_dense(sparse_indices=top_k_bg.indices,
                                    sparse_values=True,
                                    default_value=False,
                                    output_shape=anchors_label_shape,
                                    validate_indices=False)

        # [-1, 1~20(+对于每个真实框最接近的anchor)] =====> [-1, 0, 1~20] #########

        # 设定背景标签0
        anchors_label = tf.where(condition=set_bg,
                                 x=tf.fill(dims=anchors_label_shape, value=0.),
                                 y=anchors_label)

        # Next step is to calculate the proper bbox targets for the labeled
        # anchors based on the values of the ground-truth boxes.
        # We have to use only the anchors labeled >= 1, each matching with
        # the proper gt_boxes

        # Get the ids of the anchors that mater for bbox_target comparison.
        # 只针对前景anchors
        is_anchor_with_target = tf.greater(anchors_label, 0)
        anchors_with_target_idx = tf.where(condition=is_anchor_with_target)

        # Get the corresponding ground truth box only for the anchors with
        # target.
        # 从每个anchors对应的最好的真实框索引中, 选择所有前景anchors对应的真实框索引, 进而
        # 确定对应的真实框坐标
        gt_boxes_idxs = tf.gather(best_gtbox_for_anchors_idx,
                                  anchors_with_target_idx)
        # Get the values of the ground truth boxes.
        anchors_gt_boxes = tf.gather_nd(gt_boxes[:, :4], gt_boxes_idxs)
        # We create the same array but with the anchors
        # 确定所有前景anchors的对应的anchor在原图的坐标
        anchors_with_target = tf.gather_nd(all_anchors,
                                           anchors_with_target_idx)

        # We create our targets with bbox_transform
        # 获取所有前景anchors对应的真实框相对于自身坐标的偏移量和缩放量
        bbox_targets = encode(anchors_with_target,
                              anchors_gt_boxes,
                              variances=self._variances)

        # We unmap targets to anchor_labels (containing the length of
        # anchors)
        # 在all_anchors中前景anchors的位置上更新对应的bbox_targets
        bbox_targets = tf.scatter_nd(indices=anchors_with_target_idx,
                                     updates=bbox_targets,
                                     shape=tf.cast(tf.shape(all_anchors),
                                                   tf.int64))

        return anchors_label, bbox_targets
Пример #11
0
    def _build(self, all_anchors, gt_boxes, im_shape):
        """
        We compare anchors to GT and using the minibatch size and the different
        config settings (clobber, foreground fraction, etc), we end up with
        training targets *only* for the elements we want to use in the batch,
        while everything else is ignored.

        Basically what it does is, first generate the targets for all (valid)
        anchors, and then start subsampling the positive (foreground) and the
        negative ones (background) based on the number of samples of each type
        that we want.

        Args:
            all_anchors:
                A Tensor with all the bounding boxes coords of the anchors.
                Its shape should be (num_anchors, 4).
            gt_boxes:
                A Tensor with the ground truth bounding boxes of the image of
                the batch being processed. Its shape should be (num_gt, 5).
                The last dimension is used for the label.
            im_shape:
                Shape of original image (height, width) in order to define
                anchor targers in respect with gt_boxes.

        Returns:
            Tuple of the tensors of:
                labels: (1, 0, -1) for each anchor.
                    Shape (num_anchors, 1)
                bbox_targets: 4d bbox targets as specified by paper.
                    Shape (num_anchors, 4)
                max_overlaps: Max IoU overlap with ground truth boxes.
                    Shape (num_anchors, 1)
        """
        # Keep only the coordinates of gt_boxes
        gt_boxes = gt_boxes[:, :4]
        all_anchors = all_anchors[:, :4]

        # Only keep anchors inside the image
        (x_min_anchor, y_min_anchor, x_max_anchor,
         y_max_anchor) = tf.unstack(all_anchors, axis=1)

        anchor_filter = tf.logical_and(
            tf.logical_and(
                tf.greater_equal(x_min_anchor, -self._allowed_border),
                tf.greater_equal(y_min_anchor, -self._allowed_border),
            ),
            tf.logical_and(
                tf.less(x_max_anchor, im_shape[1] + self._allowed_border),
                tf.less(y_max_anchor, im_shape[0] + self._allowed_border),
            ),
        )

        # We (force) reshape the filter so that we can use it as a boolean mask
        anchor_filter = tf.reshape(anchor_filter, [-1])
        # Filter anchors.
        anchors = tf.boolean_mask(all_anchors,
                                  anchor_filter,
                                  name="filter_anchors")

        # Generate array with the labels for all_anchors.
        labels = tf.fill((tf.gather(tf.shape(all_anchors), [0])), -1)
        labels = tf.boolean_mask(labels, anchor_filter, name="filter_labels")

        # Intersection over union (IoU) overlap between the anchors and the
        # ground truth boxes.
        overlaps = bbox_overlap_tf(tf.to_float(anchors), tf.to_float(gt_boxes))

        # Generate array with the IoU value of the closest GT box for each
        # anchor.
        max_overlaps = tf.reduce_max(overlaps, axis=1)
        if not self._clobber_positives:
            # Assign bg labels first so that positive labels can clobber them.
            # First we get an array with True where IoU is less than
            # self._negative_overlap
            negative_overlap_nonzero = tf.less(max_overlaps,
                                               self._negative_overlap)

            # Finally we set 0 at True indices
            labels = tf.where(
                condition=negative_overlap_nonzero,
                x=tf.zeros(tf.shape(labels)),
                y=tf.to_float(labels),
            )
        # Get the value of the max IoU for the closest anchor for each gt.
        gt_max_overlaps = tf.reduce_max(overlaps, axis=0)

        # Find all the indices that match (at least one, but could be more).
        gt_argmax_overlaps = tf.squeeze(tf.equal(overlaps, gt_max_overlaps))
        gt_argmax_overlaps = tf.where(gt_argmax_overlaps)[:, 0]
        # Eliminate duplicates indices.
        gt_argmax_overlaps, _ = tf.unique(gt_argmax_overlaps)
        # Order the indices for sparse_to_dense compatibility
        gt_argmax_overlaps, _ = tf.nn.top_k(gt_argmax_overlaps,
                                            k=tf.shape(gt_argmax_overlaps)[-1])
        gt_argmax_overlaps = tf.reverse(gt_argmax_overlaps, [0])

        # Foreground label: for each ground-truth, anchor with highest overlap.
        # When the argmax is many items we use all of them (for consistency).
        # We set 1 at gt_argmax_overlaps_cond indices
        gt_argmax_overlaps_cond = tf.sparse_to_dense(
            gt_argmax_overlaps,
            tf.shape(labels, out_type=tf.int64),
            True,
            default_value=False,
        )

        labels = tf.where(
            condition=gt_argmax_overlaps_cond,
            x=tf.ones(tf.shape(labels)),
            y=tf.to_float(labels),
        )

        # Foreground label: above threshold Intersection over Union (IoU)
        # First we get an array with True where IoU is greater or equal than
        # self._positive_overlap
        positive_overlap_inds = tf.greater_equal(max_overlaps,
                                                 self._positive_overlap)
        # Finally we set 1 at True indices
        labels = tf.where(condition=positive_overlap_inds,
                          x=tf.ones(tf.shape(labels)),
                          y=labels)

        if self._clobber_positives:
            # Assign background labels last so that negative labels can clobber
            # positives. First we get an array with True where IoU is less than
            # self._negative_overlap
            negative_overlap_nonzero = tf.less(max_overlaps,
                                               self._negative_overlap)
            # Finally we set 0 at True indices
            labels = tf.where(
                condition=negative_overlap_nonzero,
                x=tf.zeros(tf.shape(labels)),
                y=labels,
            )

        # Subsample positive labels if we have too many
        def subsample_positive():
            # Shuffle the foreground indices
            disable_fg_inds = tf.random_shuffle(fg_inds, seed=self._seed)
            # Select the indices that we have to ignore, this is
            # `tf.shape(fg_inds)[0] - num_fg` because we want to get only
            # `num_fg` foreground labels.
            disable_place = tf.shape(fg_inds)[0] - num_fg
            disable_fg_inds = disable_fg_inds[:disable_place]
            # Order the indices for sparse_to_dense compatibility
            disable_fg_inds, _ = tf.nn.top_k(disable_fg_inds,
                                             k=tf.shape(disable_fg_inds)[-1])
            disable_fg_inds = tf.reverse(disable_fg_inds, [0])
            disable_fg_inds = tf.sparse_to_dense(
                disable_fg_inds,
                tf.shape(labels, out_type=tf.int64),
                True,
                default_value=False,
            )
            # Put -1 to ignore the anchors in the selected indices
            return tf.where(
                condition=tf.squeeze(disable_fg_inds),
                x=tf.to_float(tf.fill(tf.shape(labels), -1)),
                y=labels,
            )

        num_fg = tf.to_int32(self._foreground_fraction * self._minibatch_size)
        # Get foreground indices, get True in the indices where we have a one.
        fg_inds = tf.equal(labels, 1)
        # We get only the indices where we have True.
        fg_inds = tf.squeeze(tf.where(fg_inds), axis=1)
        fg_inds_size = tf.size(fg_inds)
        # Condition for check if we have too many positive labels.
        subsample_positive_cond = fg_inds_size > num_fg
        # Check the condition and subsample positive labels.
        labels = tf.cond(subsample_positive_cond,
                         true_fn=subsample_positive,
                         false_fn=lambda: labels)

        # Subsample negative labels if we have too many
        def subsample_negative():
            # Shuffle the background indices
            disable_bg_inds = tf.random_shuffle(bg_inds, seed=self._seed)

            # Select the indices that we have to ignore, this is
            # `tf.shape(bg_inds)[0] - num_bg` because we want to get only
            # `num_bg` background labels.
            disable_place = tf.shape(bg_inds)[0] - num_bg
            disable_bg_inds = disable_bg_inds[:disable_place]
            # Order the indices for sparse_to_dense compatibility
            disable_bg_inds, _ = tf.nn.top_k(disable_bg_inds,
                                             k=tf.shape(disable_bg_inds)[-1])
            disable_bg_inds = tf.reverse(disable_bg_inds, [0])
            disable_bg_inds = tf.sparse_to_dense(
                disable_bg_inds,
                tf.shape(labels, out_type=tf.int64),
                True,
                default_value=False,
            )
            # Put -1 to ignore the anchors in the selected indices
            return tf.where(
                condition=tf.squeeze(disable_bg_inds),
                x=tf.to_float(tf.fill(tf.shape(labels), -1)),
                y=labels,
            )

        # Recalculate the foreground indices after (maybe) disable some of them

        # Get foreground indices, get True in the indices where we have a one.
        fg_inds = tf.equal(labels, 1)
        # We get only the indices where we have True.
        fg_inds = tf.squeeze(tf.where(fg_inds), axis=1)
        fg_inds_size = tf.size(fg_inds)

        num_bg = tf.to_int32(self._minibatch_size - fg_inds_size)
        # Get background indices, get True in the indices where we have a zero.
        bg_inds = tf.equal(labels, 0)
        # We get only the indices where we have True.
        bg_inds = tf.squeeze(tf.where(bg_inds), axis=1)
        bg_inds_size = tf.size(bg_inds)
        # Condition for check if we have too many positive labels.
        subsample_negative_cond = bg_inds_size > num_bg
        # Check the condition and subsample positive labels.
        labels = tf.cond(subsample_negative_cond,
                         true_fn=subsample_negative,
                         false_fn=lambda: labels)

        # Return bbox targets with shape (anchors.shape[0], 4).

        # Find the closest gt box for each anchor.
        argmax_overlaps = tf.argmax(overlaps, axis=1)
        # Eliminate duplicates.
        argmax_overlaps_unique, _ = tf.unique(argmax_overlaps)
        # Filter the gt_boxes.
        # We get only the indices where we have "inside anchors".
        anchor_filter_inds = tf.where(anchor_filter)
        gt_boxes = tf.gather(gt_boxes, argmax_overlaps)

        bbox_targets = encode_tf(anchors, gt_boxes)

        # For the anchors that arent foreground, we ignore the bbox_targets.
        anchor_foreground_filter = tf.equal(labels, 1)
        bbox_targets = tf.where(
            condition=anchor_foreground_filter,
            x=bbox_targets,
            y=tf.zeros_like(bbox_targets),
        )

        # We unroll "inside anchors" value for all anchors (for shape
        # compatibility).

        # We complete the missed indices with zeros
        # (because scatter_nd has zeros as default).
        bbox_targets = tf.scatter_nd(
            indices=tf.to_int32(anchor_filter_inds),
            updates=bbox_targets,
            shape=tf.shape(all_anchors),
        )

        labels_scatter = tf.scatter_nd(
            indices=tf.to_int32(anchor_filter_inds),
            updates=labels,
            shape=[tf.shape(all_anchors)[0]],
        )
        # We have to put -1 to ignore the indices with 0 generated by
        # scatter_nd, otherwise it will be considered as background.
        labels = tf.where(
            condition=anchor_filter,
            x=labels_scatter,
            y=tf.to_float(tf.fill(tf.shape(labels_scatter), -1)),
        )

        max_overlaps = tf.scatter_nd(
            indices=tf.to_int32(anchor_filter_inds),
            updates=max_overlaps,
            shape=[tf.shape(all_anchors)[0]],
        )

        return labels, bbox_targets, max_overlaps
Пример #12
0
    def _build(self, all_anchors, gt_boxes, im_shape):
        """
        We compare anchors to GT and using the minibatch size and the different
        config settings (clobber, foreground fraction, etc), we end up with
        training targets *only* for the elements we want to use in the batch,
        while everything else is ignored.

        Basically what it does is, first generate the targets for all (valid)
        anchors, and then start subsampling the positive (foreground) and the
        negative ones (background) based on the number of samples of each type
        that we want.
        对于所有的anchors生成一个标签, 然后开始对正负标签进行anchors进行采样, 确定batch

        Args:
            all_anchors:
                A Tensor with all the bounding boxes coords of the anchors.
                Its shape should be (num_anchors, 4).
            gt_boxes:
                A Tensor with the ground truth bounding boxes of the image of
                the batch being processed. Its shape should be (num_gt, 5).
                The last dimension is used for the label.
            im_shape:
                Shape of original image (height, width) in order to define
                anchor targers in respect with gt_boxes.

        Returns:
            Tuple of the tensors of:
                labels: (1, 0, -1) for each anchor.
                    Shape (num_anchors, 1)
                bbox_targets: 4d bbox targets as specified by paper.
                    Shape (num_anchors, 4)
                max_overlaps: Max IoU overlap with ground truth boxes.
                    Shape (num_anchors, 1)
        """
        # Keep only the coordinates of gt_boxes
        # gt_boxes是第二维是5, 这里只是用了前四个元素
        gt_boxes = gt_boxes[:, :4]
        all_anchors = all_anchors[:, :4]

        # Only keep anchors inside the image
        # 只保留图像内的anchors
        (x_min_anchor, y_min_anchor,
         x_max_anchor, y_max_anchor) = tf.unstack(all_anchors, axis=1)

        anchor_filter = tf.logical_and(
            tf.logical_and(
                tf.greater_equal(x_min_anchor, -self._allowed_border),
                tf.greater_equal(y_min_anchor, -self._allowed_border)
            ),
            tf.logical_and(
                tf.less(x_max_anchor, im_shape[1] + self._allowed_border),
                tf.less(y_max_anchor, im_shape[0] + self._allowed_border)
            )
        )

        # We (force) reshape the filter so that we can use it as a boolean mask
        # 强制展开(虽然本身就是一维的吧?)为一维, 以作为掩膜, 来确定有效的图像内的anchors
        anchor_filter = tf.reshape(anchor_filter, [-1])
        # Filter anchors.
        # 仅保留图像内的anchors (num_anchors, 4)
        anchors = tf.boolean_mask(
            all_anchors, anchor_filter, name='filter_anchors')

        # Generate array with the labels for all_anchors.
        # fill在这里创建了一个用标量-1, 填充的固定大小的张量
        # gather利用索引重新组建了一个张量, 这里只是抽取了all_anchors的形状的第一维度大小,
        # 以此确定的张量, 用-1填充
        # 这里也就是为了得到对于每个anchor所对应的种类标签, 预先设定为-1, 后续在进行更改
        # (num_anchors. )
        labels = tf.fill((tf.gather(tf.shape(all_anchors), [0])), -1)
        # 仅保留图像内的anchors的类别
        labels = tf.boolean_mask(labels, anchor_filter, name='filter_labels')

        # Intersection over union (IoU) overlap between the anchors and the
        # ground truth boxes.
        # 获取IoU
        overlaps = bbox_overlap_tf(tf.to_float(anchors), tf.to_float(gt_boxes))

        # Generate array with the IoU value of the closest GT box for each
        # anchor. 获取每个anchors对于所有的真实框的最大IoU值
        # reduce_max 计算指定维度的最大值
        max_overlaps = tf.reduce_max(overlaps, axis=1)

        # 这里为负样本确定了标签
        if not self._clobber_positives:
            # 首先设定背景标签, 这样可以使得正样本可以覆盖负样本, 这样可以保证每个ground
            # truth至少对应一个正样本anchor
            # Assign bg labels first so that positive labels can clobber them.
            # First we get an array with True where IoU is less than
            # self._negative_overlap
            # 获取负样本的对应的真值
            negative_overlap_nonzero = tf.less(
                max_overlaps, self._negative_overlap)

            # Finally we set 0 at True indices
            # labels现在全是-1, 这里的tf.where表现出来和其他语言的三目运算符是相同的
            # condition的真值(也就是非0的负样本)对应位置返回x(即0元素), 假值(也即是为0的
            # 负样本, 即IoU并不是小于0.3的样本)对应返回y(即-1元素)
            # 这里还没有指定正样本
            labels = tf.where(
                condition=negative_overlap_nonzero,
                x=tf.zeros(tf.shape(labels)), y=tf.to_float(labels)
            )

        # 这里是要以真实框的角度来确定正样本#########################################
        # Get the value of the max IoU for the closest anchor for each gt.
        # 每个真实框对应的最大IoU值
        # tf.shape = [num_gt]
        gt_max_overlaps = tf.reduce_max(overlaps, axis=0)

        # Find all the indices that match (at least one, but could be more).
        # 找到所有匹配的的位置, 也就是针对每个真实框对应的IoU最大值的匹配结果(逻辑张量)
        # ques: 这里的squeeze没看明白使用的含义
        # ans: 消除冗余的维度
        gt_argmax_overlaps = tf.squeeze(tf.equal(overlaps, gt_max_overlaps))

        # where只有一个输入的时候, 表示是条件变量, 只返回自身真值的位置, 各个对应关系对应的
        # 位置[[x,y],...,[x,y]], 这里得到的是第一列的值, 形如[x, x, x, ..., x]
        # 第一列代表着有哪些anchors有真实框对应
        gt_argmax_overlaps = tf.where(gt_argmax_overlaps)[:, 0]

        # Eliminate duplicates indices.
        # 消除重复的索引, 这里得到的是**不再重复的元素值**组成的张量
        # 因为同一个anchor可能对应多个真实框, 这里得到的是消除了重复的最终的anchor
        # (1, num_unique_anchors), 保留下的是有真实框对应的anchors序号
        gt_argmax_overlaps, _ = tf.unique(gt_argmax_overlaps)

        # Order the indices for sparse_to_dense compatibility
        # ques: 不太理解这里, 因为gt_argmax_overlaps这里已经是一个一维的张量
        # ans: 这里应该是只是实现了一个排序
        # 这个函数的作用是返回 input 中每行(这里就是每个anchor所对应的IoU)最大的 k 个数,
        # 并且返回它们所在位置的索引, 这里只是承接了数
        gt_argmax_overlaps, _ = tf.nn.top_k(
            gt_argmax_overlaps, k=tf.shape(gt_argmax_overlaps)[-1])

        # 沿着指定的轴进行反转, 现在是坐标值x从小到大排列了
        # (1, num_unique_anchors)
        gt_argmax_overlaps = tf.reverse(gt_argmax_overlaps, [0])

        # Foreground label: for each ground-truth, anchor with highest overlap.
        # When the argmax is many items we use all of them (for consistency).
        # We set 1 at gt_argmax_overlaps_cond indices
        # 在gt_argmax_overlap(也就是真实框对应IOU最大的的anchors)所对应的位置, 替换为
        # true, (1, num_anchors)
        gt_argmax_overlaps_cond = tf.sparse_to_dense(
            gt_argmax_overlaps, tf.shape(labels, out_type=tf.int64),
            True, default_value=False
        )

        # 为剩下的真实框对应的最大IoU的anchors的位置上返回1, 这部分当做正样本, 其余label不
        # 变
        # **对每个标定的ground truth,与其重叠比例IoU最大的anchor记为正样本**
        labels = tf.where(
            condition=gt_argmax_overlaps_cond,
            x=tf.ones(tf.shape(labels)), y=tf.to_float(labels)
        )

        # 这里是从anchors的角度开始确定正样本#######################################
        # Foreground label: above threshold Intersection over Union (IoU)
        # First we get an array with True where IoU is greater or equal than
        # self._positive_overlap
        # 大于阈值0.7的也作为正样本
        positive_overlap_inds = tf.greater_equal(
            max_overlaps, self._positive_overlap)
        # Finally we set 1 at True indices
        # 最终为正样本设定为1, 其余的保留
        labels = tf.where(
            condition=positive_overlap_inds,
            x=tf.ones(tf.shape(labels)), y=labels
        )

        # 为负样本确定了标签
        if self._clobber_positives:
            # 进入到这里的时候, 说明要使用负样本覆盖正样本, 也就是不一定是每个真实框都有一个
            # 对应正样本
            # Assign background labels last so that negative labels can clobber
            # positives. First we get an array with True where IoU is less than
            # self._negative_overlap
            # 得到IoU小于0.3的位置
            negative_overlap_nonzero = tf.less(
                max_overlaps, self._negative_overlap)
            # Finally we set 0 at True indices
            # 为之设定为0标签
            labels = tf.where(
                condition=negative_overlap_nonzero,
                x=tf.zeros(tf.shape(labels)), y=labels
            )

        # Subsample positive labels if we have too many
        def subsample_positive():
            # Shuffle the foreground indices
            # 打乱所有前景数据的索引, 对于所有忽略的anchors的位置, 返回-1,
            # 其余的保持label的原样
            disable_fg_inds = tf.random_shuffle(fg_inds, seed=self._seed)
            # Select the indices that we have to ignore, this is
            # `tf.shape(fg_inds)[0] - num_fg` because we want to get only
            # `num_fg` foreground labels.
            # 得到不想要的前景数据的数量
            disable_place = (tf.shape(fg_inds)[0] - num_fg)
            # 得到不想要的前景数据
            disable_fg_inds = disable_fg_inds[:disable_place]
            # Order the indices for sparse_to_dense compatibility
            # 得到每行最大的k个值, 这里的k的大小等于输入的第二个维度
            # ques: 这里的k的设定有什么含义?
            disable_fg_inds, _ = tf.nn.top_k(
                disable_fg_inds, k=tf.shape(disable_fg_inds)[-1])
            disable_fg_inds = tf.reverse(disable_fg_inds, [0])
            disable_fg_inds = tf.sparse_to_dense(
                disable_fg_inds, tf.shape(labels, out_type=tf.int64),
                True, default_value=False
            )
            # Put -1 to ignore the anchors in the selected indices
            # 对于所有忽略的anchors的位置, 返回-1, 其余的保持label的原样
            return tf.where(
                condition=tf.squeeze(disable_fg_inds),
                x=tf.to_float(tf.fill(tf.shape(labels), -1)), y=labels
            )

        # 想要的前景样本的数量
        num_fg = tf.to_int32(self._foreground_fraction * self._minibatch_size)
        # Get foreground indices, get True in the indices where we have a one.
        # 获得前景样本的位置(labels是[num_anchors, 1])
        fg_inds = tf.equal(labels, 1)
        # We get only the indices where we have True.
        # 获得真值的坐标
        fg_inds = tf.squeeze(tf.where(fg_inds), axis=1)
        fg_inds_size = tf.size(fg_inds)
        # Condition for check if we have too many positive labels.
        subsample_positive_cond = fg_inds_size > num_fg
        # Check the condition and subsample positive labels.
        # 若是实际的正样本的数量多对想要的值, 那就下采样, 否则返回原值
        labels = tf.cond(
            subsample_positive_cond,
            true_fn=subsample_positive, false_fn=lambda: labels
        )

        # Subsample negative labels if we have too many
        def subsample_negative():
            # 具体操作和前面的正样本的采样一致, 只不过这里采样的是负样本
            # Shuffle the background indices
            disable_bg_inds = tf.random_shuffle(bg_inds, seed=self._seed)

            # Select the indices that we have to ignore, this is
            # `tf.shape(bg_inds)[0] - num_bg` because we want to get only
            # `num_bg` background labels.
            disable_place = (tf.shape(bg_inds)[0] - num_bg)
            disable_bg_inds = disable_bg_inds[:disable_place]
            # Order the indices for sparse_to_dense compatibility
            disable_bg_inds, _ = tf.nn.top_k(
                disable_bg_inds, k=tf.shape(disable_bg_inds)[-1])
            disable_bg_inds = tf.reverse(disable_bg_inds, [0])
            disable_bg_inds = tf.sparse_to_dense(
                disable_bg_inds, tf.shape(labels, out_type=tf.int64),
                True, default_value=False
            )
            # Put -1 to ignore the anchors in the selected indices
            return tf.where(
                condition=tf.squeeze(disable_bg_inds),
                x=tf.to_float(tf.fill(tf.shape(labels), -1)), y=labels
            )

        # Recalculate the foreground indices after (maybe) disable some of them

        # Get foreground indices, get True in the indices where we have a one.
        fg_inds = tf.equal(labels, 1)
        # We get only the indices where we have True.
        fg_inds = tf.squeeze(tf.where(fg_inds), axis=1)
        fg_inds_size = tf.size(fg_inds)

        # 开始为负样本进行下采样
        num_bg = tf.to_int32(self._minibatch_size - fg_inds_size)
        # Get background indices, get True in the indices where we have a zero.
        bg_inds = tf.equal(labels, 0)
        # We get only the indices where we have True.
        bg_inds = tf.squeeze(tf.where(bg_inds), axis=1)
        bg_inds_size = tf.size(bg_inds)
        # Condition for check if we have too many positive labels.
        subsample_negative_cond = bg_inds_size > num_bg
        # Check the condition and subsample positive labels.
        labels = tf.cond(
            subsample_negative_cond,
            true_fn=subsample_negative, false_fn=lambda: labels
        )

        # Return bbox targets with shape (anchors.shape[0], 4).

        # Find the closest gt box for each anchor.
        # 为每个anchor确定最大IoU的真实框, 确定对应的坐标
        argmax_overlaps = tf.argmax(overlaps, axis=1)
        # Eliminate duplicates.
        # 消除重复, 得到去除重复后的anchor对应关系
        argmax_overlaps_unique, _ = tf.unique(argmax_overlaps)
        # Filter the gt_boxes.
        # We get only the indices where we have "inside anchors".
        # 只保留图像内部的anchor
        anchor_filter_inds = tf.where(anchor_filter)
        # 从对应的坐标中返回真实框, 与anchors互相对应
        gt_boxes = tf.gather(gt_boxes, argmax_overlaps)

        # 计算真实值相对于anchors的偏移量和缩放量
        bbox_targets = encode_tf(anchors, gt_boxes)

        # For the anchors that aren't foreground, we ignore the bbox_targets.
        # 对于不是前景的bbox_targets值进行忽略
        anchor_foreground_filter = tf.equal(labels, 1)
        bbox_targets = tf.where(
            condition=anchor_foreground_filter,
            x=bbox_targets, y=tf.zeros_like(bbox_targets)
        )

        # We unroll "inside anchors" value for all anchors (for shape
        # compatibility).

        # We complete the missed indices with zeros
        # (because scatter_nd has zeros as default).
        # 在shape形状的全零张量中的indices对应的位置替换为updates对应的数据
        # 这里返回的结果的含义是: 图像内部的anchors对应的真实值相对的偏移量和缩放量
        bbox_targets = tf.scatter_nd(
            indices=tf.to_int32(anchor_filter_inds),
            updates=bbox_targets,
            shape=tf.shape(all_anchors)
        )

        # 将标签更新到对应的0张量的位置上
        labels_scatter = tf.scatter_nd(
            indices=tf.to_int32(anchor_filter_inds),
            updates=labels,
            shape=[tf.shape(all_anchors)[0]]
        )
        # We have to put -1 to ignore the indices with 0 generated by
        # scatter_nd, otherwise it will be considered as background.
        # 将对应的忽略标志放到图像外的anchors的对应的位置上
        labels = tf.where(
            condition=anchor_filter, x=labels_scatter,
            y=tf.to_float(tf.fill(tf.shape(labels_scatter), -1))
        )

        # 在图像内的anchors位置上更新针对每个真实框最大IoU结果
        max_overlaps = tf.scatter_nd(
            indices=tf.to_int32(anchor_filter_inds),
            updates=max_overlaps,
            shape=[tf.shape(all_anchors)[0]]
        )

        return labels, bbox_targets, max_overlaps