Exemplo n.º 1
0
    def testSmoothL1LossPerfectScore(self):
        # Test smooth l1 loss for a perfect case
        # Set inputs for smooth_l1_loss
        all_ones = [1.0, 1.0, 1.0, 1.0]

        bbox_prediction_tf = tf.placeholder(tf.float32)
        bbox_target_tf = tf.placeholder(tf.float32)
        loss_tf = smooth_l1_loss(bbox_prediction_tf, bbox_target_tf)

        with tf.Session() as sess:
            loss = sess.run(
                loss_tf,
                feed_dict={bbox_prediction_tf: [all_ones], bbox_target_tf: [all_ones]},
            )
            self.assertAlmostEqual(loss, 0.0, delta=0.4)
Exemplo n.º 2
0
    def testSmoothL1LossRandom(self):
        # Test smooth l1 loss for random case
        # Set inputs for smooth_l1_loss
        random_prediction = [0.47450006, -0.80413032, -0.26595005, 0.17124325]
        random_target = [0.10058594, 0.07910156, 0.10555581, -0.1224325]

        bbox_prediction_tf = tf.placeholder(tf.float32)
        bbox_target_tf = tf.placeholder(tf.float32)
        loss_tf = smooth_l1_loss(bbox_prediction_tf, bbox_target_tf)

        with tf.Session() as sess:
            loss = sess.run(
                loss_tf,
                feed_dict={
                    bbox_prediction_tf: [random_prediction],
                    bbox_target_tf: [random_target],
                },
            )
            self.assertAlmostEqual(loss, 2, delta=0.4)
Exemplo n.º 3
0
    def loss(self, prediction_dict):
        """
        Returns cost for RCNN based on:

        Args:
            prediction_dict with keys:
                rcnn:
                    cls_score: shape (num_proposals, num_classes + 1)
                        Has the class scoring for each the proposals. Classes
                        are 1-indexed with 0 being the background.

                    cls_prob: shape (num_proposals, num_classes + 1)
                        Application of softmax on cls_score.

                    bbox_offsets: shape (num_proposals, num_classes * 4)
                        Has the offset for each proposal for each class.
                        We have to compare only the proposals labeled with the
                        offsets for that label.

                target:
                    cls_target: shape (num_proposals,)
                        Has the correct label for each of the proposals.
                        0 => background
                        1..n => 1-indexed classes

                    bbox_offsets_target: shape (num_proposals, 4)
                        Has the true offset of each proposal for the true
                        label.
                        In case of not having a true label (non-background)
                        then it's just zeroes.

        Returns:
            loss_dict with keys:
                rcnn_cls_loss: The cross-entropy or log-loss of the
                    classification tasks between then num_classes + background.
                rcnn_reg_loss: The smooth L1 loss for the bounding box
                    regression task to adjust correctly labeled boxes.

        """
        with tf.name_scope('RCNNLoss'):
            cls_score = prediction_dict['rcnn']['cls_score']
            # cls_prob = prediction_dict['rcnn']['cls_prob']
            # Cast target explicitly as int32.
            cls_target = tf.cast(
                prediction_dict['target']['cls'], tf.int32
            )

            # First we need to calculate the log loss betweetn cls_prob and
            # cls_target

            # We only care for the targets that are >= 0
            not_ignored = tf.reshape(tf.greater_equal(
                cls_target, 0), [-1], name='not_ignored')
            # We apply boolean mask to score, prob and target.
            cls_score_labeled = tf.boolean_mask(
                cls_score, not_ignored, name='cls_score_labeled')
            # cls_prob_labeled = tf.boolean_mask(
            #    cls_prob, not_ignored, name='cls_prob_labeled')
            cls_target_labeled = tf.boolean_mask(
                cls_target, not_ignored, name='cls_target_labeled')

            tf.summary.scalar(
                'batch_size',
                tf.shape(cls_score_labeled)[0], ['rcnn']
            )

            # Transform to one-hot vector
            cls_target_one_hot = tf.one_hot(
                cls_target_labeled, depth=self._num_classes + 1,
                name='cls_target_one_hot'
            )

            # We get cross entropy loss of each proposal.
            cross_entropy_per_proposal = (
                tf.nn.softmax_cross_entropy_with_logits_v2(
                    labels=tf.stop_gradient(cls_target_one_hot),
                    logits=cls_score_labeled
                )
            )

            if self._debug:
                prediction_dict['_debug']['losses'] = {}
                # Save the cross entropy per proposal to be able to
                # visualize proposals with high and low error.
                prediction_dict['_debug']['losses'][
                    'cross_entropy_per_proposal'
                ] = (
                    cross_entropy_per_proposal
                )

            # Second we need to calculate the smooth l1 loss between
            # `bbox_offsets` and `bbox_offsets_target`.
            bbox_offsets = prediction_dict['rcnn']['bbox_offsets']
            bbox_offsets_target = (
                prediction_dict['target']['bbox_offsets']
            )

            # We only want the non-background labels bounding boxes.
            not_ignored = tf.reshape(tf.greater(cls_target, 0), [-1])
            bbox_offsets_labeled = tf.boolean_mask(
                bbox_offsets, not_ignored, name='bbox_offsets_labeled')
            bbox_offsets_target_labeled = tf.boolean_mask(
                bbox_offsets_target, not_ignored,
                name='bbox_offsets_target_labeled'
            )

            cls_target_labeled = tf.boolean_mask(
                cls_target, not_ignored, name='cls_target_labeled')
            # `cls_target_labeled` is based on `cls_target` which has
            # `num_classes` + 1 classes.
            # for making `one_hot` with depth `num_classes` to work we need
            # to lower them to make them 0-index.
            cls_target_labeled = cls_target_labeled - 1

            cls_target_one_hot = tf.one_hot(
                cls_target_labeled, depth=self._num_classes,
                name='cls_target_one_hot'
            )

            # cls_target now is (num_labeled, num_classes)
            bbox_flatten = tf.reshape(
                bbox_offsets_labeled, [-1, 4], name='bbox_flatten')

            # We use the flatten cls_target_one_hot as boolean mask for the
            # bboxes.
            cls_flatten = tf.cast(tf.reshape(
                cls_target_one_hot, [-1]), tf.bool, 'cls_flatten_as_bool')

            bbox_offset_cleaned = tf.boolean_mask(
                bbox_flatten, cls_flatten, 'bbox_offset_cleaned')

            # Calculate the smooth l1 loss between the "cleaned" bboxes
            # offsets (that means, the useful results) and the labeled
            # targets.
            reg_loss_per_proposal = smooth_l1_loss(
                bbox_offset_cleaned, bbox_offsets_target_labeled,
                sigma=self._l1_sigma
            )

            tf.summary.scalar(
                'rcnn_foreground_samples',
                tf.shape(bbox_offset_cleaned)[0], ['rcnn']
            )

            if self._debug:
                # Also save reg loss per proposals to be able to visualize
                # good and bad proposals in debug mode.
                prediction_dict['_debug']['losses'][
                    'reg_loss_per_proposal'
                ] = (
                    reg_loss_per_proposal
                )

            return {
                'rcnn_cls_loss': tf.reduce_mean(cross_entropy_per_proposal),
                'rcnn_reg_loss': tf.reduce_mean(reg_loss_per_proposal),
            }
Exemplo n.º 4
0
    def loss(self, prediction_dict, return_all=False):
        """
        Compute the loss for SSD.

        Args:
            prediction_dict: The output dictionary of the _build method from
                which we use different main keys:

                cls_pred: A dictionary with the classes classification.
                loc_pred: A dictionary with the localization predictions
                target: A dictionary with the targets for both classes and
                    localizations.

        Returns:
            A tensor for the total loss.
        """

        with tf.name_scope('losses'):
            # 类别预测得分结果
            cls_pred = prediction_dict['cls_pred']
            # 调整后的anchors对应的类别标签(这个是直接从真实框身上得来的)
            cls_target = tf.cast(prediction_dict['target']['cls'], tf.int32)
            # Transform to one-hot vector
            cls_target_one_hot = tf.one_hot(cls_target,
                                            depth=self._num_classes + 1,
                                            name='cls_target_one_hot')

            ###################################################################
            # 这里计算了对应的L_conf ############################################
            ###################################################################

            # We get cross entropy loss of each proposal.
            # TODO: Optimization opportunity: We calculate the probabilities
            #       earlier in the program, so if we used those instead of the
            #       logits we would not have the need to do softmax here too.
            # 得到对于每个提案的分类损失
            cross_entropy_per_proposal = (
                tf.nn.softmax_cross_entropy_with_logits(
                    labels=cls_target_one_hot, logits=cls_pred))

            # Second we need to calculate the smooth l1 loss between
            # `bbox_offsets` and `bbox_offsets_targets`.
            # 一个是预测的偏移缩放值, 一个是真实框的偏移缩放值
            bbox_offsets = prediction_dict['loc_pred']
            bbox_offsets_targets = (prediction_dict['target']['bbox_offsets'])

            # We only want the non-background labels bounding boxes.
            # 在预测框和真实框中筛选前景对应的偏移缩放值,
            not_ignored = tf.reshape(tf.greater(cls_target, 0), [-1])
            bbox_offsets_positives = tf.boolean_mask(
                bbox_offsets, not_ignored, name='bbox_offsets_positives')
            bbox_offsets_target_positives = tf.boolean_mask(
                bbox_offsets_targets,
                not_ignored,
                name='bbox_offsets_target_positives')

            ###################################################################
            # 这里计算了L_{loc}(x,l,g) #########################################
            ###################################################################

            # Calculate the smooth l1 regression loss between the flatten
            # bboxes offsets  and the labeled targets.
            # 得到对于每个提案的回归损失
            reg_loss_per_proposal = smooth_l1_loss(
                bbox_offsets_positives, bbox_offsets_target_positives)

            # 前面的计算只是将对应的项进行了计算, 还需要进行求和
            cls_loss = tf.reduce_sum(cross_entropy_per_proposal)
            bbox_loss = tf.reduce_sum(reg_loss_per_proposal)

            # Following the paper, set loss to 0 if there are 0 bboxes
            # assigned as foreground targets.
            # 如果真实框中, 没有前景类别, 则将损失设定为0, 存在前景类别的时候, 加权求和
            safety_condition = tf.not_equal(
                tf.shape(bbox_offsets_positives)[0], 0)
            final_loss = tf.cond(
                safety_condition,
                true_fn=lambda: (
                    (cls_loss + bbox_loss * self._loc_loss_weight) / tf.cast(
                        tf.shape(bbox_offsets_positives)[0], tf.float32)),
                false_fn=lambda: 0.0)
            tf.losses.add_loss(final_loss)
            total_loss = tf.losses.get_total_loss()

            prediction_dict['reg_loss_per_proposal'] = reg_loss_per_proposal
            prediction_dict['cls_loss_per_proposal'] = (
                cross_entropy_per_proposal)

            tf.summary.scalar('cls_loss',
                              cls_loss,
                              collections=self._losses_collections)

            tf.summary.scalar('bbox_loss',
                              bbox_loss,
                              collections=self._losses_collections)

            tf.summary.scalar('total_loss',
                              total_loss,
                              collections=self._losses_collections)
            if return_all:
                return {
                    'total_loss': total_loss,
                    'cls_loss': cls_loss,
                    'bbox_loss': bbox_loss
                }
            else:
                return total_loss
Exemplo n.º 5
0
    def loss(self, prediction_dict):
        """
        Returns cost for Region Proposal Network based on:

        Args:
            rpn_cls_score: Score for being an object or not for each anchor
                in the image. Shape: (num_anchors, 2)
            rpn_cls_target: Ground truth labeling for each anchor. Should be
                * 1: for positive labels
                * 0: for negative labels
                * -1: for labels we should ignore.
                Shape: (num_anchors, )
            rpn_bbox_target: Bounding box output delta target for rpn.
                Shape: (num_anchors, 4)
            rpn_bbox_pred: Bounding box output delta prediction for rpn.
                Shape: (num_anchors, 4)
        Returns:
            Multiloss between cls probability and bbox target.
        """

        rpn_cls_score = prediction_dict["rpn_cls_score"]
        rpn_cls_target = prediction_dict["rpn_cls_target"]

        rpn_bbox_target = prediction_dict["rpn_bbox_target"]
        rpn_bbox_pred = prediction_dict["rpn_bbox_pred"]

        with tf.variable_scope("RPNLoss"):
            # Flatten already flat Tensor for usage as boolean mask filter.
            rpn_cls_target = tf.cast(tf.reshape(rpn_cls_target, [-1]),
                                     tf.int32,
                                     name="rpn_cls_target")
            # Transform to boolean tensor mask for not ignored.
            labels_not_ignored = tf.not_equal(rpn_cls_target,
                                              -1,
                                              name="labels_not_ignored")

            # Now we only have the labels we are going to compare with the
            # cls probability.
            labels = tf.boolean_mask(rpn_cls_target, labels_not_ignored)
            cls_score = tf.boolean_mask(rpn_cls_score, labels_not_ignored)

            # We need to transform `labels` to `cls_score` shape.
            # convert [1, 0] to [[0, 1], [1, 0]] for ce with logits.
            cls_target = tf.one_hot(labels, depth=2)

            # Equivalent to log loss
            if self.loss_type == CROSS_ENTROPY:
                # TODO PV make this a loss function in losses.py
                ce_per_anchor = tf.nn.softmax_cross_entropy_with_logits_v2(
                    labels=cls_target, logits=cls_score)
                if self.loss_weight != 1:
                    ce_per_anchor = ce_per_anchor * self.loss_weight
            elif self.loss_type == FOCAL:
                ce_per_anchor = focal_loss(cls_score, cls_target,
                                           self.focal_gamma)
            # TODO PV Rename cross entropy per anchor to reflect focal loss is
            # calculated
            prediction_dict["cross_entropy_per_anchor"] = ce_per_anchor

            # Finally, we need to calculate the regression loss over
            # `rpn_bbox_target` and `rpn_bbox_pred`.
            # We use SmoothL1Loss.
            rpn_bbox_target = tf.reshape(rpn_bbox_target, [-1, 4])
            rpn_bbox_pred = tf.reshape(rpn_bbox_pred, [-1, 4])

            # We only care for positive labels (we ignore backgrounds since
            # we don't have any bounding box information for it).
            positive_labels = tf.equal(rpn_cls_target, 1)
            rpn_bbox_target = tf.boolean_mask(rpn_bbox_target, positive_labels)
            rpn_bbox_pred = tf.boolean_mask(rpn_bbox_pred, positive_labels)

            # We apply smooth l1 loss as described by the Fast R-CNN paper.
            reg_loss_per_anchor = smooth_l1_loss(rpn_bbox_pred,
                                                 rpn_bbox_target,
                                                 sigma=self._l1_sigma)

            prediction_dict["reg_loss_per_anchor"] = reg_loss_per_anchor

            # Loss summaries.
            tf.summary.scalar("batch_size", tf.shape(labels)[0], ["rpn"])
            foreground_cls_loss = tf.boolean_mask(ce_per_anchor,
                                                  tf.equal(labels, 1))
            background_cls_loss = tf.boolean_mask(ce_per_anchor,
                                                  tf.equal(labels, 0))
            tf.summary.scalar("foreground_cls_loss",
                              tf.reduce_mean(foreground_cls_loss), ["rpn"])
            tf.summary.histogram("foreground_cls_loss", foreground_cls_loss,
                                 ["rpn"])
            tf.summary.scalar("background_cls_loss",
                              tf.reduce_mean(background_cls_loss), ["rpn"])
            tf.summary.histogram("background_cls_loss", background_cls_loss,
                                 ["rpn"])
            tf.summary.scalar("foreground_samples",
                              tf.shape(rpn_bbox_target)[0], ["rpn"])

            return {
                "rpn_cls_loss": tf.reduce_mean(ce_per_anchor),
                "rpn_reg_loss": tf.reduce_mean(reg_loss_per_anchor),
            }
Exemplo n.º 6
0
    def loss(self, prediction_dict):
        """
        Returns cost for Region Proposal Network based on:

        Args:
            rpn_cls_score: Score for being an object or not for each anchor
                in the image. Shape: (num_anchors, 2)
            rpn_cls_target: Ground truth labeling for each anchor. Should be
                * 1: for positive labels
                * 0: for negative labels
                * -1: for labels we should ignore.
                Shape: (num_anchors, )
            rpn_bbox_target: Bounding box output delta target for rpn.
                Shape: (num_anchors, 4)
            rpn_bbox_pred: Bounding box output delta prediction for rpn.
                Shape: (num_anchors, 4)
        Returns:
            Multiloss between cls probability and bbox target.
        """

        rpn_cls_score = prediction_dict['rpn_cls_score']
        rpn_cls_target = prediction_dict['rpn_cls_target']

        rpn_bbox_target = prediction_dict['rpn_bbox_target']
        rpn_bbox_pred = prediction_dict['rpn_bbox_pred']

        with tf.variable_scope('RPNLoss'):
            # Flatten already flat Tensor for usage as boolean mask filter.
            rpn_cls_target = tf.cast(tf.reshape(rpn_cls_target, [-1]),
                                     tf.int32,
                                     name='rpn_cls_target')
            # Transform to boolean tensor mask for not ignored.
            labels_not_ignored = tf.not_equal(rpn_cls_target,
                                              -1,
                                              name='labels_not_ignored')

            # Now we only have the labels we are going to compare with the
            # cls probability.
            labels = tf.boolean_mask(rpn_cls_target, labels_not_ignored)
            cls_score = tf.boolean_mask(rpn_cls_score, labels_not_ignored)

            # We need to transform `labels` to `cls_score` shape.
            # convert [1, 0] to [[0, 1], [1, 0]] for ce with logits.
            cls_target = tf.one_hot(labels, depth=2)

            # Equivalent to log loss
            ce_per_anchor = tf.nn.softmax_cross_entropy_with_logits(
                labels=cls_target, logits=cls_score)
            prediction_dict['cross_entropy_per_anchor'] = ce_per_anchor

            # Finally, we need to calculate the regression loss over
            # `rpn_bbox_target` and `rpn_bbox_pred`.
            # We use SmoothL1Loss.
            rpn_bbox_target = tf.reshape(rpn_bbox_target, [-1, 4])
            rpn_bbox_pred = tf.reshape(rpn_bbox_pred, [-1, 4])

            # We only care for positive labels (we ignore backgrounds since
            # we don't have any bounding box information for it).
            positive_labels = tf.equal(rpn_cls_target, 1)
            rpn_bbox_target = tf.boolean_mask(rpn_bbox_target, positive_labels)
            rpn_bbox_pred = tf.boolean_mask(rpn_bbox_pred, positive_labels)

            # We apply smooth l1 loss as described by the Fast R-CNN paper.
            reg_loss_per_anchor = smooth_l1_loss(rpn_bbox_pred,
                                                 rpn_bbox_target)

            prediction_dict['reg_loss_per_anchor'] = reg_loss_per_anchor

            # Loss summaries.
            tf.summary.scalar('batch_size', tf.shape(labels)[0], ['rpn'])
            foreground_cls_loss = tf.boolean_mask(ce_per_anchor,
                                                  tf.equal(labels, 1))
            background_cls_loss = tf.boolean_mask(ce_per_anchor,
                                                  tf.equal(labels, 0))
            tf.summary.scalar('foreground_cls_loss',
                              tf.reduce_mean(foreground_cls_loss), ['rpn'])
            tf.summary.histogram('foreground_cls_loss', foreground_cls_loss,
                                 ['rpn'])
            tf.summary.scalar('background_cls_loss',
                              tf.reduce_mean(background_cls_loss), ['rpn'])
            tf.summary.histogram('background_cls_loss', background_cls_loss,
                                 ['rpn'])
            tf.summary.scalar('foreground_samples',
                              tf.shape(rpn_bbox_target)[0], ['rpn'])

            return {
                'rpn_cls_loss': tf.reduce_sum(ce_per_anchor),
                'rpn_reg_loss': tf.reduce_sum(reg_loss_per_anchor),
            }
Exemplo n.º 7
0
    def loss(self, prediction_dict):
        """
        Returns cost for RCNN based on:
        返回类别损失和回归损失, 基于cls_score, cls_prob, bbox_offsets, cls_target,
        bbox_offsets_target,

        Args:
            prediction_dict with keys:
                rcnn: 研究的是预测结果
                    cls_score: shape (num_proposals, num_classes + 1)
                        Has the class scoring for each the proposals. Classes
                        are 1-indexed with 0 being the background.
                        针对各个类别(包含背景), 各个提案区域对应的得分

                    cls_prob: shape (num_proposals, num_classes + 1)
                        Application of softmax on cls_score.
                        针对各个类别(包含背景), 各个提案区域对应的概率, 也就是cls_score
                        的softmax结果

                    bbox_offsets: shape (num_proposals, num_classes * 4)
                        Has the offset for each proposal for each class.
                        We have to compare only the proposals labeled with the
                        offsets for that label.
                        针对各个类别(不包含背景), 各个提案区域对应的坐标偏移量(4个值)
                        只需要比较标定的提案和那个标签的偏移量

                target: 研究的是真实标签
                    对于类别而言, 就是各个提案对应的正确的类别标签;
                    对于边界框而言, 各个提案对于真实标签的真实偏移量
                    cls_target: shape (num_proposals,)
                        Has the correct label for each of the proposals.
                        0 => background
                        1..n => 1-indexed classes

                    bbox_offsets_target: shape (num_proposals, 4)
                        ground truth相对anchor的偏移量和缩放量
                        Has the true offset of each proposal for the true
                        label.
                        In case of not having a true label (non-background)
                        then it's just zeroes.

        Returns:
            loss_dict with keys:
                rcnn_cls_loss: The cross-entropy or log-loss of the
                    classification tasks between then num_classes + background.
                rcnn_reg_loss: The smooth L1 loss for the bounding box
                    regression task to adjust correctly labeled boxes.

        """
        with tf.name_scope('RCNNLoss'):
            # 预测得分
            # (num_proposals, num_classes + 1)
            cls_score = prediction_dict['rcnn']['cls_score']
            # Cast target explicitly as int32.
            # 真实类别
            # (num_proposals, )
            cls_target = tf.cast(prediction_dict['target']['cls'], tf.int32)

            # First we need to calculate the log loss betweetn cls_prob and
            # cls_target, 需要计算分类概率的对数损失

            # 只计算正样本的损失
            # We only care for the targets that are >= 0
            # 寻找要保留, 不忽略的样本, 作为有效的样本
            not_ignored = tf.reshape(tf.greater_equal(cls_target, 0), [-1],
                                     name='not_ignored')
            # We apply boolean mask to score, prob and target.
            # 确定有效样本的类别预测得分
            cls_score_labeled = tf.boolean_mask(cls_score,
                                                not_ignored,
                                                name='cls_score_labeled')
            # 确定有效样本的真实类别
            cls_target_labeled = tf.boolean_mask(cls_target,
                                                 not_ignored,
                                                 name='cls_target_labeled')

            tf.summary.scalar('batch_size',
                              tf.shape(cls_score_labeled)[0], ['rcnn'])

            # 将真实的类别转化为one-hot编码, 现在的cls_target_one_hot转化为
            # (num_proposal, 21)
            # Transform to one-hot vector
            cls_target_one_hot = tf.one_hot(cls_target_labeled,
                                            depth=self._num_classes + 1,
                                            name='cls_target_one_hot')

            # We get cross entropy loss of each proposal.
            # 计算有效提案的真实类别和类别预测得分之间的交叉熵
            # 这里计算的时候一个表述的是样本分类的概率, 一个表述的是样本的真实类, 相当于只在
            # 对应的真实类别上进行了计算
            cross_entropy_per_proposal = (
                tf.nn.softmax_cross_entropy_with_logits_v2(
                    labels=tf.stop_gradient(cls_target_one_hot),
                    logits=cls_score_labeled))

            if self._debug:
                prediction_dict['_debug']['losses'] = {}
                # Save the cross entropy per proposal to be able to
                # visualize proposals with high and low error.
                prediction_dict['_debug']['losses'][
                    'cross_entropy_per_proposal'] = (
                        cross_entropy_per_proposal)

            # Second we need to calculate the smooth l1 loss between
            # `bbox_offsets` and `bbox_offsets_target`.
            # 预测框相对anchor中心位置的偏移量以及宽高的缩放量t与ground truth相对anchor
            # 的偏移量和缩放量之间的smooth L1损失
            #  (num_proposals, num_classes * 4)
            bbox_offsets = prediction_dict['rcnn']['bbox_offsets']
            # (num_proposals, 4)
            bbox_offsets_target = (prediction_dict['target']['bbox_offsets'])

            # We only want the non-background labels bounding boxes.
            # 只计算类别标定值大于0的提案对应的边界框, 回归这边只计算非背景的有效框
            # (num_proposals, )
            not_ignored = tf.reshape(tf.greater(cls_target, 0), [-1])
            # (num_proposals, num_classes * 4)
            bbox_offsets_labeled = tf.boolean_mask(bbox_offsets,
                                                   not_ignored,
                                                   name='bbox_offsets_labeled')
            # (num_proposals, 4)
            bbox_offsets_target_labeled = tf.boolean_mask(
                bbox_offsets_target,
                not_ignored,
                name='bbox_offsets_target_labeled')

            cls_target_labeled = tf.boolean_mask(cls_target,
                                                 not_ignored,
                                                 name='cls_target_labeled')
            # `cls_target_labeled` is based on `cls_target` which has
            # `num_classes` + 1 classes.
            # for making `one_hot` with depth `num_classes` to work we need
            # to lower them to make them 0-index.
            # 对于one-hot编码, 需要索引从0开始, 非背景的标签是从1开始的, 所以直接减1就可以
            cls_target_labeled = cls_target_labeled - 1
            cls_target_one_hot = tf.one_hot(cls_target_labeled,
                                            depth=self._num_classes,
                                            name='cls_target_one_hot')
            # 进行one-hot编码后, 数据的格式发生了改变
            # cls_target now is (num_proposals, num_classes)

            # (num_proposals x num_classes, 4)
            bbox_flatten = tf.reshape(bbox_offsets_labeled, [-1, 4],
                                      name='bbox_flatten')

            # We use the flatten cls_target_one_hot as boolean mask for the
            # bboxes.
            # 将cls_target_one_hot转化为一维的张量, 作为bboxes的掩膜来进行操作
            # 现在的cls_target_one_hot形状为(num_porposals, num_classes),
            # 也就是(n, 20), 进行reshape操作后应该是(n x 20, )
            cls_flatten = tf.cast(tf.reshape(cls_target_one_hot, [-1]),
                                  tf.bool, 'cls_flatten_as_bool')

            # bbox_flatten本身就是nx4的大小, 被一个一维的掩膜进行处理,
            # 这里确定了每个提案所对应的真实类别下的框的预测偏移量
            bbox_offset_cleaned = tf.boolean_mask(bbox_flatten, cls_flatten,
                                                  'bbox_offset_cleaned')

            # Calculate the smooth l1 loss between the "cleaned" bboxes
            # offsets (that means, the useful results) and the labeled
            # targets.
            # 计算预测框相对anchor中心位置的偏移量以及宽高的缩放量与ground truth相对
            # anchor的偏移量和缩放量的之间的smoothL1损失
            reg_loss_per_proposal = smooth_l1_loss(bbox_offset_cleaned,
                                                   bbox_offsets_target_labeled,
                                                   sigma=self._l1_sigma)

            tf.summary.scalar('rcnn_foreground_samples',
                              tf.shape(bbox_offset_cleaned)[0], ['rcnn'])

            if self._debug:
                # Also save reg loss per proposals to be able to visualize
                # good and bad proposals in debug mode.
                prediction_dict['_debug']['losses'][
                    'reg_loss_per_proposal'] = (reg_loss_per_proposal)

            # reduce_* 系列函数, axis=None 表示最终的结果只有一个值
            return {
                'rcnn_cls_loss': tf.reduce_mean(cross_entropy_per_proposal),
                'rcnn_reg_loss': tf.reduce_mean(reg_loss_per_proposal),
            }
Exemplo n.º 8
0
    def loss(self, prediction_dict):
        """
        Returns cost for RCNN based on:

        Args:
            prediction_dict with keys:
                rcnn:
                    cls_score: shape (num_proposals, num_classes + 1)
                        Has the class scoring for each the proposals. Classes
                        are 1-indexed with 0 being the background.

                    cls_prob: shape (num_proposals, num_classes + 1)
                        Application of softmax on cls_score.

                    bbox_offsets: shape (num_proposals, num_classes * 4)
                        Has the offset for each proposal for each class.
                        We have to compare only the proposals labeled with the
                        offsets for that label.

                target:
                    cls_target: shape (num_proposals,)
                        Has the correct label for each of the proposals.
                        0 => background
                        1..n => 1-indexed classes

                    bbox_offsets_target: shape (num_proposals, 4)
                        Has the true offset of each proposal for the true
                        label.
                        In case of not having a true label (non-background)
                        then it's just zeroes.

        Returns:
            loss_dict with keys:
                rcnn_cls_loss: The cross-entropy or log-loss of the
                    classification tasks between then num_classes + background.
                rcnn_reg_loss: The smooth L1 loss for the bounding box
                    regression task to adjust correctly labeled boxes.

        """
        with tf.name_scope('RCNNLoss'):
            cls_score = prediction_dict['rcnn']['cls_score']
            # cls_prob = prediction_dict['rcnn']['cls_prob']
            # Cast target explicitly as int32.
            cls_target = tf.cast(
                prediction_dict['target']['cls'], tf.int32
            )

            # First we need to calculate the log loss betweetn cls_prob and
            # cls_target

            # We only care for the targets that are >= 0
            not_ignored = tf.reshape(tf.greater_equal(
                cls_target, 0), [-1], name='not_ignored')
            # We apply boolean mask to score, prob and target.
            cls_score_labeled = tf.boolean_mask(
                cls_score, not_ignored, name='cls_score_labeled')
            # cls_prob_labeled = tf.boolean_mask(
            #    cls_prob, not_ignored, name='cls_prob_labeled')
            cls_target_labeled = tf.boolean_mask(
                cls_target, not_ignored, name='cls_target_labeled')

            tf.summary.scalar(
                'batch_size',
                tf.shape(cls_score_labeled)[0], ['rcnn']
            )

            # Transform to one-hot vector
            cls_target_one_hot = tf.one_hot(
                cls_target_labeled, depth=self._num_classes + 1,
                name='cls_target_one_hot'
            )

            # We get cross entropy loss of each proposal.
            cross_entropy_per_proposal = (
                tf.nn.softmax_cross_entropy_with_logits(
                    labels=cls_target_one_hot, logits=cls_score_labeled
                )
            )

            if self._debug:
                prediction_dict['_debug']['losses'] = {}
                # Save the cross entropy per proposal to be able to
                # visualize proposals with high and low error.
                prediction_dict['_debug']['losses'][
                    'cross_entropy_per_proposal'
                ] = (
                    cross_entropy_per_proposal
                )

            # Second we need to calculate the smooth l1 loss between
            # `bbox_offsets` and `bbox_offsets_target`.
            bbox_offsets = prediction_dict['rcnn']['bbox_offsets']
            bbox_offsets_target = (
                prediction_dict['target']['bbox_offsets']
            )

            # We only want the non-background labels bounding boxes.
            not_ignored = tf.reshape(tf.greater(cls_target, 0), [-1])
            bbox_offsets_labeled = tf.boolean_mask(
                bbox_offsets, not_ignored, name='bbox_offsets_labeled')
            bbox_offsets_target_labeled = tf.boolean_mask(
                bbox_offsets_target, not_ignored,
                name='bbox_offsets_target_labeled'
            )

            cls_target_labeled = tf.boolean_mask(
                cls_target, not_ignored, name='cls_target_labeled')
            # `cls_target_labeled` is based on `cls_target` which has
            # `num_classes` + 1 classes.
            # for making `one_hot` with depth `num_classes` to work we need
            # to lower them to make them 0-index.
            cls_target_labeled = cls_target_labeled - 1

            cls_target_one_hot = tf.one_hot(
                cls_target_labeled, depth=self._num_classes,
                name='cls_target_one_hot'
            )

            # cls_target now is (num_labeled, num_classes)
            bbox_flatten = tf.reshape(
                bbox_offsets_labeled, [-1, 4], name='bbox_flatten')

            # We use the flatten cls_target_one_hot as boolean mask for the
            # bboxes.
            cls_flatten = tf.cast(tf.reshape(
                cls_target_one_hot, [-1]), tf.bool, 'cls_flatten_as_bool')

            bbox_offset_cleaned = tf.boolean_mask(
                bbox_flatten, cls_flatten, 'bbox_offset_cleaned')

            # Calculate the smooth l1 loss between the "cleaned" bboxes
            # offsets (that means, the useful results) and the labeled
            # targets.
            reg_loss_per_proposal = smooth_l1_loss(
                bbox_offset_cleaned, bbox_offsets_target_labeled,
                sigma=self._l1_sigma
            )

            tf.summary.scalar(
                'rcnn_foreground_samples',
                tf.shape(bbox_offset_cleaned)[0], ['rcnn']
            )

            if self._debug:
                # Also save reg loss per proposals to be able to visualize
                # good and bad proposals in debug mode.
                prediction_dict['_debug']['losses'][
                    'reg_loss_per_proposal'
                ] = (
                    reg_loss_per_proposal
                )

            return {
                'rcnn_cls_loss': tf.reduce_mean(cross_entropy_per_proposal),
                'rcnn_reg_loss': tf.reduce_mean(reg_loss_per_proposal),
            }
Exemplo n.º 9
0
    def loss(self, prediction_dict):
        """
        Returns cost for Region Proposal Network based on:

        Args:
            rpn_cls_score: Score for being an object or not for each anchor
                in the image. Shape: (num_anchors, 2)
            rpn_cls_target: Ground truth labeling for each anchor. Should be
                * 1: for positive labels
                * 0: for negative labels
                * -1: for labels we should ignore.
                Shape: (num_anchors, )
                对于anchor的真实标记, 这里应该是以IoU来判定的:
                对每个proposal,计算其与所有ground truth的重叠比例IoU, 筛选出与每个
                proposal重叠比例最大的ground truth.
                如果proposal的最大IoU大于0.5则为目标(前景), 标签值(label)为对应
                ground truth的目标分类如果IoU小于0.5且大于0.1则为背景,标签值为0
                ques: 这里的-1该如何理解?
                ans: 要忽略的部分, 因为并不总是所有的提案都要被用到
            rpn_bbox_target: Bounding box output delta target for rpn.
                Shape: (num_anchors, 4)
                这里输出的边界框的目标偏移量.
            rpn_bbox_pred: Bounding box output delta prediction for rpn.
                Shape: (num_anchors, 4)
                边界框的输出预测偏移量
        Returns:
            返回一个多任务损失
            Multiloss between cls probability and bbox target.
        """

        rpn_cls_score = prediction_dict['rpn_cls_score']
        rpn_cls_target = prediction_dict['rpn_cls_target']

        rpn_bbox_target = prediction_dict['rpn_bbox_target']
        rpn_bbox_pred = prediction_dict['rpn_bbox_pred']

        with tf.variable_scope('RPNLoss'):
            # Flatten already flat Tensor for usage as boolean mask filter.
            rpn_cls_target = tf.cast(tf.reshape(
                rpn_cls_target, [-1]), tf.int32, name='rpn_cls_target')
            # Transform to boolean tensor mask for not ignored.
            # 返回不应该被忽略的标签的逻辑张量, 可以用来作为一个实际需要处理的标签的
            # 掩膜
            labels_not_ignored = tf.not_equal(
                rpn_cls_target, -1, name='labels_not_ignored')

            # Now we only have the labels we are going to compare with the
            # cls probability.
            # 这里的掩膜函数可以提取张量里的对应于掩膜真值的位置上的数值, 进而获得将
            # 要用来比较的类别概率和标签
            labels = tf.boolean_mask(rpn_cls_target, labels_not_ignored)
            cls_score = tf.boolean_mask(rpn_cls_score, labels_not_ignored)

            # We need to transform `labels` to `cls_score` shape.
            # convert [1, 0] to [[0, 1], [1, 0]] for ce with logits.
            # 对于各个类别的分数匹配对应的标签, 对标签进行one-hot编码
            # ques: 目的是什么
            # ans: 计算交叉熵是需要使用onehot编码的
            cls_target = tf.one_hot(labels, depth=2)

            # Equivalent to log loss
            # 计算类别的对数损失, 这里使用的是softmax交叉熵的形式,
            # 计算labels和logits的交叉熵
            ce_per_anchor = tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=cls_target, logits=cls_score
            )
            prediction_dict['cross_entropy_per_anchor'] = ce_per_anchor

            # 计算回归损失
            # Finally, we need to calculate the regression loss over
            # `rpn_bbox_target` and `rpn_bbox_pred`.
            # We use SmoothL1Loss.
            rpn_bbox_target = tf.reshape(rpn_bbox_target, [-1, 4])
            rpn_bbox_pred = tf.reshape(rpn_bbox_pred, [-1, 4])

            # We only care for positive labels (we ignore backgrounds since
            # we don't have any bounding box information for it).
            # 只用正样本, 来计算回归损失
            positive_labels = tf.equal(rpn_cls_target, 1)
            rpn_bbox_target = tf.boolean_mask(rpn_bbox_target, positive_labels)
            rpn_bbox_pred = tf.boolean_mask(rpn_bbox_pred, positive_labels)

            # We apply smooth l1 loss as described by the Fast R-CNN paper.
            reg_loss_per_anchor = smooth_l1_loss(
                rpn_bbox_pred, rpn_bbox_target, sigma=self._l1_sigma
            )

            prediction_dict['reg_loss_per_anchor'] = reg_loss_per_anchor

            # Loss summaries.
            tf.summary.scalar('batch_size', tf.shape(labels)[0], ['rpn'])
            foreground_cls_loss = tf.boolean_mask(
                ce_per_anchor, tf.equal(labels, 1))
            background_cls_loss = tf.boolean_mask(
                ce_per_anchor, tf.equal(labels, 0))
            tf.summary.scalar(
                'foreground_cls_loss',
                tf.reduce_mean(foreground_cls_loss), ['rpn'])
            tf.summary.histogram(
                'foreground_cls_loss', foreground_cls_loss, ['rpn'])
            tf.summary.scalar(
                'background_cls_loss',
                tf.reduce_mean(background_cls_loss), ['rpn'])
            tf.summary.histogram(
                'background_cls_loss', background_cls_loss, ['rpn'])
            tf.summary.scalar(
                'foreground_samples', tf.shape(rpn_bbox_target)[0], ['rpn'])

            # 计算均值
            return {
                'rpn_cls_loss': tf.reduce_mean(ce_per_anchor),
                'rpn_reg_loss': tf.reduce_mean(reg_loss_per_anchor),
            }
Exemplo n.º 10
0
    def loss(self, prediction_dict, return_all=False):
        """Compute the loss for SSD.

        Args:
            prediction_dict: The output dictionary of the _build method from
                which we use different main keys:

                cls_pred: A dictionary with the classes classification.
                loc_pred: A dictionary with the localization predictions
                target: A dictionary with the targets for both classes and
                    localizations.

        Returns:
            A tensor for the total loss.
        """

        with tf.name_scope("losses"):

            cls_pred = prediction_dict["cls_pred"]
            cls_target = tf.cast(prediction_dict["target"]["cls"], tf.int32)
            # Transform to one-hot vector
            cls_target_one_hot = tf.one_hot(cls_target,
                                            depth=self._num_classes + 1,
                                            name="cls_target_one_hot")

            # We get cross entropy loss of each proposal.
            # TODO: Optimization opportunity: We calculate the probabilities
            #       earlier in the program, so if we used those instead of the
            #       logits we would not have the need to do softmax here too.
            if self.loss_type == CROSS_ENTROPY:
                classification_loss_per_proposal = (
                    tf.nn.softmax_cross_entropy_with_logits_v2(
                        labels=cls_target_one_hot, logits=cls_pred))
            elif self.loss_type == FOCAL:
                classification_loss_per_proposal = focal_loss(
                    cls_pred, cls_target_one_hot, self.focal_gamma)
            # Second we need to calculate the smooth l1 loss between
            # `bbox_offsets` and `bbox_offsets_targets`.
            bbox_offsets = prediction_dict["loc_pred"]
            bbox_offsets_targets = prediction_dict["target"]["bbox_offsets"]

            # We only want the non-background labels bounding boxes.
            not_ignored = tf.reshape(tf.greater(cls_target, 0), [-1])
            bbox_offsets_positives = tf.boolean_mask(
                bbox_offsets, not_ignored, name="bbox_offsets_positives")
            bbox_offsets_target_positives = tf.boolean_mask(
                bbox_offsets_targets,
                not_ignored,
                name="bbox_offsets_target_positives")

            # Calculate the smooth l1 regression loss between the flatten
            # bboxes offsets  and the labeled targets.
            reg_loss_per_proposal = smooth_l1_loss(
                bbox_offsets_positives, bbox_offsets_target_positives)

            cls_loss = tf.reduce_sum(classification_loss_per_proposal)
            bbox_loss = tf.reduce_sum(reg_loss_per_proposal)

            # Following the paper, set loss to 0 if there are 0 bboxes
            # assigned as foreground targets.
            safety_condition = tf.not_equal(
                tf.shape(bbox_offsets_positives)[0], 0)
            final_loss = tf.cond(
                safety_condition,
                true_fn=lambda:
                ((cls_loss + bbox_loss * self._loc_loss_weight) / tf.cast(
                    tf.shape(bbox_offsets_positives)[0], tf.float32)),
                false_fn=lambda: 0.0,
            )
            tf.losses.add_loss(final_loss)
            total_loss = tf.losses.get_total_loss()

            prediction_dict["reg_loss_per_proposal"] = reg_loss_per_proposal
            prediction_dict[
                "cls_loss_per_proposal"] = classification_loss_per_proposal

            tf.summary.scalar("cls_loss",
                              cls_loss,
                              collections=self._losses_collections)

            tf.summary.scalar("bbox_loss",
                              bbox_loss,
                              collections=self._losses_collections)

            tf.summary.scalar("total_loss",
                              total_loss,
                              collections=self._losses_collections)
            if return_all:
                return {
                    "total_loss": total_loss,
                    "cls_loss": cls_loss,
                    "bbox_loss": bbox_loss,
                }
            else:
                return total_loss
Exemplo n.º 11
0
    def loss(self, prediction_dict):
        """
        Returns cost for RCNN based on:

        Args:
            prediction_dict with keys:
                rcnn:
                    cls_score: shape (num_proposals, num_classes + 1)
                        Has the class scoring for each the proposals. Classes
                        are 1-indexed with 0 being the background.

                    cls_prob: shape (num_proposals, num_classes + 1)
                        Application of softmax on cls_score.

                    bbox_offsets: shape (num_proposals, num_classes * 4)
                        Has the offset for each proposal for each class.
                        We have to compare only the proposals labeled with the
                        offsets for that label.

                target:
                    cls_target: shape (num_proposals,)
                        Has the correct label for each of the proposals.
                        0 => background
                        1..n => 1-indexed classes

                    bbox_offsets_target: shape (num_proposals, 4)
                        Has the true offset of each proposal for the true
                        label.
                        In case of not having a true label (non-background)
                        then it's just zeroes.

        Returns:
            loss_dict with keys:
                rcnn_cls_loss: The cross-entropy or log-loss of the
                    classification tasks between then num_classes + background.
                rcnn_reg_loss: The smooth L1 loss for the bounding box
                    regression task to adjust correctly labeled boxes.

        """
        with tf.name_scope("RCNNLoss"):
            cls_score = prediction_dict["rcnn"]["cls_score"]
            # cls_prob = prediction_dict['rcnn']['cls_prob']
            # Cast target explicitly as int32.
            cls_target = tf.cast(prediction_dict["target"]["cls"], tf.int32)

            # First we need to calculate the log loss betweetn cls_prob and
            # cls_target

            # We only care for the targets that are >= 0
            not_ignored = tf.reshape(
                tf.greater_equal(cls_target, 0), [-1], name="not_ignored"
            )
            # We apply boolean mask to score, prob and target.
            cls_score_labeled = tf.boolean_mask(
                cls_score, not_ignored, name="cls_score_labeled"
            )
            # cls_prob_labeled = tf.boolean_mask(
            #    cls_prob, not_ignored, name='cls_prob_labeled')
            cls_target_labeled = tf.boolean_mask(
                cls_target, not_ignored, name="cls_target_labeled"
            )

            tf.summary.scalar("batch_size", tf.shape(cls_score_labeled)[0], ["rcnn"])

            # Transform to one-hot vector
            cls_target_one_hot = tf.one_hot(
                cls_target_labeled,
                depth=self._num_classes + 1,
                name="cls_target_one_hot",
            )

            if self.loss_type == CROSS_ENTROPY:

                # your class weights
                class_weights = self.loss_weight
                onehot_labels = tf.stop_gradient(cls_target_one_hot)
                # deduce weights for batch samples based on their true label
                # compute your (unweighted) softmax cross entropy loss
                cross_entropy_per_proposal = tf.nn.softmax_cross_entropy_with_logits(
                    labels=onehot_labels, logits=cls_score_labeled
                )
                if class_weights != 1:
                    class_weights = tf.constant([class_weights], dtype=tf.float32)
                    weights = tf.reduce_sum(class_weights * onehot_labels, axis=1)
                    # apply the weights, relying on broadcasting
                    # of the multiplication
                    cross_entropy_per_proposal = cross_entropy_per_proposal * weights
            elif self.loss_type == FOCAL:

                cross_entropy_per_proposal = focal_loss(
                    cls_score_labeled,
                    tf.stop_gradient(cls_target_one_hot),
                    self.focal_gamma,
                )

            if self._debug:
                prediction_dict["_debug"]["losses"] = {}
                # Save the classification loss per proposal to be able to
                # visualize proposals with high and low error.
                prediction_dict["_debug"]["losses"][
                    "cross_entropy_per_proposal"
                ] = cross_entropy_per_proposal

            # Second we need to calculate the smooth l1 loss between
            # `bbox_offsets` and `bbox_offsets_target`.
            bbox_offsets = prediction_dict["rcnn"]["bbox_offsets"]
            bbox_offsets_target = prediction_dict["target"]["bbox_offsets"]

            # We only want the non-background labels bounding boxes.
            not_ignored = tf.reshape(tf.greater(cls_target, 0), [-1])
            bbox_offsets_labeled = tf.boolean_mask(
                bbox_offsets, not_ignored, name="bbox_offsets_labeled"
            )
            bbox_offsets_target_labeled = tf.boolean_mask(
                bbox_offsets_target, not_ignored, name="bbox_offsets_target_labeled"
            )

            cls_target_labeled = tf.boolean_mask(
                cls_target, not_ignored, name="cls_target_labeled"
            )
            # `cls_target_labeled` is based on `cls_target` which has
            # `num_classes` + 1 classes.
            # for making `one_hot` with depth `num_classes` to work we need
            # to lower them to make them 0-index.
            cls_target_labeled = cls_target_labeled - 1

            cls_target_one_hot = tf.one_hot(
                cls_target_labeled, depth=self._num_classes, name="cls_target_one_hot"
            )

            # cls_target now is (num_labeled, num_classes)
            bbox_flatten = tf.reshape(
                bbox_offsets_labeled, [-1, 4], name="bbox_flatten"
            )

            # We use the flatten cls_target_one_hot as boolean mask for the
            # bboxes.
            cls_flatten = tf.cast(
                tf.reshape(cls_target_one_hot, [-1]), tf.bool, "cls_flatten_as_bool"
            )

            bbox_offset_cleaned = tf.boolean_mask(
                bbox_flatten, cls_flatten, "bbox_offset_cleaned"
            )

            # Calculate the smooth l1 loss between the "cleaned" bboxes
            # offsets (that means, the useful results) and the labeled
            # targets.
            reg_loss_per_proposal = smooth_l1_loss(
                bbox_offset_cleaned, bbox_offsets_target_labeled, sigma=self._l1_sigma
            )

            tf.summary.scalar(
                "rcnn_foreground_samples", tf.shape(bbox_offset_cleaned)[0], ["rcnn"]
            )

            if self._debug:
                # Also save reg loss per proposals to be able to visualize
                # good and bad proposals in debug mode.
                prediction_dict["_debug"]["losses"][
                    "reg_loss_per_proposal"
                ] = reg_loss_per_proposal

            return {
                "rcnn_cls_loss": tf.reduce_mean(cross_entropy_per_proposal),
                "rcnn_reg_loss": tf.reduce_mean(reg_loss_per_proposal),
            }
Exemplo n.º 12
0
    def loss(self, prediction_dict):
        """
        Returns cost for Region Proposal Network based on:

        Args:
            rpn_cls_score: Score for being an object or not for each anchor
                in the image. Shape: (num_anchors, 2)
            rpn_cls_target: Ground truth labeling for each anchor. Should be
                * 1: for positive labels
                * 0: for negative labels
                * -1: for labels we should ignore.
                Shape: (num_anchors, )
            rpn_bbox_target: Bounding box output delta target for rpn.
                Shape: (num_anchors, 4)
            rpn_bbox_pred: Bounding box output delta prediction for rpn.
                Shape: (num_anchors, 4)
        Returns:
            Multiloss between cls probability and bbox target.
        """

        rpn_cls_score = prediction_dict['rpn_cls_score']
        rpn_cls_target = prediction_dict['rpn_cls_target']

        rpn_bbox_target = prediction_dict['rpn_bbox_target']
        rpn_bbox_pred = prediction_dict['rpn_bbox_pred']

        with tf.variable_scope('RPNLoss'):
            # Flatten already flat Tensor for usage as boolean mask filter.
            rpn_cls_target = tf.cast(tf.reshape(
                rpn_cls_target, [-1]), tf.int32, name='rpn_cls_target')
            # Transform to boolean tensor mask for not ignored.
            labels_not_ignored = tf.not_equal(
                rpn_cls_target, -1, name='labels_not_ignored')

            # Now we only have the labels we are going to compare with the
            # cls probability.
            labels = tf.boolean_mask(rpn_cls_target, labels_not_ignored)
            cls_score = tf.boolean_mask(rpn_cls_score, labels_not_ignored)

            # We need to transform `labels` to `cls_score` shape.
            # convert [1, 0] to [[0, 1], [1, 0]] for ce with logits.
            cls_target = tf.one_hot(labels, depth=2)

            # Equivalent to log loss
            ce_per_anchor = tf.nn.softmax_cross_entropy_with_logits(
                labels=cls_target, logits=cls_score
            )
            prediction_dict['cross_entropy_per_anchor'] = ce_per_anchor

            # Finally, we need to calculate the regression loss over
            # `rpn_bbox_target` and `rpn_bbox_pred`.
            # We use SmoothL1Loss.
            rpn_bbox_target = tf.reshape(rpn_bbox_target, [-1, 4])
            rpn_bbox_pred = tf.reshape(rpn_bbox_pred, [-1, 4])

            # We only care for positive labels (we ignore backgrounds since
            # we don't have any bounding box information for it).
            positive_labels = tf.equal(rpn_cls_target, 1)
            rpn_bbox_target = tf.boolean_mask(rpn_bbox_target, positive_labels)
            rpn_bbox_pred = tf.boolean_mask(rpn_bbox_pred, positive_labels)

            # We apply smooth l1 loss as described by the Fast R-CNN paper.
            reg_loss_per_anchor = smooth_l1_loss(
                rpn_bbox_pred, rpn_bbox_target, sigma=self._l1_sigma
            )

            prediction_dict['reg_loss_per_anchor'] = reg_loss_per_anchor

            # Loss summaries.
            tf.summary.scalar('batch_size', tf.shape(labels)[0], ['rpn'])
            foreground_cls_loss = tf.boolean_mask(
                ce_per_anchor, tf.equal(labels, 1))
            background_cls_loss = tf.boolean_mask(
                ce_per_anchor, tf.equal(labels, 0))
            tf.summary.scalar(
                'foreground_cls_loss',
                tf.reduce_mean(foreground_cls_loss), ['rpn'])
            tf.summary.histogram(
                'foreground_cls_loss', foreground_cls_loss, ['rpn'])
            tf.summary.scalar(
                'background_cls_loss',
                tf.reduce_mean(background_cls_loss), ['rpn'])
            tf.summary.histogram(
                'background_cls_loss', background_cls_loss, ['rpn'])
            tf.summary.scalar(
                'foreground_samples', tf.shape(rpn_bbox_target)[0], ['rpn'])

            return {
                'rpn_cls_loss': tf.reduce_mean(ce_per_anchor),
                'rpn_reg_loss': tf.reduce_mean(reg_loss_per_anchor),
            }