def testSmoothL1LossPerfectScore(self): # Test smooth l1 loss for a perfect case # Set inputs for smooth_l1_loss all_ones = [1.0, 1.0, 1.0, 1.0] bbox_prediction_tf = tf.placeholder(tf.float32) bbox_target_tf = tf.placeholder(tf.float32) loss_tf = smooth_l1_loss(bbox_prediction_tf, bbox_target_tf) with tf.Session() as sess: loss = sess.run( loss_tf, feed_dict={bbox_prediction_tf: [all_ones], bbox_target_tf: [all_ones]}, ) self.assertAlmostEqual(loss, 0.0, delta=0.4)
def testSmoothL1LossRandom(self): # Test smooth l1 loss for random case # Set inputs for smooth_l1_loss random_prediction = [0.47450006, -0.80413032, -0.26595005, 0.17124325] random_target = [0.10058594, 0.07910156, 0.10555581, -0.1224325] bbox_prediction_tf = tf.placeholder(tf.float32) bbox_target_tf = tf.placeholder(tf.float32) loss_tf = smooth_l1_loss(bbox_prediction_tf, bbox_target_tf) with tf.Session() as sess: loss = sess.run( loss_tf, feed_dict={ bbox_prediction_tf: [random_prediction], bbox_target_tf: [random_target], }, ) self.assertAlmostEqual(loss, 2, delta=0.4)
def loss(self, prediction_dict): """ Returns cost for RCNN based on: Args: prediction_dict with keys: rcnn: cls_score: shape (num_proposals, num_classes + 1) Has the class scoring for each the proposals. Classes are 1-indexed with 0 being the background. cls_prob: shape (num_proposals, num_classes + 1) Application of softmax on cls_score. bbox_offsets: shape (num_proposals, num_classes * 4) Has the offset for each proposal for each class. We have to compare only the proposals labeled with the offsets for that label. target: cls_target: shape (num_proposals,) Has the correct label for each of the proposals. 0 => background 1..n => 1-indexed classes bbox_offsets_target: shape (num_proposals, 4) Has the true offset of each proposal for the true label. In case of not having a true label (non-background) then it's just zeroes. Returns: loss_dict with keys: rcnn_cls_loss: The cross-entropy or log-loss of the classification tasks between then num_classes + background. rcnn_reg_loss: The smooth L1 loss for the bounding box regression task to adjust correctly labeled boxes. """ with tf.name_scope('RCNNLoss'): cls_score = prediction_dict['rcnn']['cls_score'] # cls_prob = prediction_dict['rcnn']['cls_prob'] # Cast target explicitly as int32. cls_target = tf.cast( prediction_dict['target']['cls'], tf.int32 ) # First we need to calculate the log loss betweetn cls_prob and # cls_target # We only care for the targets that are >= 0 not_ignored = tf.reshape(tf.greater_equal( cls_target, 0), [-1], name='not_ignored') # We apply boolean mask to score, prob and target. cls_score_labeled = tf.boolean_mask( cls_score, not_ignored, name='cls_score_labeled') # cls_prob_labeled = tf.boolean_mask( # cls_prob, not_ignored, name='cls_prob_labeled') cls_target_labeled = tf.boolean_mask( cls_target, not_ignored, name='cls_target_labeled') tf.summary.scalar( 'batch_size', tf.shape(cls_score_labeled)[0], ['rcnn'] ) # Transform to one-hot vector cls_target_one_hot = tf.one_hot( cls_target_labeled, depth=self._num_classes + 1, name='cls_target_one_hot' ) # We get cross entropy loss of each proposal. cross_entropy_per_proposal = ( tf.nn.softmax_cross_entropy_with_logits_v2( labels=tf.stop_gradient(cls_target_one_hot), logits=cls_score_labeled ) ) if self._debug: prediction_dict['_debug']['losses'] = {} # Save the cross entropy per proposal to be able to # visualize proposals with high and low error. prediction_dict['_debug']['losses'][ 'cross_entropy_per_proposal' ] = ( cross_entropy_per_proposal ) # Second we need to calculate the smooth l1 loss between # `bbox_offsets` and `bbox_offsets_target`. bbox_offsets = prediction_dict['rcnn']['bbox_offsets'] bbox_offsets_target = ( prediction_dict['target']['bbox_offsets'] ) # We only want the non-background labels bounding boxes. not_ignored = tf.reshape(tf.greater(cls_target, 0), [-1]) bbox_offsets_labeled = tf.boolean_mask( bbox_offsets, not_ignored, name='bbox_offsets_labeled') bbox_offsets_target_labeled = tf.boolean_mask( bbox_offsets_target, not_ignored, name='bbox_offsets_target_labeled' ) cls_target_labeled = tf.boolean_mask( cls_target, not_ignored, name='cls_target_labeled') # `cls_target_labeled` is based on `cls_target` which has # `num_classes` + 1 classes. # for making `one_hot` with depth `num_classes` to work we need # to lower them to make them 0-index. cls_target_labeled = cls_target_labeled - 1 cls_target_one_hot = tf.one_hot( cls_target_labeled, depth=self._num_classes, name='cls_target_one_hot' ) # cls_target now is (num_labeled, num_classes) bbox_flatten = tf.reshape( bbox_offsets_labeled, [-1, 4], name='bbox_flatten') # We use the flatten cls_target_one_hot as boolean mask for the # bboxes. cls_flatten = tf.cast(tf.reshape( cls_target_one_hot, [-1]), tf.bool, 'cls_flatten_as_bool') bbox_offset_cleaned = tf.boolean_mask( bbox_flatten, cls_flatten, 'bbox_offset_cleaned') # Calculate the smooth l1 loss between the "cleaned" bboxes # offsets (that means, the useful results) and the labeled # targets. reg_loss_per_proposal = smooth_l1_loss( bbox_offset_cleaned, bbox_offsets_target_labeled, sigma=self._l1_sigma ) tf.summary.scalar( 'rcnn_foreground_samples', tf.shape(bbox_offset_cleaned)[0], ['rcnn'] ) if self._debug: # Also save reg loss per proposals to be able to visualize # good and bad proposals in debug mode. prediction_dict['_debug']['losses'][ 'reg_loss_per_proposal' ] = ( reg_loss_per_proposal ) return { 'rcnn_cls_loss': tf.reduce_mean(cross_entropy_per_proposal), 'rcnn_reg_loss': tf.reduce_mean(reg_loss_per_proposal), }
def loss(self, prediction_dict, return_all=False): """ Compute the loss for SSD. Args: prediction_dict: The output dictionary of the _build method from which we use different main keys: cls_pred: A dictionary with the classes classification. loc_pred: A dictionary with the localization predictions target: A dictionary with the targets for both classes and localizations. Returns: A tensor for the total loss. """ with tf.name_scope('losses'): # 类别预测得分结果 cls_pred = prediction_dict['cls_pred'] # 调整后的anchors对应的类别标签(这个是直接从真实框身上得来的) cls_target = tf.cast(prediction_dict['target']['cls'], tf.int32) # Transform to one-hot vector cls_target_one_hot = tf.one_hot(cls_target, depth=self._num_classes + 1, name='cls_target_one_hot') ################################################################### # 这里计算了对应的L_conf ############################################ ################################################################### # We get cross entropy loss of each proposal. # TODO: Optimization opportunity: We calculate the probabilities # earlier in the program, so if we used those instead of the # logits we would not have the need to do softmax here too. # 得到对于每个提案的分类损失 cross_entropy_per_proposal = ( tf.nn.softmax_cross_entropy_with_logits( labels=cls_target_one_hot, logits=cls_pred)) # Second we need to calculate the smooth l1 loss between # `bbox_offsets` and `bbox_offsets_targets`. # 一个是预测的偏移缩放值, 一个是真实框的偏移缩放值 bbox_offsets = prediction_dict['loc_pred'] bbox_offsets_targets = (prediction_dict['target']['bbox_offsets']) # We only want the non-background labels bounding boxes. # 在预测框和真实框中筛选前景对应的偏移缩放值, not_ignored = tf.reshape(tf.greater(cls_target, 0), [-1]) bbox_offsets_positives = tf.boolean_mask( bbox_offsets, not_ignored, name='bbox_offsets_positives') bbox_offsets_target_positives = tf.boolean_mask( bbox_offsets_targets, not_ignored, name='bbox_offsets_target_positives') ################################################################### # 这里计算了L_{loc}(x,l,g) ######################################### ################################################################### # Calculate the smooth l1 regression loss between the flatten # bboxes offsets and the labeled targets. # 得到对于每个提案的回归损失 reg_loss_per_proposal = smooth_l1_loss( bbox_offsets_positives, bbox_offsets_target_positives) # 前面的计算只是将对应的项进行了计算, 还需要进行求和 cls_loss = tf.reduce_sum(cross_entropy_per_proposal) bbox_loss = tf.reduce_sum(reg_loss_per_proposal) # Following the paper, set loss to 0 if there are 0 bboxes # assigned as foreground targets. # 如果真实框中, 没有前景类别, 则将损失设定为0, 存在前景类别的时候, 加权求和 safety_condition = tf.not_equal( tf.shape(bbox_offsets_positives)[0], 0) final_loss = tf.cond( safety_condition, true_fn=lambda: ( (cls_loss + bbox_loss * self._loc_loss_weight) / tf.cast( tf.shape(bbox_offsets_positives)[0], tf.float32)), false_fn=lambda: 0.0) tf.losses.add_loss(final_loss) total_loss = tf.losses.get_total_loss() prediction_dict['reg_loss_per_proposal'] = reg_loss_per_proposal prediction_dict['cls_loss_per_proposal'] = ( cross_entropy_per_proposal) tf.summary.scalar('cls_loss', cls_loss, collections=self._losses_collections) tf.summary.scalar('bbox_loss', bbox_loss, collections=self._losses_collections) tf.summary.scalar('total_loss', total_loss, collections=self._losses_collections) if return_all: return { 'total_loss': total_loss, 'cls_loss': cls_loss, 'bbox_loss': bbox_loss } else: return total_loss
def loss(self, prediction_dict): """ Returns cost for Region Proposal Network based on: Args: rpn_cls_score: Score for being an object or not for each anchor in the image. Shape: (num_anchors, 2) rpn_cls_target: Ground truth labeling for each anchor. Should be * 1: for positive labels * 0: for negative labels * -1: for labels we should ignore. Shape: (num_anchors, ) rpn_bbox_target: Bounding box output delta target for rpn. Shape: (num_anchors, 4) rpn_bbox_pred: Bounding box output delta prediction for rpn. Shape: (num_anchors, 4) Returns: Multiloss between cls probability and bbox target. """ rpn_cls_score = prediction_dict["rpn_cls_score"] rpn_cls_target = prediction_dict["rpn_cls_target"] rpn_bbox_target = prediction_dict["rpn_bbox_target"] rpn_bbox_pred = prediction_dict["rpn_bbox_pred"] with tf.variable_scope("RPNLoss"): # Flatten already flat Tensor for usage as boolean mask filter. rpn_cls_target = tf.cast(tf.reshape(rpn_cls_target, [-1]), tf.int32, name="rpn_cls_target") # Transform to boolean tensor mask for not ignored. labels_not_ignored = tf.not_equal(rpn_cls_target, -1, name="labels_not_ignored") # Now we only have the labels we are going to compare with the # cls probability. labels = tf.boolean_mask(rpn_cls_target, labels_not_ignored) cls_score = tf.boolean_mask(rpn_cls_score, labels_not_ignored) # We need to transform `labels` to `cls_score` shape. # convert [1, 0] to [[0, 1], [1, 0]] for ce with logits. cls_target = tf.one_hot(labels, depth=2) # Equivalent to log loss if self.loss_type == CROSS_ENTROPY: # TODO PV make this a loss function in losses.py ce_per_anchor = tf.nn.softmax_cross_entropy_with_logits_v2( labels=cls_target, logits=cls_score) if self.loss_weight != 1: ce_per_anchor = ce_per_anchor * self.loss_weight elif self.loss_type == FOCAL: ce_per_anchor = focal_loss(cls_score, cls_target, self.focal_gamma) # TODO PV Rename cross entropy per anchor to reflect focal loss is # calculated prediction_dict["cross_entropy_per_anchor"] = ce_per_anchor # Finally, we need to calculate the regression loss over # `rpn_bbox_target` and `rpn_bbox_pred`. # We use SmoothL1Loss. rpn_bbox_target = tf.reshape(rpn_bbox_target, [-1, 4]) rpn_bbox_pred = tf.reshape(rpn_bbox_pred, [-1, 4]) # We only care for positive labels (we ignore backgrounds since # we don't have any bounding box information for it). positive_labels = tf.equal(rpn_cls_target, 1) rpn_bbox_target = tf.boolean_mask(rpn_bbox_target, positive_labels) rpn_bbox_pred = tf.boolean_mask(rpn_bbox_pred, positive_labels) # We apply smooth l1 loss as described by the Fast R-CNN paper. reg_loss_per_anchor = smooth_l1_loss(rpn_bbox_pred, rpn_bbox_target, sigma=self._l1_sigma) prediction_dict["reg_loss_per_anchor"] = reg_loss_per_anchor # Loss summaries. tf.summary.scalar("batch_size", tf.shape(labels)[0], ["rpn"]) foreground_cls_loss = tf.boolean_mask(ce_per_anchor, tf.equal(labels, 1)) background_cls_loss = tf.boolean_mask(ce_per_anchor, tf.equal(labels, 0)) tf.summary.scalar("foreground_cls_loss", tf.reduce_mean(foreground_cls_loss), ["rpn"]) tf.summary.histogram("foreground_cls_loss", foreground_cls_loss, ["rpn"]) tf.summary.scalar("background_cls_loss", tf.reduce_mean(background_cls_loss), ["rpn"]) tf.summary.histogram("background_cls_loss", background_cls_loss, ["rpn"]) tf.summary.scalar("foreground_samples", tf.shape(rpn_bbox_target)[0], ["rpn"]) return { "rpn_cls_loss": tf.reduce_mean(ce_per_anchor), "rpn_reg_loss": tf.reduce_mean(reg_loss_per_anchor), }
def loss(self, prediction_dict): """ Returns cost for Region Proposal Network based on: Args: rpn_cls_score: Score for being an object or not for each anchor in the image. Shape: (num_anchors, 2) rpn_cls_target: Ground truth labeling for each anchor. Should be * 1: for positive labels * 0: for negative labels * -1: for labels we should ignore. Shape: (num_anchors, ) rpn_bbox_target: Bounding box output delta target for rpn. Shape: (num_anchors, 4) rpn_bbox_pred: Bounding box output delta prediction for rpn. Shape: (num_anchors, 4) Returns: Multiloss between cls probability and bbox target. """ rpn_cls_score = prediction_dict['rpn_cls_score'] rpn_cls_target = prediction_dict['rpn_cls_target'] rpn_bbox_target = prediction_dict['rpn_bbox_target'] rpn_bbox_pred = prediction_dict['rpn_bbox_pred'] with tf.variable_scope('RPNLoss'): # Flatten already flat Tensor for usage as boolean mask filter. rpn_cls_target = tf.cast(tf.reshape(rpn_cls_target, [-1]), tf.int32, name='rpn_cls_target') # Transform to boolean tensor mask for not ignored. labels_not_ignored = tf.not_equal(rpn_cls_target, -1, name='labels_not_ignored') # Now we only have the labels we are going to compare with the # cls probability. labels = tf.boolean_mask(rpn_cls_target, labels_not_ignored) cls_score = tf.boolean_mask(rpn_cls_score, labels_not_ignored) # We need to transform `labels` to `cls_score` shape. # convert [1, 0] to [[0, 1], [1, 0]] for ce with logits. cls_target = tf.one_hot(labels, depth=2) # Equivalent to log loss ce_per_anchor = tf.nn.softmax_cross_entropy_with_logits( labels=cls_target, logits=cls_score) prediction_dict['cross_entropy_per_anchor'] = ce_per_anchor # Finally, we need to calculate the regression loss over # `rpn_bbox_target` and `rpn_bbox_pred`. # We use SmoothL1Loss. rpn_bbox_target = tf.reshape(rpn_bbox_target, [-1, 4]) rpn_bbox_pred = tf.reshape(rpn_bbox_pred, [-1, 4]) # We only care for positive labels (we ignore backgrounds since # we don't have any bounding box information for it). positive_labels = tf.equal(rpn_cls_target, 1) rpn_bbox_target = tf.boolean_mask(rpn_bbox_target, positive_labels) rpn_bbox_pred = tf.boolean_mask(rpn_bbox_pred, positive_labels) # We apply smooth l1 loss as described by the Fast R-CNN paper. reg_loss_per_anchor = smooth_l1_loss(rpn_bbox_pred, rpn_bbox_target) prediction_dict['reg_loss_per_anchor'] = reg_loss_per_anchor # Loss summaries. tf.summary.scalar('batch_size', tf.shape(labels)[0], ['rpn']) foreground_cls_loss = tf.boolean_mask(ce_per_anchor, tf.equal(labels, 1)) background_cls_loss = tf.boolean_mask(ce_per_anchor, tf.equal(labels, 0)) tf.summary.scalar('foreground_cls_loss', tf.reduce_mean(foreground_cls_loss), ['rpn']) tf.summary.histogram('foreground_cls_loss', foreground_cls_loss, ['rpn']) tf.summary.scalar('background_cls_loss', tf.reduce_mean(background_cls_loss), ['rpn']) tf.summary.histogram('background_cls_loss', background_cls_loss, ['rpn']) tf.summary.scalar('foreground_samples', tf.shape(rpn_bbox_target)[0], ['rpn']) return { 'rpn_cls_loss': tf.reduce_sum(ce_per_anchor), 'rpn_reg_loss': tf.reduce_sum(reg_loss_per_anchor), }
def loss(self, prediction_dict): """ Returns cost for RCNN based on: 返回类别损失和回归损失, 基于cls_score, cls_prob, bbox_offsets, cls_target, bbox_offsets_target, Args: prediction_dict with keys: rcnn: 研究的是预测结果 cls_score: shape (num_proposals, num_classes + 1) Has the class scoring for each the proposals. Classes are 1-indexed with 0 being the background. 针对各个类别(包含背景), 各个提案区域对应的得分 cls_prob: shape (num_proposals, num_classes + 1) Application of softmax on cls_score. 针对各个类别(包含背景), 各个提案区域对应的概率, 也就是cls_score 的softmax结果 bbox_offsets: shape (num_proposals, num_classes * 4) Has the offset for each proposal for each class. We have to compare only the proposals labeled with the offsets for that label. 针对各个类别(不包含背景), 各个提案区域对应的坐标偏移量(4个值) 只需要比较标定的提案和那个标签的偏移量 target: 研究的是真实标签 对于类别而言, 就是各个提案对应的正确的类别标签; 对于边界框而言, 各个提案对于真实标签的真实偏移量 cls_target: shape (num_proposals,) Has the correct label for each of the proposals. 0 => background 1..n => 1-indexed classes bbox_offsets_target: shape (num_proposals, 4) ground truth相对anchor的偏移量和缩放量 Has the true offset of each proposal for the true label. In case of not having a true label (non-background) then it's just zeroes. Returns: loss_dict with keys: rcnn_cls_loss: The cross-entropy or log-loss of the classification tasks between then num_classes + background. rcnn_reg_loss: The smooth L1 loss for the bounding box regression task to adjust correctly labeled boxes. """ with tf.name_scope('RCNNLoss'): # 预测得分 # (num_proposals, num_classes + 1) cls_score = prediction_dict['rcnn']['cls_score'] # Cast target explicitly as int32. # 真实类别 # (num_proposals, ) cls_target = tf.cast(prediction_dict['target']['cls'], tf.int32) # First we need to calculate the log loss betweetn cls_prob and # cls_target, 需要计算分类概率的对数损失 # 只计算正样本的损失 # We only care for the targets that are >= 0 # 寻找要保留, 不忽略的样本, 作为有效的样本 not_ignored = tf.reshape(tf.greater_equal(cls_target, 0), [-1], name='not_ignored') # We apply boolean mask to score, prob and target. # 确定有效样本的类别预测得分 cls_score_labeled = tf.boolean_mask(cls_score, not_ignored, name='cls_score_labeled') # 确定有效样本的真实类别 cls_target_labeled = tf.boolean_mask(cls_target, not_ignored, name='cls_target_labeled') tf.summary.scalar('batch_size', tf.shape(cls_score_labeled)[0], ['rcnn']) # 将真实的类别转化为one-hot编码, 现在的cls_target_one_hot转化为 # (num_proposal, 21) # Transform to one-hot vector cls_target_one_hot = tf.one_hot(cls_target_labeled, depth=self._num_classes + 1, name='cls_target_one_hot') # We get cross entropy loss of each proposal. # 计算有效提案的真实类别和类别预测得分之间的交叉熵 # 这里计算的时候一个表述的是样本分类的概率, 一个表述的是样本的真实类, 相当于只在 # 对应的真实类别上进行了计算 cross_entropy_per_proposal = ( tf.nn.softmax_cross_entropy_with_logits_v2( labels=tf.stop_gradient(cls_target_one_hot), logits=cls_score_labeled)) if self._debug: prediction_dict['_debug']['losses'] = {} # Save the cross entropy per proposal to be able to # visualize proposals with high and low error. prediction_dict['_debug']['losses'][ 'cross_entropy_per_proposal'] = ( cross_entropy_per_proposal) # Second we need to calculate the smooth l1 loss between # `bbox_offsets` and `bbox_offsets_target`. # 预测框相对anchor中心位置的偏移量以及宽高的缩放量t与ground truth相对anchor # 的偏移量和缩放量之间的smooth L1损失 # (num_proposals, num_classes * 4) bbox_offsets = prediction_dict['rcnn']['bbox_offsets'] # (num_proposals, 4) bbox_offsets_target = (prediction_dict['target']['bbox_offsets']) # We only want the non-background labels bounding boxes. # 只计算类别标定值大于0的提案对应的边界框, 回归这边只计算非背景的有效框 # (num_proposals, ) not_ignored = tf.reshape(tf.greater(cls_target, 0), [-1]) # (num_proposals, num_classes * 4) bbox_offsets_labeled = tf.boolean_mask(bbox_offsets, not_ignored, name='bbox_offsets_labeled') # (num_proposals, 4) bbox_offsets_target_labeled = tf.boolean_mask( bbox_offsets_target, not_ignored, name='bbox_offsets_target_labeled') cls_target_labeled = tf.boolean_mask(cls_target, not_ignored, name='cls_target_labeled') # `cls_target_labeled` is based on `cls_target` which has # `num_classes` + 1 classes. # for making `one_hot` with depth `num_classes` to work we need # to lower them to make them 0-index. # 对于one-hot编码, 需要索引从0开始, 非背景的标签是从1开始的, 所以直接减1就可以 cls_target_labeled = cls_target_labeled - 1 cls_target_one_hot = tf.one_hot(cls_target_labeled, depth=self._num_classes, name='cls_target_one_hot') # 进行one-hot编码后, 数据的格式发生了改变 # cls_target now is (num_proposals, num_classes) # (num_proposals x num_classes, 4) bbox_flatten = tf.reshape(bbox_offsets_labeled, [-1, 4], name='bbox_flatten') # We use the flatten cls_target_one_hot as boolean mask for the # bboxes. # 将cls_target_one_hot转化为一维的张量, 作为bboxes的掩膜来进行操作 # 现在的cls_target_one_hot形状为(num_porposals, num_classes), # 也就是(n, 20), 进行reshape操作后应该是(n x 20, ) cls_flatten = tf.cast(tf.reshape(cls_target_one_hot, [-1]), tf.bool, 'cls_flatten_as_bool') # bbox_flatten本身就是nx4的大小, 被一个一维的掩膜进行处理, # 这里确定了每个提案所对应的真实类别下的框的预测偏移量 bbox_offset_cleaned = tf.boolean_mask(bbox_flatten, cls_flatten, 'bbox_offset_cleaned') # Calculate the smooth l1 loss between the "cleaned" bboxes # offsets (that means, the useful results) and the labeled # targets. # 计算预测框相对anchor中心位置的偏移量以及宽高的缩放量与ground truth相对 # anchor的偏移量和缩放量的之间的smoothL1损失 reg_loss_per_proposal = smooth_l1_loss(bbox_offset_cleaned, bbox_offsets_target_labeled, sigma=self._l1_sigma) tf.summary.scalar('rcnn_foreground_samples', tf.shape(bbox_offset_cleaned)[0], ['rcnn']) if self._debug: # Also save reg loss per proposals to be able to visualize # good and bad proposals in debug mode. prediction_dict['_debug']['losses'][ 'reg_loss_per_proposal'] = (reg_loss_per_proposal) # reduce_* 系列函数, axis=None 表示最终的结果只有一个值 return { 'rcnn_cls_loss': tf.reduce_mean(cross_entropy_per_proposal), 'rcnn_reg_loss': tf.reduce_mean(reg_loss_per_proposal), }
def loss(self, prediction_dict): """ Returns cost for RCNN based on: Args: prediction_dict with keys: rcnn: cls_score: shape (num_proposals, num_classes + 1) Has the class scoring for each the proposals. Classes are 1-indexed with 0 being the background. cls_prob: shape (num_proposals, num_classes + 1) Application of softmax on cls_score. bbox_offsets: shape (num_proposals, num_classes * 4) Has the offset for each proposal for each class. We have to compare only the proposals labeled with the offsets for that label. target: cls_target: shape (num_proposals,) Has the correct label for each of the proposals. 0 => background 1..n => 1-indexed classes bbox_offsets_target: shape (num_proposals, 4) Has the true offset of each proposal for the true label. In case of not having a true label (non-background) then it's just zeroes. Returns: loss_dict with keys: rcnn_cls_loss: The cross-entropy or log-loss of the classification tasks between then num_classes + background. rcnn_reg_loss: The smooth L1 loss for the bounding box regression task to adjust correctly labeled boxes. """ with tf.name_scope('RCNNLoss'): cls_score = prediction_dict['rcnn']['cls_score'] # cls_prob = prediction_dict['rcnn']['cls_prob'] # Cast target explicitly as int32. cls_target = tf.cast( prediction_dict['target']['cls'], tf.int32 ) # First we need to calculate the log loss betweetn cls_prob and # cls_target # We only care for the targets that are >= 0 not_ignored = tf.reshape(tf.greater_equal( cls_target, 0), [-1], name='not_ignored') # We apply boolean mask to score, prob and target. cls_score_labeled = tf.boolean_mask( cls_score, not_ignored, name='cls_score_labeled') # cls_prob_labeled = tf.boolean_mask( # cls_prob, not_ignored, name='cls_prob_labeled') cls_target_labeled = tf.boolean_mask( cls_target, not_ignored, name='cls_target_labeled') tf.summary.scalar( 'batch_size', tf.shape(cls_score_labeled)[0], ['rcnn'] ) # Transform to one-hot vector cls_target_one_hot = tf.one_hot( cls_target_labeled, depth=self._num_classes + 1, name='cls_target_one_hot' ) # We get cross entropy loss of each proposal. cross_entropy_per_proposal = ( tf.nn.softmax_cross_entropy_with_logits( labels=cls_target_one_hot, logits=cls_score_labeled ) ) if self._debug: prediction_dict['_debug']['losses'] = {} # Save the cross entropy per proposal to be able to # visualize proposals with high and low error. prediction_dict['_debug']['losses'][ 'cross_entropy_per_proposal' ] = ( cross_entropy_per_proposal ) # Second we need to calculate the smooth l1 loss between # `bbox_offsets` and `bbox_offsets_target`. bbox_offsets = prediction_dict['rcnn']['bbox_offsets'] bbox_offsets_target = ( prediction_dict['target']['bbox_offsets'] ) # We only want the non-background labels bounding boxes. not_ignored = tf.reshape(tf.greater(cls_target, 0), [-1]) bbox_offsets_labeled = tf.boolean_mask( bbox_offsets, not_ignored, name='bbox_offsets_labeled') bbox_offsets_target_labeled = tf.boolean_mask( bbox_offsets_target, not_ignored, name='bbox_offsets_target_labeled' ) cls_target_labeled = tf.boolean_mask( cls_target, not_ignored, name='cls_target_labeled') # `cls_target_labeled` is based on `cls_target` which has # `num_classes` + 1 classes. # for making `one_hot` with depth `num_classes` to work we need # to lower them to make them 0-index. cls_target_labeled = cls_target_labeled - 1 cls_target_one_hot = tf.one_hot( cls_target_labeled, depth=self._num_classes, name='cls_target_one_hot' ) # cls_target now is (num_labeled, num_classes) bbox_flatten = tf.reshape( bbox_offsets_labeled, [-1, 4], name='bbox_flatten') # We use the flatten cls_target_one_hot as boolean mask for the # bboxes. cls_flatten = tf.cast(tf.reshape( cls_target_one_hot, [-1]), tf.bool, 'cls_flatten_as_bool') bbox_offset_cleaned = tf.boolean_mask( bbox_flatten, cls_flatten, 'bbox_offset_cleaned') # Calculate the smooth l1 loss between the "cleaned" bboxes # offsets (that means, the useful results) and the labeled # targets. reg_loss_per_proposal = smooth_l1_loss( bbox_offset_cleaned, bbox_offsets_target_labeled, sigma=self._l1_sigma ) tf.summary.scalar( 'rcnn_foreground_samples', tf.shape(bbox_offset_cleaned)[0], ['rcnn'] ) if self._debug: # Also save reg loss per proposals to be able to visualize # good and bad proposals in debug mode. prediction_dict['_debug']['losses'][ 'reg_loss_per_proposal' ] = ( reg_loss_per_proposal ) return { 'rcnn_cls_loss': tf.reduce_mean(cross_entropy_per_proposal), 'rcnn_reg_loss': tf.reduce_mean(reg_loss_per_proposal), }
def loss(self, prediction_dict): """ Returns cost for Region Proposal Network based on: Args: rpn_cls_score: Score for being an object or not for each anchor in the image. Shape: (num_anchors, 2) rpn_cls_target: Ground truth labeling for each anchor. Should be * 1: for positive labels * 0: for negative labels * -1: for labels we should ignore. Shape: (num_anchors, ) 对于anchor的真实标记, 这里应该是以IoU来判定的: 对每个proposal,计算其与所有ground truth的重叠比例IoU, 筛选出与每个 proposal重叠比例最大的ground truth. 如果proposal的最大IoU大于0.5则为目标(前景), 标签值(label)为对应 ground truth的目标分类如果IoU小于0.5且大于0.1则为背景,标签值为0 ques: 这里的-1该如何理解? ans: 要忽略的部分, 因为并不总是所有的提案都要被用到 rpn_bbox_target: Bounding box output delta target for rpn. Shape: (num_anchors, 4) 这里输出的边界框的目标偏移量. rpn_bbox_pred: Bounding box output delta prediction for rpn. Shape: (num_anchors, 4) 边界框的输出预测偏移量 Returns: 返回一个多任务损失 Multiloss between cls probability and bbox target. """ rpn_cls_score = prediction_dict['rpn_cls_score'] rpn_cls_target = prediction_dict['rpn_cls_target'] rpn_bbox_target = prediction_dict['rpn_bbox_target'] rpn_bbox_pred = prediction_dict['rpn_bbox_pred'] with tf.variable_scope('RPNLoss'): # Flatten already flat Tensor for usage as boolean mask filter. rpn_cls_target = tf.cast(tf.reshape( rpn_cls_target, [-1]), tf.int32, name='rpn_cls_target') # Transform to boolean tensor mask for not ignored. # 返回不应该被忽略的标签的逻辑张量, 可以用来作为一个实际需要处理的标签的 # 掩膜 labels_not_ignored = tf.not_equal( rpn_cls_target, -1, name='labels_not_ignored') # Now we only have the labels we are going to compare with the # cls probability. # 这里的掩膜函数可以提取张量里的对应于掩膜真值的位置上的数值, 进而获得将 # 要用来比较的类别概率和标签 labels = tf.boolean_mask(rpn_cls_target, labels_not_ignored) cls_score = tf.boolean_mask(rpn_cls_score, labels_not_ignored) # We need to transform `labels` to `cls_score` shape. # convert [1, 0] to [[0, 1], [1, 0]] for ce with logits. # 对于各个类别的分数匹配对应的标签, 对标签进行one-hot编码 # ques: 目的是什么 # ans: 计算交叉熵是需要使用onehot编码的 cls_target = tf.one_hot(labels, depth=2) # Equivalent to log loss # 计算类别的对数损失, 这里使用的是softmax交叉熵的形式, # 计算labels和logits的交叉熵 ce_per_anchor = tf.nn.softmax_cross_entropy_with_logits_v2( labels=cls_target, logits=cls_score ) prediction_dict['cross_entropy_per_anchor'] = ce_per_anchor # 计算回归损失 # Finally, we need to calculate the regression loss over # `rpn_bbox_target` and `rpn_bbox_pred`. # We use SmoothL1Loss. rpn_bbox_target = tf.reshape(rpn_bbox_target, [-1, 4]) rpn_bbox_pred = tf.reshape(rpn_bbox_pred, [-1, 4]) # We only care for positive labels (we ignore backgrounds since # we don't have any bounding box information for it). # 只用正样本, 来计算回归损失 positive_labels = tf.equal(rpn_cls_target, 1) rpn_bbox_target = tf.boolean_mask(rpn_bbox_target, positive_labels) rpn_bbox_pred = tf.boolean_mask(rpn_bbox_pred, positive_labels) # We apply smooth l1 loss as described by the Fast R-CNN paper. reg_loss_per_anchor = smooth_l1_loss( rpn_bbox_pred, rpn_bbox_target, sigma=self._l1_sigma ) prediction_dict['reg_loss_per_anchor'] = reg_loss_per_anchor # Loss summaries. tf.summary.scalar('batch_size', tf.shape(labels)[0], ['rpn']) foreground_cls_loss = tf.boolean_mask( ce_per_anchor, tf.equal(labels, 1)) background_cls_loss = tf.boolean_mask( ce_per_anchor, tf.equal(labels, 0)) tf.summary.scalar( 'foreground_cls_loss', tf.reduce_mean(foreground_cls_loss), ['rpn']) tf.summary.histogram( 'foreground_cls_loss', foreground_cls_loss, ['rpn']) tf.summary.scalar( 'background_cls_loss', tf.reduce_mean(background_cls_loss), ['rpn']) tf.summary.histogram( 'background_cls_loss', background_cls_loss, ['rpn']) tf.summary.scalar( 'foreground_samples', tf.shape(rpn_bbox_target)[0], ['rpn']) # 计算均值 return { 'rpn_cls_loss': tf.reduce_mean(ce_per_anchor), 'rpn_reg_loss': tf.reduce_mean(reg_loss_per_anchor), }
def loss(self, prediction_dict, return_all=False): """Compute the loss for SSD. Args: prediction_dict: The output dictionary of the _build method from which we use different main keys: cls_pred: A dictionary with the classes classification. loc_pred: A dictionary with the localization predictions target: A dictionary with the targets for both classes and localizations. Returns: A tensor for the total loss. """ with tf.name_scope("losses"): cls_pred = prediction_dict["cls_pred"] cls_target = tf.cast(prediction_dict["target"]["cls"], tf.int32) # Transform to one-hot vector cls_target_one_hot = tf.one_hot(cls_target, depth=self._num_classes + 1, name="cls_target_one_hot") # We get cross entropy loss of each proposal. # TODO: Optimization opportunity: We calculate the probabilities # earlier in the program, so if we used those instead of the # logits we would not have the need to do softmax here too. if self.loss_type == CROSS_ENTROPY: classification_loss_per_proposal = ( tf.nn.softmax_cross_entropy_with_logits_v2( labels=cls_target_one_hot, logits=cls_pred)) elif self.loss_type == FOCAL: classification_loss_per_proposal = focal_loss( cls_pred, cls_target_one_hot, self.focal_gamma) # Second we need to calculate the smooth l1 loss between # `bbox_offsets` and `bbox_offsets_targets`. bbox_offsets = prediction_dict["loc_pred"] bbox_offsets_targets = prediction_dict["target"]["bbox_offsets"] # We only want the non-background labels bounding boxes. not_ignored = tf.reshape(tf.greater(cls_target, 0), [-1]) bbox_offsets_positives = tf.boolean_mask( bbox_offsets, not_ignored, name="bbox_offsets_positives") bbox_offsets_target_positives = tf.boolean_mask( bbox_offsets_targets, not_ignored, name="bbox_offsets_target_positives") # Calculate the smooth l1 regression loss between the flatten # bboxes offsets and the labeled targets. reg_loss_per_proposal = smooth_l1_loss( bbox_offsets_positives, bbox_offsets_target_positives) cls_loss = tf.reduce_sum(classification_loss_per_proposal) bbox_loss = tf.reduce_sum(reg_loss_per_proposal) # Following the paper, set loss to 0 if there are 0 bboxes # assigned as foreground targets. safety_condition = tf.not_equal( tf.shape(bbox_offsets_positives)[0], 0) final_loss = tf.cond( safety_condition, true_fn=lambda: ((cls_loss + bbox_loss * self._loc_loss_weight) / tf.cast( tf.shape(bbox_offsets_positives)[0], tf.float32)), false_fn=lambda: 0.0, ) tf.losses.add_loss(final_loss) total_loss = tf.losses.get_total_loss() prediction_dict["reg_loss_per_proposal"] = reg_loss_per_proposal prediction_dict[ "cls_loss_per_proposal"] = classification_loss_per_proposal tf.summary.scalar("cls_loss", cls_loss, collections=self._losses_collections) tf.summary.scalar("bbox_loss", bbox_loss, collections=self._losses_collections) tf.summary.scalar("total_loss", total_loss, collections=self._losses_collections) if return_all: return { "total_loss": total_loss, "cls_loss": cls_loss, "bbox_loss": bbox_loss, } else: return total_loss
def loss(self, prediction_dict): """ Returns cost for RCNN based on: Args: prediction_dict with keys: rcnn: cls_score: shape (num_proposals, num_classes + 1) Has the class scoring for each the proposals. Classes are 1-indexed with 0 being the background. cls_prob: shape (num_proposals, num_classes + 1) Application of softmax on cls_score. bbox_offsets: shape (num_proposals, num_classes * 4) Has the offset for each proposal for each class. We have to compare only the proposals labeled with the offsets for that label. target: cls_target: shape (num_proposals,) Has the correct label for each of the proposals. 0 => background 1..n => 1-indexed classes bbox_offsets_target: shape (num_proposals, 4) Has the true offset of each proposal for the true label. In case of not having a true label (non-background) then it's just zeroes. Returns: loss_dict with keys: rcnn_cls_loss: The cross-entropy or log-loss of the classification tasks between then num_classes + background. rcnn_reg_loss: The smooth L1 loss for the bounding box regression task to adjust correctly labeled boxes. """ with tf.name_scope("RCNNLoss"): cls_score = prediction_dict["rcnn"]["cls_score"] # cls_prob = prediction_dict['rcnn']['cls_prob'] # Cast target explicitly as int32. cls_target = tf.cast(prediction_dict["target"]["cls"], tf.int32) # First we need to calculate the log loss betweetn cls_prob and # cls_target # We only care for the targets that are >= 0 not_ignored = tf.reshape( tf.greater_equal(cls_target, 0), [-1], name="not_ignored" ) # We apply boolean mask to score, prob and target. cls_score_labeled = tf.boolean_mask( cls_score, not_ignored, name="cls_score_labeled" ) # cls_prob_labeled = tf.boolean_mask( # cls_prob, not_ignored, name='cls_prob_labeled') cls_target_labeled = tf.boolean_mask( cls_target, not_ignored, name="cls_target_labeled" ) tf.summary.scalar("batch_size", tf.shape(cls_score_labeled)[0], ["rcnn"]) # Transform to one-hot vector cls_target_one_hot = tf.one_hot( cls_target_labeled, depth=self._num_classes + 1, name="cls_target_one_hot", ) if self.loss_type == CROSS_ENTROPY: # your class weights class_weights = self.loss_weight onehot_labels = tf.stop_gradient(cls_target_one_hot) # deduce weights for batch samples based on their true label # compute your (unweighted) softmax cross entropy loss cross_entropy_per_proposal = tf.nn.softmax_cross_entropy_with_logits( labels=onehot_labels, logits=cls_score_labeled ) if class_weights != 1: class_weights = tf.constant([class_weights], dtype=tf.float32) weights = tf.reduce_sum(class_weights * onehot_labels, axis=1) # apply the weights, relying on broadcasting # of the multiplication cross_entropy_per_proposal = cross_entropy_per_proposal * weights elif self.loss_type == FOCAL: cross_entropy_per_proposal = focal_loss( cls_score_labeled, tf.stop_gradient(cls_target_one_hot), self.focal_gamma, ) if self._debug: prediction_dict["_debug"]["losses"] = {} # Save the classification loss per proposal to be able to # visualize proposals with high and low error. prediction_dict["_debug"]["losses"][ "cross_entropy_per_proposal" ] = cross_entropy_per_proposal # Second we need to calculate the smooth l1 loss between # `bbox_offsets` and `bbox_offsets_target`. bbox_offsets = prediction_dict["rcnn"]["bbox_offsets"] bbox_offsets_target = prediction_dict["target"]["bbox_offsets"] # We only want the non-background labels bounding boxes. not_ignored = tf.reshape(tf.greater(cls_target, 0), [-1]) bbox_offsets_labeled = tf.boolean_mask( bbox_offsets, not_ignored, name="bbox_offsets_labeled" ) bbox_offsets_target_labeled = tf.boolean_mask( bbox_offsets_target, not_ignored, name="bbox_offsets_target_labeled" ) cls_target_labeled = tf.boolean_mask( cls_target, not_ignored, name="cls_target_labeled" ) # `cls_target_labeled` is based on `cls_target` which has # `num_classes` + 1 classes. # for making `one_hot` with depth `num_classes` to work we need # to lower them to make them 0-index. cls_target_labeled = cls_target_labeled - 1 cls_target_one_hot = tf.one_hot( cls_target_labeled, depth=self._num_classes, name="cls_target_one_hot" ) # cls_target now is (num_labeled, num_classes) bbox_flatten = tf.reshape( bbox_offsets_labeled, [-1, 4], name="bbox_flatten" ) # We use the flatten cls_target_one_hot as boolean mask for the # bboxes. cls_flatten = tf.cast( tf.reshape(cls_target_one_hot, [-1]), tf.bool, "cls_flatten_as_bool" ) bbox_offset_cleaned = tf.boolean_mask( bbox_flatten, cls_flatten, "bbox_offset_cleaned" ) # Calculate the smooth l1 loss between the "cleaned" bboxes # offsets (that means, the useful results) and the labeled # targets. reg_loss_per_proposal = smooth_l1_loss( bbox_offset_cleaned, bbox_offsets_target_labeled, sigma=self._l1_sigma ) tf.summary.scalar( "rcnn_foreground_samples", tf.shape(bbox_offset_cleaned)[0], ["rcnn"] ) if self._debug: # Also save reg loss per proposals to be able to visualize # good and bad proposals in debug mode. prediction_dict["_debug"]["losses"][ "reg_loss_per_proposal" ] = reg_loss_per_proposal return { "rcnn_cls_loss": tf.reduce_mean(cross_entropy_per_proposal), "rcnn_reg_loss": tf.reduce_mean(reg_loss_per_proposal), }
def loss(self, prediction_dict): """ Returns cost for Region Proposal Network based on: Args: rpn_cls_score: Score for being an object or not for each anchor in the image. Shape: (num_anchors, 2) rpn_cls_target: Ground truth labeling for each anchor. Should be * 1: for positive labels * 0: for negative labels * -1: for labels we should ignore. Shape: (num_anchors, ) rpn_bbox_target: Bounding box output delta target for rpn. Shape: (num_anchors, 4) rpn_bbox_pred: Bounding box output delta prediction for rpn. Shape: (num_anchors, 4) Returns: Multiloss between cls probability and bbox target. """ rpn_cls_score = prediction_dict['rpn_cls_score'] rpn_cls_target = prediction_dict['rpn_cls_target'] rpn_bbox_target = prediction_dict['rpn_bbox_target'] rpn_bbox_pred = prediction_dict['rpn_bbox_pred'] with tf.variable_scope('RPNLoss'): # Flatten already flat Tensor for usage as boolean mask filter. rpn_cls_target = tf.cast(tf.reshape( rpn_cls_target, [-1]), tf.int32, name='rpn_cls_target') # Transform to boolean tensor mask for not ignored. labels_not_ignored = tf.not_equal( rpn_cls_target, -1, name='labels_not_ignored') # Now we only have the labels we are going to compare with the # cls probability. labels = tf.boolean_mask(rpn_cls_target, labels_not_ignored) cls_score = tf.boolean_mask(rpn_cls_score, labels_not_ignored) # We need to transform `labels` to `cls_score` shape. # convert [1, 0] to [[0, 1], [1, 0]] for ce with logits. cls_target = tf.one_hot(labels, depth=2) # Equivalent to log loss ce_per_anchor = tf.nn.softmax_cross_entropy_with_logits( labels=cls_target, logits=cls_score ) prediction_dict['cross_entropy_per_anchor'] = ce_per_anchor # Finally, we need to calculate the regression loss over # `rpn_bbox_target` and `rpn_bbox_pred`. # We use SmoothL1Loss. rpn_bbox_target = tf.reshape(rpn_bbox_target, [-1, 4]) rpn_bbox_pred = tf.reshape(rpn_bbox_pred, [-1, 4]) # We only care for positive labels (we ignore backgrounds since # we don't have any bounding box information for it). positive_labels = tf.equal(rpn_cls_target, 1) rpn_bbox_target = tf.boolean_mask(rpn_bbox_target, positive_labels) rpn_bbox_pred = tf.boolean_mask(rpn_bbox_pred, positive_labels) # We apply smooth l1 loss as described by the Fast R-CNN paper. reg_loss_per_anchor = smooth_l1_loss( rpn_bbox_pred, rpn_bbox_target, sigma=self._l1_sigma ) prediction_dict['reg_loss_per_anchor'] = reg_loss_per_anchor # Loss summaries. tf.summary.scalar('batch_size', tf.shape(labels)[0], ['rpn']) foreground_cls_loss = tf.boolean_mask( ce_per_anchor, tf.equal(labels, 1)) background_cls_loss = tf.boolean_mask( ce_per_anchor, tf.equal(labels, 0)) tf.summary.scalar( 'foreground_cls_loss', tf.reduce_mean(foreground_cls_loss), ['rpn']) tf.summary.histogram( 'foreground_cls_loss', foreground_cls_loss, ['rpn']) tf.summary.scalar( 'background_cls_loss', tf.reduce_mean(background_cls_loss), ['rpn']) tf.summary.histogram( 'background_cls_loss', background_cls_loss, ['rpn']) tf.summary.scalar( 'foreground_samples', tf.shape(rpn_bbox_target)[0], ['rpn']) return { 'rpn_cls_loss': tf.reduce_mean(ce_per_anchor), 'rpn_reg_loss': tf.reduce_mean(reg_loss_per_anchor), }