def get_predictions(self, score_threshold=0.1, iou_threshold=0.6, max_boxes=20): """Postprocess outputs of the network. Returns: boxes: a float tensor with shape [batch_size, N, 4]. scores: a float tensor with shape [batch_size, N]. num_boxes: an int tensor with shape [batch_size], it represents the number of detections on an image. where N = max_boxes. """ # 良心活:输入的图像有rescale操作,得到的预测结果也有对应的还原操作 with tf.name_scope('postprocessing'): boxes = batch_decode(self.box_encodings, self.anchors) # if the images were padded we need to rescale predicted boxes: boxes = boxes / self.box_scaler boxes = tf.clip_by_value(boxes, 0.0, 1.0) # it has shape [batch_size, num_anchors, 4] scores = tf.nn.softmax(self.class_predictions_with_background, axis=2)[:, :, 1] # it has shape [batch_size, num_anchors] # 执行NMS操作来去除重复的检测框 with tf.device('/cpu:0'), tf.name_scope('nms'): boxes, scores, num_detections = batch_non_max_suppression( boxes, scores, score_threshold, iou_threshold, max_boxes) # 返回的字典包括:检测框、检测框对应的分数、以及检测框的总数目 return {'boxes': boxes, 'scores': scores, 'num_boxes': num_detections}
def get_predictions(self, score_threshold=0.1, iou_threshold=0.6, max_boxes=20): """Postprocess outputs of the network. Returns: boxes: a float tensor with shape [batch_size, N, 4]. scores: a float tensor with shape [batch_size, N]. num_boxes: an int tensor with shape [batch_size], it represents the number of detections on an image. where N = max_boxes. """ with tf.name_scope('postprocessing'): boxes = batch_decode(self.box_encodings, self.anchors) # it has shape [batch_size, num_anchors, 4] scores = tf.nn.softmax(self.class_predictions_with_background, axis=2)[:, :, 1] # it has shape [batch_size, num_anchors] with tf.device('/cpu:0'), tf.name_scope('nms'): boxes, scores, num_detections = batch_non_max_suppression( boxes, scores, score_threshold, iou_threshold, max_boxes) return {'boxes': boxes, 'scores': scores, 'num_boxes': num_detections}
def get_box_prediction(self): """ return predicted boxes with shape (batch_size, num_anchors, 4) """ boxes = batch_decode(self.box_encodings, self.anchors) # if the images were padded we need to rescale predicted boxes: boxes = boxes / self.box_scaler boxes = tf.clip_by_value(boxes, 0.0, 1.0) # it has shape [batch_size, num_anchors, 4] return boxes
def get_predictions(self, score_threshold=0.1, iou_threshold=0.6, max_boxes_per_class=20): with tf.name_scope('postprocessing'): boxes = batch_decode(self.box_encodings, self.anchors) # it has shape [batch_size, num_anchors, 4] class_predictions_without_background = tf.slice( self.class_predictions_with_background, [0, 0, 1], [-1, -1, -1]) scores = tf.sigmoid(class_predictions_without_background) # it has shape [batch_size, num_anchors, num_classes] with tf.device('/cpu:0'), tf.name_scope('nms'): boxes, scores, classes, num_detections = batch_multiclass_non_max_suppression( boxes, scores, score_threshold, iou_threshold, max_boxes_per_class, self.num_classes) return { 'boxes': boxes, 'labels': classes, 'scores': scores, 'num_boxes': num_detections }
def loss(self, groundtruth, params): """Compute scalar loss tensors with respect to provided groundtruth. Arguments: groundtruth: a dict with the following keys 'boxes': a float tensor with shape [batch_size, max_num_boxes, 4]. 'num_boxes': an int tensor with shape [batch_size]. where max_num_boxes = max(num_boxes). params: a dict with parameters for OHEM. Returns: two float tensors with shape []. """ reg_targets, matches = self._create_targets(groundtruth) with tf.name_scope('losses'): # whether anchor is matched is_matched = tf.greater_equal(matches, 0) weights = tf.to_float(is_matched) # shape [batch_size, num_anchors] # we have binary classification for each anchor # cls_targets 表示真实的分类结果 cls_targets = tf.to_int32(is_matched) with tf.name_scope('classification_loss'): # 计算分类的代价值,默认权重为1 cls_losses = classification_loss( self.class_predictions_with_background, cls_targets) with tf.name_scope('localization_loss'): # 计算定位的代价值,只有正样本才有对应的损失值 location_losses = localization_loss(self.box_encodings, reg_targets, weights) # they have shape [batch_size, num_anchors] with tf.name_scope('normalization'): matches_per_image = tf.reduce_sum(weights, axis=1) # shape [batch_size] num_matches = tf.reduce_sum(matches_per_image) # shape [] normalizer = tf.maximum(num_matches, 1.0) scores = tf.nn.softmax(self.class_predictions_with_background, axis=2) # it has shape [batch_size, num_anchors, 2] decoded_boxes = batch_decode(self.box_encodings, self.anchors) decoded_boxes = decoded_boxes / self.box_scaler # it has shape [batch_size, num_anchors, 4] # add summaries for predictions is_background = tf.equal(matches, -1) self._add_scalewise_histograms( tf.to_float(is_background) * scores[:, :, 0], 'background_probability') self._add_scalewise_histograms(weights * scores[:, :, 1], 'face_probability') ymin, xmin, ymax, xmax = tf.unstack(decoded_boxes, axis=2) h, w = ymax - ymin, xmax - xmin self._add_scalewise_histograms(weights * h, 'box_heights') self._add_scalewise_histograms(weights * w, 'box_widths') # add summaries for losses and matches self._add_scalewise_matches_summaries(weights) self._add_scalewise_summaries(cls_losses, name='classification_losses') self._add_scalewise_summaries(location_losses, name='localization_losses') tf.summary.scalar('total_mean_matches_per_image', tf.reduce_mean(matches_per_image)) with tf.name_scope('ohem'): location_loss, cls_loss = apply_hard_mining( location_losses, cls_losses, self.class_predictions_with_background, matches, decoded_boxes, loss_to_use=params['loss_to_use'], loc_loss_weight=params['loc_loss_weight'], cls_loss_weight=params['cls_loss_weight'], num_hard_examples=params['num_hard_examples'], nms_threshold=params['nms_threshold'], max_negatives_per_positive=params[ 'max_negatives_per_positive'], min_negatives_per_image=params['min_negatives_per_image']) return { 'localization_loss': location_loss / normalizer, 'classification_loss': cls_loss / normalizer }
def apply_hard_mining(location_losses, cls_losses, class_predictions_with_background, box_encodings, matches, anchors, loss_to_use='classification', loc_loss_weight=1.0, cls_loss_weight=1.0, num_hard_examples=3000, nms_threshold=0.99, max_negatives_per_positive=3, min_negatives_per_image=0): """Applies hard mining to anchorwise losses. Arguments: location_losses: a float tensor with shape [batch_size, num_anchors]. cls_losses: a float tensor with shape [batch_size, num_anchors]. box_encodings: a float tensor with shape [batch_size, num_anchors, 4]. class_predictions_with_background: a float tensor with shape [batch_size, num_anchors, num_classes + 1]. matches: an int tensor with shape [batch_size, num_anchors]. anchors: a float tensor with shape [num_anchors, 4]. loss_to_use: a string, only possible values are ['classification', 'both']. loc_loss_weight: a float number. cls_loss_weight: a float number. num_hard_examples: an integer. nms_threshold: a float number. max_negatives_per_positive: a float number. min_negatives_per_image: an integer. Returns: two float tensors with shape []. """ decoded_boxes = batch_decode(box_encodings, anchors) # it has shape [batch_size, num_anchors, 4] # all these tensors must have static first dimension (batch size) decoded_boxes_list = tf.unstack(decoded_boxes, axis=0) location_losses_list = tf.unstack(location_losses, axis=0) cls_losses_list = tf.unstack(cls_losses, axis=0) matches_list = tf.unstack(matches, axis=0) # they all lists with length = batch_size batch_size = len(decoded_boxes_list) num_positives_list, num_negatives_list = [], [] mined_location_losses, mined_cls_losses = [], [] # do OHEM for each image in the batch for i, box_locations in enumerate(decoded_boxes_list): image_losses = cls_losses_list[i] * cls_loss_weight if loss_to_use == 'both': image_losses += (location_losses_list[i] * loc_loss_weight) # it has shape [num_anchors] selected_indices = tf.image.non_max_suppression( box_locations, image_losses, num_hard_examples, nms_threshold) selected_indices, num_positives, num_negatives = _subsample_selection_to_desired_neg_pos_ratio( selected_indices, matches_list[i], max_negatives_per_positive, min_negatives_per_image) num_positives_list.append(num_positives) num_negatives_list.append(num_negatives) mined_location_losses.append( tf.reduce_sum(tf.gather(location_losses_list[i], selected_indices))) mined_cls_losses.append( tf.reduce_sum(tf.gather(cls_losses_list[i], selected_indices))) mean_num_positives = tf.reduce_mean(tf.stack(num_positives_list, axis=0)) mean_num_negatives = tf.reduce_mean(tf.stack(num_negatives_list, axis=0)) tf.summary.scalar('mean_num_positives', mean_num_positives) tf.summary.scalar('mean_num_negatives', mean_num_negatives) location_loss = tf.reduce_sum(tf.stack(mined_location_losses, axis=0)) cls_loss = tf.reduce_sum(tf.stack(mined_cls_losses, axis=0)) return location_loss, cls_loss