def fast_rcnn_predict(self): with tf.variable_scope('fast_rcnn_predict'): fast_rcnn_softmax_scores = slim.softmax( self.fast_rcnn_scores) # [-1, num_classes+1] fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes, [-1, 4]) reference_boxes = tf.tile( self.fast_rcnn_all_level_proposals, [1, self.num_classes]) # [N, 4*num_classes] reference_boxes = tf.reshape(reference_boxes, [-1, 4]) # [N*num_classes, 4] fast_rcnn_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=fast_rcnn_encode_boxes, reference_boxes=reference_boxes, scale_factors=self.scale_factors) fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( fast_rcnn_decode_boxes, img_shape=self.img_shape) # mutilclass NMS fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes, [-1, self.num_classes * 4]) fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \ self.fast_rcnn_proposals(fast_rcnn_decode_boxes, scores=fast_rcnn_softmax_scores) return fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category
def fast_rcnn_prediction(self): ''' :param: self.fast_rcnn_cls_scores, [2000, num_cls+1], num_cls+background :param: self.fast_rcnn_encode_boxes, [2000, num_cls*4] :return: fast_rcnn_decode_boxes, [-1, 4] :return: fast_rcnn_category, [-1, ] :return: fast_rcnn_scores, [-1, ] :return: num_object, [-1, ] ''' with tf.variable_scope('fast_rcnn_predict'): fast_rcnn_softmax_score = slim.softmax(self.fast_rcnn_cls_scores) fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes, [-1, 4]) fast_rcnn_reference_boxes = tf.tile(self.rois_boxes, [1, self.num_cls]) fast_rcnn_reference_boxes = tf.reshape(fast_rcnn_reference_boxes, [-1, 4]) # ues encode boxes to decode the reference boxes fast_rcnn_decode_boxes = decode_boxes( encode_boxes=fast_rcnn_encode_boxes, reference_boxes=fast_rcnn_reference_boxes, scale_factors=self.scale_factors) # clip decode boxes to image shape fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( boxes=fast_rcnn_decode_boxes, img_shape=self.img_shape) # mutil-class nms fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes, [-1, 4 * self.num_cls]) fast_rcnn_decode_boxes, fast_rcnn_category, fast_rcnn_scores, num_object = \ self.mutil_class_nms(boxes=fast_rcnn_decode_boxes, scores=fast_rcnn_softmax_score) return fast_rcnn_decode_boxes, fast_rcnn_category, fast_rcnn_scores, num_object
def rpn_proposals(self): ''' :param:self.anchors: shape:[-1, 4]->[ymin, xmin, ymax, xmax] :param:self.rpn_scores: shape:[-1, 2]->[backgroud, foreground] :param:self.rpn_encode_boxes: shape:[-1, 4]->[ycenter, xcenter, h, w] :return: valid_boxes [2000, 4] :return: valid_scores [2000,] ''' with tf.variable_scope('rpn_proposals'): rpn_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=self.rpn_encode_boxes, reference_boxes=self.anchors, scale_factors=self.scale_factors) if not self.is_training: image_shape = tf.shape(self.img_batch) rpn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( rpn_decode_boxes, image_shape) rpn_softmax_scores = slim.softmax(self.rpn_scores) rpn_object_score = rpn_softmax_scores[:, 1] if self.top_k_nms: rpn_object_score, top_k_indices = tf.nn.top_k(rpn_object_score, k=self.top_k_nms) rpn_decode_boxes = tf.gather(rpn_decode_boxes, top_k_indices) nms_indices = boxes_utils.non_maximal_suppression( rpn_decode_boxes, rpn_object_score, self.rpn_nms_iou_threshold, self.max_proposal_num) valid_scores = tf.gather(rpn_object_score, nms_indices) valid_boxes = tf.gather(rpn_decode_boxes, nms_indices) return valid_boxes, valid_scores
def postprocess_detctions(rpn_bbox_pred, rpn_cls_prob, img_shape, anchors, is_training): # 1. decode boxes boxes_pred = bbox_transform.bbox_transform_inv(boxes=anchors, deltas=rpn_bbox_pred) # 2. clip to img boundaries boxes_pred = boxes_utils.clip_boxes_to_img_boundaries(boxes=boxes_pred, img_shape=img_shape) return_boxes_pred = [] return_scores = [] return_labels = [] for j in range(0, cfgs.CLASS_NUM): indices = filter_detections(boxes_pred, rpn_cls_prob[:, j], is_training) tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, indices), [-1, 4]) tmp_scores = tf.reshape(tf.gather(rpn_cls_prob[:, j], indices), [ -1, ]) return_boxes_pred.append(tmp_boxes_pred) return_scores.append(tmp_scores) return_labels.append(tf.ones_like(tmp_scores) * (j + 1)) return_boxes_pred = tf.concat(return_boxes_pred, axis=0) return_scores = tf.concat(return_scores, axis=0) return_labels = tf.concat(return_labels, axis=0) return return_boxes_pred, return_scores, return_labels
def rpn_proposals(self): with tf.variable_scope('rpn_proposals'): rpn_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=self.rpn_encode_boxes, reference_boxes=self.anchors, scale_factors=self.scale_factors) if not self.is_training: # when test, clip proposals to img boundaries img_shape = tf.shape(self.img_batch) rpn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(rpn_decode_boxes, img_shape) rpn_softmax_scores = slim.softmax(self.rpn_scores) rpn_object_score = rpn_softmax_scores[:, 1] # second column represent object if self.top_k_nms: rpn_object_score, top_k_indices = tf.nn.top_k(rpn_object_score, k=self.top_k_nms) rpn_decode_boxes = tf.gather(rpn_decode_boxes, top_k_indices) valid_indices = nms.non_maximal_suppression(boxes=rpn_decode_boxes, scores=rpn_object_score, max_output_size=self.max_proposals_num, iou_threshold=self.rpn_nms_iou_threshold) valid_boxes = tf.gather(rpn_decode_boxes, valid_indices) valid_scores = tf.gather(rpn_object_score, valid_indices) rpn_proposals_boxes, rpn_proposals_scores = tf.cond( tf.less(tf.shape(valid_boxes)[0], self.max_proposals_num), lambda: boxes_utils.padd_boxes_with_zeros(valid_boxes, valid_scores, self.max_proposals_num), lambda: (valid_boxes, valid_scores)) return rpn_proposals_boxes, rpn_proposals_scores
def postprocess_detctions(rpn_bbox, rpn_cls_prob, img_shape): ''' :param rpn_bbox: [-1, 4] :param rpn_cls_prob: [-1, NUM_CLASS] :param img_shape: :return: ''' boxes = boxes_utils.clip_boxes_to_img_boundaries(boxes=rpn_bbox, img_shape=img_shape) return_boxes = [] return_scores = [] return_labels = [] for j in range(0, cfgs.CLASS_NUM): indices = filter_detections(boxes, rpn_cls_prob[:, j]) tmp_boxes = tf.reshape(tf.gather(boxes, indices), [-1, 4]) return_boxes.append(tmp_boxes) tmp_scores = tf.gather(rpn_cls_prob[:, j], indices) tmp_scores = tf.reshape(tmp_scores, [ -1, ]) return_scores.append(tmp_scores) return_labels.append(tf.ones_like(tmp_scores) * (j + 1)) return_boxes = tf.concat(return_boxes, axis=0) return_scores = tf.concat(return_scores, axis=0) return_labels = tf.concat(return_labels, axis=0) return return_boxes, return_scores, return_labels
def postprocess_rpn_proposals(rpn_bbox_pred, rpn_cls_prob, img_shape, anchors, is_training): ''' :param rpn_bbox_pred: [-1, 4] :param rpn_cls_prob: [-1, 2] :param img_shape: :param anchors:[-1, 4] :param is_training: :return: ''' if is_training: pre_nms_topN = cfgs.RPN_TOP_K_NMS_TRAIN post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TARIN nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD else: pre_nms_topN = cfgs.RPN_TOP_K_NMS_TEST post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TEST nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD cls_prob = rpn_cls_prob[:, 1] # 1. decode boxes decode_boxes = encode_and_decode.decode_boxes( encode_boxes=rpn_bbox_pred, reference_boxes=anchors, scale_factors=cfgs.ANCHOR_SCALE_FACTORS) # decode_boxes = encode_and_decode.decode_boxes(boxes=anchors, # deltas=rpn_bbox_pred, # scale_factor=None) # 2. clip to img boundaries decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( decode_boxes=decode_boxes, img_shape=img_shape) # 3. get top N to NMS if pre_nms_topN > 0: pre_nms_topN = tf.minimum(pre_nms_topN, tf.shape(decode_boxes)[0], name='avoid_unenough_boxes') cls_prob, top_k_indices = tf.nn.top_k(cls_prob, k=pre_nms_topN) decode_boxes = tf.gather(decode_boxes, top_k_indices) # 4. NMS keep = tf.image.non_max_suppression(boxes=decode_boxes, scores=cls_prob, max_output_size=post_nms_topN, iou_threshold=nms_thresh) final_boxes = tf.gather(decode_boxes, keep) final_probs = tf.gather(cls_prob, keep) return final_boxes, final_probs
def fast_rcnn_proposals(self, decode_boxes, scores): ''' mutilclass NMS :param decode_boxes: [N, num_classes*4] :param scores: [N, num_classes+1] :return: detection_boxes : [-1, 4] scores : [-1, ] ''' with tf.variable_scope('fast_rcnn_proposals'): category = tf.argmax(scores, axis=1) object_mask = tf.cast(tf.not_equal(category, 0), tf.float32) decode_boxes = decode_boxes * tf.expand_dims(object_mask, axis=1) # make background box is [0 0 0 0] scores = scores * tf.expand_dims(object_mask, axis=1) decode_boxes = tf.reshape(decode_boxes, [-1, self.num_classes, 4]) # [N, num_classes, 4] decode_boxes_list = tf.unstack(decode_boxes, axis=1) score_list = tf.unstack(scores[:, 1:], axis=1) after_nms_boxes = [] after_nms_scores = [] category_list = [] for per_class_decode_boxes, per_class_scores in zip(decode_boxes_list, score_list): valid_indices = boxes_utils.nms_boxes(per_class_decode_boxes, per_class_scores, iou_threshold=self.fast_rcnn_nms_iou_threshold, max_output_size=self.fast_rcnn_nms_max_boxes_per_class, name='second_stage_NMS') after_nms_boxes.append(tf.gather(per_class_decode_boxes, valid_indices)) after_nms_scores.append(tf.gather(per_class_scores, valid_indices)) tmp_category = tf.gather(category, valid_indices) category_list.append(tmp_category) all_nms_boxes = tf.concat(after_nms_boxes, axis=0) all_nms_scores = tf.concat(after_nms_scores, axis=0) all_category = tf.concat(category_list, axis=0) all_nms_boxes = boxes_utils.clip_boxes_to_img_boundaries(all_nms_boxes, img_shape=self.img_shape) scores_large_than_threshold_indices = tf.reshape(tf.where(tf.greater(all_nms_scores, self.show_detections_score_threshold)), [-1]) all_nms_boxes = tf.gather(all_nms_boxes, scores_large_than_threshold_indices) all_nms_scores = tf.gather(all_nms_scores, scores_large_than_threshold_indices) all_category = tf.gather(all_category, scores_large_than_threshold_indices) return all_nms_boxes, all_nms_scores, tf.shape(all_nms_boxes)[0], all_category # num of objects
def fast_rcnn_predict(self): with tf.variable_scope('fast_rcnn_predict'): fast_rcnn_softmax_scores = slim.softmax( self.fast_rcnn_scores) # [-1, num_classes+1] fast_rcnn_softmax_scores_rotate = slim.softmax( self.fast_rcnn_scores_rotate) # [-1, num_classes+1] fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes, [-1, 4]) fast_rcnn_encode_boxes_rotate = tf.reshape( self.fast_rcnn_encode_boxes_rotate, [-1, 5]) # Class agnostic regression without tile # reference_boxes = tf.tile(self.fast_rcnn_all_level_proposals, [1, self.num_classes]) # [N, 4*num_classes] reference_boxes = self.fast_rcnn_all_level_proposals reference_boxes = tf.reshape(reference_boxes, [-1, 4]) # [N*num_classes, 4] fast_rcnn_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=fast_rcnn_encode_boxes, reference_boxes=reference_boxes, scale_factors=self.scale_factors) fast_rcnn_decode_boxes_rotate = \ encode_and_decode.decode_boxes_rotate(encode_boxes=fast_rcnn_encode_boxes_rotate, reference_boxes=reference_boxes, scale_factors=self.scale_factors) fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( fast_rcnn_decode_boxes, img_shape=self.img_shape) # mutilclass NMS # Class-agnostic regression without reshape # fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes, [-1, self.num_classes*4]) # fast_rcnn_decode_boxes_rotate = tf.reshape(fast_rcnn_decode_boxes_rotate, [-1, self.num_classes * 5]) fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \ self.fast_rcnn_proposals(fast_rcnn_decode_boxes, scores=fast_rcnn_softmax_scores) fast_rcnn_decode_boxes_rotate, fast_rcnn_score_rotate, fast_rcnn_head_quadrant, \ num_of_objects_rotate, detection_category_rotate = \ self.fast_rcnn_proposals_rotate(fast_rcnn_decode_boxes_rotate, scores=fast_rcnn_softmax_scores_rotate, head_quadrant=self.fast_rcnn_head_quadrant) return fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category,\ fast_rcnn_decode_boxes_rotate, fast_rcnn_score_rotate, fast_rcnn_head_quadrant, \ num_of_objects_rotate, detection_category_rotate
def postprocess_rpn_proposals(self, rpn_bbox_pred, rpn_cls_prob, img_shape, anchors, is_training): """ rpn proposals operation :param rpn_bbox_pred: predict bbox :param rpn_cls_prob: probability of rpn classification :param img_shape: image_shape :param anchor: all reference anchor :param is_training: :return: """ if is_training: pre_nms_topN = cfgs.RPN_TOP_K_NMS_TRAIN post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TARIN nms_threshold = cfgs.RPN_NMS_IOU_THRESHOLD else: pre_nms_topN = cfgs.RPN_TOP_K_NMS_TEST post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TEST nms_threshold = cfgs.RPN_NMS_IOU_THRESHOLD cls_prob = rpn_cls_prob[:, 1] #(, 2) =>(negtive, postive) # step 1 decode boxes decode_boxes = encode_and_decode.decode_boxes( encoded_boxes=rpn_bbox_pred, reference_boxes=anchors, scale_factors=cfgs.ANCHOR_SCALE_FACTORS) # step 2 clip to image boundaries decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( decode_boxes, img_shape=img_shape) # step 3 get top N to NMS if pre_nms_topN > 0: pre_nms_topN = tf.minimum(pre_nms_topN, tf.shape(decode_boxes)[0], name='minimum_boxes') cls_prob, top_k_indices = tf.nn.top_k(cls_prob, k=pre_nms_topN) decode_boxes = tf.gather(params=decode_boxes, indices=top_k_indices) # step 4 NMS(Non Max Suppression) keep_indices = tf.image.non_max_suppression( boxes=decode_boxes, scores=cls_prob, max_output_size=post_nms_topN, iou_threshold=nms_threshold) final_boxes = tf.gather(decode_boxes, keep_indices) final_probs = tf.gather(cls_prob, keep_indices) return final_boxes, final_probs
def batch_slice_rpn_proposals(rpn_encode_boxes, rpn_scores, anchors, config, rpn_proposals_num): with tf.variable_scope('rpn_proposals'): rpn_softmax_scores = slim.softmax(rpn_scores) rpn_object_score = rpn_softmax_scores[:, 1] # second column represent object if config.RPN_TOP_K_NMS: top_k_indices = tf.nn.top_k(rpn_object_score, k=config.RPN_TOP_K_NMS).indices rpn_object_score = tf.gather(rpn_object_score, top_k_indices) rpn_encode_boxes = tf.gather(rpn_encode_boxes, top_k_indices) anchors = tf.gather(anchors, top_k_indices) rpn_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=rpn_encode_boxes, reference_boxes=anchors, dev_factors=config.RPN_BBOX_STD_DEV) valid_indices = boxes_utils.non_maximal_suppression( boxes=rpn_decode_boxes, scores=rpn_object_score, max_output_size=rpn_proposals_num, iou_threshold=config.RPN_NMS_IOU_THRESHOLD) rpn_decode_boxes = tf.gather(rpn_decode_boxes, valid_indices) rpn_object_score = tf.gather(rpn_object_score, valid_indices) # clip proposals to img boundaries(replace the out boundary with image boundary) rpn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( rpn_decode_boxes, [0, 0, config.TARGET_SIDE - 1, config.TARGET_SIDE - 1]) # Pad if needed padding = tf.maximum( rpn_proposals_num - tf.shape(rpn_decode_boxes)[0], 0) # care about why we don't use tf.pad in there zeros_padding = tf.zeros((padding, 4), dtype=tf.float32) rpn_proposals_boxes = tf.concat( [rpn_decode_boxes, zeros_padding], axis=0) rpn_object_score = tf.pad(rpn_object_score, [(0, padding)]) return rpn_proposals_boxes, rpn_object_score
def fast_rcnn_predict(self): with tf.variable_scope('fast_rcnn_predict'): fast_rcnn_softmax_scores = slim.softmax(self.fast_rcnn_scores) # [-1, num_classes+1] fast_rcnn_softmax_scores_rotate = slim.softmax(self.fast_rcnn_scores_rotate) # [-1, num_classes+1] fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes, [-1, 4]) fast_rcnn_encode_boxes_rotate = tf.reshape(self.fast_rcnn_encode_boxes_rotate, [-1, 5]) reference_boxes = tf.tile(self.fast_rcnn_all_level_proposals, [1, self.num_classes]) # [N, 4*num_classes] reference_boxes = tf.reshape(reference_boxes, [-1, 4]) # [N*num_classes, 4] fast_rcnn_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=fast_rcnn_encode_boxes, reference_boxes=reference_boxes, scale_factors=self.scale_factors) fast_rcnn_decode_boxes_rotate = \ encode_and_decode.decode_boxes_rotate(encode_boxes=fast_rcnn_encode_boxes_rotate, reference_boxes=reference_boxes, scale_factors=self.scale_factors) fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(fast_rcnn_decode_boxes, img_shape=self.img_shape) # mutilclass NMS fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes, [-1, self.num_classes*4]) fast_rcnn_decode_boxes_rotate = tf.reshape(fast_rcnn_decode_boxes_rotate, [-1, self.num_classes * 5]) fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \ self.fast_rcnn_proposals(fast_rcnn_decode_boxes, scores=fast_rcnn_softmax_scores) fast_rcnn_decode_boxes_rotate, fast_rcnn_score_rotate, num_of_objects_rotate, detection_category_rotate = \ self.fast_rcnn_proposals_rotate(fast_rcnn_decode_boxes_rotate, scores=fast_rcnn_softmax_scores_rotate) fast_rcnn_decode_boxes_rotate_reorder = tf.py_func(read_reorder, inp=[fast_rcnn_decode_boxes_rotate], Tout=tf.float32) fast_rcnn_decode_boxes_rotate_original = fast_rcnn_decode_boxes_rotate fast_rcnn_decode_boxes_rotate = fast_rcnn_decode_boxes_rotate_reorder return fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category,\ fast_rcnn_decode_boxes_rotate_original, fast_rcnn_decode_boxes_rotate, fast_rcnn_decode_boxes_rotate_reorder, fast_rcnn_score_rotate, num_of_objects_rotate, detection_category_rotate
def rpn_loss(self): ''' :param: self.gtboxes_and_label: [n, 5]->[ymin, xmin, ymax, xmax, cls] :param: self.anchors: [m, 4]-> [ymin, xmin, ymax, xmax] :param:self.rpn_encode_boxes: [m, 4]->[ycenter, xcenter, h, w] :return: ''' with tf.variable_scope('rpn_loss'): minibatch_indices,\ minibatch_anchor_matched_gtboxes,\ object_mask,\ minibatch_label_onehot = self.make_minibatch() minibatch_anchors = tf.gather(self.anchors, minibatch_indices) minibatch_rpn_encode_boxes = tf.gather(self.rpn_encode_boxes, minibatch_indices) minibatch_rpn_scores = tf.gather(self.rpn_scores, minibatch_indices) minibatch_encode_boxes_label = encode_and_decode.encode_boxes( minibatch_anchors, minibatch_anchor_matched_gtboxes, self.scale_factors) # summary positive_anchors_in_img = draw_box_with_tensor( img_batch=self.img_batch, boxes=minibatch_anchors * tf.expand_dims(object_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 1)))[0]) negative_mask = tf.cast( tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32) negative_anchors_in_img = draw_box_with_tensor( img_batch=self.img_batch, boxes=minibatch_anchors * tf.expand_dims(negative_mask, 1), text=tf.shape(tf.where(tf.equal(negative_mask, 1)))[0]) minibatch_decode_anchors = encode_and_decode.decode_boxes( encode_boxes=minibatch_rpn_encode_boxes, reference_boxes=minibatch_anchors, scale_factors=self.scale_factors) # clip boxes into image shape minibatch_decode_anchors = boxes_utils.clip_boxes_to_img_boundaries( minibatch_decode_anchors, tf.shape(self.img_batch)) positive_decode_anchor_in_img = \ draw_box_with_tensor(img_batch=self.img_batch, boxes=minibatch_decode_anchors*tf.expand_dims(object_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 1)))[0] ) tf.summary.image('images/rpn/losses/anchors_positive_minibatch', positive_anchors_in_img) tf.summary.image('images/rpn/losses/anchors_negative_minibatch', negative_anchors_in_img) tf.summary.image('images/rpn/losses/decode_anchor_positive', positive_decode_anchor_in_img) # losses with tf.variable_scope('rpn_localization_losses'): classify_loss = slim.losses.softmax_cross_entropy( logits=minibatch_rpn_scores, onehot_labels=minibatch_label_onehot) location_loss = losses.l1_smooth_losses( predict_boxes=minibatch_rpn_encode_boxes, gtboxes=minibatch_encode_boxes_label, object_weights=object_mask) slim.losses.add_loss( location_loss) # add location loss to losses collections return location_loss, classify_loss
def postprocess_rpn_proposals(rpn_bbox_pred, rpn_cls_prob, img_shape, anchors, is_training): ''' :param rpn_bbox_pred: [-1, 4] :param rpn_cls_prob: [-1, 2] :param img_shape: :param anchors:[-1, 4] :param is_training: :return: ''' if is_training: pre_nms_topN = cfgs.RPN_TOP_K_NMS_TRAIN # 默认12000 post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TARIN # 默认2000 nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD # 默认0.7 else: pre_nms_topN = cfgs.RPN_TOP_K_NMS_TEST # 默认6000 post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TEST # 默认300 nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD # 默认0.7 cls_prob = rpn_cls_prob[:, 1] # 1. decode boxes # clw note:这个函数接受RPN网络的预测框位置,以及预测的类别(两类),图像的尺寸大小,以及生成的锚点作为输入。 # 经过解码后,得到的是真实的预测框的位置,因为有可能预测的框比设定的选取前N个框的个数还小, # 因此在预测框的数目以及设定的数目之间取最小值,之后再采用 tf.image.non_max_suppression抑制, # 选取最终的非极大值抑制后的Top K个框,原论文中未采用NMS之前为12000个(就是上面的cfgs.RPN_TOP_K_NMS_TRAIN), # NMS后为2000个(就是上面的cfgs.RPN_MAXIMUM_PROPOSAL_TARIN)。 # 这里还没有具体的分类那个框是那个目标,只是选出了前K个可能存在目标的框。 decode_boxes = encode_and_decode.decode_boxes( encoded_boxes=rpn_bbox_pred, reference_boxes=anchors, scale_factors=cfgs.ANCHOR_SCALE_FACTORS) # decode_boxes = encode_and_decode.decode_boxes(boxes=anchors, # deltas=rpn_bbox_pred, # scale_factor=None) # 2. clip to img boundaries decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( decode_boxes=decode_boxes, img_shape=img_shape) # 3. get top N to NMS if pre_nms_topN > 0: # clw note:初步得到一系列框(~60*40*9=20k)之后,如果是训练集,会去掉与边界相交的anchors,因此 # 数量会大大减小,即NMS之前的TopK个框(这里默认值是12k,文中给的6k),之后再进行NMS。 pre_nms_topN = tf.minimum(pre_nms_topN, tf.shape(decode_boxes)[0], name='avoid_unenough_boxes') cls_prob, top_k_indices = tf.nn.top_k(cls_prob, k=pre_nms_topN) decode_boxes = tf.gather(decode_boxes, top_k_indices) # 4. NMS keep = tf.image.non_max_suppression(boxes=decode_boxes, scores=cls_prob, max_output_size=post_nms_topN, iou_threshold=nms_thresh) final_boxes = tf.gather(decode_boxes, keep) final_probs = tf.gather(cls_prob, keep) return final_boxes, final_probs
def postprocess_fastrcnn_proposals(bbox_ppred, scores, img_shape, rois, is_training): ''' :param rpn_bbox_pred: [-1, 4] :param rpn_cls_prob: [-1, 2] :param img_shape: :param anchors:[-1, 4] :param is_training: :return: ''' if is_training: pre_nms_topN = 2000 #cfgs.RPN_TOP_K_NMS_TRAIN post_nms_topN = 500 #cfgs.RPN_MAXIMUM_PROPOSAL_TARIN nms_thresh = 0.8 #cfgs.RPN_NMS_IOU_THRESHOLD else: pre_nms_topN = 1500 #cfgs.RPN_TOP_K_NMS_TEST post_nms_topN = 300 #cfgs.RPN_MAXIMUM_PROPOSAL_TEST nms_thresh = 0.7 #cfgs.RPN_NMS_IOU_THRESHOLD #rois = tf.stop_gradient(rois) #scores = tf.stop_gradient(scores) bbox_ppred = tf.reshape(bbox_ppred, [-1, cfgs.CLASS_NUM + 1, 4]) #bbox_ppred = tf.stop_gradient(bbox_ppred) bbox_pred_list = tf.unstack(bbox_ppred, axis=1) score_list = tf.unstack(scores, axis=1) allclasses_boxes = [] allclasses_scores = [] categories = [] for i in range(1, cfgs.CLASS_NUM + 1): # 1. decode boxes in each class tmp_encoded_box = bbox_pred_list[i] tmp_score = score_list[i] tmp_decoded_boxes = encode_and_decode.decode_boxes( encode_boxes=tmp_encoded_box, reference_boxes=rois, scale_factors=cfgs.ROI_SCALE_FACTORS) # tmp_decoded_boxes = encode_and_decode.decode_boxes(boxes=rois, # deltas=tmp_encoded_box, # scale_factor=cfgs.ROI_SCALE_FACTORS) # 2. clip to img boundaries tmp_decoded_boxes = boxes_utils.clip_boxes_to_img_boundaries( decode_boxes=tmp_decoded_boxes, img_shape=img_shape) # 3. NMS pre_nms_topN = tf.minimum(pre_nms_topN, tf.shape(tmp_decoded_boxes)[0], name='avoid_unenough_boxes') cls_prob, top_k_indices = tf.nn.top_k(tmp_score, k=pre_nms_topN) decode_boxes = tf.gather(tmp_decoded_boxes, top_k_indices) # 取索引 # 4. NMS keep = tf.image.non_max_suppression(boxes=decode_boxes, scores=cls_prob, max_output_size=post_nms_topN, iou_threshold=nms_thresh) perclass_boxes = tf.gather(tmp_decoded_boxes, keep) perclass_scores = tf.gather(tmp_score, keep) allclasses_boxes.append(perclass_boxes) allclasses_scores.append(perclass_scores) categories.append(tf.ones_like(perclass_scores) * i) final_boxes = tf.concat(allclasses_boxes, axis=0) final_scores = tf.concat(allclasses_scores, axis=0) final_category = tf.concat(categories, axis=0) return final_boxes, final_scores
def train(): with tf.Graph().as_default(): ############## # input data # ############## with tf.name_scope('get_batch'): data = Read_tfrecord() iterator, img_name, img, gtboxes_label, num_gtbox = data.get_batch_data( ) with tf.name_scope('draw_gtboxes'): gtboxes_in_img = draw_box_with_tensor(img, tf.reshape( gtboxes_label, [-1, 5])[:, :-1], text=img_name) # original_img = tf.squeeze(img, axis=0)+tf.constant(cfg.DEPTH_MEAN) # original_img = tf.reshape(original_img, shape=tf.shape(img)) # tf.summary.image('images/original_images', original_img) #################### # backbone network # #################### _, end_point = get_network_byname(net_name=cfg.NETWORK_NAME, inputs=img, num_classes=None, is_training=True, global_pool=False, output_stride=None, spatial_squeeze=False) ############### # rpn network # ############### rpn_net = build_rpn.RPN( net_name=cfg.NETWORK_NAME, inputs=img, gtboxes_and_label=tf.squeeze(gtboxes_label, axis=0), is_training=True, end_point=end_point, anchor_scales=cfg.ANCHOR_SCALES, anchor_ratios=cfg.ANCHOR_RATIOS, scale_factors=cfg.SCALE_FACTOR, base_anchor_size_list=cfg.BASE_ANCHOR_SIZE_LIST, stride=cfg.STRIDE, level=cfg.LEVEL, top_k_nms=cfg.TOP_K_NMS, share_head=cfg.IS_SHARE_HEAD, rpn_nms_iou_threshold=cfg.RPN_NMS_IOU_THRESHOLD, max_proposal_num=cfg.MAX_PROPOSAL_NUM, rpn_iou_positive_threshold=cfg.RPN_IOU_POSITIVE_THRESHOLD, rpn_iou_negtive_threshold=cfg.RPN_IOU_NEGATIVE_THRESHOLD, rpn_mini_batchsize=cfg.RPN_MINI_BATCH_SIZE, rpn_positive_ratio=cfg.POSITIVE_RATIO, remove_outside_anchors=cfg.IS_FILTER_OUTSIDE_ANCHORS, rpn_weight_decay=cfg.RPN_WEIGHT_DECAY) rpn_proposals_boxes, rpn_proposals_scores = rpn_net.rpn_proposals() rpn_location_loss, rpn_classification_loss = rpn_net.rpn_loss() rpn_net_loss = rpn_location_loss + rpn_classification_loss with tf.name_scope('draw_proposals'): rpn_object_indices = tf.reshape(tf.where( tf.greater(rpn_proposals_scores, 0.5)), shape=[-1]) # clip boxes into image shape clip_rpn_proposals_boxes = clip_boxes_to_img_boundaries( rpn_proposals_boxes, tf.shape(img)) rpn_object_boxes = tf.gather(clip_rpn_proposals_boxes, indices=rpn_object_indices) rpn_object_boxes_in_img = draw_box_with_tensor( img_batch=img, boxes=rpn_object_boxes, text=tf.shape(rpn_object_boxes)[0]) rpn_proposals_boxes_in_img = draw_box_with_tensor( img_batch=img, boxes=clip_rpn_proposals_boxes, text=tf.shape(rpn_proposals_boxes)[0]) ############# # fast-rcnn # ############# fast_rcnn = build_fast_rcnn.FastRcnn( img_batch=img, feature_dict=rpn_net.feature_pyramid, rpn_proposal_boxes=rpn_proposals_boxes, rpn_proposal_scores=rpn_proposals_scores, gtboxes_and_label=tf.squeeze(gtboxes_label, axis=0), crop_size=cfg.CROP_SIZE, roi_pooling_kernel_size=cfg.ROI_POOLING_KERNEL_SIZE, levels=cfg.LEVEL, is_training=True, weights_regularizer=cfg.FAST_RCNN_WEIGHTS_DECAY, num_cls=cfg.NUM_CLASSES, scale_factors=cfg.SCALE_FACTOR, fast_rcnn_nms_iou_threshold=cfg.FAST_RCNN_NMS_IOU_THRESHOLD, max_num_per_class=cfg.MAX_NUM_PER_CLASS, fast_rcnn_score_threshold=cfg.FAST_RCNN_SCORE_THRESHOLD, fast_rcnn_positive_threshold_iou=cfg. FAST_RCNN_POSITIVE_THRESHOLD_IOU, fast_rcnn_minibatch_size=cfg.FAST_RCNN_MINIBATCH_SIZE, fast_rcnn_positive_ratio=cfg.FAST_RCNN_POSITIVE_RATIO) fast_rcnn_decode_boxes, fast_rcnn_category, fast_rcnn_scores, num_object = \ fast_rcnn.fast_rcnn_prediction() fast_rcnn_boxes_loss, fast_rcnn_cls_loss = fast_rcnn.fast_rcnn_loss() fast_rcnn_total_loss = fast_rcnn_boxes_loss + fast_rcnn_cls_loss with tf.name_scope('fast_rcnn_prediction_boxes'): fast_rcnn_prediction_in_image = draw_boxes_with_category( img_batch=img, boxes=fast_rcnn_decode_boxes, category=fast_rcnn_category, scores=fast_rcnn_scores) ##################### # optimization part # ##################### # global_step = tf.train.get_or_create_global_step() # total_loss = slim.losses.get_losses() # total_loss = tf.reduce_sum(total_loss * tf.constant(cfg.LOSS_WEIGHT, dtype=tf.float32)) # # lr = tf.train.piecewise_constant(global_step, # [60000], # [cfg.BASE_LEARNING_RATE, cfg.BASE_LEARNING_RATE/10]) # # optimizer = slim.train.MomentumOptimizer(learning_rate=lr, # momentum=cfg.MOMENTUM,) # # train_op = optimizer.minimize(total_loss, global_step) global_step = tf.train.get_or_create_global_step() total_loss = slim.losses.get_total_loss() lr = tf.train.piecewise_constant( global_step, [60000], [cfg.BASE_LEARNING_RATE, cfg.BASE_LEARNING_RATE / 10]) optimizer = tf.train.MomentumOptimizer(learning_rate=lr, momentum=cfg.MOMENTUM) train_op = slim.learning.create_train_op(total_loss, optimizer, global_step) ########### # summary # ########### # ground truth boxes tf.summary.image('images/gtboxes', gtboxes_in_img) # rpn net's proposals tf.summary.image('images/rpn/proposals', rpn_proposals_boxes_in_img) tf.summary.image('images/rpn/objects', rpn_object_boxes_in_img) # rpn loss scale tf.summary.scalar('losses/rpn/location_loss', rpn_location_loss) tf.summary.scalar('losses/rpn/cls_loss', rpn_classification_loss) tf.summary.scalar('losses/rpn/total_loss', rpn_net_loss) # fast rcnn prediction boxes tf.summary.image('images/fast_rcnn/prediction_boxes', fast_rcnn_prediction_in_image) # fast loss part tf.summary.scalar('losses/fast_rcnn/location_loss', fast_rcnn_boxes_loss) tf.summary.scalar('losses/fast_rcnn/cls_loss', fast_rcnn_cls_loss) tf.summary.scalar('losses/fast_rcnn/total_loss', fast_rcnn_total_loss) tf.summary.scalar('losses/total_loss', total_loss) tf.summary.scalar('learing_rate', lr) if debug: # bcckbone network for key in end_point.keys(): tf.summary.histogram('value/' + key, end_point[key]) # weights for weight in slim.get_model_variables(): tf.summary.histogram('weight/' + weight.name, weight.value()) # rpn anchor image_with_anchor_list = debug_rpn.debug_rpn(rpn_net, img) for i, image_with_anchor in enumerate(image_with_anchor_list): tf.summary.image('anchors/image_with_anchors_' + str(i), image_with_anchor[0]) # fast rcnn prediction tf.summary.tensor_summary('image_shape', tf.shape(img)) tf.summary.tensor_summary('fast_rcnn_prediction_boxes', fast_rcnn_decode_boxes) summary_op = tf.summary.merge_all() summary_path = cfg.SUMMARY_PATH check_and_create_paths([summary_path]) ################ # session part # ################ init_op = tf.group(tf.local_variables_initializer(), tf.global_variables_initializer()) checkpoint_path, restorer = get_restorer() with tf.Session() as sess: # initial part sess.run(init_op) sess.run(iterator.initializer) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) saver = tf.train.Saver() if checkpoint_path: restorer.restore(sess, checkpoint_path) print('restore is done!!!') step = 0 while True: try: if step >= 30: break training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) start_time = time.time() _global_step,\ _img_name,\ _rpn_location_loss,\ _rpn_classification_loss,\ _rpn_net_loss,\ _fast_rcnn_boxes_loss,\ _fast_rcnn_cls_loss,\ _fast_rcnn_total_loss,\ _total_loss,\ _train_op,\ summary_str\ = sess.run([global_step, img_name, rpn_location_loss, rpn_classification_loss, rpn_net_loss, fast_rcnn_boxes_loss, fast_rcnn_cls_loss, fast_rcnn_total_loss, total_loss, train_op, summary_op]) end_time = time.time() # print the result in screen if 1: # step % 10 == 0: cost_time = end_time - start_time print( """-----time:%s---step:%d---image name:%s---cost_time:%.4fs-----\n total_loss:%.4f\n rpn_boxes_loss:%.4f rpn_class_loss:%.4f rpn_total_loss:%.4f\n fast_rcnn_boxes_loss:%.4f fast_rcnn_class_loss:%.4f fast_rcnn_total_loss:%4f""" % (training_time, _global_step, str(_img_name), cost_time, _total_loss, _rpn_location_loss, _rpn_classification_loss, _rpn_net_loss, _fast_rcnn_boxes_loss, _fast_rcnn_cls_loss, _fast_rcnn_total_loss)) # add summary if step % 10 == 0: # summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) summary_writer.flush() # save ckpt if step % 10000 == 0 and step > 1: check_and_create_paths([cfg.CKPT_PATH]) save_path = os.path.join(cfg.CKPT_PATH, 'model_weights') saver.save(sess, save_path, global_step) step += 1 except tf.errors.OutOfRangeError: break summary_writer.close()
def get_rois(self): ''' 1)get roi from feature map 2)roi align or roi pooling. Here is roi align :return: all_level_rois: [N, 7, 7, C] all_level_proposals : [N, 5] all_level_proposals is matched with all_level_rois 因为产生rois的时候打乱了self.first_stage_decode_boxes的顺序, 而到时候解码的时候应该让rois和正确的reference box对应, 所以要重新产生一个匹配的all_level_proposals ''' levels = self.assign_level() all_level_roi_list = [] all_level_proposal_rotate_list = [] all_level_proposal_horizontal_list = [] with tf.variable_scope('crop_roi_and_roi_align'): for i in range(self.min_level, self.max_level + 1): level_i_proposal_indices = tf.reshape(tf.where(tf.equal(levels, i)), [-1]) level_i_rotate_proposals = tf.gather(self.rpn_proposals_boxes, level_i_proposal_indices) level_i_rotate_proposals = tf.cond( tf.equal(tf.shape(level_i_rotate_proposals)[0], 0), lambda: tf.constant([[0, 0, 1, 1, -90]], dtype=tf.float32), lambda: level_i_rotate_proposals ) # to avoid level_i_proposals batch-size is 0, or this project will be broken when BP gradients all_level_proposal_rotate_list.append(level_i_rotate_proposals) level_i_horizon_proposals = get_horizon_minAreaRectangle(level_i_rotate_proposals, False) level_i_horizon_proposals = clip_boxes_to_img_boundaries(level_i_horizon_proposals, img_shape=self.img_shape) xmin, ymin, xmax, ymax = tf.unstack(level_i_horizon_proposals, axis=1) h = tf.maximum(ymax-ymin, 0) w = tf.maximum(xmax-xmin, 0) x_c = (xmax+xmin) // 2 y_c = (ymax+ymin) // 2 theta = tf.ones_like(h) * -90 level_i_horizontal_proposals = tf.transpose(tf.stack([x_c, y_c, h, w, theta])) all_level_proposal_horizontal_list.append(level_i_horizontal_proposals) img_h, img_w = tf.cast(self.img_shape[1], tf.float32), tf.cast(self.img_shape[2], tf.float32) normalize_ymin = ymin / img_h normalize_xmin = xmin / img_w normalize_ymax = ymax / img_h normalize_xmax = xmax / img_w level_i_cropped_rois = tf.image.crop_and_resize(self.feature_pyramid['P%d' % i], boxes=tf.transpose(tf.stack([normalize_ymin, normalize_xmin, normalize_ymax, normalize_xmax])), box_ind=tf.zeros(shape=[tf.shape(level_i_rotate_proposals)[0], ], dtype=tf.int32), crop_size=[self.roi_size, self.roi_size], name='CROP_AND_RESIZE' ) if cfgs.USE_MASK: ''' RRPN, affine rotation. We implement it with rotated mask. ''' roi_mask = tf_wrapper.get_mask_tf(level_i_rotate_proposals, self.roi_size) # [<300, 14, 14] roi_mask = tf.stack([roi_mask for _ in range(256)], axis=3) level_i_cropped_rois = level_i_cropped_rois * roi_mask level_i_rois = slim.max_pool2d(level_i_cropped_rois, [self.roi_pool_kernel_size, self.roi_pool_kernel_size], stride=self.roi_pool_kernel_size) all_level_roi_list.append(level_i_rois) all_level_rois = tf.concat(all_level_roi_list, axis=0) all_level_rotate_proposals = tf.concat(all_level_proposal_rotate_list, axis=0) all_level_horizontal_proposals = tf.concat(all_level_proposal_horizontal_list, axis=0) return all_level_rois, all_level_rotate_proposals, all_level_horizontal_proposals
def postprocess_fastrcnn(self, rois, bbox_ppred, scores, img_shape): ''' :param rois:[-1, 4] :param bbox_ppred: [-1, (cfgs.Class_num+1) * 4] :param scores: [-1, cfgs.Class_num + 1] :return: ''' with tf.name_scope('postprocess_fastrcnn'): rois = tf.stop_gradient(rois) scores = tf.stop_gradient(scores) bbox_ppred = tf.reshape(bbox_ppred, [-1, cfgs.CLASS_NUM + 1, 4]) bbox_ppred = tf.stop_gradient(bbox_ppred) bbox_pred_list = tf.unstack(bbox_ppred, axis=1) score_list = tf.unstack(scores, axis=1) allclasses_boxes = [] allclasses_scores = [] categories = [] for i in range(1, cfgs.CLASS_NUM + 1): # 1. decode boxes in each class tmp_encoded_box = bbox_pred_list[i] tmp_score = score_list[i] tmp_decoded_boxes = encode_and_decode.decode_boxes( encoded_boxes=tmp_encoded_box, reference_boxes=rois, scale_factors=cfgs.ROI_SCALE_FACTORS) # tmp_decoded_boxes = encode_and_decode.decode_boxes(boxes=rois, # deltas=tmp_encoded_box, # scale_factor=cfgs.ROI_SCALE_FACTORS) # 2. clip to img boundaries tmp_decoded_boxes = boxes_utils.clip_boxes_to_img_boundaries( decode_boxes=tmp_decoded_boxes, img_shape=img_shape) # 3. NMS keep = tf.image.non_max_suppression( boxes=tmp_decoded_boxes, scores=tmp_score, max_output_size=cfgs.FAST_RCNN_NMS_MAX_BOXES_PER_CLASS, iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD) perclass_boxes = tf.gather(tmp_decoded_boxes, keep) perclass_scores = tf.gather(tmp_score, keep) allclasses_boxes.append(perclass_boxes) allclasses_scores.append(perclass_scores) categories.append(tf.ones_like(perclass_scores) * i) final_boxes = tf.concat(allclasses_boxes, axis=0) final_scores = tf.concat(allclasses_scores, axis=0) final_category = tf.concat(categories, axis=0) if self.is_training: ''' in training. We should show the detecitons in the tensorboard. So we add this. ''' kept_indices = tf.reshape( tf.where( tf.greater_equal(final_scores, cfgs.SHOW_SCORE_THRSHOLD)), [-1]) final_boxes = tf.gather(final_boxes, kept_indices) final_scores = tf.gather(final_scores, kept_indices) final_category = tf.gather(final_category, kept_indices) return final_boxes, final_scores, final_category
def fast_rcnn_loss(self): ''' :return: ''' with tf.variable_scope('fast_rcnn_loss'): minibatch_indices, minibatch_gtboxes, minibatch_onehot_label, minibatch_object_mask = self.make_minibatch( ) minibatch_proposal_boxes = tf.gather(self.rois_boxes, minibatch_indices) minibatch_predict_scores = tf.gather(self.fast_rcnn_cls_scores, minibatch_indices) minibatch_predict_encode_boxes = tf.gather( self.fast_rcnn_encode_boxes, minibatch_indices) # encode minibatch_gtboxes minibatch_encode_gtboxes = encode_boxes( anchors=minibatch_proposal_boxes, gtboxes=minibatch_gtboxes, scale_factors=self.scale_factors) # [minibatch_size, 4]->[minibatch_size, num_cls*4] minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes, [1, self.num_cls]) # class_weight_mask [minibatch_size, num_cls*4] class_weight_mask_list = [] category_list = tf.unstack(minibatch_onehot_label, axis=1) for i in range(1, self.num_cls + 1): class_weight = tf.ones([self.fast_rcnn_minibatch_size, 4], dtype=tf.float32) class_weight = class_weight * tf.expand_dims(category_list[i], axis=1) class_weight_mask_list.append(class_weight) class_weight_mask = tf.concat(class_weight_mask_list, axis=1) # cls loss with tf.variable_scope('fast_rcnn_cls_losses'): fast_rcnn_cls_loss = slim.losses.softmax_cross_entropy( logits=minibatch_predict_scores, onehot_labels=minibatch_onehot_label) # boxes loss with tf.variable_scope('fast_rcnn_boxes_losses'): fast_rcnn_boxes_loss = losses.l1_smooth_losses( predict_boxes=minibatch_predict_encode_boxes, gtboxes=minibatch_encode_gtboxes, object_weights=minibatch_object_mask, classes_weights=class_weight_mask) slim.losses.add_loss(fast_rcnn_boxes_loss) # check loss and decode boxes # summary positive proposals and negative proposals minibatch_proposal_boxes = boxes_utils.clip_boxes_to_img_boundaries( minibatch_proposal_boxes, self.img_shape) minibatch_positive_proposals = \ draw_box_with_tensor(img_batch=self.img_batch, boxes=minibatch_proposal_boxes*tf.expand_dims(tf.cast(minibatch_object_mask, tf.float32), 1), text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 1)))[0] ) minibatch_negative_mask = tf.cast( tf.logical_not(tf.cast(minibatch_object_mask, tf.bool)), tf.float32) minibatch_negative_proposals = \ draw_box_with_tensor(img_batch=self.img_batch, boxes=minibatch_proposal_boxes * tf.expand_dims(minibatch_negative_mask, 1), text=tf.shape(tf.where(tf.equal(minibatch_negative_mask, 1)))[0] ) tf.summary.image('minibatch_positive_proposals', minibatch_positive_proposals) tf.summary.image('minibatch_negative_proposal', minibatch_negative_proposals) # check the cls tensor part tf.summary.tensor_summary('minibatch_object_mask', minibatch_object_mask) tf.summary.tensor_summary('class_weight_mask', class_weight_mask) tf.summary.tensor_summary('minibatch_predict_encode_boxes', minibatch_predict_encode_boxes) tf.summary.tensor_summary('minibatch_encode_gtboxes', minibatch_encode_gtboxes) tf.summary.tensor_summary('location_loss', fast_rcnn_boxes_loss) tf.summary.tensor_summary('logits', minibatch_predict_scores) tf.summary.tensor_summary('one_hot', minibatch_onehot_label) return fast_rcnn_boxes_loss, fast_rcnn_cls_loss
def batch_slice_head_proposals(rpn_proposal_bbox, encode_boxes, categories, scores, image_height, image_width): """ mutilclass NMS :param rpn_proposal_bbox: (N, 4) :param encode_boxes: (N, 4) :param categories:(N, ) :param scores: (N, ) :param image_window:(y1, x1, y2, x2) the boundary of image :return: detection_boxes_scores_labels : (-1, 6)[y1, x1, y2, x2, scores, labels] """ with tf.name_scope('head_proposals'): # trim the zero graph rpn_proposal_bbox, non_zeros = boxes_utils.trim_zeros_graph( rpn_proposal_bbox, name="trim_proposals_detection") encode_boxes = tf.boolean_mask(encode_boxes, non_zeros) categories = tf.boolean_mask(categories, non_zeros) scores = tf.boolean_mask(scores, non_zeros) fast_rcnn_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=encode_boxes, reference_boxes=rpn_proposal_bbox, scale_factors=cfgs.BBOX_STD_DEV) fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( fast_rcnn_decode_boxes, image_height, image_width) # remove the background keep = tf.cast(tf.where(categories > 0)[:, 0], tf.int32) if cfgs.DEBUG: print_categories = tf.gather(categories, keep) print_scores = tf.gather(scores, keep) num_item = tf.minimum(tf.shape(print_scores)[0], 100) print_scores_vision, print_index = tf.nn.top_k( print_scores, k=num_item) print_categories_vision = tf.gather( print_categories, print_index) boxes_utils.print_tensors(print_categories_vision, "categories") boxes_utils.print_tensors(print_scores_vision, "scores") # Filter out low confidence boxes if cfgs.FINAL_SCORE_THRESHOLD: # 0.7 conf_keep = tf.cast( tf.where(scores >= cfgs.FINAL_SCORE_THRESHOLD)[:, 0], tf.int32) keep = tf.sets.set_intersection( tf.expand_dims(keep, 0), tf.expand_dims(conf_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] pre_nms_class_ids = tf.gather(categories, keep) pre_nms_scores = tf.gather(scores, keep) pre_nms_rois = tf.gather(fast_rcnn_decode_boxes, keep) unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0] def nms_keep_map(class_id): """Apply Non-Maximum Suppression on ROIs of the given class.""" # Indices of ROIs of the given class ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0] # Apply NMS class_keep = tf.image.non_max_suppression( tf.gather(pre_nms_rois, ixs), tf.gather(pre_nms_scores, ixs), max_output_size=cfgs.DETECTION_MAX_INSTANCES, # 最多200条 iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD ) # 0.3 太高就过滤完了 # Map indicies class_keep = tf.gather(keep, tf.gather(ixs, class_keep)) # Pad with -1 so returned tensors have the same shape gap = cfgs.DETECTION_MAX_INSTANCES - tf.shape( class_keep)[0] class_keep = tf.pad(class_keep, [(0, gap)], mode='CONSTANT', constant_values=-1) # Set shape so map_fn() can infer result shape class_keep.set_shape([cfgs.DETECTION_MAX_INSTANCES]) return class_keep # 2. Map over class IDs nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids, dtype=tf.int32) # 3. Merge results into one list, and remove -1 padding nms_keep = tf.reshape(nms_keep, [-1]) nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0]) # 4. Compute intersection between keep and nms_keep keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(nms_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] # Keep top detections roi_count = cfgs.DETECTION_MAX_INSTANCES class_scores_keep = tf.gather(scores, keep) num_keep = tf.minimum( tf.shape(class_scores_keep)[0], roi_count) top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1] keep = tf.gather(keep, top_ids) # Arrange output as [N, (y1, x1, y2, x2, class_id, score)] # Coordinates are normalized. detections = tf.concat([ tf.gather(fast_rcnn_decode_boxes, keep), tf.to_float(tf.gather(categories, keep))[..., tf.newaxis], tf.gather(scores, keep)[..., tf.newaxis] ], axis=1) # Pad with zeros if detections < DETECTION_MAX_INSTANCES gap = cfgs.DETECTION_MAX_INSTANCES - tf.shape(detections)[0] detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT") return detections
def get_rois(self): ''' 1)get roi from feature map 2)roi align or roi pooling. Here is roi align :return: all_level_rois: [N, 7, 7, C] all_level_proposals : [N, 5] all_level_proposals is matched with all_level_rois 因为产生rois的时候打乱了self.first_stage_decode_boxes的顺序, 而到时候解码的时候应该让rois和正确的reference box对应, 所以要重新产生一个匹配的all_level_proposals ''' levels = self.assign_level() all_level_roi_list = [] all_level_proposal_rotate_list = [] all_level_proposal_horizontal_list = [] with tf.variable_scope('crop_roi_and_roi_align'): for i in range(self.min_level, self.max_level + 1): level_i_proposal_indices = tf.reshape( tf.where(tf.equal(levels, i)), [-1]) level_i_rotate_proposals = tf.gather(self.rpn_proposals_boxes, level_i_proposal_indices) level_i_rotate_proposals = tf.cond( tf.equal(tf.shape(level_i_rotate_proposals)[0], 0), lambda: tf.constant([[0, 0, 1, 1, -90]], dtype=tf.float32), lambda: level_i_rotate_proposals ) # to avoid level_i_proposals batch-size is 0, or this project will be broken when BP gradients all_level_proposal_rotate_list.append(level_i_rotate_proposals) level_i_horizon_proposals = get_horizon_minAreaRectangle( level_i_rotate_proposals, False) level_i_horizon_proposals = clip_boxes_to_img_boundaries( level_i_horizon_proposals, img_shape=self.img_shape) xmin, ymin, xmax, ymax = tf.unstack(level_i_horizon_proposals, axis=1) h = tf.maximum(ymax - ymin, 0) w = tf.maximum(xmax - xmin, 0) x_c = (xmax + xmin) // 2 y_c = (ymax + ymin) // 2 theta = tf.ones_like(h) * -90 level_i_horizontal_proposals = tf.transpose( tf.stack([x_c, y_c, h, w, theta])) all_level_proposal_horizontal_list.append( level_i_horizontal_proposals) img_h, img_w = tf.cast(self.img_shape[1], tf.float32), tf.cast( self.img_shape[2], tf.float32) normalize_ymin = ymin / img_h normalize_xmin = xmin / img_w normalize_ymax = ymax / img_h normalize_xmax = xmax / img_w level_i_cropped_rois = tf.image.crop_and_resize( self.feature_pyramid['P%d' % i], boxes=tf.transpose( tf.stack([ normalize_ymin, normalize_xmin, normalize_ymax, normalize_xmax ])), box_ind=tf.zeros(shape=[ tf.shape(level_i_rotate_proposals)[0], ], dtype=tf.int32), crop_size=[self.roi_size, self.roi_size], name='CROP_AND_RESIZE') if cfgs.USE_MASK: ''' RRPN, affine rotation. We implement it with rotated mask. ''' roi_mask = tf_wrapper.get_mask_tf( level_i_rotate_proposals, self.roi_size) # [<300, 14, 14] roi_mask = tf.stack([roi_mask for _ in range(256)], axis=3) level_i_cropped_rois = level_i_cropped_rois * roi_mask level_i_rois = slim.max_pool2d( level_i_cropped_rois, [self.roi_pool_kernel_size, self.roi_pool_kernel_size], stride=self.roi_pool_kernel_size) all_level_roi_list.append(level_i_rois) all_level_rois = tf.concat(all_level_roi_list, axis=0) all_level_rotate_proposals = tf.concat( all_level_proposal_rotate_list, axis=0) all_level_horizontal_proposals = tf.concat( all_level_proposal_horizontal_list, axis=0) return all_level_rois, all_level_rotate_proposals, all_level_horizontal_proposals