def build_whole_detection_network(self, input_img_batch, gtboxes_batch): if self.is_training: # ensure shape is [M, 5] gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5]) gtboxes_batch = tf.cast(gtboxes_batch, tf.float32) img_shape = tf.shape(input_img_batch) # 1. build base network P_list = self.build_base_network( input_img_batch) # [P2, P3, P4, P5, P6] # 2. build rpn with tf.variable_scope('build_rpn', regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY)): fpn_cls_score = [] fpn_box_pred = [] for level_name, p in zip(cfgs.LEVLES, P_list): if cfgs.SHARE_HEADS: reuse_flag = None if level_name == cfgs.LEVLES[0] else True scope_list = [ 'rpn_conv/3x3', 'rpn_cls_score', 'rpn_bbox_pred' ] else: reuse_flag = None scope_list = [ 'rpn_conv/3x3_%s' % level_name, 'rpn_cls_score_%s' % level_name, 'rpn_bbox_pred_%s' % level_name ] rpn_conv3x3 = slim.conv2d(p, 512, [3, 3], trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, padding="SAME", activation_fn=tf.nn.relu, scope=scope_list[0], reuse=reuse_flag) rpn_cls_score = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location * 2, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=None, padding="VALID", scope=scope_list[1], reuse=reuse_flag) rpn_box_pred = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location * 4, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.BBOX_INITIALIZER, activation_fn=None, padding="VALID", scope=scope_list[2], reuse=reuse_flag) rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) fpn_cls_score.append(rpn_cls_score) fpn_box_pred.append(rpn_box_pred) fpn_cls_score = tf.concat(fpn_cls_score, axis=0, name='fpn_cls_score') fpn_box_pred = tf.concat(fpn_box_pred, axis=0, name='fpn_box_pred') fpn_cls_prob = slim.softmax(fpn_cls_score, scope='fpn_cls_prob') # 3. generate_anchors all_anchors = [] for i in range(len(cfgs.LEVLES)): level_name, p = cfgs.LEVLES[i], P_list[i] p_h, p_w = tf.shape(p)[1], tf.shape(p)[2] featuremap_height = tf.cast(p_h, tf.float32) featuremap_width = tf.cast(p_w, tf.float32) anchors = anchor_utils.make_anchors( base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[i], anchor_scales=cfgs.ANCHOR_SCALES, anchor_ratios=cfgs.ANCHOR_RATIOS, featuremap_height=featuremap_height, featuremap_width=featuremap_width, stride=cfgs.ANCHOR_STRIDE_LIST[i], name="make_anchors_for%s" % level_name) all_anchors.append(anchors) all_anchors = tf.concat(all_anchors, axis=0, name='all_anchors_of_FPN') # 4. postprocess rpn proposals. such as: decode, clip, NMS with tf.variable_scope('postprocess_FPN'): rois, roi_scores = postprocess_rpn_proposals( rpn_bbox_pred=fpn_box_pred, rpn_cls_prob=fpn_cls_prob, img_shape=img_shape, anchors=all_anchors, is_training=self.is_training) # rois shape [-1, 4] # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.is_training: score_gre_05 = tf.reshape( tf.where(tf.greater_equal(roi_scores, 0.5)), [-1]) score_gre_05_rois = tf.gather(rois, score_gre_05) score_gre_05_score = tf.gather(roi_scores, score_gre_05) score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores( img_batch=input_img_batch, boxes=score_gre_05_rois, scores=score_gre_05_score) tf.summary.image('score_greater_05_rois', score_gre_05_in_img) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.is_training: with tf.variable_scope('sample_anchors_minibatch'): fpn_labels, fpn_bbox_targets = \ tf.py_func( anchor_target_layer, [gtboxes_batch, img_shape, all_anchors], [tf.float32, tf.float32]) fpn_bbox_targets = tf.reshape(fpn_bbox_targets, [-1, 4]) fpn_labels = tf.to_int32(fpn_labels, name="to_int32") fpn_labels = tf.reshape(fpn_labels, [-1]) self.add_anchor_img_smry(input_img_batch, all_anchors, fpn_labels) # --------------------------------------add smry----------------------------------------------------------- fpn_cls_category = tf.argmax(fpn_cls_prob, axis=1) kept_rpppn = tf.reshape(tf.where(tf.not_equal(fpn_labels, -1)), [-1]) fpn_cls_category = tf.gather(fpn_cls_category, kept_rpppn) acc = tf.reduce_mean( tf.to_float( tf.equal(fpn_cls_category, tf.to_int64(tf.gather(fpn_labels, kept_rpppn))))) tf.summary.scalar('ACC/fpn_accuracy', acc) with tf.control_dependencies([fpn_labels]): with tf.variable_scope('sample_RCNN_minibatch'): rois, labels, bbox_targets = \ tf.py_func(proposal_target_layer, [rois, gtboxes_batch], [tf.float32, tf.float32, tf.float32]) rois = tf.reshape(rois, [-1, 4]) labels = tf.to_int32(labels) labels = tf.reshape(labels, [-1]) bbox_targets = tf.reshape(bbox_targets, [-1, 4 * (cfgs.CLASS_NUM + 1)]) self.add_roi_batch_img_smry(input_img_batch, rois, labels) if self.is_training: rois_list, labels, bbox_targets = self.assign_levels( all_rois=rois, labels=labels, bbox_targets=bbox_targets) else: rois_list = self.assign_levels( all_rois=rois ) # rois_list: [P2_rois, P3_rois, P4_rois, P5_rois] # -------------------------------------------------------------------------------------------------------------# # Fast-RCNN # # -------------------------------------------------------------------------------------------------------------# # 5. build Fast-RCNN # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10) bbox_pred, cls_score = self.build_fastrcnn(P_list=P_list, rois_list=rois_list, img_shape=img_shape) # bbox_pred shape: [-1, 4*(cls_num+1)]. # cls_score shape: [-1, cls_num+1] cls_prob = slim.softmax(cls_score, 'cls_prob') # ----------------------------------------------add smry------------------------------------------------------- if self.is_training: cls_category = tf.argmax(cls_prob, axis=1) fast_acc = tf.reduce_mean( tf.to_float(tf.equal(cls_category, tf.to_int64(labels)))) tf.summary.scalar('ACC/fast_acc', fast_acc) rois = tf.concat(rois_list, axis=0, name='concat_rois') # 6. postprocess_fastrcnn if not self.is_training: return self.postprocess_fastrcnn(rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, img_shape=img_shape) else: ''' when trian. We need build Loss ''' #GIOU loss需要先解码 fpn_pred = encode_and_decode.decode_boxes( encoded_boxes=fpn_box_pred, reference_boxes=all_anchors, scale_factors=cfgs.ROI_SCALE_FACTORS) loss_dict = self.build_loss(rpn_box_pred=fpn_pred, rpn_bbox_targets=all_anchors, rpn_cls_score=fpn_cls_score, rpn_labels=fpn_labels, bbox_pred=bbox_pred, bbox_targets=bbox_targets, cls_score=cls_score, labels=labels) final_bbox, final_scores, final_category = self.postprocess_fastrcnn( rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, img_shape=img_shape) return final_bbox, final_scores, final_category, loss_dict
def fpn(self, img_batch, gtboxes_batch): """ construct fpn network :param input_img_batch: :param gtboxes_batch: :return: """ if self.is_training: # ensure shape is [M, 5] gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5]) gtboxes_batch = tf.cast(gtboxes_batch, tf.float32) img_shape = tf.shape(img_batch) # step 1 build base network # get Pyramid feature list P_list = self.build_base_network(inputs_batch=img_batch) #[P2, P3, P4, P5, P6] # step 2 build fpn fpn_cls_score, fpn_box_pred = self.build_rpn_network(P_list) fpn_cls_prob = slim.softmax(fpn_cls_score, scope='fpn_cls_prob') # step 3 generate anchor all_anchors = [] for i in range(len(cfgs.LEVLES)): level_name, p = cfgs.LEVLES[i], P_list[i] # feature shape p_height, p_width = tf.shape(p)[1], tf.shape(p)[2] feature_height = tf.cast(p_height, dtype=tf.float32) feature_width = tf.cast(p_width, dtype=tf.float32) anchors = anchor_utils.make_anchors(base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[i], anchor_scales=cfgs.ANCHOR_SCALES, anchor_ratios=cfgs.ANCHOR_RATIOS, feature_height=feature_height, feature_width=feature_width, stride=cfgs.ANCHOR_STRIDE_LIST[i], name="make_anchors_for%s" % level_name) all_anchors.append(anchors) all_anchors = tf.concat(all_anchors, axis=0, name='all_anchors_of_FPN') # step 4 postprocess rpn proposals. such as: decode, clip and NMS with tf.variable_scope('postprocess_FPN'): rois, roi_scores = self.postprocess_rpn_proposals(rpn_bbox_pred=fpn_box_pred, rpn_cls_prob=fpn_cls_prob, img_shape=img_shape, anchors=all_anchors, is_training=self.is_training) if self.is_training: rois_in_img = show_box_in_tensor.draw_boxes_with_scores(img_batch=img_batch, boxes=rois, scores=roi_scores) tf.summary.image('all_rpn_rois', rois_in_img) score_gre_05 = tf.reshape(tf.where(tf.greater_equal(roi_scores, 0.5)), [-1]) score_gre_05_rois = tf.gather(rois, score_gre_05) score_gre_05_score = tf.gather(roi_scores, score_gre_05) score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores(img_batch=img_batch, boxes=score_gre_05_rois, scores=score_gre_05_score) tf.summary.image('score_greater_05_rois', score_gre_05_in_img) if self.is_training: with tf.variable_scope('sample_anchors_minibatch'): fpn_labels, fpn_bbox_targets =tf.py_func(anchor_target_layer, [gtboxes_batch, img_shape, all_anchors], [tf.float32, tf.float32]) fpn_bbox_targets = tf.reshape(fpn_bbox_targets, [-1, 4]) fpn_labels = tf.to_int32(fpn_labels, name='to_int32') fpn_labels = tf.reshape(fpn_labels, [-1]) self.add_anchor_img_smry(img_batch, all_anchors, fpn_labels) #------------------------------------------add summary----------------------------------------------------- fpn_cls_category = tf.argmax(fpn_cls_prob, axis=1) kept_rpppn = tf.reshape(tf.where(tf.not_equal(fpn_labels, -1)), [-1]) fpn_cls_category = tf.gather(fpn_cls_category, kept_rpppn) acc = tf.reduce_mean(tf.to_float(tf.equal(fpn_cls_category, tf.to_int64(tf.gather(fpn_labels, kept_rpppn))))) tf.summary.scalar('ACC/fpn_accuracy', acc) with tf.control_dependencies([fpn_labels]): with tf.variable_scope('sample_RCNN_minibatch'): rois, labels, bbox_targets = tf.py_func(proposal_target_layer, [rois, gtboxes_batch], [tf.float32, tf.float32, tf.float32]) rois = tf.reshape(rois, [-1, 4]) labels = tf.to_int32(labels) labels = tf.reshape(labels, [-1]) bbox_targets = tf.reshape(bbox_targets, [-1, 4 * (cfgs.CLASS_NUM + 1)]) self.add_roi_batch_img_smry(img_batch, rois, labels) if self.is_training: rois_list, labels, bbox_targets = self.assign_levels(all_rois=rois, labels=labels, bbox_targets=bbox_targets) else: rois_list = self.assign_levels(all_rois=rois) # rois_list: [P2_rois, P3_rois, P4_rois, P5_rois] # -------------------------------------------------------------------------------------------------------------# # Fast-RCNN # # -------------------------------------------------------------------------------------------------------------# # 5. build Fast-RCNN # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10) bbox_pred, cls_score = self.build_fastrcnn(P_list=P_list, rois_list=rois_list, img_shape=img_shape) # bbox_pred shape: [-1, 4*(cls_num+1)]. # cls_score shape: [-1, cls_num+1] cls_prob = slim.softmax(cls_score, 'cls_prob') # ----------------------------------------------add smry------------------------------------------------------- if self.is_training: cls_category = tf.argmax(cls_prob, axis=1) fast_acc = tf.reduce_mean(tf.to_float(tf.equal(cls_category, tf.to_int64(labels)))) tf.summary.scalar('ACC/fast_acc', fast_acc) rois = tf.concat(rois_list, axis=0, name='concat_rois') # 6. postprocess_fastrcnn if not self.is_training: return self.postprocess_fastrcnn(rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, img_shape=img_shape) else: ''' when train. We need build Loss ''' self.loss_dict = self.build_loss(rpn_box_pred=fpn_box_pred, rpn_bbox_targets=fpn_bbox_targets, rpn_cls_score=fpn_cls_score, rpn_labels=fpn_labels, bbox_pred=bbox_pred, bbox_targets=bbox_targets, cls_score=cls_score, labels=labels) final_bbox, final_scores, final_category = self.postprocess_fastrcnn(rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, img_shape=img_shape) return final_bbox, final_scores, final_category
def build_whole_detection_network(self, input_img_batch, gtboxes_batch, gtboxes_r_batch, gpu_id): if self.is_training: # ensure shape is [M, 5] gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5]) gtboxes_batch = tf.cast(gtboxes_batch, tf.float32) gtboxes_r_batch = tf.reshape(gtboxes_r_batch, [-1, 6]) gtboxes_r_batch = tf.cast(gtboxes_r_batch, tf.float32) img_shape = tf.shape(input_img_batch) # 1. build base network mask_list = [] if cfgs.USE_SUPERVISED_MASK: P_list, mask_list = self.build_base_network( input_img_batch) # [P2, P3, P4, P5, P6], [mask_p2, mask_p3] else: P_list = self.build_base_network( input_img_batch) # [P2, P3, P4, P5, P6] # 2. build rpn with tf.variable_scope('build_rpn', regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY)): fpn_cls_score = [] fpn_box_pred = [] for level_name, p in zip(cfgs.LEVLES, P_list): if cfgs.SHARE_HEADS: reuse_flag = None if level_name == cfgs.LEVLES[0] else True scope_list = [ 'rpn_conv/3x3', 'rpn_cls_score', 'rpn_bbox_pred' ] else: reuse_flag = None scope_list = [ 'rpn_conv/3x3_%s' % level_name, 'rpn_cls_score_%s' % level_name, 'rpn_bbox_pred_%s' % level_name ] rpn_conv3x3 = slim.conv2d(p, 512, [3, 3], trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, padding="SAME", activation_fn=tf.nn.relu, scope=scope_list[0], reuse=reuse_flag) rpn_cls_score = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location * 2, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=None, padding="VALID", scope=scope_list[1], reuse=reuse_flag) rpn_box_pred = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location * 4, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.BBOX_INITIALIZER, activation_fn=None, padding="VALID", scope=scope_list[2], reuse=reuse_flag) rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) fpn_cls_score.append(rpn_cls_score) fpn_box_pred.append(rpn_box_pred) fpn_cls_score = tf.concat(fpn_cls_score, axis=0, name='fpn_cls_score') fpn_box_pred = tf.concat(fpn_box_pred, axis=0, name='fpn_box_pred') fpn_cls_prob = slim.softmax(fpn_cls_score, scope='fpn_cls_prob') # 3. generate_anchors all_anchors = [] mask_gt_list = [] for i in range(len(cfgs.LEVLES)): level_name, p = cfgs.LEVLES[i], P_list[i] p_h, p_w = tf.shape(p)[1], tf.shape(p)[2] if cfgs.USE_SUPERVISED_MASK and i < len( mask_list) and self.is_training: if cfgs.MASK_TYPE.strip() == 'h': mask = tf.py_func( mask_utils.make_gt_mask, [p_h, p_w, img_shape[1], img_shape[2], gtboxes_batch], Tout=tf.int32) elif cfgs.MASK_TYPE.strip() == 'r': mask = tf.py_func(mask_utils.make_r_gt_mask, [ p_h, p_w, img_shape[1], img_shape[2], gtboxes_r_batch ], Tout=tf.int32) if cfgs.BINARY_MASK: mask = tf.where(tf.greater(mask, 0), tf.ones_like(mask), tf.zeros_like(mask)) mask_gt_list.append(mask) mask_utils.vis_mask_tfsmry(mask, name="MASK/%s" % level_name) featuremap_height = tf.cast(p_h, tf.float32) featuremap_width = tf.cast(p_w, tf.float32) anchors = anchor_utils.make_anchors( base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[i], anchor_scales=cfgs.ANCHOR_SCALES, anchor_ratios=cfgs.ANCHOR_RATIOS, featuremap_height=featuremap_height, featuremap_width=featuremap_width, stride=cfgs.ANCHOR_STRIDE_LIST[i], name="make_anchors_for%s" % level_name) all_anchors.append(anchors) all_anchors = tf.concat(all_anchors, axis=0, name='all_anchors_of_FPN') # 4. postprocess rpn proposals. such as: decode, clip, NMS with tf.variable_scope('postprocess_FPN'): rois, roi_scores = postprocess_rpn_proposals( rpn_bbox_pred=fpn_box_pred, rpn_cls_prob=fpn_cls_prob, img_shape=img_shape, anchors=all_anchors, is_training=self.is_training) if self.is_training: with tf.variable_scope('sample_anchors_minibatch'): fpn_labels, fpn_bbox_targets = \ tf.py_func( anchor_target_layer, [gtboxes_batch, img_shape, all_anchors], [tf.float32, tf.float32]) fpn_bbox_targets = tf.reshape(fpn_bbox_targets, [-1, 4]) fpn_labels = tf.to_int32(fpn_labels, name="to_int32") fpn_labels = tf.reshape(fpn_labels, [-1]) self.add_anchor_img_smry(input_img_batch, all_anchors, fpn_labels, method=0) # --------------------------------------add smry----------------------------------------------------------- fpn_cls_category = tf.argmax(fpn_cls_prob, axis=1) kept_rpppn = tf.reshape(tf.where(tf.not_equal(fpn_labels, -1)), [-1]) fpn_cls_category = tf.gather(fpn_cls_category, kept_rpppn) acc = tf.reduce_mean( tf.to_float( tf.equal(fpn_cls_category, tf.to_int64(tf.gather(fpn_labels, kept_rpppn))))) tf.summary.scalar('ACC/fpn_accuracy', acc) with tf.control_dependencies([fpn_labels]): with tf.variable_scope('sample_RCNN_minibatch'): rois, labels, bbox_targets = \ tf.py_func(proposal_target_layer, [rois, gtboxes_batch, gtboxes_r_batch], [tf.float32, tf.float32, tf.float32]) rois = tf.reshape(rois, [-1, 4]) labels = tf.to_int32(labels) labels = tf.reshape(labels, [-1]) bbox_targets = tf.reshape(bbox_targets, [-1, 5 * (cfgs.CLASS_NUM + 1)]) self.add_roi_batch_img_smry(input_img_batch, rois, labels, method=0) if not cfgs.USE_CONCAT: if self.is_training: rois_list, labels, bbox_targets = self.assign_levels( all_rois=rois, labels=labels, bbox_targets=bbox_targets) else: rois_list = self.assign_levels( all_rois=rois ) # rois_list: [P2_rois, P3_rois, P4_rois, P5_rois] # -------------------------------------------------------------------------------------------------------------# # Fast-RCNN # # -------------------------------------------------------------------------------------------------------------# # 5. build Fast-RCNN if not cfgs.USE_CONCAT: bbox_pred, cls_score = self.build_fastrcnn(P_list=P_list, rois_list=rois_list, img_shape=img_shape) rois = tf.concat(rois_list, axis=0, name='concat_rois') else: bbox_pred, cls_score = self.build_concat_fastrcnn( P_list=P_list, all_rois=rois, img_shape=img_shape) cls_prob = slim.softmax(cls_score, 'cls_prob') # ----------------------------------------------add smry------------------------------------------------------- if self.is_training: cls_category = tf.argmax(cls_prob, axis=1) fast_acc = tf.reduce_mean( tf.to_float(tf.equal(cls_category, tf.to_int64(labels)))) tf.summary.scalar('ACC/fast_acc', fast_acc) # 6. postprocess_fastrcnn if self.is_training: self.build_loss( rpn_box_pred=fpn_box_pred, rpn_bbox_targets=fpn_bbox_targets, rpn_cls_score=fpn_cls_score, rpn_labels=fpn_labels, bbox_pred=bbox_pred, bbox_targets=bbox_targets, cls_score=cls_score, labels=labels, mask_list=mask_list if cfgs.USE_SUPERVISED_MASK else None, mask_gt_list=mask_gt_list if cfgs.USE_SUPERVISED_MASK else None) final_bbox, final_scores, final_category = self.postprocess_fastrcnn( rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, gpu_id=gpu_id) if self.is_training: return final_bbox, final_scores, final_category, self.loss_dict else: return final_bbox, final_scores, final_category
def build_whole_detection_network(self, input_img_batch, gtboxes_batch): if self.is_training: # ensure shape is [M, 5] gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5]) gtboxes_batch = tf.cast(gtboxes_batch, tf.float32) img_shape = tf.shape(input_img_batch) # 1. build base network P_list = self.build_base_network(input_img_batch) # 2. build rpn with tf.variable_scope('build_rpn', regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY)): fpn_cls_score = [] fpn_box_pred = [] for level_name, p in zip(cfgs.LEVLES, P_list): if cfgs.SHARE_HEADS: reuse_flag = None if level_name == cfgs.LEVLES[0] else True scope_list = [ 'rpn_conv/3x3', 'rpn_cls_score', 'rpn_bbox_pred' ] else: reuse_flag = None scope_list = [ 'rpn_conv/3x3_%s' % level_name, 'rpn_cls_score_%s' % level_name, 'rpn_bbox_pred_%s' % level_name ] rpn_conv3x3 = slim.conv2d(p, 512, [3, 3], trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, padding="SAME", activation_fn=tf.nn.relu, scope=scope_list[0], reuse=reuse_flag) rpn_cls_score = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location * 2, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=None, padding="VALID", scope=scope_list[1], reuse=reuse_flag) rpn_box_pred = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location * 4, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.BBOX_INITIALIZER, activation_fn=None, padding="VALID", scope=scope_list[2], reuse=reuse_flag) rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) fpn_cls_score.append(rpn_cls_score) fpn_box_pred.append(rpn_box_pred) fpn_cls_score = tf.concat(fpn_cls_score, axis=0, name='fpn_cls_score') fpn_box_pred = tf.concat(fpn_box_pred, axis=0, name='fpn_box_pred') fpn_cls_prob = slim.softmax(fpn_cls_score, scope='fpn_cls_prob') # 3. generate_anchors all_anchors = [] for i in range(len(cfgs.LEVLES)): level_name, p = cfgs.LEVLES[i], P_list[i] p_h, p_w = tf.shape(p)[1], tf.shape(p)[2] featuremap_height = tf.cast(p_h, tf.float32) featuremap_width = tf.cast(p_w, tf.float32) anchors = anchor_utils.make_anchors( base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[i], anchor_scales=cfgs.ANCHOR_SCALES, anchor_ratios=cfgs.ANCHOR_RATIOS, featuremap_height=featuremap_height, featuremap_width=featuremap_width, stride=cfgs.ANCHOR_STRIDE_LIST[i], name="make_anchors_for%s" % level_name) all_anchors.append(anchors) all_anchors = tf.concat(all_anchors, axis=0, name='all_anchors_of_FPN') # 4. postprocess rpn proposals. such as: decode, clip, NMS with tf.variable_scope('postprocess_FPN'): rois, roi_scores = postprocess_rpn_proposals( rpn_bbox_pred=fpn_box_pred, rpn_cls_prob=fpn_cls_prob, img_shape=img_shape, anchors=all_anchors, is_training=self.is_training) # rois shape [-1, 4] # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.is_training: score_gre_05 = tf.reshape( tf.where(tf.greater_equal(roi_scores, 0.5)), [-1]) score_gre_05_rois = tf.gather(rois, score_gre_05) score_gre_05_score = tf.gather(roi_scores, score_gre_05) score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores( img_batch=input_img_batch, boxes=score_gre_05_rois, scores=score_gre_05_score) tf.summary.image('score_greater_05_rois', score_gre_05_in_img) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.is_training: with tf.variable_scope('sample_anchors_minibatch'): fpn_labels, fpn_bbox_targets = \ tf.py_func( anchor_target_layer, [gtboxes_batch, img_shape, all_anchors], [tf.float32, tf.float32]) fpn_bbox_targets = tf.reshape(fpn_bbox_targets, [-1, 4]) fpn_labels = tf.to_int32(fpn_labels, name="to_int32") fpn_labels = tf.reshape(fpn_labels, [-1]) self.add_anchor_img_smry(input_img_batch, all_anchors, fpn_labels) # --------------------------------------add smry----------------------------------------------------------- fpn_cls_category = tf.argmax(fpn_cls_prob, axis=1) kept_rpppn = tf.reshape(tf.where(tf.not_equal(fpn_labels, -1)), [-1]) fpn_cls_category = tf.gather(fpn_cls_category, kept_rpppn) acc = tf.reduce_mean( tf.to_float( tf.equal(fpn_cls_category, tf.to_int64(tf.gather(fpn_labels, kept_rpppn))))) tf.summary.scalar('ACC/fpn_accuracy', acc) # cascade rcnn total_loss_dict = {} cascade_bbox_pred = [] cascade_cls_prob = [] cascade_rois = [] fg_thresholds = [0.5, 0.6, 0.7] for i in range(len(fg_thresholds)): if self.is_training: rois, bbox_pred, cls_prob, loss_dict = self.cascade_rcnn( rois, gtboxes_batch, input_img_batch, P_list, img_shape, fg_thresholds[i], fpn_box_pred, fpn_bbox_targets, fpn_cls_score, fpn_labels, stage=i + 1) for k in loss_dict.keys(): if k not in total_loss_dict.keys(): total_loss_dict[k] = loss_dict[k] else: total_loss_dict[k] += loss_dict[k] else: rois, bbox_pred, cls_prob = self.cascade_rcnn(rois, gtboxes_batch, input_img_batch, P_list, img_shape, fg_thresholds[i], fpn_box_pred, None, fpn_cls_score, None, stage=i + 1) cascade_bbox_pred.append(bbox_pred) cascade_cls_prob.append(cls_prob) cascade_rois.append(rois) final_bbox, final_scores, final_category = self.postprocess_fastrcnn( rois=cascade_rois[-1], bbox_ppred=cascade_bbox_pred[-1], scores=cascade_cls_prob[-1], img_shape=img_shape) if self.is_training: return final_bbox, final_scores, final_category, total_loss_dict else: return final_bbox, final_scores, final_category
def build_whole_detection_network(self, input_img_batch, gtboxes_h_batch, gtboxes_r_batch): if self.is_training: # ensure shape is [M, 5] gtboxes_r_batch = tf.reshape(gtboxes_r_batch, [-1, 6]) gtboxes_r_batch = tf.cast(gtboxes_r_batch, tf.float32) gtboxes_batch = tf.reshape(gtboxes_h_batch, [-1, 5]) gtboxes_batch = tf.cast(gtboxes_batch, tf.float32) img_shape = tf.shape(input_img_batch) # 1. build base network feature_to_cropped = self.build_base_network(input_img_batch) # 2. build rpn with tf.variable_scope('build_rpn', regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY)): rpn_conv3x3 = slim.conv2d(feature_to_cropped, 512, [3, 3], trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=tf.nn.relu, scope='rpn_conv/3x3') rpn_cls_score = slim.conv2d(rpn_conv3x3, self.num_anchors_per_location * 2, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=None, scope='rpn_cls_score') rpn_box_pred = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location * 4, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.BBOX_INITIALIZER, activation_fn=None, scope='rpn_bbox_pred') rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob') # 3. generate_anchors featuremap_height, featuremap_width = tf.shape( feature_to_cropped)[1], tf.shape(feature_to_cropped)[2] featuremap_height = tf.cast(featuremap_height, tf.float32) featuremap_width = tf.cast(featuremap_width, tf.float32) anchors = anchor_utils.make_anchors( base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0], anchor_scales=cfgs.ANCHOR_SCALES, anchor_ratios=cfgs.ANCHOR_RATIOS, featuremap_height=featuremap_height, featuremap_width=featuremap_width, stride=cfgs.ANCHOR_STRIDE, name="make_anchors_forRPN") # with tf.variable_scope('make_anchors'): # anchors = anchor_utils.make_anchors(height=featuremap_height, # width=featuremap_width, # feat_stride=cfgs.ANCHOR_STRIDE[0], # anchor_scales=cfgs.ANCHOR_SCALES, # anchor_ratios=cfgs.ANCHOR_RATIOS, base_size=16 # ) # 4. postprocess rpn proposals. such as: decode, clip, NMS with tf.variable_scope('postprocess_RPN'): # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2]) # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob') # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) rois, roi_scores = postprocess_rpn_proposals( rpn_bbox_pred=rpn_box_pred, rpn_cls_prob=rpn_cls_prob, img_shape=img_shape, anchors=anchors, is_training=self.is_training) # rois shape [-1, 4] # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.is_training: rois_in_img = show_box_in_tensor.draw_boxes_with_scores( img_batch=input_img_batch, boxes=rois, scores=roi_scores) tf.summary.image('all_rpn_rois', rois_in_img) score_gre_05 = tf.reshape( tf.where(tf.greater_equal(roi_scores, 0.5)), [-1]) score_gre_05_rois = tf.gather(rois, score_gre_05) score_gre_05_score = tf.gather(roi_scores, score_gre_05) score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores( img_batch=input_img_batch, boxes=score_gre_05_rois, scores=score_gre_05_score) tf.summary.image('score_greater_05_rois', score_gre_05_in_img) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.is_training: with tf.variable_scope('sample_anchors_minibatch'): rpn_labels, rpn_bbox_targets = \ tf.py_func( anchor_target_layer, [gtboxes_batch, img_shape, anchors], [tf.float32, tf.float32]) rpn_bbox_targets = tf.reshape(rpn_bbox_targets, [-1, 4]) rpn_labels = tf.to_int32(rpn_labels, name="to_int32") rpn_labels = tf.reshape(rpn_labels, [-1]) self.add_anchor_img_smry(input_img_batch, anchors, rpn_labels) # --------------------------------------add smry---------------------------------------------------------------- rpn_cls_category = tf.argmax(rpn_cls_prob, axis=1) kept_rpppn = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)), [-1]) rpn_cls_category = tf.gather(rpn_cls_category, kept_rpppn) acc = tf.reduce_mean( tf.to_float( tf.equal(rpn_cls_category, tf.to_int64(tf.gather(rpn_labels, kept_rpppn))))) tf.summary.scalar('ACC/rpn_accuracy', acc) with tf.control_dependencies([rpn_labels]): with tf.variable_scope('sample_RCNN_minibatch_stage1'): stage1_rois, stage1_labels, stage1_bbox_targets = \ tf.py_func(proposal_target_layer_3, [rois, rois, gtboxes_batch, gtboxes_r_batch, cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD[0]], [tf.float32, tf.float32, tf.float32]) stage1_rois = tf.reshape(stage1_rois, [-1, 4]) stage1_labels = tf.to_int32(stage1_labels) stage1_labels = tf.reshape(stage1_labels, [-1]) stage1_bbox_targets = tf.reshape( stage1_bbox_targets, [-1, 5 * (cfgs.CLASS_NUM + 1)]) self.add_roi_batch_img_smry(input_img_batch, stage1_rois, stage1_labels, 'stage1') #stage1_bbox_targets_h = boxes_utils.get_horizen_minAreaRectangle(stage1_bbox_targets, False) else: stage1_rois = rois # -------------------------------------------------------------------------------------------------------------# # Fast-RCNN-before1 # # -------------------------------------------------------------------------------------------------------------# # 5. build Fast-RCNN-before1 # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10) stage1_bbox_pred_fliter, stage1_bbox_pred, stage1_cls_score = self.build_fastrcnn( feature_to_cropped=feature_to_cropped, rois=stage1_rois, img_shape=img_shape, scope='stage1') # bbox_pred shape: [-1, 4*(cls_num+1)]. # cls_score shape: [-1, cls_num+1] stage1_cls_prob = slim.softmax(stage1_cls_score, 'stage1_cls_prob') stage1_cls_category = tf.argmax(stage1_cls_prob, axis=1) # ----------------------------------------------add smry------------------------------------------------------- if self.is_training: stage1_fast_acc = tf.reduce_mean( tf.to_float( tf.equal(stage1_cls_category, tf.to_int64(stage1_labels)))) tf.summary.scalar('ACC/stage1_fast_acc', stage1_fast_acc) # postprocess_fastrcnn_before1 # return x,y,w,h,theta stage1_bbox = self.postprocess_cascade( rois=stage1_rois, bbox_ppred=stage1_bbox_pred_fliter, scope='stage1', five=False) #stage1_bbox_h = boxes_utils.get_horizen_minAreaRectangle(stage1_bbox, with_label=False) if self.is_training: overlaps = iou_rotate.iou_rotate_calculate(stage1_bbox, gtboxes_r_batch[:, :-1], use_gpu=True, gpu_id=0) if self.is_training: with tf.control_dependencies([stage1_bbox]): with tf.variable_scope('sample_RCNN_minibatch_stage2'): stage2_rois, stage2_labels, stage2_bbox_targets = \ tf.py_func(proposal_target_layer_r, [stage1_bbox,gtboxes_r_batch, overlaps, cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD[1]], [tf.float32, tf.float32,tf.float32]) stage2_rois = tf.reshape(stage2_rois, [-1, 5]) # 斜 stage2_labels = tf.to_int32(stage2_labels) stage2_labels = tf.reshape(stage2_labels, [-1]) stage2_bbox_targets = tf.reshape( stage2_bbox_targets, [-1, 5 * (cfgs.CLASS_NUM + 1)]) self.add_roi_batch_img_smry_rotate(input_img_batch, stage2_rois, stage2_labels, 'stage2') else: stage2_rois = stage1_bbox # -------------------------------------------------------------------------------------------------------------# # Fast-RCNN-before2 # # -------------------------------------------------------------------------------------------------------------# # 6. build Fast-RCNN-before2 # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10) # stage2_rois = tf.stop_gradient(stage2_rois) stage2_rois_h = boxes_utils.get_horizen_minAreaRectangle( stage2_rois, with_label=False) ##斜变正 stage2_bbox_pred_fliter, stage2_bbox_pred, stage2_cls_score = self.build_fastrcnn( feature_to_cropped=feature_to_cropped, rois=stage2_rois_h, img_shape=img_shape, scope='stage2') # bbox_pred shape: [-1, 4*(cls_num+1)]. # cls_score shape: [-1, cls_num+1] stage2_cls_prob = slim.softmax(stage2_cls_score, 'stage2_cls_prob') stage2_cls_category = tf.argmax(stage2_cls_prob, axis=1) # ----------------------------------------------add smry------------------------------------------------------- if self.is_training: stage2_fast_acc = tf.reduce_mean( tf.to_float( tf.equal(stage2_cls_category, tf.to_int64(stage2_labels)))) tf.summary.scalar('ACC/stage2_fast_acc', stage2_fast_acc) # postprocess_fastrcnn_before2 stage2_bbox = self.postprocess_cascade( rois=stage2_rois, bbox_ppred=stage2_bbox_pred_fliter, scope='stage2') #stage2_bbox_h = boxes_utils.get_horizen_minAreaRectangle(stage2_bbox, with_label=False) if self.is_training: overlaps = iou_rotate.iou_rotate_calculate(stage2_bbox, gtboxes_r_batch[:, :-1], use_gpu=True, gpu_id=0) if self.is_training: with tf.control_dependencies([stage2_bbox]): with tf.variable_scope('sample_RCNN_minibatch_stage3'): stage3_rois, stage3_labels, stage3_bbox_targets = \ tf.py_func(proposal_target_layer_r, [stage2_bbox, gtboxes_r_batch, overlaps, cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD[2]], [tf.float32, tf.float32, tf.float32]) stage3_rois = tf.reshape(stage3_rois, [-1, 5]) stage3_labels = tf.to_int32(stage3_labels) stage3_labels = tf.reshape(stage3_labels, [-1]) stage3_bbox_targets = tf.reshape( stage3_bbox_targets, [-1, 5 * (cfgs.CLASS_NUM + 1)]) self.add_roi_batch_img_smry_rotate(input_img_batch, stage3_rois, stage3_labels, 'stage3') else: stage3_rois = stage2_bbox # -------------------------------------------------------------------------------------------------------------# # Fast-RCNN # # -------------------------------------------------------------------------------------------------------------# # 7. build Fast-RCNN # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10) # stage3_rois = tf.stop_gradient(stage3_rois) stage3_rois_h = boxes_utils.get_horizen_minAreaRectangle( stage3_rois, with_label=False) ##斜变正 stage3_bbox_pred, stage3_cls_score = self.build_fastrcnn( feature_to_cropped=feature_to_cropped, rois=stage3_rois_h, img_shape=img_shape, scope='stage3') # bbox_pred shape: [-1, 4*(cls_num+1)]. # cls_score shape: [-1, cls_num+1] stage3_cls_prob = slim.softmax(stage3_cls_score, 'stage3_cls_prob') stage3_cls_category = tf.argmax(stage3_cls_prob, axis=1) # ----------------------------------------------add smry------------------------------------------------------- if self.is_training: fast_acc = tf.reduce_mean( tf.to_float( tf.equal(stage3_cls_category, tf.to_int64(stage3_labels)))) tf.summary.scalar('ACC/fast_acc', fast_acc) # postprocess_fastrcnn if not self.is_training: with slim.arg_scope([ slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected ], reuse=True): _, _, final_scores_stage2 = self.build_fastrcnn( feature_to_cropped=feature_to_cropped, rois=stage3_rois_h, img_shape=img_shape, scope='stage2') final_scores_stage2 = slim.softmax(final_scores_stage2, 'final_scores_stage2') _, _, final_scores_stage1 = self.build_fastrcnn( feature_to_cropped=feature_to_cropped, rois=stage3_rois_h, img_shape=img_shape, scope='stage1') final_scores_stage1 = slim.softmax(final_scores_stage1, 'final_scores_stage1') # choose which stage to export cls_prob = tf.add(final_scores_stage2, final_scores_stage1) cls_prob = tf.add(cls_prob, stage3_cls_prob) / 3 return self.postprocess_fastrcnn_r(rois=stage3_rois, bbox_ppred=stage3_bbox_pred, scores=cls_prob, img_shape=img_shape, scope='stage3') else: ''' when trian. We need build Loss ''' loss_dict = self.build_loss( rpn_box_pred=rpn_box_pred, rpn_bbox_targets=rpn_bbox_targets, rpn_cls_score=rpn_cls_score, rpn_labels=rpn_labels, bbox_pred=stage3_bbox_pred, bbox_targets=stage3_bbox_targets, stage2_bbox_pred=stage2_bbox_pred, stage2_bbox_targets=stage2_bbox_targets, stage1_bbox_pred=stage1_bbox_pred, stage1_bbox_targets=stage1_bbox_targets, cls_score=stage3_cls_score, labels=stage3_labels, stage2_cls_score=stage2_cls_score, stage2_labels=stage2_labels, stage1_cls_score=stage1_cls_score, stage1_labels=stage1_labels) final_bbox, final_scores, final_category = self.postprocess_fastrcnn_r( rois=stage3_rois, bbox_ppred=stage3_bbox_pred, scores=stage3_cls_prob, img_shape=img_shape, scope='stage3') return final_bbox, final_scores, final_category, loss_dict
def faster_rcnn(self, input_img_batch, gtboxes_batch): if self.is_training: # ensure shape is [M, 5] gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5]) gtboxes_batch = tf.cast(gtboxes_batch, tf.float32) img_shape = tf.shape(input_img_batch) # step 1 build base network feature_cropped = self.build_base_network(input_img_batch) # step 2 build rpn rpn_box_pred, rpn_cls_score = self.build_rpn_network(feature_cropped) rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob') # step 3 make anchor feature_height = tf.cast(tf.shape(feature_cropped)[1], dtype=tf.float32) feature_width = tf.cast(tf.shape(feature_cropped)[2], dtype=tf.float32) # step make anchor # reference anchor coordinate # (img_height*img_width*mum_anchor, 4) #++++++++++++++++++++++++++++++++++++generate anchors+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ anchors = make_anchors(base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0], anchor_scales=cfgs.ANCHOR_SCALES, anchor_ratios=cfgs.ANCHOR_RATIOS, feature_height=feature_height, feature_width=feature_width, stride=cfgs.ANCHOR_STRIDE, name='make_anchors_forRPN') # step 4 postprocess rpn proposals. such as: decode, clip, NMS with tf.variable_scope('postprocess_RPN'): rois, roi_scores = self.postprocess_rpn_proposals( rpn_bbox_pred=rpn_box_pred, rpn_cls_prob=rpn_cls_prob, img_shape=img_shape, anchors=anchors, is_training=self.is_training) # +++++++++++++++++++++++++++++++++++++add img summary++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.is_training: rois_in_img = show_box_in_tensor.draw_boxes_with_scores( img_batch=input_img_batch, boxes=rois, scores=roi_scores) tf.summary.image('all_rpn_rois', rois_in_img) score_gre_05 = tf.reshape( tf.where(tf.greater_equal(roi_scores, 0.5)), [-1]) score_gre_05_rois = tf.gather(rois, score_gre_05) score_gre_05_score = tf.gather(roi_scores, score_gre_05) score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores( img_batch=input_img_batch, boxes=score_gre_05_rois, scores=score_gre_05_score) tf.summary.image('score_greater_05_rois', score_gre_05_in_img) #++++++++++++++++++++++++++++++++++++++++get rpn_lablel and rpn_bbox_target++++++++++++++++++++++++++++++++++++ if self.is_training: with tf.variable_scope('sample_anchors_minibatch'): rpn_labels, rpn_box_targets = tf.py_func( anchor_target_layer, [gtboxes_batch, img_shape, anchors], [tf.float32, tf.float32]) rpn_bbox_targets = tf.reshape(rpn_box_targets, shape=(-1, 4)) rpn_labels = tf.cast(rpn_labels, dtype=tf.int32, name='to_int32') rpn_labels = tf.reshape(rpn_labels, shape=[-1]) self.add_anchor_img_smry(input_img_batch, anchors, rpn_labels) #+++++++++++++++++++++++++++++++++++generate target boxes and labels++++++++++++++++++++++++++++++++++++++++ rpn_cls_category = tf.argmax(rpn_cls_prob, axis=1) # get positive and negative indices and ignore others where rpn label value equal to -1 kept_rpn_indices = tf.reshape(tf.where(tf.not_equal( rpn_labels, -1)), shape=[-1]) rpn_cls_category = tf.gather(rpn_cls_category, indices=kept_rpn_indices) rpn_cls_labels = tf.cast(tf.gather(rpn_labels, indices=kept_rpn_indices), dtype=tf.int64) # evaluate function acc = tf.reduce_mean( tf.cast(tf.equal(rpn_cls_category, rpn_cls_labels), dtype=tf.float32)) tf.summary.scalar('ACC/rpn_accuracy', acc) with tf.control_dependencies([rpn_labels]): with tf.variable_scope('sample_RCNN_minibatch'): rois, labels, bbox_targets = tf.py_func( proposal_target_layer, [rois, gtboxes_batch], [tf.float32, tf.float32, tf.float32]) rois = tf.reshape(rois, [-1, 4]) labels = tf.cast(labels, dtype=tf.int32) labels = tf.reshape(labels, [-1]) bbox_targets = tf.reshape(bbox_targets, [-1, 4 * (cfgs.CLASS_NUM + 1)]) self.add_roi_batch_img_smry(input_img_batch, rois, labels) # -------------------------------------------------------------------------------------------------------------# # Fast-RCNN # # -------------------------------------------------------------------------------------------------------------# # step 5 build fast-RCNN bbox_pred, cls_score = self.build_fastrcnn( feature_crop=feature_cropped, rois=rois, img_shape=img_shape) cls_prob = slim.softmax(cls_score, 'cls_prob') # ----------------------------------------------add smry------------------------------------------------------- if self.is_training: cls_category = tf.argmax(cls_prob, axis=1) fast_acc = tf.reduce_mean( tf.to_float(tf.equal(cls_category, tf.to_int64(labels)))) tf.summary.scalar('ACC/fast_acc', fast_acc) # 6. postprocess_fastrcnn if not self.is_training: return self.postprocess_fastrcnn(rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, img_shape=img_shape) else: ''' when trian. We need build Loss ''' self.loss_dict = self.build_loss(rpn_box_pred=rpn_box_pred, rpn_bbox_targets=rpn_bbox_targets, rpn_cls_score=rpn_cls_score, rpn_labels=rpn_labels, bbox_pred=bbox_pred, bbox_targets=bbox_targets, cls_score=cls_score, labels=labels) final_bbox, final_scores, final_category = self.postprocess_fastrcnn( rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, img_shape=img_shape) return final_bbox, final_scores, final_category
def build_whole_detection_network(self, input_img_batch, gtboxes_r_batch, gtboxes_h_batch): if self.is_training: # ensure shape is [M, 5] and [M, 6] gtboxes_r_batch = tf.reshape(gtboxes_r_batch, [-1, 6]) gtboxes_h_batch = tf.reshape(gtboxes_h_batch, [-1, 5]) gtboxes_r_batch = tf.cast(gtboxes_r_batch, tf.float32) gtboxes_h_batch = tf.cast(gtboxes_h_batch, tf.float32) img_shape = tf.shape(input_img_batch) # 1. build base network feature_to_cropped = self.build_base_network(input_img_batch) # 2. build rpn with tf.variable_scope('build_rpn', regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY)): rpn_conv3x3 = slim.conv2d(feature_to_cropped, 512, [3, 3], trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=tf.nn.relu, scope='rpn_conv/3x3') rpn_cls_score = slim.conv2d(rpn_conv3x3, self.num_anchors_per_location * 2, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=None, scope='rpn_cls_score') rpn_box_pred = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location * 4, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.BBOX_INITIALIZER, activation_fn=None, scope='rpn_bbox_pred') rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob') # 3. generate_anchors featuremap_height, featuremap_width = tf.shape( feature_to_cropped)[1], tf.shape(feature_to_cropped)[2] featuremap_height = tf.cast(featuremap_height, tf.float32) featuremap_width = tf.cast(featuremap_width, tf.float32) anchors = anchor_utils.make_anchors( base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0], anchor_scales=cfgs.ANCHOR_SCALES, anchor_ratios=cfgs.ANCHOR_RATIOS, featuremap_height=featuremap_height, featuremap_width=featuremap_width, stride=cfgs.ANCHOR_STRIDE, name="make_anchors_forRPN") # with tf.variable_scope('make_anchors'): # anchors = anchor_utils.make_anchors(height=featuremap_height, # width=featuremap_width, # feat_stride=cfgs.ANCHOR_STRIDE[0], # anchor_scales=cfgs.ANCHOR_SCALES, # anchor_ratios=cfgs.ANCHOR_RATIOS, base_size=16 # ) # 4. postprocess rpn proposals. such as: decode, clip, NMS with tf.variable_scope('postprocess_RPN'): # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2]) # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob') # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) rois, roi_scores = postprocess_rpn_proposals( rpn_bbox_pred=rpn_box_pred, rpn_cls_prob=rpn_cls_prob, img_shape=img_shape, anchors=anchors, is_training=self.is_training) # rois shape [-1, 4] # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.is_training: rois_in_img = show_box_in_tensor.draw_boxes_with_categories( img_batch=input_img_batch, boxes=rois, scores=roi_scores) tf.summary.image('all_rpn_rois', rois_in_img) score_gre_05 = tf.reshape( tf.where(tf.greater_equal(roi_scores, 0.5)), [-1]) score_gre_05_rois = tf.gather(rois, score_gre_05) score_gre_05_score = tf.gather(roi_scores, score_gre_05) score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_categories( img_batch=input_img_batch, boxes=score_gre_05_rois, scores=score_gre_05_score) tf.summary.image('score_greater_05_rois', score_gre_05_in_img) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.is_training: with tf.variable_scope('sample_anchors_minibatch'): rpn_labels, rpn_bbox_targets = \ tf.py_func( anchor_target_layer, [gtboxes_h_batch, img_shape, anchors], [tf.float32, tf.float32]) rpn_bbox_targets = tf.reshape(rpn_bbox_targets, [-1, 4]) rpn_labels = tf.to_int32(rpn_labels, name="to_int32") rpn_labels = tf.reshape(rpn_labels, [-1]) self.add_anchor_img_smry(input_img_batch, anchors, rpn_labels) # --------------------------------------add smry----------------------------------------------------------- rpn_cls_category = tf.argmax(rpn_cls_prob, axis=1) kept_rpppn = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)), [-1]) rpn_cls_category = tf.gather(rpn_cls_category, kept_rpppn) acc = tf.reduce_mean( tf.to_float( tf.equal(rpn_cls_category, tf.to_int64(tf.gather(rpn_labels, kept_rpppn))))) tf.summary.scalar('ACC/rpn_accuracy', acc) with tf.control_dependencies([rpn_labels]): with tf.variable_scope('sample_RCNN_minibatch'): rois, labels, bbox_targets_h, bbox_targets_r = \ tf.py_func(proposal_target_layer, [rois, gtboxes_h_batch, gtboxes_r_batch], [tf.float32, tf.float32, tf.float32, tf.float32]) rois = tf.reshape(rois, [-1, 4]) labels = tf.to_int32(labels) labels = tf.reshape(labels, [-1]) bbox_targets_h = tf.reshape(bbox_targets_h, [-1, 4 * (cfgs.CLASS_NUM + 1)]) bbox_targets_r = tf.reshape(bbox_targets_r, [-1, 5 * (cfgs.CLASS_NUM + 1)]) self.add_roi_batch_img_smry(input_img_batch, rois, labels) # -------------------------------------------------------------------------------------------------------------# # Fast-RCNN # # -------------------------------------------------------------------------------------------------------------# # 5. build Fast-RCNN # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10) bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r = self.build_fastrcnn( feature_to_cropped=feature_to_cropped, rois=rois, img_shape=img_shape) # bbox_pred shape: [-1, 4*(cls_num+1)]. # cls_score shape: [-1, cls_num+1] cls_prob_h = slim.softmax(cls_score_h, 'cls_prob_h') cls_prob_r = slim.softmax(cls_score_r, 'cls_prob_r') # ----------------------------------------------add smry------------------------------------------------------- if self.is_training: cls_category_h = tf.argmax(cls_prob_h, axis=1) fast_acc_h = tf.reduce_mean( tf.to_float(tf.equal(cls_category_h, tf.to_int64(labels)))) tf.summary.scalar('ACC/fast_acc_h', fast_acc_h) cls_category_r = tf.argmax(cls_prob_r, axis=1) fast_acc_r = tf.reduce_mean( tf.to_float(tf.equal(cls_category_r, tf.to_int64(labels)))) tf.summary.scalar('ACC/fast_acc_r', fast_acc_r) # 6. postprocess_fastrcnn if not self.is_training: final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h( rois=rois, bbox_ppred=bbox_pred_h, scores=cls_prob_h, img_shape=img_shape) final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r( rois=rois, bbox_ppred=bbox_pred_r, scores=cls_prob_r, img_shape=img_shape) return final_boxes_h, final_scores_h, final_category_h, final_boxes_r, final_scores_r, final_category_r else: ''' when trian. We need build Loss ''' loss_dict = self.build_loss(rpn_box_pred=rpn_box_pred, rpn_bbox_targets=rpn_bbox_targets, rpn_cls_score=rpn_cls_score, rpn_labels=rpn_labels, bbox_pred_h=bbox_pred_h, bbox_targets_h=bbox_targets_h, cls_score_h=cls_score_h, bbox_pred_r=bbox_pred_r, bbox_targets_r=bbox_targets_r, cls_score_r=cls_score_r, labels=labels) final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h( rois=rois, bbox_ppred=bbox_pred_h, scores=cls_prob_h, img_shape=img_shape) final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r( rois=rois, bbox_ppred=bbox_pred_r, scores=cls_prob_r, img_shape=img_shape) return final_boxes_h, final_scores_h, final_category_h, \ final_boxes_r, final_scores_r, final_category_r, loss_dict
def build_whole_detection_network(self, input_img_batch, gtboxes_batch): if self.is_training: # ensure shape is [M, 5] gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5]) gtboxes_batch = tf.cast(gtboxes_batch, tf.float32) img_shape = tf.shape(input_img_batch) # 1. build base network feature_stride8, feature_stride16 = self.build_base_network( input_img_batch) # feature_stride8 = tf.image.resize_bilinear(feature_stride8, [tf.shape(feature_stride8)[1] * 2, # tf.shape(feature_stride8)[2] * 2], # name='upsampling_stride8') # 2. build rpn with tf.variable_scope('build_ssh', regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY)): ssh_max_pool = slim.max_pool2d(inputs=feature_stride16, kernel_size=[2, 2], scope='ssh_max_pool') cls_score_m3, box_pred_m3 = self.detection_module( ssh_max_pool, self.m3_num_anchors_per_location, 'detection_module_m3') box_pred_m3 = tf.reshape(box_pred_m3, [-1, 4 * (cfgs.CLASS_NUM + 1)]) cls_score_m3 = tf.reshape(cls_score_m3, [-1, (cfgs.CLASS_NUM + 1)]) cls_prob_m3 = slim.softmax(cls_score_m3, scope='cls_prob_m3') cls_score_m2, box_pred_m2 = self.detection_module( feature_stride16, self.m2_num_anchors_per_location, 'detection_module_m2') box_pred_m2 = tf.reshape(box_pred_m2, [-1, 4 * (cfgs.CLASS_NUM + 1)]) cls_score_m2 = tf.reshape(cls_score_m2, [-1, (cfgs.CLASS_NUM + 1)]) cls_prob_m2 = slim.softmax(cls_score_m2, scope='cls_prob_m2') channels_16 = feature_stride16.get_shape().as_list()[-1] channels_8 = feature_stride8.get_shape().as_list()[-1] feature8_shape = tf.shape(feature_stride8) conv1x1_1 = slim.conv2d(inputs=feature_stride16, num_outputs=channels_16 // 4, kernel_size=[1, 1], trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=tf.nn.relu, scope='conv1x1_1') upsampling = tf.image.resize_bilinear( conv1x1_1, [feature8_shape[1], feature8_shape[2]], name='upsampling') conv1x1_2 = slim.conv2d(inputs=feature_stride8, num_outputs=channels_8 // 2, kernel_size=[1, 1], trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=tf.nn.relu, scope='conv1x1_2') eltwise_sum = upsampling + conv1x1_2 conv3x3 = slim.conv2d(inputs=eltwise_sum, num_outputs=channels_8 // 2, kernel_size=[3, 3], trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=tf.nn.relu, scope='conv3x3') cls_score_m1, box_pred_m1 = self.detection_module( conv3x3, self.m1_num_anchors_per_location, 'detection_module_m1') box_pred_m1 = tf.reshape(box_pred_m1, [-1, 4 * (cfgs.CLASS_NUM + 1)]) cls_score_m1 = tf.reshape(cls_score_m1, [-1, (cfgs.CLASS_NUM + 1)]) cls_prob_m1 = slim.softmax(cls_score_m1, scope='cls_prob_m1') # 3. generate_anchors featuremap_height_m1, featuremap_width_m1 = tf.shape(feature_stride8)[1], \ tf.shape(feature_stride8)[2] featuremap_height_m1 = tf.cast(featuremap_height_m1, tf.float32) featuremap_width_m1 = tf.cast(featuremap_width_m1, tf.float32) anchors_m1 = anchor_utils.make_anchors( base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0], anchor_scales=cfgs.M1_ANCHOR_SCALES, anchor_ratios=cfgs.ANCHOR_RATIOS, featuremap_height=featuremap_height_m1, featuremap_width=featuremap_width_m1, stride=[cfgs.ANCHOR_STRIDE[0]], name="make_anchors_for_m1") featuremap_height_m2, featuremap_width_m2 = tf.shape(feature_stride16)[1], \ tf.shape(feature_stride16)[2] featuremap_height_m2 = tf.cast(featuremap_height_m2, tf.float32) featuremap_width_m2 = tf.cast(featuremap_width_m1, tf.float32) anchors_m2 = anchor_utils.make_anchors( base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0], anchor_scales=cfgs.M2_ANCHOR_SCALES, anchor_ratios=cfgs.ANCHOR_RATIOS, featuremap_height=featuremap_height_m2, featuremap_width=featuremap_width_m2, stride=[cfgs.ANCHOR_STRIDE[1]], name="make_anchors_for_m2") featuremap_height_m3, featuremap_width_m3 = tf.shape(ssh_max_pool)[1], \ tf.shape(ssh_max_pool)[2] featuremap_height_m3 = tf.cast(featuremap_height_m3, tf.float32) featuremap_width_m3 = tf.cast(featuremap_width_m3, tf.float32) anchors_m3 = anchor_utils.make_anchors( base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0], anchor_scales=cfgs.M3_ANCHOR_SCALES, anchor_ratios=cfgs.ANCHOR_RATIOS, featuremap_height=featuremap_height_m3, featuremap_width=featuremap_width_m3, stride=[cfgs.ANCHOR_STRIDE[2]], name="make_anchors_for_m3") # refer to paper: Seeing Small Faces from Robust Anchor’s Perspective if cfgs.EXTRA_SHIFTED_ANCHOR: shift_anchors_m1 = anchor_utils.shift_anchor( anchors_m1, cfgs.ANCHOR_STRIDE[0]) shift_anchors_m2 = anchor_utils.shift_anchor( anchors_m2, cfgs.ANCHOR_STRIDE[1]) shift_anchors_m3 = anchor_utils.shift_anchor( anchors_m3, cfgs.ANCHOR_STRIDE[2]) else: shift_anchors_m1, shift_anchors_m2, shift_anchors_m3 = [], [], [] if cfgs.FACE_SHIFT_JITTER: jitter_anchors_m1 = anchor_utils.shift_jitter( anchors_m1, cfgs.ANCHOR_STRIDE[0]) jitter_anchors_m2 = anchor_utils.shift_jitter( anchors_m2, cfgs.ANCHOR_STRIDE[1]) jitter_anchors_m3 = anchor_utils.shift_jitter( anchors_m3, cfgs.ANCHOR_STRIDE[2]) else: jitter_anchors_m1, jitter_anchors_m2, jitter_anchors_m3 = [], [], [] anchors_m1 = [anchors_m1] + shift_anchors_m1 + jitter_anchors_m1 anchors_m1 = tf.reshape(tf.stack(anchors_m1, axis=1), [-1, 4]) anchors_m2 = [anchors_m2] + shift_anchors_m2 + jitter_anchors_m2 anchors_m2 = tf.reshape(tf.stack(anchors_m2, axis=1), [-1, 4]) anchors_m3 = [anchors_m3] + shift_anchors_m3 + jitter_anchors_m3 anchors_m3 = tf.reshape(tf.stack(anchors_m3, axis=1), [-1, 4]) if self.is_training: with tf.variable_scope('sample_ssh_minibatch_m1'): rois_m1, labels_m1, bbox_targets_m1, keep_inds_m1 = \ tf.py_func(proposal_target_layer, [anchors_m1, gtboxes_batch, 'M1'], [tf.float32, tf.float32, tf.float32, tf.int32]) rois_m1 = tf.reshape(rois_m1, [-1, 4]) labels_m1 = tf.to_int32(labels_m1) labels_m1 = tf.reshape(labels_m1, [-1]) bbox_targets_m1 = tf.reshape(bbox_targets_m1, [-1, 4 * (cfgs.CLASS_NUM + 1)]) self.add_roi_batch_img_smry(input_img_batch, rois_m1, labels_m1, 'm1') with tf.variable_scope('sample_ssh_minibatch_m2'): rois_m2, labels_m2, bbox_targets_m2, keep_inds_m2 = \ tf.py_func(proposal_target_layer, [anchors_m2, gtboxes_batch, 'M2'], [tf.float32, tf.float32, tf.float32, tf.int32]) rois_m2 = tf.reshape(rois_m2, [-1, 4]) labels_m2 = tf.to_int32(labels_m2) labels_m2 = tf.reshape(labels_m2, [-1]) bbox_targets_m2 = tf.reshape(bbox_targets_m2, [-1, 4 * (cfgs.CLASS_NUM + 1)]) self.add_roi_batch_img_smry(input_img_batch, rois_m2, labels_m2, 'm2') with tf.variable_scope('sample_ssh_minibatch_m3'): rois_m3, labels_m3, bbox_targets_m3, keep_inds_m3 = \ tf.py_func(proposal_target_layer, [anchors_m3, gtboxes_batch, 'M3'], [tf.float32, tf.float32, tf.float32, tf.int32]) rois_m3 = tf.reshape(rois_m3, [-1, 4]) labels_m3 = tf.to_int32(labels_m3) labels_m3 = tf.reshape(labels_m3, [-1]) bbox_targets_m3 = tf.reshape(bbox_targets_m3, [-1, 4 * (cfgs.CLASS_NUM + 1)]) self.add_roi_batch_img_smry(input_img_batch, rois_m3, labels_m3, 'm3') if not self.is_training: with tf.variable_scope('postprocess_ssh_m1'): final_bbox_m1, final_scores_m1, final_category_m1 = self.postprocess_ssh( rois=anchors_m1, bbox_ppred=box_pred_m1, scores=cls_prob_m1, img_shape=img_shape, iou_threshold=cfgs.M1_NMS_IOU_THRESHOLD) with tf.variable_scope('postprocess_ssh_m2'): final_bbox_m2, final_scores_m2, final_category_m2 = self.postprocess_ssh( rois=anchors_m2, bbox_ppred=box_pred_m2, scores=cls_prob_m2, img_shape=img_shape, iou_threshold=cfgs.M2_NMS_IOU_THRESHOLD) with tf.variable_scope('postprocess_ssh_m3'): final_bbox_m3, final_scores_m3, final_category_m3 = self.postprocess_ssh( rois=anchors_m3, bbox_ppred=box_pred_m3, scores=cls_prob_m3, img_shape=img_shape, iou_threshold=cfgs.M3_NMS_IOU_THRESHOLD) result_dict = { 'final_bbox_m1': final_bbox_m1, 'final_scores_m1': final_scores_m1, 'final_category_m1': final_category_m1, 'final_bbox_m2': final_bbox_m2, 'final_scores_m2': final_scores_m2, 'final_category_m2': final_category_m2, 'final_bbox_m3': final_bbox_m3, 'final_scores_m3': final_scores_m3, 'final_category_m3': final_category_m3 } return result_dict else: with tf.variable_scope('ssh_loss_m1'): if not cfgs.M1_MINIBATCH_SIZE == -1: box_pred_m1 = tf.gather(box_pred_m1, keep_inds_m1) cls_score_m1 = tf.gather(cls_score_m1, keep_inds_m1) cls_prob_m1 = tf.reshape( tf.gather(cls_prob_m1, keep_inds_m1), [-1, (cfgs.CLASS_NUM + 1)]) bbox_loss_m1 = losses.smooth_l1_loss_rcnn( bbox_pred=box_pred_m1, bbox_targets=bbox_targets_m1, label=labels_m1, num_classes=cfgs.CLASS_NUM + 1, sigma=cfgs.M1_SIGMA) cls_loss_m1 = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=cls_score_m1, labels=labels_m1)) with tf.variable_scope('postprocess_ssh_m1'): final_bbox_m1, final_scores_m1, final_category_m1 = self.postprocess_ssh( rois=rois_m1, bbox_ppred=box_pred_m1, scores=cls_prob_m1, img_shape=img_shape, iou_threshold=cfgs.M2_NMS_IOU_THRESHOLD) with tf.variable_scope('ssh_loss_m2'): if not cfgs.M2_MINIBATCH_SIZE == -1: box_pred_m2 = tf.gather(box_pred_m2, keep_inds_m2) cls_score_m2 = tf.gather(cls_score_m2, keep_inds_m2) cls_prob_m2 = tf.reshape( tf.gather(cls_prob_m2, keep_inds_m2), [-1, (cfgs.CLASS_NUM + 1)]) bbox_loss_m2 = losses.smooth_l1_loss_rcnn( bbox_pred=box_pred_m2, bbox_targets=bbox_targets_m2, label=labels_m2, num_classes=cfgs.CLASS_NUM + 1, sigma=cfgs.M2_SIGMA) cls_loss_m2 = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=cls_score_m2, labels=labels_m2)) with tf.variable_scope('postprocess_ssh_m2'): final_bbox_m2, final_scores_m2, final_category_m2 = self.postprocess_ssh( rois=rois_m2, bbox_ppred=box_pred_m2, scores=cls_prob_m2, img_shape=img_shape, iou_threshold=cfgs.M2_NMS_IOU_THRESHOLD) with tf.variable_scope('ssh_loss_m3'): if not cfgs.M3_MINIBATCH_SIZE == -1: box_pred_m3 = tf.gather(box_pred_m3, keep_inds_m3) cls_score_m3 = tf.gather(cls_score_m3, keep_inds_m3) cls_prob_m3 = tf.reshape( tf.gather(cls_prob_m3, keep_inds_m3), [-1, (cfgs.CLASS_NUM + 1)]) bbox_loss_m3 = losses.smooth_l1_loss_rcnn( bbox_pred=box_pred_m3, bbox_targets=bbox_targets_m3, label=labels_m3, num_classes=cfgs.CLASS_NUM + 1, sigma=cfgs.M3_SIGMA) cls_loss_m3 = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=cls_score_m3, labels=labels_m3)) with tf.variable_scope('postprocess_ssh_m3'): final_bbox_m3, final_scores_m3, final_category_m3 = self.postprocess_ssh( rois=rois_m3, bbox_ppred=box_pred_m3, scores=cls_prob_m3, img_shape=img_shape, iou_threshold=cfgs.M3_NMS_IOU_THRESHOLD) result_dict = { 'final_bbox_m1': final_bbox_m1, 'final_scores_m1': final_scores_m1, 'final_category_m1': final_category_m1, 'final_bbox_m2': final_bbox_m2, 'final_scores_m2': final_scores_m2, 'final_category_m2': final_category_m2, 'final_bbox_m3': final_bbox_m3, 'final_scores_m3': final_scores_m3, 'final_category_m3': final_category_m3 } losses_dict = { 'bbox_loss_m1': bbox_loss_m1, 'cls_loss_m1': cls_loss_m1, 'bbox_loss_m2': bbox_loss_m2, 'cls_loss_m2': cls_loss_m2, 'bbox_loss_m3': bbox_loss_m3, 'cls_loss_m3': cls_loss_m3 } return result_dict, losses_dict
def build_whole_detection_network(self, input_img_batch, gtboxes_batch): if self.is_training: # ensure shape is [M, 5] gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5]) gtboxes_batch = tf.cast(gtboxes_batch, tf.float32) img_shape = tf.shape(input_img_batch) # 1. build base network P_dict = self.build_base_network(input_img_batch) # [P2, P3, P4, P5, P6] # 2. build fpn by build rpn for each level with tf.variable_scope("build_FPN", regularizer=slim.l2_regularizer(cfgs.WEIGHT_DECAY)): fpn_box_delta = {} fpn_cls_score = {} fpn_cls_prob = {} for key in cfgs.LEVLES: if cfgs.SHARE_HEADS: reuse_flag = None if key == cfgs.LEVLES[0] else True scope_list = ['fpn_conv/3x3', 'fpn_cls_score', 'fpn_bbox_pred'] else: reuse_flag = None scope_list= ['fpn_conv/3x3_%s' % key, 'fpn_cls_score_%s' % key, 'fpn_bbox_pred_%s' % key] rpn_conv3x3 = slim.conv2d( P_dict[key], 512, [3, 3], trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, padding="SAME", activation_fn=tf.nn.relu, scope=scope_list[0], reuse=reuse_flag) rpn_cls_score = slim.conv2d(rpn_conv3x3, self.num_anchors_per_location*2, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=None, padding="VALID", scope=scope_list[1], reuse=reuse_flag) rpn_box_delta = slim.conv2d(rpn_conv3x3, self.num_anchors_per_location*4, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.BBOX_INITIALIZER, activation_fn=None, padding="VALID", scope=scope_list[2], reuse=reuse_flag) fpn_box_delta[key] = tf.reshape(rpn_box_delta, [-1, 4]) fpn_cls_score[key] = tf.reshape(rpn_cls_score, [-1, 2]) fpn_cls_prob[key] = slim.softmax(fpn_cls_score[key]) # 3. generate anchors for fpn. (by generate for each level) anchors_dict = {} anchor_list = [] with tf.name_scope("generate_FPN_anchors"): for key in cfgs.LEVLES: p_h, p_w = tf.to_float(tf.shape(P_dict[key])[1]), tf.to_float(tf.shape(P_dict[key])[2]) id_ = int(key[-1]) - int(cfgs.LEVLES[0][-1]) # such as : 2-2, 3-3 tmp_anchors = anchor_utils.make_anchors(base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[id_], anchor_scales=cfgs.ANCHOR_SCALES, anchor_ratios=cfgs.ANCHOR_RATIOS, stride=cfgs.ANCHOR_STRIDE_LIST[id_], featuremap_height=p_h, featuremap_width=p_w, name='%s_make_anchors' % key) anchors_dict[key] = tmp_anchors anchor_list.append(tmp_anchors) all_anchors = tf.concat(anchor_list, axis=0) # 4. postprocess fpn proposals. such as: decode, clip, NMS # Need to Note: Here we NMS for each level instead of NMS for all anchors. rois_list = [] rois_scores_list = [] with tf.name_scope("postproces_fpn"): for key in cfgs.LEVLES: tmp_rois, tmp_roi_scores = postprocess_rpn_proposals(rpn_bbox_pred=fpn_box_delta[key], rpn_cls_prob=fpn_cls_prob[key], img_shape=img_shape, anchors=anchors_dict[key], is_training=self.is_training) rois_list.append(tmp_rois) rois_scores_list.append(tmp_roi_scores) allrois = tf.concat(rois_list, axis=0) allrois_scores = tf.concat(rois_scores_list, axis=0) fpn_topk = cfgs.FPN_TOP_K_PER_LEVEL_TRAIN if self.is_training else cfgs.FPN_TOP_K_PER_LEVEL_TEST topk = tf.minimum(fpn_topk, tf.shape(allrois)[0]) rois_scores, topk_indices = tf.nn.top_k(allrois_scores, k=topk) rois = tf.stop_gradient(tf.gather(allrois, topk_indices)) rois_scores = tf.stop_gradient(rois_scores) # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.is_training: score_gre_05 = tf.reshape(tf.where(tf.greater_equal(rois_scores, 0.5)), [-1]) score_gre_05_rois = tf.gather(rois, score_gre_05) score_gre_05_score = tf.gather(rois_scores, score_gre_05) score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores(img_batch=input_img_batch, boxes=score_gre_05_rois, scores=score_gre_05_score) tf.summary.image('score_greater_05_rois', score_gre_05_in_img) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # sample for fpn. We should concat all the anchors if self.is_training: with tf.variable_scope('sample_anchors_minibatch'): fpn_labels, fpn_bbox_targets = \ tf.py_func( anchor_target_layer, [gtboxes_batch, img_shape, all_anchors], [tf.float32, tf.float32]) fpn_bbox_targets = tf.reshape(fpn_bbox_targets, [-1, 4]) fpn_labels = tf.to_int32(fpn_labels, name="to_int32") fpn_labels = tf.reshape(fpn_labels, [-1]) self.add_anchor_img_smry(input_img_batch, all_anchors, fpn_labels) with tf.control_dependencies([fpn_labels]): with tf.variable_scope('sample_RCNN_minibatch'): rois, labels, bbox_targets = \ tf.py_func(proposal_target_layer, [rois, gtboxes_batch], [tf.float32, tf.float32, tf.float32]) rois = tf.reshape(rois, [-1, 4]) labels = tf.to_int32(labels) labels = tf.reshape(labels, [-1]) bbox_targets = tf.reshape(bbox_targets, [-1, 4*(cfgs.CLASS_NUM+1)]) self.add_roi_batch_img_smry(input_img_batch, rois, labels) if self.is_training: rois_list, labels, bbox_targets = self.assign_levels(all_rois=rois, labels=labels, bbox_targets=bbox_targets) else: rois_list = self.assign_levels(all_rois=rois) # rois_list: [P2_rois, P3_rois, P4_rois, P5_rois] # -------------------------------------------------------------------------------------------------------------# # Fast-RCNN # # -------------------------------------------------------------------------------------------------------------# # 5. build Fast-RCNN # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10) bbox_pred, cls_score = self.build_fastrcnn(P_list=[P_dict[key] for key in cfgs.LEVLES], rois_list=rois_list, img_shape=img_shape) # bbox_pred shape: [-1, 4*(cls_num+1)]. # cls_score shape: [-1, cls_num+1] cls_prob = slim.softmax(cls_score, 'cls_prob') # ----------------------------------------------add smry------------------------------------------------------- if self.is_training: cls_category = tf.argmax(cls_prob, axis=1) fast_acc = tf.reduce_mean(tf.to_float(tf.equal(cls_category, tf.to_int64(labels)))) tf.summary.scalar('ACC/fast_acc', fast_acc) rois = tf.concat(rois_list, axis=0, name='concat_rois') # 6. postprocess_fastrcnn if not self.is_training: return self.postprocess_fastrcnn(rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, img_shape=img_shape) else: ''' when trian. We need build Loss ''' loss_dict = self.build_loss(rpn_box_pred=tf.concat([fpn_box_delta[key] for key in cfgs.LEVLES], axis=0), rpn_bbox_targets=fpn_bbox_targets, rpn_cls_score=tf.concat([fpn_cls_score[key] for key in cfgs.LEVLES], axis=0), rpn_labels=fpn_labels, bbox_pred=bbox_pred, bbox_targets=bbox_targets, cls_score=cls_score, labels=labels) final_bbox, final_scores, final_category = self.postprocess_fastrcnn(rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, img_shape=img_shape) return final_bbox, final_scores, final_category, loss_dict
def build_whole_detection_network(self, input_img_batch, gtboxes_batch): if self.is_training: # ensure shape is [M, 5] gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5]) gtboxes_batch = tf.cast(gtboxes_batch, tf.float32) img_shape = tf.shape(input_img_batch) # 1.0 build base network rpn_base = self.build_base_network(input_img_batch) # 1.1 build the head_base network if self.base_network_name.startswith('resnet_v1'): rfcn_base = resnet.restnet_head(rpn_base, scope_name=self.base_network_name, is_training=self.is_training) elif self.base_network_name.startswith('MobilenetV2'): rfcn_base = mobilenet_v2.mobilenetv2_head( rpn_base, is_training=self.is_training) else: raise ValueError('Sorry, we only support resnet or mobilenet_v2') # 2. build rpn head with tf.variable_scope('build_rpn', regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY)): rpn_conv3x3 = slim.conv2d(rpn_base, 128, [3, 3], trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=tf.nn.relu, scope='rpn_conv/3x3') rpn_cls_score = slim.conv2d(rpn_conv3x3, self.num_anchors_per_location * 2, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=None, scope='rpn_cls_score') rpn_box_pred = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location * 4, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.BBOX_INITIALIZER, activation_fn=None, scope='rpn_bbox_pred') rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob') # 3. generate_anchors featuremap_height, featuremap_width = tf.shape(rpn_base)[1], tf.shape( rpn_base)[2] featuremap_height = tf.cast(featuremap_height, tf.float32) featuremap_width = tf.cast(featuremap_width, tf.float32) anchors = anchor_utils.make_anchors( base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0], anchor_scales=cfgs.ANCHOR_SCALES, anchor_ratios=cfgs.ANCHOR_RATIOS, featuremap_height=featuremap_height, featuremap_width=featuremap_width, stride=cfgs.ANCHOR_STRIDE, name="make_anchors_forRPN") # 4. postprocess rpn proposals. such as: decode, clip, NMS with tf.variable_scope('postprocess_RPN'): rois, roi_scores = postprocess_rpn_proposals( rpn_bbox_pred=rpn_box_pred, rpn_cls_prob=rpn_cls_prob, img_shape=img_shape, anchors=anchors, is_training=self.is_training) # rois shape [-1, 4] # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.is_training: rois_in_img = show_box_in_tensor.draw_boxes_with_scores( img_batch=input_img_batch, boxes=rois, scores=roi_scores) tf.summary.image('all_rpn_rois', rois_in_img) score_gre_05 = tf.reshape( tf.where(tf.greater_equal(roi_scores, 0.5)), [-1]) score_gre_05_rois = tf.gather(rois, score_gre_05) score_gre_05_score = tf.gather(roi_scores, score_gre_05) score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores( img_batch=input_img_batch, boxes=score_gre_05_rois, scores=score_gre_05_score) tf.summary.image('score_greater_05_rois', score_gre_05_in_img) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.is_training: with tf.variable_scope('sample_anchors_minibatch'): rpn_labels, rpn_bbox_targets = \ tf.py_func( anchor_target_layer, [gtboxes_batch, img_shape, anchors], [tf.float32, tf.float32]) rpn_bbox_targets = tf.reshape(rpn_bbox_targets, [-1, 4]) rpn_labels = tf.to_int32(rpn_labels, name="to_int32") rpn_labels = tf.reshape(rpn_labels, [-1]) self.add_anchor_img_smry(input_img_batch, anchors, rpn_labels) # --------------------------------------add smry---------------------------------------------------------- rpn_cls_category = tf.argmax(rpn_cls_prob, axis=1) kept_rpppn = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)), [-1]) rpn_cls_category = tf.gather(rpn_cls_category, kept_rpppn) acc = tf.reduce_mean( tf.to_float( tf.equal(rpn_cls_category, tf.to_int64(tf.gather(rpn_labels, kept_rpppn))))) tf.summary.scalar('ACC/rpn_accuracy', acc) with tf.control_dependencies([rpn_labels]): with tf.variable_scope('sample_RFCN_minibatch'): rois, labels, bbox_targets = \ tf.py_func(proposal_target_layer, [rois, gtboxes_batch], [tf.float32, tf.float32, tf.float32]) rois = tf.reshape(rois, [-1, 4]) labels = tf.to_int32(labels) labels = tf.reshape(labels, [-1]) bbox_targets = tf.reshape(bbox_targets, [-1, 4 * (cfgs.CLASS_NUM + 1)]) self.add_roi_batch_img_smry(input_img_batch, rois, labels) # -------------------------------------------------------------------------------------------------------------# # RFCN # # -------------------------------------------------------------------------------------------------------------# # 5. build rfcn head bbox_pred, cls_score = self.build_rfcn_head( rfcn_base=rfcn_base, rois=rois, img_shape=img_shape, bin_nums=[3, 3], crop_size=[9, 9]) # crop_size is the total size # bbox_pred shape: [-1, 4*(cls_num+1)]. # cls_score shape: [-1, cls_num+1] cls_prob = slim.softmax(cls_score, 'cls_prob') # ----------------------------------------------add smry------------------------------------------------------- if self.is_training: cls_category = tf.argmax(cls_prob, axis=1) rfcn_acc = tf.reduce_mean( tf.to_float(tf.equal(cls_category, tf.to_int64(labels)))) tf.summary.scalar('ACC/rfcn_acc', rfcn_acc) # 6. postprocess_rfcn if not self.is_training: return self.postprocess_rfcn(rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, img_shape=img_shape) else: ''' when trian. We need build Loss ''' loss_dict = self.build_loss(rpn_box_pred=rpn_box_pred, rpn_bbox_targets=rpn_bbox_targets, rpn_cls_score=rpn_cls_score, rpn_labels=rpn_labels, bbox_pred=bbox_pred, bbox_targets=bbox_targets, cls_score=cls_score, labels=labels) final_bbox, final_scores, final_category = self.postprocess_rfcn( rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, img_shape=img_shape) return final_bbox, final_scores, final_category, loss_dict
def build_whole_detection_network(self, input_img_batch, gtboxes_r_batch, gtboxes_h_batch): if self.is_training: # ensure shape is [M, 5] and [M, 6] gtboxes_r_batch = tf.reshape(gtboxes_r_batch, [-1, 6]) gtboxes_h_batch = tf.reshape(gtboxes_h_batch, [-1, 5]) gtboxes_r_batch = tf.cast(gtboxes_r_batch, tf.float32) gtboxes_h_batch = tf.cast(gtboxes_h_batch, tf.float32) img_shape = tf.shape(input_img_batch) # 1. build base network C2_, C4 = self.build_base_network(input_img_batch) C2 = slim.conv2d(C2_, num_outputs=1024, kernel_size=[1, 1], stride=1, scope='build_C2_to_1024') self.feature_pyramid = {'C2': C2, 'C4': C4} # 2. build rpn rpn_all_encode_boxes = {} rpn_all_boxes_scores = {} rpn_all_cls_score = {} anchors = {} with tf.variable_scope('build_rpn', regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY)): i = 0 for level in self.level: rpn_conv3x3 = slim.conv2d( self.feature_pyramid[level], 512, [3, 3], trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=tf.nn.relu, scope='rpn_conv/3x3_{}'.format(level)) rpn_cls_score = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location[i] * 2, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=None, scope='rpn_cls_score_{}'.format(level)) rpn_box_pred = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location[i] * 4, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.BBOX_INITIALIZER, activation_fn=None, scope='rpn_bbox_pred_{}'.format(level)) rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) rpn_cls_prob = slim.softmax( rpn_cls_score, scope='rpn_cls_prob_{}'.format(level)) # do the softmax rpn_all_cls_score[level] = rpn_cls_score rpn_all_boxes_scores[level] = rpn_cls_prob # do the softmax rpn_all_encode_boxes[level] = rpn_box_pred i += 1 # 3. generate_anchors i = 0 for level, base_anchor_size, stride in zip(self.level, self.base_anchor_size_list, self.stride): featuremap_height, featuremap_width = tf.shape( self.feature_pyramid[level])[1], tf.shape( self.feature_pyramid[level])[2] featuremap_height = tf.cast(featuremap_height, tf.float32) featuremap_width = tf.cast(featuremap_width, tf.float32) #anchor_scale = tf.constant(self.anchor_scales[i], dtype=tf.float32) #)anchor_ratio = tf.constant(self.anchor_ratios[i], dtype=tf.float32) anchor_scale = self.anchor_scales[i] anchor_ratio = self.anchor_ratios[i] tmp_anchors = anchor_utils.make_anchors( base_anchor_size=base_anchor_size, anchor_scales=anchor_scale, anchor_ratios=anchor_ratio, featuremap_height=featuremap_height, featuremap_width=featuremap_width, stride=stride, name="make_anchors_forRPN_{}".format(level)) tmp_anchors = tf.reshape(tmp_anchors, [-1, 4]) anchors[level] = tmp_anchors i += 1 # with tf.variable_scope('make_anchors'): # anchors = anchor_utils.make_anchors(height=featuremap_height, # width=featuremap_width, # feat_stride=cfgs.ANCHOR_STRIDE[0], # anchor_scales=cfgs.ANCHOR_SCALES, # anchor_ratios=cfgs.ANCHOR_RATIOS, base_size=16 # ) # 4. postprocess rpn proposals. such as: decode, clip, NMS rois = {} roi_scores = {} with tf.variable_scope('postprocess_RPN'): # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2]) # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob') # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) for level in self.level: rois_rpn, roi_scores_rpn = postprocess_rpn_proposals( rpn_bbox_pred=rpn_all_encode_boxes[level], rpn_cls_prob=rpn_all_boxes_scores[level], img_shape=img_shape, anchors=anchors[level], is_training=self.is_training) # rois[level] = rois # roi_scores[level] = roi_scores # rois shape [-1, 4] # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++ rois[level] = rois_rpn roi_scores[level] = roi_scores_rpn if self.is_training: rois_in_img = show_box_in_tensor.draw_boxes_with_categories( img_batch=input_img_batch, boxes=rois_rpn, scores=roi_scores_rpn) tf.summary.image('all_rpn_rois_{}'.format(level), rois_in_img) score_gre_05 = tf.reshape( tf.where(tf.greater_equal(roi_scores_rpn, 0.5)), [-1]) score_gre_05_rois = tf.gather(rois_rpn, score_gre_05) score_gre_05_score = tf.gather(roi_scores_rpn, score_gre_05) score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_categories( img_batch=input_img_batch, boxes=score_gre_05_rois, scores=score_gre_05_score) tf.summary.image('score_greater_05_rois_{}'.format(level), score_gre_05_in_img) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ rpn_labels = {} rpn_bbox_targets = {} labels_all = [] labels = {} bbox_targets_h = {} bbox_targets_r = {} bbox_targets_all_h = [] bbox_targets_all_r = [] if self.is_training: for level in self.level: with tf.variable_scope( 'sample_anchors_minibatch_{}'.format(level)): rpn_labels_one, rpn_bbox_targets_one = \ tf.py_func( anchor_target_layer, [gtboxes_h_batch, img_shape, anchors[level]], [tf.float32, tf.float32]) rpn_bbox_targets_one = tf.reshape(rpn_bbox_targets_one, [-1, 4]) rpn_labels_one = tf.to_int32( rpn_labels_one, name="to_int32_{}".format(level)) rpn_labels_one = tf.reshape(rpn_labels_one, [-1]) self.add_anchor_img_smry(input_img_batch, anchors[level], rpn_labels_one) # -----------------------------add to the dict------------------------------------------------------------- rpn_labels[level] = rpn_labels_one rpn_bbox_targets[level] = rpn_bbox_targets_one # --------------------------------------add smry----------------------------------------------------------- rpn_cls_category = tf.argmax(rpn_all_boxes_scores[level], axis=1) kept_rpppn = tf.reshape( tf.where(tf.not_equal(rpn_labels_one, -1)), [-1]) rpn_cls_category = tf.gather(rpn_cls_category, kept_rpppn) # 预测 acc = tf.reduce_mean( tf.to_float( tf.equal( rpn_cls_category, tf.to_int64(tf.gather(rpn_labels_one, kept_rpppn))))) tf.summary.scalar('ACC/rpn_accuracy_{}'.format(level), acc) with tf.control_dependencies([rpn_labels[level]]): with tf.variable_scope( 'sample_RCNN_minibatch_{}'.format(level)): rois_, labels_, bbox_targets_h_, bbox_targets_r_ = \ tf.py_func(proposal_target_layer, [rois[level], gtboxes_h_batch, gtboxes_r_batch], [tf.float32, tf.float32, tf.float32, tf.float32]) rois_fast = tf.reshape(rois_, [-1, 4]) labels_fast = tf.to_int32(labels_) labels_fast = tf.reshape(labels_fast, [-1]) bbox_targets_h_fast = tf.reshape( bbox_targets_h_, [-1, 4 * (cfgs.CLASS_NUM + 1)]) bbox_targets_r_fast = tf.reshape( bbox_targets_r_, [-1, 5 * (cfgs.CLASS_NUM + 1)]) self.add_roi_batch_img_smry(input_img_batch, rois_fast, labels_fast) #----------------------new_add---------------------- rois[level] = rois_fast labels[level] = labels_fast bbox_targets_h[level] = bbox_targets_h_fast bbox_targets_r[level] = bbox_targets_r_fast labels_all.append(labels_fast) bbox_targets_all_h.append(bbox_targets_h_fast) bbox_targets_all_r.append(bbox_targets_r_fast) fast_labels = tf.concat(labels_all, axis=0) fast_bbox_targets_h = tf.concat(bbox_targets_all_h, axis=0) fast_bbox_targets_r = tf.concat(bbox_targets_all_r, axis=0) # -------------------------------------------------------------------------------------------------------------# # Fast-RCNN # # -------------------------------------------------------------------------------------------------------------# # 5. build Fast-RCNN # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10) bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r = self.build_fastrcnn( feature_to_cropped=self.feature_pyramid, rois_all=rois, img_shape=img_shape) # 这里的feature_to_cropped是feature maps 特征图 # bbox_pred shape: [-1, 4*(cls_num+1)]. # cls_score shape: [-1, cls_num+1] cls_prob_h = slim.softmax(cls_score_h, 'cls_prob_h') # 根据代码可看到水平和旋转的处理过程是分开的 cls_prob_r = slim.softmax(cls_score_r, 'cls_prob_r') # ----------------------------------------------add smry------------------------------------------------------- if self.is_training: cls_category_h = tf.argmax(cls_prob_h, axis=1) fast_acc_h = tf.reduce_mean( tf.to_float(tf.equal(cls_category_h, tf.to_int64(fast_labels)))) tf.summary.scalar('ACC/fast_acc_h', fast_acc_h) cls_category_r = tf.argmax(cls_prob_r, axis=1) fast_acc_r = tf.reduce_mean( tf.to_float(tf.equal(cls_category_r, tf.to_int64(fast_labels)))) tf.summary.scalar('ACC/fast_acc_r', fast_acc_r) # 6. postprocess_fastrcnn if not self.is_training: rois_all = [] for level in self.level: rois_all.append(rois[level]) rois = tf.concat(rois_all, axis=0) final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h( rois=rois, bbox_ppred=bbox_pred_h, scores=cls_prob_h, img_shape=img_shape) final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r( rois=rois, bbox_ppred=bbox_pred_r, scores=cls_prob_r, img_shape=img_shape) return final_boxes_h, final_scores_h, final_category_h, final_boxes_r, final_scores_r, final_category_r else: ''' when trian. We need build Loss ''' loss_dict = self.build_loss(rpn_box_pred=rpn_all_encode_boxes, rpn_bbox_targets=rpn_bbox_targets, rpn_cls_score=rpn_all_cls_score, rpn_labels=rpn_labels, bbox_pred_h=bbox_pred_h, bbox_targets_h=fast_bbox_targets_h, cls_score_h=cls_score_h, bbox_pred_r=bbox_pred_r, bbox_targets_r=fast_bbox_targets_r, cls_score_r=cls_score_r, labels=fast_labels) rois_all = [] for level in self.level: rois_all.append(rois[level]) rois = tf.concat(rois_all, axis=0) final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h( rois=rois, bbox_ppred=bbox_pred_h, scores=cls_prob_h, img_shape=img_shape) final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r( rois=rois, bbox_ppred=bbox_pred_r, scores=cls_prob_r, img_shape=img_shape) return final_boxes_h, final_scores_h, final_category_h, \ final_boxes_r, final_scores_r, final_category_r, loss_dict
def build_whole_detection_network(self, input_img_batch, gtboxes_r_batch, gtboxes_h_batch): img_shape = tf.shape(input_img_batch) # 1. build base network feature_to_cropped = self.build_base_network(input_img_batch) # 2. build rpn with tf.variable_scope('build_rpn', regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY)): rpn_conv3x3 = slim.conv2d(feature_to_cropped, 512, [3, 3], trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=tf.nn.relu, scope='rpn_conv/3x3') rpn_cls_score = slim.conv2d(rpn_conv3x3, self.num_anchors_per_location * 2, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=None, scope='rpn_cls_score') rpn_box_pred = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location * 4, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.BBOX_INITIALIZER, activation_fn=None, scope='rpn_bbox_pred') rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob') # 3. generate_anchors featuremap_height, featuremap_width = tf.shape( feature_to_cropped)[1], tf.shape(feature_to_cropped)[2] featuremap_height = tf.cast(featuremap_height, tf.float32) featuremap_width = tf.cast(featuremap_width, tf.float32) anchors = anchor_utils.make_anchors( base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0], anchor_scales=cfgs.ANCHOR_SCALES, anchor_ratios=cfgs.ANCHOR_RATIOS, featuremap_height=featuremap_height, featuremap_width=featuremap_width, stride=cfgs.ANCHOR_STRIDE, name="make_anchors_forRPN") # with tf.variable_scope('make_anchors'): # anchors = anchor_utils.make_anchors(height=featuremap_height, # width=featuremap_width, # feat_stride=cfgs.ANCHOR_STRIDE[0], # anchor_scales=cfgs.ANCHOR_SCALES, # anchor_ratios=cfgs.ANCHOR_RATIOS, base_size=16 # ) # 4. postprocess rpn proposals. such as: decode, clip, NMS with tf.variable_scope('postprocess_RPN'): # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2]) # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob') # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) rois, roi_scores = postprocess_rpn_proposals( rpn_bbox_pred=rpn_box_pred, rpn_cls_prob=rpn_cls_prob, img_shape=img_shape, anchors=anchors, is_training=self.is_training) # rois shape [-1, 4] # -------------------------------------------------------------------------------------------------------------# # Fast-RCNN # # -------------------------------------------------------------------------------------------------------------# # 5. build Fast-RCNN # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10) bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r = self.build_fastrcnn( feature_to_cropped=feature_to_cropped, rois=rois, img_shape=img_shape) # bbox_pred shape: [-1, 4*(cls_num+1)]. # cls_score shape: [-1, cls_num+1] cls_prob_h = slim.softmax(cls_score_h, 'cls_prob_h') cls_prob_r = slim.softmax(cls_score_r, 'cls_prob_r') # ----------------------------------------------add smry------------------------------------------------------- # 6. postprocess_fastrcnn final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h( rois=rois, bbox_ppred=bbox_pred_h, scores=cls_prob_h, img_shape=img_shape) final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r( rois=rois, bbox_ppred=bbox_pred_r, scores=cls_prob_r, img_shape=img_shape) return final_boxes_h, final_scores_h, final_category_h, final_boxes_r, final_scores_r, final_category_r