def build_whole_detection_network(self, input_img_batch, gtboxes_batch): if self.is_training: # ensure shape is [M, 5] gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5]) gtboxes_batch = tf.cast(gtboxes_batch, tf.float32) img_shape = tf.shape(input_img_batch) # 1. build base network feature_pyramid = self.build_base_network(input_img_batch) # 2. build rpn rpn_box_pred, rpn_cls_score, rpn_cls_prob = self.rpn_net( feature_pyramid) # 3. generate_anchors anchors = self.make_anchors(feature_pyramid) # 4. postprocess rpn proposals. such as: decode, clip, filter if self.is_training: with tf.variable_scope('build_loss'): labels, target_delta, anchor_states = tf.py_func( func=anchor_target_layer, inp=[gtboxes_batch, anchors], Tout=[tf.float32, tf.float32, tf.float32]) self.add_anchor_img_smry(input_img_batch, anchors, anchor_states) cls_loss = losses.focal_loss(labels, rpn_cls_score, anchor_states) reg_loss = losses.smooth_l1_loss(target_delta, rpn_box_pred, anchor_states) self.losses_dict = { 'cls_loss': cls_loss, 'reg_loss': reg_loss * 2 } with tf.variable_scope('postprocess_detctions'): boxes, scores, category = postprocess_detctions( rpn_bbox_pred=rpn_box_pred, rpn_cls_prob=rpn_cls_prob, img_shape=img_shape, anchors=anchors, is_training=self.is_training) boxes = tf.stop_gradient(boxes) scores = tf.stop_gradient(scores) category = tf.stop_gradient(category) if self.is_training: return boxes, scores, category, self.losses_dict else: return boxes, scores, category
def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h, gtboxes_batch_r, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) # 1. build base network feature_pyramid = self.build_base_network(input_img_batch) # 2. build rpn rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net( feature_pyramid, 'rpn_net') # 3. generate_anchors anchor_list = self.make_anchors(feature_pyramid) rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0) rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0) # rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0) anchors = tf.concat(anchor_list, axis=0) if self.is_training: with tf.variable_scope('build_loss'): labels, target_delta, anchor_states, target_boxes = tf.py_func( func=anchor_target_layer, inp=[gtboxes_batch_h, gtboxes_batch_r, anchors], Tout=[tf.float32, tf.float32, tf.float32, tf.float32]) if self.method == 'H': self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0) else: self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1) cls_loss = losses.focal_loss(labels, rpn_cls_score, anchor_states) if cfgs.USE_IOU_FACTOR: reg_loss = losses.iou_smooth_l1_loss( target_delta, rpn_box_pred, anchor_states, target_boxes, anchors) else: reg_loss = losses.smooth_l1_loss(target_delta, rpn_box_pred, anchor_states) self.losses_dict['cls_loss'] = cls_loss * cfgs.CLS_WEIGHT self.losses_dict['reg_loss'] = reg_loss * cfgs.REG_WEIGHT with tf.variable_scope('refine_feature_pyramid'): refine_feature_pyramid = {} for level in cfgs.LEVEL: feature_1x5 = slim.conv2d( inputs=feature_pyramid[level], num_outputs=256, kernel_size=[1, 5], weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=cfgs.SUBNETS_BIAS_INITIALIZER, stride=1, activation_fn=None, scope='refine_1x5_{}'.format(level)) feature5x1 = slim.conv2d( inputs=feature_1x5, num_outputs=256, kernel_size=[5, 1], weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=cfgs.SUBNETS_BIAS_INITIALIZER, stride=1, activation_fn=None, scope='refine_5x1_{}'.format(level)) feature_1x1 = slim.conv2d( inputs=feature_pyramid[level], num_outputs=256, kernel_size=[1, 1], weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=cfgs.SUBNETS_BIAS_INITIALIZER, stride=1, activation_fn=None, scope='refine_1x1_{}'.format(level)) refine_feature_pyramid[level] = feature5x1 + feature_1x1 refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list = self.refine_net( refine_feature_pyramid, 'refine_net') # refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list = self.refine_net(feature_pyramid, 'refine_net') refine_box_pred = tf.concat(refine_box_pred_list, axis=0) refine_cls_score = tf.concat(refine_cls_score_list, axis=0) refine_cls_prob = tf.concat(refine_cls_prob_list, axis=0) # refine_boxes = tf.concat(refine_boxes_list, axis=0) if cfgs.METHOD == 'H': x_c = (anchors[:, 2] + anchors[:, 0]) / 2 y_c = (anchors[:, 3] + anchors[:, 1]) / 2 h = anchors[:, 2] - anchors[:, 0] + 1 w = anchors[:, 3] - anchors[:, 1] + 1 theta = -90 * tf.ones_like(x_c) anchors = tf.transpose(tf.stack([x_c, y_c, w, h, theta])) refine_boxes = bbox_transform.rbbox_transform_inv(boxes=anchors, deltas=rpn_box_pred) # 4. postprocess rpn proposals. such as: decode, clip, filter if not self.is_training: with tf.variable_scope('postprocess_detctions'): boxes, scores, category = postprocess_detctions( refine_bbox_pred=refine_box_pred, refine_cls_prob=refine_cls_prob, anchors=refine_boxes, is_training=self.is_training) return boxes, scores, category # 5. build loss else: with tf.variable_scope('build_refine_loss'): refine_labels, refine_target_delta, refine_box_states, refine_target_boxes = tf.py_func( func=refinebox_target_layer, inp=[ gtboxes_batch_r, refine_boxes, cfgs.REFINE_IOU_POSITIVE_THRESHOLD[0], cfgs.REFINE_IOU_NEGATIVE_THRESHOLD[0], gpu_id ], Tout=[tf.float32, tf.float32, tf.float32, tf.float32]) self.add_anchor_img_smry(input_img_batch, refine_boxes, refine_box_states, 1) refine_cls_loss = losses.focal_loss(refine_labels, refine_cls_score, refine_box_states) if cfgs.USE_IOU_FACTOR: refine_reg_loss = losses.iou_smooth_l1_loss( refine_target_delta, refine_box_pred, refine_box_states, refine_target_boxes, refine_boxes) else: refine_reg_loss = losses.smooth_l1_loss( refine_target_delta, refine_box_pred, refine_box_states) self.losses_dict[ 'refine_cls_loss'] = refine_cls_loss * cfgs.CLS_WEIGHT self.losses_dict[ 'refine_reg_loss'] = refine_reg_loss * cfgs.REG_WEIGHT with tf.variable_scope('postprocess_detctions'): boxes, scores, category = postprocess_detctions( refine_bbox_pred=refine_box_pred, refine_cls_prob=refine_cls_prob, anchors=refine_boxes, is_training=self.is_training) boxes = tf.stop_gradient(boxes) scores = tf.stop_gradient(scores) category = tf.stop_gradient(category) return boxes, scores, category, self.losses_dict
def refine_stage(self, input_img_batch, gtboxes_batch_r, gt_smooth_label, box_pred_list, cls_prob_list, proposal_list, angle_cls_list, feature_pyramid, gpu_id, pos_threshold, neg_threshold, stage, proposal_filter=False): with tf.variable_scope('refine_feature_pyramid{}'.format(stage)): refine_feature_pyramid = {} refine_boxes_list = [] refine_boxes_angle_list = [] for box_pred, cls_prob, proposal, angle_prob, stride, level in \ zip(box_pred_list, cls_prob_list, proposal_list, angle_cls_list, cfgs.ANCHOR_STRIDE, cfgs.LEVEL): if proposal_filter: box_pred = tf.reshape( box_pred, [-1, self.num_anchors_per_location, 5]) proposal = tf.reshape(proposal, [ -1, self.num_anchors_per_location, 5 if self.method == 'R' else 4 ]) cls_prob = tf.reshape( cls_prob, [-1, self.num_anchors_per_location, cfgs.CLASS_NUM]) cls_max_prob = tf.reduce_max(cls_prob, axis=-1) box_pred_argmax = tf.cast( tf.reshape(tf.argmax(cls_max_prob, axis=-1), [-1, 1]), tf.int32) indices = tf.cast( tf.cumsum(tf.ones_like(box_pred_argmax), axis=0), tf.int32) - tf.constant(1, tf.int32) indices = tf.concat([indices, box_pred_argmax], axis=-1) box_pred = tf.reshape(tf.gather_nd(box_pred, indices), [-1, 5]) proposal = tf.reshape(tf.gather_nd(proposal, indices), [-1, 5 if self.method == 'R' else 4]) if cfgs.METHOD == 'H': x_c = (proposal[:, 2] + proposal[:, 0]) / 2 y_c = (proposal[:, 3] + proposal[:, 1]) / 2 h = proposal[:, 2] - proposal[:, 0] + 1 w = proposal[:, 3] - proposal[:, 1] + 1 theta = -90 * tf.ones_like(x_c) proposal = tf.transpose( tf.stack([x_c, y_c, w, h, theta])) else: box_pred = tf.reshape(box_pred, [-1, 5]) proposal = tf.reshape(proposal, [-1, 5]) bboxes = bbox_transform.rbbox_transform_inv(boxes=proposal, deltas=box_pred) if angle_prob is not None: angle_cls = tf.cast( tf.argmax(tf.sigmoid(angle_prob), axis=1), tf.float32) angle_cls = tf.reshape(angle_cls, [ -1, ]) * -1 - 0.5 x, y, w, h, theta = tf.unstack(bboxes, axis=1) bboxes_angle = tf.transpose( tf.stack([x, y, w, h, angle_cls])) refine_boxes_angle_list.append(bboxes_angle) center_point = bboxes_angle[:, :2] / stride else: center_point = bboxes[:, :2] / stride refine_boxes_list.append(bboxes) refine_feature_pyramid[level] = self.refine_feature_op( points=center_point, feature_map=feature_pyramid[level], name=level) refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list, refine_angle_cls_list = self.refine_net( refine_feature_pyramid, 'refine_net{}'.format(stage)) refine_box_pred = tf.concat(refine_box_pred_list, axis=0) refine_cls_score = tf.concat(refine_cls_score_list, axis=0) # refine_cls_prob = tf.concat(refine_cls_prob_list, axis=0) refine_boxes = tf.concat(refine_boxes_list, axis=0) refine_angle_cls = tf.concat(refine_angle_cls_list, axis=0) if self.is_training: with tf.variable_scope('build_refine_loss{}'.format(stage)): refine_labels, refine_target_delta, refine_box_states, refine_target_boxes, refine_target_smooth_label = tf.py_func( func=refinebox_target_layer, inp=[ gtboxes_batch_r, gt_smooth_label, refine_boxes, pos_threshold, neg_threshold, gpu_id ], Tout=[ tf.float32, tf.float32, tf.float32, tf.float32, tf.float32 ]) self.add_anchor_img_smry(input_img_batch, refine_boxes, refine_box_states, 1) refine_cls_loss = losses.focal_loss(refine_labels, refine_cls_score, refine_box_states) if False: # cfgs.USE_IOU_FACTOR: refine_reg_loss = losses.iou_smooth_l1_loss( refine_target_delta, refine_box_pred, refine_box_states, refine_target_boxes, refine_boxes, is_refine=True) else: refine_reg_loss = losses.smooth_l1_loss( refine_target_delta, refine_box_pred, refine_box_states) angle_cls_loss = losses.angle_focal_loss( refine_target_smooth_label, refine_angle_cls, refine_box_states) self.losses_dict['refine_cls_loss{}'.format( stage)] = refine_cls_loss * cfgs.CLS_WEIGHT self.losses_dict['refine_reg_loss{}'.format( stage)] = refine_reg_loss * cfgs.REG_WEIGHT self.losses_dict['angle_cls_loss{}'.format( stage)] = angle_cls_loss * cfgs.ANGLE_CLS_WEIGHT return refine_box_pred_list, refine_cls_prob_list, refine_boxes_list, refine_angle_cls_list
def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h, gtboxes_batch_r, gt_smooth_label, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) gt_smooth_label = tf.reshape(gt_smooth_label, [-1, cfgs.ANGLE_RANGE]) gt_smooth_label = tf.cast(gt_smooth_label, tf.float32) img_shape = tf.shape(input_img_batch) # 1. build base network feature_pyramid = self.build_base_network(input_img_batch) # 2. build rpn rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net( feature_pyramid, 'rpn_net') # 3. generate_anchors anchor_list = self.make_anchors(feature_pyramid) rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0) rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0) # rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0) anchors = tf.concat(anchor_list, axis=0) if self.is_training: with tf.variable_scope('build_loss'): labels, target_delta, anchor_states, target_boxes = tf.py_func( func=anchor_target_layer, inp=[gtboxes_batch_h, gtboxes_batch_r, anchors], Tout=[tf.float32, tf.float32, tf.float32, tf.float32]) if self.method == 'H': self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0) else: self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1) cls_loss = losses.focal_loss(labels, rpn_cls_score, anchor_states) if cfgs.USE_IOU_FACTOR: reg_loss = losses.iou_smooth_l1_loss( target_delta, rpn_box_pred, anchor_states, target_boxes, anchors) else: reg_loss = losses.smooth_l1_loss(target_delta, rpn_box_pred, anchor_states) self.losses_dict['cls_loss'] = cls_loss * cfgs.CLS_WEIGHT self.losses_dict['reg_loss'] = reg_loss * cfgs.REG_WEIGHT box_pred_list, cls_prob_list, proposal_list, angle_cls_list = rpn_box_pred_list, rpn_cls_prob_list, anchor_list, [ None, None, None, None, None ] all_box_pred_list, all_cls_prob_list, all_proposal_list, all_angle_cls_list = [], [], [], [] for i in range(cfgs.NUM_REFINE_STAGE): box_pred_list, cls_prob_list, proposal_list, angle_cls_list = self.refine_stage( input_img_batch, gtboxes_batch_r, gt_smooth_label, box_pred_list, cls_prob_list, proposal_list, angle_cls_list, feature_pyramid, gpu_id, pos_threshold=cfgs.REFINE_IOU_POSITIVE_THRESHOLD[i], neg_threshold=cfgs.REFINE_IOU_NEGATIVE_THRESHOLD[i], stage='' if i == 0 else '_stage{}'.format(i + 2), proposal_filter=True if i == 0 else False) if not self.is_training: all_box_pred_list.extend(box_pred_list) all_cls_prob_list.extend(cls_prob_list) all_proposal_list.extend(proposal_list) all_angle_cls_list.extend(angle_cls_list) else: all_box_pred_list, all_cls_prob_list, all_proposal_list, all_angle_cls_list = box_pred_list, cls_prob_list, proposal_list, angle_cls_list with tf.variable_scope('postprocess_detctions'): box_pred = tf.concat(all_box_pred_list, axis=0) cls_prob = tf.concat(all_cls_prob_list, axis=0) proposal = tf.concat(all_proposal_list, axis=0) angle_cls = tf.concat(all_angle_cls_list, axis=0) boxes, scores, category, boxes_angle = postprocess_detctions( refine_bbox_pred=box_pred, refine_cls_prob=cls_prob, refine_angle_prob=tf.sigmoid(angle_cls), anchors=proposal, is_training=self.is_training) boxes = tf.stop_gradient(boxes) scores = tf.stop_gradient(scores) category = tf.stop_gradient(category) boxes_angle = tf.stop_gradient(boxes_angle) if self.is_training: return boxes, scores, category, boxes_angle, self.losses_dict else: return boxes, scores, category, boxes_angle
def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h, gtboxes_batch_r, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) # 1. build base network feature_pyramid = self.build_base_network(input_img_batch) # 2. build rpn rpn_box_pred, rpn_cls_score, rpn_cls_prob = self.rpn_net( feature_pyramid) # 3. generate_anchors anchors = self.make_anchors(feature_pyramid) # 4. postprocess rpn proposals. such as: decode, clip, filter if not self.is_training: with tf.variable_scope('postprocess_detctions'): boxes, scores, category = postprocess_detctions( rpn_bbox_pred=rpn_box_pred, rpn_cls_prob=rpn_cls_prob, anchors=anchors, is_training=self.is_training) return boxes, scores, category # 5. build loss else: with tf.variable_scope('build_loss'): labels, target_delta, anchor_states, target_boxes = tf.py_func( func=anchor_target_layer, inp=[gtboxes_batch_h, gtboxes_batch_r, anchors, gpu_id], Tout=[tf.float32, tf.float32, tf.float32, tf.float32]) if self.method == 'H': self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0) else: self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1) cls_loss = losses.focal_loss(labels, rpn_cls_score, anchor_states) if cfgs.REG_LOSS_MODE == 0: reg_loss = losses.iou_smooth_l1_loss( target_delta, rpn_box_pred, anchor_states, target_boxes, anchors) elif cfgs.REG_LOSS_MODE == 1: reg_loss = losses.smooth_l1_loss_atan( target_delta, rpn_box_pred, anchor_states) else: reg_loss = losses.smooth_l1_loss(target_delta, rpn_box_pred, anchor_states) losses_dict = { 'cls_loss': cls_loss * cfgs.CLS_WEIGHT, 'reg_loss': reg_loss * cfgs.REG_WEIGHT } with tf.variable_scope('postprocess_detctions'): boxes, scores, category = postprocess_detctions( rpn_bbox_pred=rpn_box_pred, rpn_cls_prob=rpn_cls_prob, anchors=anchors, is_training=self.is_training) boxes = tf.stop_gradient(boxes) scores = tf.stop_gradient(scores) category = tf.stop_gradient(category) return boxes, scores, category, losses_dict
def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h, gtboxes_batch_r, gt_encode_label, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) gt_encode_label = tf.reshape(gt_encode_label, [-1, self.coding_len]) gt_encode_label = tf.cast(gt_encode_label, tf.float32) # 1. build base network feature_pyramid = self.build_base_network(input_img_batch) # 2. build rpn rpn_box_pred, rpn_cls_score, rpn_cls_prob, rpn_angle_cls = self.rpn_net( feature_pyramid) # 3. generate_anchors anchors = self.make_anchors(feature_pyramid) # 4. postprocess rpn proposals. such as: decode, clip, filter if self.is_training: with tf.variable_scope('build_loss'): labels, target_delta, anchor_states, target_boxes, target_encode_label = tf.py_func( func=anchor_target_layer, inp=[ gtboxes_batch_h, gtboxes_batch_r, gt_encode_label, anchors, gpu_id ], Tout=[ tf.float32, tf.float32, tf.float32, tf.float32, tf.float32 ]) if self.method == 'H': self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0) else: self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1) cls_loss = losses.focal_loss(labels, rpn_cls_score, anchor_states) reg_loss = losses.smooth_l1_loss(target_delta, rpn_box_pred, anchor_states) # angle_cls_loss = losses_dcl.angle_cls_focal_loss(target_encode_label, rpn_angle_cls, # anchor_states, decimal_weight=None) angle_cls_loss = losses_dcl.angle_cls_period_focal_loss( target_encode_label, rpn_angle_cls, anchor_states, target_boxes, decimal_weight=None) self.losses_dict = { 'cls_loss': cls_loss * cfgs.CLS_WEIGHT, 'reg_loss': reg_loss * cfgs.REG_WEIGHT, 'angle_cls_loss': angle_cls_loss * cfgs.ANGLE_WEIGHT } with tf.variable_scope('postprocess_detctions'): scores, category, boxes_angle, angle_logits = postprocess_detctions( rpn_bbox_pred=rpn_box_pred, rpn_cls_prob=rpn_cls_prob, rpn_angle_prob=tf.sigmoid(rpn_angle_cls), rpn_angle_logits=rpn_angle_cls, anchors=anchors, is_training=self.is_training) # boxes = tf.stop_gradient(boxes) scores = tf.stop_gradient(scores) category = tf.stop_gradient(category) boxes_angle = tf.stop_gradient(boxes_angle) angle_logits = tf.stop_gradient(angle_logits) if self.is_training: return scores, category, boxes_angle, angle_logits, self.losses_dict else: return scores, category, boxes_angle, angle_logits
def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h, gtboxes_batch_r, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) #change 9 vulue from 6 4 points and one label gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 9]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) # 1. build base network feature_pyramid = self.build_base_network(input_img_batch) # 2. build rpn need to change for 8 value regression rpn_box_pred, rpn_cls_score, rpn_cls_prob = self.rpn_net( feature_pyramid) # 3. generate_anchors nothing change anchors = self.make_anchors(feature_pyramid) # 4. postprocess rpn proposals. such as: decode, clip, filter change something if not self.is_training: with tf.variable_scope('postprocess_detctions'): boxes, scores, category = postprocess_detctions( rpn_bbox_pred=rpn_box_pred, rpn_cls_prob=rpn_cls_prob, anchors=anchors, is_training=self.is_training) return boxes, scores, category # 5. build loss else: with tf.variable_scope('build_loss'): #change for anchor target layer labels, target_delta, anchor_states, target_boxes = tf.py_func( func=anchor_target_layer, inp=[gtboxes_batch_h, gtboxes_batch_r, anchors, gpu_id], Tout=[tf.float32, tf.float32, tf.float32, tf.float32]) # if self.method == 'H': # self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0) # else: # self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1) cls_loss = losses.focal_loss(labels, rpn_cls_score, anchor_states) if cfgs.USE_IOU_FACTOR: reg_loss = losses.iou_smooth_l1_loss( target_delta, rpn_box_pred, anchor_states, target_boxes, anchors) else: reg_loss = losses.smooth_cl1_loss_4p_align1( target_delta, rpn_box_pred, anchor_states, anchors) # reg_loss = losses.regress_smooth_l1_loss_angle(target_delta, rpn_box_pred, anchor_states,anchors) losses_dict = { 'cls_loss': cls_loss * cfgs.CLS_WEIGHT, 'reg_loss': reg_loss * cfgs.REG_WEIGHT } with tf.variable_scope('postprocess_detctions'): boxes, scores, category = postprocess_detctions( rpn_bbox_pred=rpn_box_pred, rpn_cls_prob=rpn_cls_prob, anchors=anchors, is_training=self.is_training) boxes = tf.stop_gradient(boxes) scores = tf.stop_gradient(scores) category = tf.stop_gradient(category) return boxes, scores, category, losses_dict
def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h, gtboxes_batch_r, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) img_shape = tf.shape(input_img_batch) # 1. build base network if cfgs.USE_SUPERVISED_MASK: feature_pyramid, mask_list, dot_layer_list = self.build_base_network( input_img_batch) else: feature_pyramid = self.build_base_network(input_img_batch) dot_layer_list = None mask_list = [] # 2. build rpn # if cfgs.USE_SUPERVISED_MASK: # for i, d in enumerate(dot_layer_list): # feature_pyramid['P{}'.format(i + 3)] *= d rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net( feature_pyramid, 'rpn_net') # 3. generate anchors and mask anchor_list = self.make_anchors(feature_pyramid) if cfgs.USE_SUPERVISED_MASK: mask_gt_list = self.generate_mask(mask_list, img_shape, gtboxes_batch_h, gtboxes_batch_r, feature_pyramid) rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0) rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0) # rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0) anchors = tf.concat(anchor_list, axis=0) if self.is_training: with tf.variable_scope('build_loss'): labels, target_delta, anchor_states, target_boxes = tf.py_func( func=anchor_target_layer, inp=[gtboxes_batch_h, gtboxes_batch_r, anchors], Tout=[tf.float32, tf.float32, tf.float32, tf.float32]) if self.method == 'H': self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0) else: self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1) cls_loss = losses.focal_loss(labels, rpn_cls_score, anchor_states) if cfgs.USE_IOU_FACTOR: reg_loss = losses.iou_smooth_l1_loss_( target_delta, rpn_box_pred, anchor_states, target_boxes, anchors) else: reg_loss = losses.smooth_l1_loss(target_delta, rpn_box_pred, anchor_states) if cfgs.USE_SUPERVISED_MASK: with tf.variable_scope("supervised_mask_loss"): mask_loss = 0.0 for i in range(len(mask_list)): a_mask, a_mask_gt = mask_list[i], mask_gt_list[i] # b, h, w, c = a_mask.shape last_dim = 2 if cfgs.BINARY_MASK else cfgs.CLASS_NUM + 1 a_mask = tf.reshape(a_mask, shape=[-1, last_dim]) a_mask_gt = tf.reshape(a_mask_gt, shape=[-1]) a_mask_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=a_mask, labels=a_mask_gt)) mask_loss += a_mask_loss self.losses_dict[ 'mask_loss'] = mask_loss * cfgs.SUPERVISED_MASK_LOSS_WEIGHT / float( len(mask_list)) self.losses_dict['cls_loss'] = cls_loss * cfgs.CLS_WEIGHT self.losses_dict['reg_loss'] = reg_loss * cfgs.REG_WEIGHT box_pred_list, cls_prob_list, proposal_list = rpn_box_pred_list, rpn_cls_prob_list, anchor_list all_box_pred_list, all_cls_prob_list, all_proposal_list = [], [], [] for i in range(cfgs.NUM_REFINE_STAGE): box_pred_list, cls_prob_list, proposal_list = self.refine_stage( input_img_batch, gtboxes_batch_r, box_pred_list, cls_prob_list, proposal_list, feature_pyramid, dot_layer_list, gpu_id, pos_threshold=cfgs.REFINE_IOU_POSITIVE_THRESHOLD[i], neg_threshold=cfgs.REFINE_IOU_NEGATIVE_THRESHOLD[i], stage='' if i == 0 else '_stage{}'.format(i + 2), proposal_filter=True if i == 0 else False) if not self.is_training: all_box_pred_list.extend(box_pred_list) all_cls_prob_list.extend(cls_prob_list) all_proposal_list.extend(proposal_list) else: all_box_pred_list, all_cls_prob_list, all_proposal_list = box_pred_list, cls_prob_list, proposal_list with tf.variable_scope('postprocess_detctions'): box_pred = tf.concat(all_box_pred_list, axis=0) cls_prob = tf.concat(all_cls_prob_list, axis=0) proposal = tf.concat(all_proposal_list, axis=0) boxes, scores, category = postprocess_detctions( refine_bbox_pred=box_pred, refine_cls_prob=cls_prob, anchors=proposal, is_training=self.is_training, gpu_id=gpu_id) boxes = tf.stop_gradient(boxes) scores = tf.stop_gradient(scores) category = tf.stop_gradient(category) if self.is_training: return boxes, scores, category, self.losses_dict else: return boxes, scores, category
def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h, gtboxes_batch_r, gt_encode_label, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) gt_encode_label = tf.reshape(gt_encode_label, [-1, self.coding_len]) gt_encode_label = tf.cast(gt_encode_label, tf.float32) # 1. build base network feature_pyramid = self.build_base_network(input_img_batch) # 2. build rpn rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net( feature_pyramid, 'rpn_net') # 3. generate_anchors anchor_list = self.make_anchors(feature_pyramid) rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0) rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0) # rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0) anchors = tf.concat(anchor_list, axis=0) if self.is_training: with tf.variable_scope('build_loss'): labels, target_delta, anchor_states, target_boxes = tf.py_func( func=anchor_target_layer, inp=[gtboxes_batch_h, gtboxes_batch_r, anchors], Tout=[tf.float32, tf.float32, tf.float32, tf.float32]) if self.method == 'H': self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0) else: self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1) cls_loss = losses.focal_loss(labels, rpn_cls_score, anchor_states) if cfgs.USE_IOU_FACTOR: reg_loss = losses.iou_smooth_l1_loss_( target_delta, rpn_box_pred, anchor_states, target_boxes, anchors) else: reg_loss = losses.smooth_l1_loss(target_delta, rpn_box_pred, anchor_states) self.losses_dict['cls_loss'] = cls_loss * cfgs.CLS_WEIGHT self.losses_dict['reg_loss'] = reg_loss * cfgs.REG_WEIGHT with tf.variable_scope('refine_feature_pyramid'): refine_feature_pyramid = {} refine_boxes_list = [] for box_pred, cls_prob, anchor, stride, level in \ zip(rpn_box_pred_list, rpn_cls_prob_list, anchor_list, cfgs.ANCHOR_STRIDE, cfgs.LEVEL): box_pred = tf.reshape(box_pred, [-1, self.num_anchors_per_location, 5]) anchor = tf.reshape(anchor, [ -1, self.num_anchors_per_location, 5 if self.method == 'R' else 4 ]) cls_prob = tf.reshape( cls_prob, [-1, self.num_anchors_per_location, cfgs.CLASS_NUM]) cls_max_prob = tf.reduce_max(cls_prob, axis=-1) box_pred_argmax = tf.cast( tf.reshape(tf.argmax(cls_max_prob, axis=-1), [-1, 1]), tf.int32) indices = tf.cast( tf.cumsum(tf.ones_like(box_pred_argmax), axis=0), tf.int32) - tf.constant(1, tf.int32) indices = tf.concat([indices, box_pred_argmax], axis=-1) box_pred_filter = tf.reshape(tf.gather_nd(box_pred, indices), [-1, 5]) anchor_filter = tf.reshape(tf.gather_nd( anchor, indices), [-1, 5 if self.method == 'R' else 4]) if cfgs.METHOD == 'H': x_c = (anchor_filter[:, 2] + anchor_filter[:, 0]) / 2 y_c = (anchor_filter[:, 3] + anchor_filter[:, 1]) / 2 h = anchor_filter[:, 2] - anchor_filter[:, 0] + 1 w = anchor_filter[:, 3] - anchor_filter[:, 1] + 1 theta = -90 * tf.ones_like(x_c) anchor_filter = tf.transpose( tf.stack([x_c, y_c, w, h, theta])) boxes_filter = bbox_transform.rbbox_transform_inv( boxes=anchor_filter, deltas=box_pred_filter) refine_boxes_list.append(boxes_filter) center_point = boxes_filter[:, :2] / stride refine_feature_pyramid[level] = self.refine_feature_op( points=center_point, feature_map=feature_pyramid[level], name=level) refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list, refine_angle_cls_list = self.refine_net( refine_feature_pyramid, 'refine_net') refine_box_pred = tf.concat(refine_box_pred_list, axis=0) refine_cls_score = tf.concat(refine_cls_score_list, axis=0) refine_cls_prob = tf.concat(refine_cls_prob_list, axis=0) refine_angle_cls = tf.concat(refine_angle_cls_list, axis=0) refine_boxes = tf.concat(refine_boxes_list, axis=0) # 4. postprocess rpn proposals. such as: decode, clip, filter if self.is_training: with tf.variable_scope('build_refine_loss'): refine_labels, refine_target_delta, refine_box_states, refine_target_boxes, refine_target_encode_label = tf.py_func( func=refinebox_target_layer, inp=[ gtboxes_batch_r, gt_encode_label, refine_boxes, cfgs.REFINE_IOU_POSITIVE_THRESHOLD[0], cfgs.REFINE_IOU_NEGATIVE_THRESHOLD[0], gpu_id ], Tout=[ tf.float32, tf.float32, tf.float32, tf.float32, tf.float32 ]) self.add_anchor_img_smry(input_img_batch, refine_boxes, refine_box_states, 1) refine_cls_loss = losses.focal_loss(refine_labels, refine_cls_score, refine_box_states) refine_reg_loss = losses.smooth_l1_loss( refine_target_delta, refine_box_pred, refine_box_states) angle_cls_loss = losses_dcl.angle_cls_period_focal_loss( refine_target_encode_label, refine_angle_cls, refine_box_states, refine_target_boxes, decimal_weight=cfgs.DATASET_NAME.startswith('DOTA')) self.losses_dict[ 'refine_cls_loss'] = refine_cls_loss * cfgs.CLS_WEIGHT self.losses_dict[ 'refine_reg_loss'] = refine_reg_loss * cfgs.REG_WEIGHT self.losses_dict[ 'angle_cls_loss'] = angle_cls_loss * cfgs.ANGLE_WEIGHT with tf.variable_scope('postprocess_detctions'): scores, category, boxes_angle = postprocess_detctions( refine_bbox_pred=refine_box_pred, refine_cls_prob=refine_cls_prob, refine_angle_prob=tf.sigmoid(refine_angle_cls), refine_boxes=refine_boxes, is_training=self.is_training) scores = tf.stop_gradient(scores) category = tf.stop_gradient(category) boxes_angle = tf.stop_gradient(boxes_angle) if self.is_training: return scores, category, boxes_angle, self.losses_dict else: return scores, category, boxes_angle
def fast_rcnn_loss(self): with tf.variable_scope('fast_rcnn_loss'): minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, \ minibatch_reference_boxes_mattached_gtboxes_rotate, \ minibatch_reference_boxes_mattached_head_quadrant, minibatch_object_mask, \ minibatch_label_one_hot = self.fast_rcnn_minibatch(self.fast_rcnn_all_level_proposals) minibatch_reference_boxes = tf.gather( self.fast_rcnn_all_level_proposals, minibatch_indices) minibatch_encode_boxes = tf.gather( self.fast_rcnn_encode_boxes, minibatch_indices) # [minibatch_size, num_classes*4] minibatch_encode_boxes_rotate = tf.gather( self.fast_rcnn_encode_boxes_rotate, minibatch_indices) # [minibatch_size, num_classes*5] minibatch_head_quadrant = tf.gather(self.fast_rcnn_head_quadrant, minibatch_indices) minibatch_scores = tf.gather(self.fast_rcnn_scores, minibatch_indices) minibatch_scores_rotate = tf.gather(self.fast_rcnn_scores_rotate, minibatch_indices) # encode gtboxes minibatch_encode_gtboxes = \ encode_and_decode.encode_boxes( unencode_boxes=minibatch_reference_boxes_mattached_gtboxes, reference_boxes=minibatch_reference_boxes, scale_factors=self.scale_factors ) minibatch_encode_gtboxes_rotate = encode_and_decode.encode_boxes_rotate( unencode_boxes= minibatch_reference_boxes_mattached_gtboxes_rotate, reference_boxes=minibatch_reference_boxes, scale_factors=self.scale_factors) ############### Class-agnostic Without tile # [minibatch_size, num_classes*4] # minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes, [1, self.num_classes]) ############### Class-agnostic Without tile # [minibatch_size, num_classes*5] # minibatch_encode_gtboxes_rotate = tf.tile(minibatch_encode_gtboxes_rotate, [1, self.num_classes]) ############### Class-agnostic Without tile # minibatch_gt_head_quadrant = tf.tile(minibatch_reference_boxes_mattached_head_quadrant, [1, self.num_classes]) minibatch_gt_head_quadrant = minibatch_reference_boxes_mattached_head_quadrant class_weights_list = [] category_list = tf.unstack(minibatch_label_one_hot, axis=1) for i in range(1, self.num_classes + 1): tmp_class_weights = tf.ones( shape=[tf.shape(minibatch_encode_boxes)[0], 4], dtype=tf.float32) tmp_class_weights = tmp_class_weights * tf.expand_dims( category_list[i], axis=1) class_weights_list.append(tmp_class_weights) class_weights = tf.concat( class_weights_list, axis=1) # [minibatch_size, num_classes*4] class_weights_list_rotate = [] category_list_rotate = tf.unstack(minibatch_label_one_hot, axis=1) for i in range(1, self.num_classes + 1): tmp_class_weights_rotate = tf.ones( shape=[tf.shape(minibatch_encode_boxes_rotate)[0], 5], dtype=tf.float32) tmp_class_weights_rotate = tmp_class_weights_rotate * tf.expand_dims( category_list_rotate[i], axis=1) class_weights_list_rotate.append(tmp_class_weights_rotate) class_weights_rotate = tf.concat( class_weights_list_rotate, axis=1) # [minibatch_size, num_classes*5] class_weights_list_head = [] category_list_head = tf.unstack(minibatch_label_one_hot, axis=1) for i in range(1, self.num_classes + 1): tmp_class_weights_head = tf.ones( shape=[tf.shape(minibatch_head_quadrant)[0], 4], dtype=tf.float32) tmp_class_weights_head = tmp_class_weights_head * tf.expand_dims( category_list_head[i], axis=1) class_weights_list_head.append(tmp_class_weights_head) class_weights_head = tf.concat(class_weights_list_head, axis=1) # loss with tf.variable_scope('fast_rcnn_classification_loss'): # fast_rcnn_classification_loss = slim.losses.softmax_cross_entropy(logits=minibatch_scores, # onehot_labels=minibatch_label_one_hot) fast_rcnn_classification_loss = losses.focal_loss( prediction_tensor=minibatch_scores, target_tensor=minibatch_label_one_hot) slim.losses.add_loss(fast_rcnn_classification_loss) with tf.variable_scope('fast_rcnn_location_loss'): # fast_rcnn_location_loss = losses.l1_smooth_losses(predict_boxes=minibatch_encode_boxes, # gtboxes=minibatch_encode_gtboxes, # object_weights=minibatch_object_mask, # classes_weights=class_weights) # Class-agnostic regression fast_rcnn_location_loss = losses.l1_smooth_losses( predict_boxes=minibatch_encode_boxes, gtboxes=minibatch_encode_gtboxes, object_weights=minibatch_object_mask, classes_weights=None) slim.losses.add_loss(fast_rcnn_location_loss) with tf.variable_scope('fast_rcnn_classification_rotate_loss'): # fast_rcnn_classification_rotate_loss = slim.losses.softmax_cross_entropy(logits=minibatch_scores_rotate, # onehot_labels=minibatch_label_one_hot) fast_rcnn_classification_rotate_loss = losses.focal_loss( prediction_tensor=minibatch_scores_rotate, target_tensor=minibatch_label_one_hot) slim.losses.add_loss(fast_rcnn_classification_rotate_loss) with tf.variable_scope('fast_rcnn_location_rotate_loss'): # fast_rcnn_location_rotate_loss = losses.l1_smooth_losses(predict_boxes=minibatch_encode_boxes_rotate, # gtboxes=minibatch_encode_gtboxes_rotate, # object_weights=minibatch_object_mask, # classes_weights=class_weights_rotate) # Class-agnostic regression fast_rcnn_location_rotate_loss = losses.l1_smooth_losses( predict_boxes=minibatch_encode_boxes_rotate, gtboxes=minibatch_encode_gtboxes_rotate, object_weights=minibatch_object_mask, classes_weights=None) slim.losses.add_loss(fast_rcnn_location_rotate_loss) with tf.variable_scope('fast_rcnn_head_quadrant_loss'): # fast_rcnn_head_quadrant_loss = losses.l1_smooth_losses(predict_boxes=minibatch_head_quadrant, # gtboxes=minibatch_gt_head_quadrant, # object_weights=minibatch_object_mask, # classes_weights=class_weights_head) # Class-agnostic regression fast_rcnn_head_quadrant_loss = losses.l1_smooth_losses( predict_boxes=minibatch_head_quadrant, gtboxes=minibatch_gt_head_quadrant, object_weights=minibatch_object_mask, classes_weights=None) slim.losses.add_loss( fast_rcnn_head_quadrant_loss * 10) # More importance by the bigger weight return fast_rcnn_location_loss, fast_rcnn_classification_loss, \ fast_rcnn_location_rotate_loss, fast_rcnn_classification_rotate_loss, fast_rcnn_head_quadrant_loss * 10
def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h, gtboxes_batch_r, gt_encode_label, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) gt_encode_label = tf.reshape(gt_encode_label, [-1, self.coding_len]) gt_encode_label = tf.cast(gt_encode_label, tf.float32) # 1. build base network feature_pyramid = self.build_base_network(input_img_batch) # 2. build rpn rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net( feature_pyramid, 'rpn_net') # 3. generate_anchors anchor_list = self.make_anchors(feature_pyramid) rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0) rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0) # rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0) anchors = tf.concat(anchor_list, axis=0) if self.is_training: with tf.variable_scope('build_loss'): labels, target_delta, anchor_states, target_boxes = tf.py_func( func=anchor_target_layer, inp=[gtboxes_batch_h, gtboxes_batch_r, anchors], Tout=[tf.float32, tf.float32, tf.float32, tf.float32]) if self.method == 'H': self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0) else: self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1) cls_loss = losses.focal_loss(labels, rpn_cls_score, anchor_states) if cfgs.USE_IOU_FACTOR: reg_loss = losses.iou_smooth_l1_loss_( target_delta, rpn_box_pred, anchor_states, target_boxes, anchors) else: reg_loss = losses.smooth_l1_loss(target_delta, rpn_box_pred, anchor_states) self.losses_dict['cls_loss'] = cls_loss * cfgs.CLS_WEIGHT self.losses_dict['reg_loss'] = reg_loss * cfgs.REG_WEIGHT refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list, refine_angle_cls_list = self.refine_net( feature_pyramid, 'refine_net') refine_box_pred = tf.concat(refine_box_pred_list, axis=0) refine_cls_score = tf.concat(refine_cls_score_list, axis=0) refine_cls_prob = tf.concat(refine_cls_prob_list, axis=0) refine_angle_cls = tf.concat(refine_angle_cls_list, axis=0) if cfgs.METHOD == 'H': x_c = (anchors[:, 2] + anchors[:, 0]) / 2 y_c = (anchors[:, 3] + anchors[:, 1]) / 2 h = anchors[:, 2] - anchors[:, 0] + 1 w = anchors[:, 3] - anchors[:, 1] + 1 theta = -90 * tf.ones_like(x_c) anchors = tf.transpose(tf.stack([x_c, y_c, w, h, theta])) # if cfgs.ANGLE_RANGE == 180: # anchors = tf.py_func(coordinate_present_convert, # inp=[anchors, -1], # Tout=[tf.float32]) # anchors = tf.reshape(anchors, [-1, 5]) refine_boxes = bbox_transform.rbbox_transform_inv(boxes=anchors, deltas=rpn_box_pred) # 4. postprocess rpn proposals. such as: decode, clip, filter if self.is_training: with tf.variable_scope('build_refine_loss'): refine_labels, refine_target_delta, refine_box_states, refine_target_boxes, refine_target_encode_label = tf.py_func( func=refinebox_target_layer, inp=[ gtboxes_batch_r, gt_encode_label, refine_boxes, cfgs.REFINE_IOU_POSITIVE_THRESHOLD[0], cfgs.REFINE_IOU_NEGATIVE_THRESHOLD[0], gpu_id ], Tout=[ tf.float32, tf.float32, tf.float32, tf.float32, tf.float32 ]) self.add_anchor_img_smry(input_img_batch, refine_boxes, refine_box_states, 1) refine_cls_loss = losses.focal_loss(refine_labels, refine_cls_score, refine_box_states) refine_reg_loss = losses.smooth_l1_loss( refine_target_delta, refine_box_pred, refine_box_states) angle_cls_loss = losses_dcl.angle_cls_period_focal_loss( refine_target_encode_label, refine_angle_cls, refine_box_states, refine_target_boxes, decimal_weight=True) self.losses_dict[ 'refine_cls_loss'] = refine_cls_loss * cfgs.CLS_WEIGHT self.losses_dict[ 'refine_reg_loss'] = refine_reg_loss * cfgs.REG_WEIGHT self.losses_dict[ 'angle_cls_loss'] = angle_cls_loss * cfgs.ANGLE_WEIGHT with tf.variable_scope('postprocess_detctions'): scores, category, boxes_angle = postprocess_detctions( refine_bbox_pred=refine_box_pred, refine_cls_prob=refine_cls_prob, refine_angle_prob=tf.sigmoid(refine_angle_cls), refine_boxes=refine_boxes, is_training=self.is_training) scores = tf.stop_gradient(scores) category = tf.stop_gradient(category) boxes_angle = tf.stop_gradient(boxes_angle) if self.is_training: return scores, category, boxes_angle, self.losses_dict else: return scores, category, boxes_angle
def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h, gtboxes_batch_r, gthead_quadrant, gt_smooth_label, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) gthead_quadrant = tf.reshape(gthead_quadrant, [-1, 1]) gthead_quadrant = tf.cast(gthead_quadrant, tf.int32) gt_smooth_label = tf.reshape(gt_smooth_label, [-1, self.angle_range]) gt_smooth_label = tf.cast(gt_smooth_label, tf.float32) img_shape = tf.shape(input_img_batch) # 1. build base network feature_pyramid = self.build_base_network(input_img_batch) # 2. build rpn rpn_box_pred, rpn_cls_score, rpn_cls_prob, rpn_head_cls, rpn_angle_cls = self.rpn_net( feature_pyramid) # 3. generate_anchors anchors = self.make_anchors(feature_pyramid) # 4. postprocess rpn proposals. such as: decode, clip, filter if self.is_training: with tf.variable_scope('build_loss'): labels, target_delta, anchor_states, target_boxes, target_head_quadrant, target_smooth_label = tf.py_func( func=anchor_target_layer, inp=[ gtboxes_batch_h, gtboxes_batch_r, gthead_quadrant, gt_smooth_label, anchors, gpu_id ], Tout=[ tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32 ]) if self.method == 'H': self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0) else: self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1) cls_loss = losses.focal_loss(labels, rpn_cls_score, anchor_states) if cfgs.REG_LOSS_MODE == 0: reg_loss = losses.iou_smooth_l1_loss( target_delta, rpn_box_pred, anchor_states, target_boxes, anchors) elif cfgs.REG_LOSS_MODE == 1: reg_loss = losses.smooth_l1_loss_atan( target_delta, rpn_box_pred, anchor_states) else: reg_loss = losses.smooth_l1_loss(target_delta, rpn_box_pred, anchor_states) if cfgs.DATASET_NAME.startswith('DOTA'): head_cls_loss = losses.head_specific_cls_focal_loss( target_head_quadrant, rpn_head_cls, anchor_states, labels, specific_cls=[6, 7, 8, 9, 10, 11]) else: head_cls_loss = losses.head_focal_loss( target_head_quadrant, rpn_head_cls, anchor_states) angle_cls_loss = losses.angle_focal_loss( target_smooth_label, rpn_angle_cls, anchor_states) self.losses_dict = { 'cls_loss': cls_loss * cfgs.CLS_WEIGHT, 'reg_loss': reg_loss * cfgs.REG_WEIGHT, 'head_cls_loss': head_cls_loss * cfgs.HEAD_WEIGHT, 'angle_cls_loss': angle_cls_loss * cfgs.ANGLE_WEIGHT } with tf.variable_scope('postprocess_detctions'): boxes, scores, category, boxes_head, boxes_angle = postprocess_detctions( rpn_bbox_pred=rpn_box_pred, rpn_cls_prob=rpn_cls_prob, rpn_angle_prob=tf.sigmoid(rpn_angle_cls), rpn_head_prob=tf.sigmoid(rpn_head_cls), anchors=anchors, is_training=self.is_training) boxes = tf.stop_gradient(boxes) scores = tf.stop_gradient(scores) category = tf.stop_gradient(category) boxes_head = tf.stop_gradient(boxes_head) boxes_angle = tf.stop_gradient(boxes_angle) if self.is_training: return boxes, scores, category, boxes_head, boxes_angle, self.losses_dict else: return boxes, scores, category, boxes_head, boxes_angle