示例#1
0
    def build_whole_detection_network(self, input_img_batch, gtboxes_batch):

        if self.is_training:
            # ensure shape is [M, 5]
            gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5])
            gtboxes_batch = tf.cast(gtboxes_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        feature_pyramid = self.build_base_network(input_img_batch)

        # 2. build rpn
        rpn_box_pred, rpn_cls_score, rpn_cls_prob = self.rpn_net(
            feature_pyramid)

        # 3. generate_anchors
        anchors = self.make_anchors(feature_pyramid)

        # 4. postprocess rpn proposals. such as: decode, clip, filter
        if self.is_training:
            with tf.variable_scope('build_loss'):
                labels, target_delta, anchor_states = tf.py_func(
                    func=anchor_target_layer,
                    inp=[gtboxes_batch, anchors],
                    Tout=[tf.float32, tf.float32, tf.float32])

                self.add_anchor_img_smry(input_img_batch, anchors,
                                         anchor_states)

                cls_loss = losses.focal_loss(labels, rpn_cls_score,
                                             anchor_states)

                reg_loss = losses.smooth_l1_loss(target_delta, rpn_box_pred,
                                                 anchor_states)

                self.losses_dict = {
                    'cls_loss': cls_loss,
                    'reg_loss': reg_loss * 2
                }

        with tf.variable_scope('postprocess_detctions'):
            boxes, scores, category = postprocess_detctions(
                rpn_bbox_pred=rpn_box_pred,
                rpn_cls_prob=rpn_cls_prob,
                img_shape=img_shape,
                anchors=anchors,
                is_training=self.is_training)
            boxes = tf.stop_gradient(boxes)
            scores = tf.stop_gradient(scores)
            category = tf.stop_gradient(category)

        if self.is_training:
            return boxes, scores, category, self.losses_dict
        else:
            return boxes, scores, category
示例#2
0
    def build_whole_detection_network(self,
                                      input_img_batch,
                                      gtboxes_batch_h,
                                      gtboxes_batch_r,
                                      gpu_id=0):

        if self.is_training:
            gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5])
            gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32)

            gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6])
            gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32)

        # 1. build base network
        feature_pyramid = self.build_base_network(input_img_batch)

        # 2. build rpn
        rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net(
            feature_pyramid, 'rpn_net')

        # 3. generate_anchors
        anchor_list = self.make_anchors(feature_pyramid)
        rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0)
        rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0)
        # rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0)
        anchors = tf.concat(anchor_list, axis=0)

        if self.is_training:
            with tf.variable_scope('build_loss'):
                labels, target_delta, anchor_states, target_boxes = tf.py_func(
                    func=anchor_target_layer,
                    inp=[gtboxes_batch_h, gtboxes_batch_r, anchors],
                    Tout=[tf.float32, tf.float32, tf.float32, tf.float32])

                if self.method == 'H':
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 0)
                else:
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 1)

                cls_loss = losses.focal_loss(labels, rpn_cls_score,
                                             anchor_states)
                if cfgs.USE_IOU_FACTOR:
                    reg_loss = losses.iou_smooth_l1_loss(
                        target_delta, rpn_box_pred, anchor_states,
                        target_boxes, anchors)
                else:
                    reg_loss = losses.smooth_l1_loss(target_delta,
                                                     rpn_box_pred,
                                                     anchor_states)

                self.losses_dict['cls_loss'] = cls_loss * cfgs.CLS_WEIGHT
                self.losses_dict['reg_loss'] = reg_loss * cfgs.REG_WEIGHT

        with tf.variable_scope('refine_feature_pyramid'):
            refine_feature_pyramid = {}
            for level in cfgs.LEVEL:
                feature_1x5 = slim.conv2d(
                    inputs=feature_pyramid[level],
                    num_outputs=256,
                    kernel_size=[1, 5],
                    weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER,
                    biases_initializer=cfgs.SUBNETS_BIAS_INITIALIZER,
                    stride=1,
                    activation_fn=None,
                    scope='refine_1x5_{}'.format(level))

                feature5x1 = slim.conv2d(
                    inputs=feature_1x5,
                    num_outputs=256,
                    kernel_size=[5, 1],
                    weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER,
                    biases_initializer=cfgs.SUBNETS_BIAS_INITIALIZER,
                    stride=1,
                    activation_fn=None,
                    scope='refine_5x1_{}'.format(level))

                feature_1x1 = slim.conv2d(
                    inputs=feature_pyramid[level],
                    num_outputs=256,
                    kernel_size=[1, 1],
                    weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER,
                    biases_initializer=cfgs.SUBNETS_BIAS_INITIALIZER,
                    stride=1,
                    activation_fn=None,
                    scope='refine_1x1_{}'.format(level))
                refine_feature_pyramid[level] = feature5x1 + feature_1x1

        refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list = self.refine_net(
            refine_feature_pyramid, 'refine_net')
        # refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list = self.refine_net(feature_pyramid, 'refine_net')

        refine_box_pred = tf.concat(refine_box_pred_list, axis=0)
        refine_cls_score = tf.concat(refine_cls_score_list, axis=0)
        refine_cls_prob = tf.concat(refine_cls_prob_list, axis=0)
        # refine_boxes = tf.concat(refine_boxes_list, axis=0)

        if cfgs.METHOD == 'H':
            x_c = (anchors[:, 2] + anchors[:, 0]) / 2
            y_c = (anchors[:, 3] + anchors[:, 1]) / 2
            h = anchors[:, 2] - anchors[:, 0] + 1
            w = anchors[:, 3] - anchors[:, 1] + 1
            theta = -90 * tf.ones_like(x_c)
            anchors = tf.transpose(tf.stack([x_c, y_c, w, h, theta]))

        refine_boxes = bbox_transform.rbbox_transform_inv(boxes=anchors,
                                                          deltas=rpn_box_pred)

        # 4. postprocess rpn proposals. such as: decode, clip, filter
        if not self.is_training:
            with tf.variable_scope('postprocess_detctions'):
                boxes, scores, category = postprocess_detctions(
                    refine_bbox_pred=refine_box_pred,
                    refine_cls_prob=refine_cls_prob,
                    anchors=refine_boxes,
                    is_training=self.is_training)
                return boxes, scores, category

        #  5. build loss
        else:
            with tf.variable_scope('build_refine_loss'):
                refine_labels, refine_target_delta, refine_box_states, refine_target_boxes = tf.py_func(
                    func=refinebox_target_layer,
                    inp=[
                        gtboxes_batch_r, refine_boxes,
                        cfgs.REFINE_IOU_POSITIVE_THRESHOLD[0],
                        cfgs.REFINE_IOU_NEGATIVE_THRESHOLD[0], gpu_id
                    ],
                    Tout=[tf.float32, tf.float32, tf.float32, tf.float32])

                self.add_anchor_img_smry(input_img_batch, refine_boxes,
                                         refine_box_states, 1)

                refine_cls_loss = losses.focal_loss(refine_labels,
                                                    refine_cls_score,
                                                    refine_box_states)
                if cfgs.USE_IOU_FACTOR:
                    refine_reg_loss = losses.iou_smooth_l1_loss(
                        refine_target_delta, refine_box_pred,
                        refine_box_states, refine_target_boxes, refine_boxes)
                else:
                    refine_reg_loss = losses.smooth_l1_loss(
                        refine_target_delta, refine_box_pred,
                        refine_box_states)

                self.losses_dict[
                    'refine_cls_loss'] = refine_cls_loss * cfgs.CLS_WEIGHT
                self.losses_dict[
                    'refine_reg_loss'] = refine_reg_loss * cfgs.REG_WEIGHT

            with tf.variable_scope('postprocess_detctions'):
                boxes, scores, category = postprocess_detctions(
                    refine_bbox_pred=refine_box_pred,
                    refine_cls_prob=refine_cls_prob,
                    anchors=refine_boxes,
                    is_training=self.is_training)
                boxes = tf.stop_gradient(boxes)
                scores = tf.stop_gradient(scores)
                category = tf.stop_gradient(category)

                return boxes, scores, category, self.losses_dict
    def refine_stage(self,
                     input_img_batch,
                     gtboxes_batch_r,
                     gt_smooth_label,
                     box_pred_list,
                     cls_prob_list,
                     proposal_list,
                     angle_cls_list,
                     feature_pyramid,
                     gpu_id,
                     pos_threshold,
                     neg_threshold,
                     stage,
                     proposal_filter=False):
        with tf.variable_scope('refine_feature_pyramid{}'.format(stage)):
            refine_feature_pyramid = {}
            refine_boxes_list = []
            refine_boxes_angle_list = []

            for box_pred, cls_prob, proposal, angle_prob, stride, level in \
                    zip(box_pred_list, cls_prob_list, proposal_list, angle_cls_list,
                        cfgs.ANCHOR_STRIDE, cfgs.LEVEL):

                if proposal_filter:
                    box_pred = tf.reshape(
                        box_pred, [-1, self.num_anchors_per_location, 5])
                    proposal = tf.reshape(proposal, [
                        -1, self.num_anchors_per_location,
                        5 if self.method == 'R' else 4
                    ])
                    cls_prob = tf.reshape(
                        cls_prob,
                        [-1, self.num_anchors_per_location, cfgs.CLASS_NUM])

                    cls_max_prob = tf.reduce_max(cls_prob, axis=-1)
                    box_pred_argmax = tf.cast(
                        tf.reshape(tf.argmax(cls_max_prob, axis=-1), [-1, 1]),
                        tf.int32)
                    indices = tf.cast(
                        tf.cumsum(tf.ones_like(box_pred_argmax), axis=0),
                        tf.int32) - tf.constant(1, tf.int32)
                    indices = tf.concat([indices, box_pred_argmax], axis=-1)

                    box_pred = tf.reshape(tf.gather_nd(box_pred, indices),
                                          [-1, 5])
                    proposal = tf.reshape(tf.gather_nd(proposal, indices),
                                          [-1, 5 if self.method == 'R' else 4])

                    if cfgs.METHOD == 'H':
                        x_c = (proposal[:, 2] + proposal[:, 0]) / 2
                        y_c = (proposal[:, 3] + proposal[:, 1]) / 2
                        h = proposal[:, 2] - proposal[:, 0] + 1
                        w = proposal[:, 3] - proposal[:, 1] + 1
                        theta = -90 * tf.ones_like(x_c)
                        proposal = tf.transpose(
                            tf.stack([x_c, y_c, w, h, theta]))
                else:
                    box_pred = tf.reshape(box_pred, [-1, 5])
                    proposal = tf.reshape(proposal, [-1, 5])

                bboxes = bbox_transform.rbbox_transform_inv(boxes=proposal,
                                                            deltas=box_pred)

                if angle_prob is not None:
                    angle_cls = tf.cast(
                        tf.argmax(tf.sigmoid(angle_prob), axis=1), tf.float32)
                    angle_cls = tf.reshape(angle_cls, [
                        -1,
                    ]) * -1 - 0.5
                    x, y, w, h, theta = tf.unstack(bboxes, axis=1)
                    bboxes_angle = tf.transpose(
                        tf.stack([x, y, w, h, angle_cls]))
                    refine_boxes_angle_list.append(bboxes_angle)
                    center_point = bboxes_angle[:, :2] / stride
                else:
                    center_point = bboxes[:, :2] / stride
                refine_boxes_list.append(bboxes)

                refine_feature_pyramid[level] = self.refine_feature_op(
                    points=center_point,
                    feature_map=feature_pyramid[level],
                    name=level)

            refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list, refine_angle_cls_list = self.refine_net(
                refine_feature_pyramid, 'refine_net{}'.format(stage))

            refine_box_pred = tf.concat(refine_box_pred_list, axis=0)
            refine_cls_score = tf.concat(refine_cls_score_list, axis=0)
            # refine_cls_prob = tf.concat(refine_cls_prob_list, axis=0)
            refine_boxes = tf.concat(refine_boxes_list, axis=0)
            refine_angle_cls = tf.concat(refine_angle_cls_list, axis=0)

        if self.is_training:
            with tf.variable_scope('build_refine_loss{}'.format(stage)):
                refine_labels, refine_target_delta, refine_box_states, refine_target_boxes, refine_target_smooth_label = tf.py_func(
                    func=refinebox_target_layer,
                    inp=[
                        gtboxes_batch_r, gt_smooth_label, refine_boxes,
                        pos_threshold, neg_threshold, gpu_id
                    ],
                    Tout=[
                        tf.float32, tf.float32, tf.float32, tf.float32,
                        tf.float32
                    ])

                self.add_anchor_img_smry(input_img_batch, refine_boxes,
                                         refine_box_states, 1)

                refine_cls_loss = losses.focal_loss(refine_labels,
                                                    refine_cls_score,
                                                    refine_box_states)
                if False:  # cfgs.USE_IOU_FACTOR:
                    refine_reg_loss = losses.iou_smooth_l1_loss(
                        refine_target_delta,
                        refine_box_pred,
                        refine_box_states,
                        refine_target_boxes,
                        refine_boxes,
                        is_refine=True)
                else:
                    refine_reg_loss = losses.smooth_l1_loss(
                        refine_target_delta, refine_box_pred,
                        refine_box_states)

                angle_cls_loss = losses.angle_focal_loss(
                    refine_target_smooth_label, refine_angle_cls,
                    refine_box_states)

                self.losses_dict['refine_cls_loss{}'.format(
                    stage)] = refine_cls_loss * cfgs.CLS_WEIGHT
                self.losses_dict['refine_reg_loss{}'.format(
                    stage)] = refine_reg_loss * cfgs.REG_WEIGHT
                self.losses_dict['angle_cls_loss{}'.format(
                    stage)] = angle_cls_loss * cfgs.ANGLE_CLS_WEIGHT

        return refine_box_pred_list, refine_cls_prob_list, refine_boxes_list, refine_angle_cls_list
    def build_whole_detection_network(self,
                                      input_img_batch,
                                      gtboxes_batch_h,
                                      gtboxes_batch_r,
                                      gt_smooth_label,
                                      gpu_id=0):

        if self.is_training:
            gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5])
            gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32)

            gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6])
            gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32)

            gt_smooth_label = tf.reshape(gt_smooth_label,
                                         [-1, cfgs.ANGLE_RANGE])
            gt_smooth_label = tf.cast(gt_smooth_label, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        feature_pyramid = self.build_base_network(input_img_batch)

        # 2. build rpn
        rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net(
            feature_pyramid, 'rpn_net')

        # 3. generate_anchors
        anchor_list = self.make_anchors(feature_pyramid)

        rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0)
        rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0)
        # rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0)
        anchors = tf.concat(anchor_list, axis=0)

        if self.is_training:
            with tf.variable_scope('build_loss'):
                labels, target_delta, anchor_states, target_boxes = tf.py_func(
                    func=anchor_target_layer,
                    inp=[gtboxes_batch_h, gtboxes_batch_r, anchors],
                    Tout=[tf.float32, tf.float32, tf.float32, tf.float32])

                if self.method == 'H':
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 0)
                else:
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 1)

                cls_loss = losses.focal_loss(labels, rpn_cls_score,
                                             anchor_states)
                if cfgs.USE_IOU_FACTOR:
                    reg_loss = losses.iou_smooth_l1_loss(
                        target_delta, rpn_box_pred, anchor_states,
                        target_boxes, anchors)
                else:
                    reg_loss = losses.smooth_l1_loss(target_delta,
                                                     rpn_box_pred,
                                                     anchor_states)

                self.losses_dict['cls_loss'] = cls_loss * cfgs.CLS_WEIGHT
                self.losses_dict['reg_loss'] = reg_loss * cfgs.REG_WEIGHT

        box_pred_list, cls_prob_list, proposal_list, angle_cls_list = rpn_box_pred_list, rpn_cls_prob_list, anchor_list, [
            None, None, None, None, None
        ]

        all_box_pred_list, all_cls_prob_list, all_proposal_list, all_angle_cls_list = [], [], [], []

        for i in range(cfgs.NUM_REFINE_STAGE):
            box_pred_list, cls_prob_list, proposal_list, angle_cls_list = self.refine_stage(
                input_img_batch,
                gtboxes_batch_r,
                gt_smooth_label,
                box_pred_list,
                cls_prob_list,
                proposal_list,
                angle_cls_list,
                feature_pyramid,
                gpu_id,
                pos_threshold=cfgs.REFINE_IOU_POSITIVE_THRESHOLD[i],
                neg_threshold=cfgs.REFINE_IOU_NEGATIVE_THRESHOLD[i],
                stage='' if i == 0 else '_stage{}'.format(i + 2),
                proposal_filter=True if i == 0 else False)

            if not self.is_training:
                all_box_pred_list.extend(box_pred_list)
                all_cls_prob_list.extend(cls_prob_list)
                all_proposal_list.extend(proposal_list)
                all_angle_cls_list.extend(angle_cls_list)
            else:
                all_box_pred_list, all_cls_prob_list, all_proposal_list, all_angle_cls_list = box_pred_list, cls_prob_list, proposal_list, angle_cls_list

        with tf.variable_scope('postprocess_detctions'):
            box_pred = tf.concat(all_box_pred_list, axis=0)
            cls_prob = tf.concat(all_cls_prob_list, axis=0)
            proposal = tf.concat(all_proposal_list, axis=0)
            angle_cls = tf.concat(all_angle_cls_list, axis=0)

            boxes, scores, category, boxes_angle = postprocess_detctions(
                refine_bbox_pred=box_pred,
                refine_cls_prob=cls_prob,
                refine_angle_prob=tf.sigmoid(angle_cls),
                anchors=proposal,
                is_training=self.is_training)

            boxes = tf.stop_gradient(boxes)
            scores = tf.stop_gradient(scores)
            category = tf.stop_gradient(category)
            boxes_angle = tf.stop_gradient(boxes_angle)

        if self.is_training:
            return boxes, scores, category, boxes_angle, self.losses_dict
        else:
            return boxes, scores, category, boxes_angle
    def build_whole_detection_network(self,
                                      input_img_batch,
                                      gtboxes_batch_h,
                                      gtboxes_batch_r,
                                      gpu_id=0):

        if self.is_training:
            gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5])
            gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32)

            gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6])
            gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32)

        # 1. build base network
        feature_pyramid = self.build_base_network(input_img_batch)

        # 2. build rpn
        rpn_box_pred, rpn_cls_score, rpn_cls_prob = self.rpn_net(
            feature_pyramid)

        # 3. generate_anchors
        anchors = self.make_anchors(feature_pyramid)

        # 4. postprocess rpn proposals. such as: decode, clip, filter
        if not self.is_training:
            with tf.variable_scope('postprocess_detctions'):
                boxes, scores, category = postprocess_detctions(
                    rpn_bbox_pred=rpn_box_pred,
                    rpn_cls_prob=rpn_cls_prob,
                    anchors=anchors,
                    is_training=self.is_training)
                return boxes, scores, category

        #  5. build loss
        else:
            with tf.variable_scope('build_loss'):
                labels, target_delta, anchor_states, target_boxes = tf.py_func(
                    func=anchor_target_layer,
                    inp=[gtboxes_batch_h, gtboxes_batch_r, anchors, gpu_id],
                    Tout=[tf.float32, tf.float32, tf.float32, tf.float32])

                if self.method == 'H':
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 0)
                else:
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 1)

                cls_loss = losses.focal_loss(labels, rpn_cls_score,
                                             anchor_states)

                if cfgs.REG_LOSS_MODE == 0:
                    reg_loss = losses.iou_smooth_l1_loss(
                        target_delta, rpn_box_pred, anchor_states,
                        target_boxes, anchors)
                elif cfgs.REG_LOSS_MODE == 1:
                    reg_loss = losses.smooth_l1_loss_atan(
                        target_delta, rpn_box_pred, anchor_states)
                else:
                    reg_loss = losses.smooth_l1_loss(target_delta,
                                                     rpn_box_pred,
                                                     anchor_states)

                losses_dict = {
                    'cls_loss': cls_loss * cfgs.CLS_WEIGHT,
                    'reg_loss': reg_loss * cfgs.REG_WEIGHT
                }

            with tf.variable_scope('postprocess_detctions'):
                boxes, scores, category = postprocess_detctions(
                    rpn_bbox_pred=rpn_box_pred,
                    rpn_cls_prob=rpn_cls_prob,
                    anchors=anchors,
                    is_training=self.is_training)
                boxes = tf.stop_gradient(boxes)
                scores = tf.stop_gradient(scores)
                category = tf.stop_gradient(category)

                return boxes, scores, category, losses_dict
    def build_whole_detection_network(self,
                                      input_img_batch,
                                      gtboxes_batch_h,
                                      gtboxes_batch_r,
                                      gt_encode_label,
                                      gpu_id=0):

        if self.is_training:
            gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5])
            gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32)

            gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6])
            gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32)

            gt_encode_label = tf.reshape(gt_encode_label,
                                         [-1, self.coding_len])
            gt_encode_label = tf.cast(gt_encode_label, tf.float32)

        # 1. build base network
        feature_pyramid = self.build_base_network(input_img_batch)

        # 2. build rpn
        rpn_box_pred, rpn_cls_score, rpn_cls_prob, rpn_angle_cls = self.rpn_net(
            feature_pyramid)

        # 3. generate_anchors
        anchors = self.make_anchors(feature_pyramid)

        # 4. postprocess rpn proposals. such as: decode, clip, filter
        if self.is_training:
            with tf.variable_scope('build_loss'):
                labels, target_delta, anchor_states, target_boxes, target_encode_label = tf.py_func(
                    func=anchor_target_layer,
                    inp=[
                        gtboxes_batch_h, gtboxes_batch_r, gt_encode_label,
                        anchors, gpu_id
                    ],
                    Tout=[
                        tf.float32, tf.float32, tf.float32, tf.float32,
                        tf.float32
                    ])

                if self.method == 'H':
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 0)
                else:
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 1)

                cls_loss = losses.focal_loss(labels, rpn_cls_score,
                                             anchor_states)

                reg_loss = losses.smooth_l1_loss(target_delta, rpn_box_pred,
                                                 anchor_states)

                # angle_cls_loss = losses_dcl.angle_cls_focal_loss(target_encode_label, rpn_angle_cls,
                #                                                  anchor_states, decimal_weight=None)
                angle_cls_loss = losses_dcl.angle_cls_period_focal_loss(
                    target_encode_label,
                    rpn_angle_cls,
                    anchor_states,
                    target_boxes,
                    decimal_weight=None)

                self.losses_dict = {
                    'cls_loss': cls_loss * cfgs.CLS_WEIGHT,
                    'reg_loss': reg_loss * cfgs.REG_WEIGHT,
                    'angle_cls_loss': angle_cls_loss * cfgs.ANGLE_WEIGHT
                }

        with tf.variable_scope('postprocess_detctions'):
            scores, category, boxes_angle, angle_logits = postprocess_detctions(
                rpn_bbox_pred=rpn_box_pred,
                rpn_cls_prob=rpn_cls_prob,
                rpn_angle_prob=tf.sigmoid(rpn_angle_cls),
                rpn_angle_logits=rpn_angle_cls,
                anchors=anchors,
                is_training=self.is_training)
            # boxes = tf.stop_gradient(boxes)
            scores = tf.stop_gradient(scores)
            category = tf.stop_gradient(category)
            boxes_angle = tf.stop_gradient(boxes_angle)
            angle_logits = tf.stop_gradient(angle_logits)

        if self.is_training:
            return scores, category, boxes_angle, angle_logits, self.losses_dict
        else:
            return scores, category, boxes_angle, angle_logits
示例#7
0
    def build_whole_detection_network(self,
                                      input_img_batch,
                                      gtboxes_batch_h,
                                      gtboxes_batch_r,
                                      gpu_id=0):

        if self.is_training:
            gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5])
            gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32)
            #change 9 vulue from 6 4 points and one label
            gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 9])
            gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32)

        # 1. build base network
        feature_pyramid = self.build_base_network(input_img_batch)

        # 2. build rpn   need to change for 8 value regression
        rpn_box_pred, rpn_cls_score, rpn_cls_prob = self.rpn_net(
            feature_pyramid)

        # 3. generate_anchors      nothing change
        anchors = self.make_anchors(feature_pyramid)

        # 4. postprocess rpn proposals. such as: decode, clip, filter   change something
        if not self.is_training:
            with tf.variable_scope('postprocess_detctions'):
                boxes, scores, category = postprocess_detctions(
                    rpn_bbox_pred=rpn_box_pred,
                    rpn_cls_prob=rpn_cls_prob,
                    anchors=anchors,
                    is_training=self.is_training)
                return boxes, scores, category

        #  5. build loss
        else:
            with tf.variable_scope('build_loss'):
                #change for anchor target layer
                labels, target_delta, anchor_states, target_boxes = tf.py_func(
                    func=anchor_target_layer,
                    inp=[gtboxes_batch_h, gtboxes_batch_r, anchors, gpu_id],
                    Tout=[tf.float32, tf.float32, tf.float32, tf.float32])

                # if self.method == 'H':
                #     self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0)
                # else:
                #     self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1)

                cls_loss = losses.focal_loss(labels, rpn_cls_score,
                                             anchor_states)

                if cfgs.USE_IOU_FACTOR:
                    reg_loss = losses.iou_smooth_l1_loss(
                        target_delta, rpn_box_pred, anchor_states,
                        target_boxes, anchors)
                else:
                    reg_loss = losses.smooth_cl1_loss_4p_align1(
                        target_delta, rpn_box_pred, anchor_states, anchors)
                    # reg_loss = losses.regress_smooth_l1_loss_angle(target_delta, rpn_box_pred, anchor_states,anchors)

                losses_dict = {
                    'cls_loss': cls_loss * cfgs.CLS_WEIGHT,
                    'reg_loss': reg_loss * cfgs.REG_WEIGHT
                }

            with tf.variable_scope('postprocess_detctions'):
                boxes, scores, category = postprocess_detctions(
                    rpn_bbox_pred=rpn_box_pred,
                    rpn_cls_prob=rpn_cls_prob,
                    anchors=anchors,
                    is_training=self.is_training)
                boxes = tf.stop_gradient(boxes)
                scores = tf.stop_gradient(scores)
                category = tf.stop_gradient(category)

                return boxes, scores, category, losses_dict
    def build_whole_detection_network(self,
                                      input_img_batch,
                                      gtboxes_batch_h,
                                      gtboxes_batch_r,
                                      gpu_id=0):

        if self.is_training:
            gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5])
            gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32)

            gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6])
            gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        if cfgs.USE_SUPERVISED_MASK:
            feature_pyramid, mask_list, dot_layer_list = self.build_base_network(
                input_img_batch)
        else:
            feature_pyramid = self.build_base_network(input_img_batch)
            dot_layer_list = None
            mask_list = []

        # 2. build rpn
        # if cfgs.USE_SUPERVISED_MASK:
        #     for i, d in enumerate(dot_layer_list):
        #         feature_pyramid['P{}'.format(i + 3)] *= d
        rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net(
            feature_pyramid, 'rpn_net')

        # 3. generate anchors and mask
        anchor_list = self.make_anchors(feature_pyramid)

        if cfgs.USE_SUPERVISED_MASK:
            mask_gt_list = self.generate_mask(mask_list, img_shape,
                                              gtboxes_batch_h, gtboxes_batch_r,
                                              feature_pyramid)

        rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0)
        rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0)
        # rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0)
        anchors = tf.concat(anchor_list, axis=0)

        if self.is_training:
            with tf.variable_scope('build_loss'):
                labels, target_delta, anchor_states, target_boxes = tf.py_func(
                    func=anchor_target_layer,
                    inp=[gtboxes_batch_h, gtboxes_batch_r, anchors],
                    Tout=[tf.float32, tf.float32, tf.float32, tf.float32])

                if self.method == 'H':
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 0)
                else:
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 1)

                cls_loss = losses.focal_loss(labels, rpn_cls_score,
                                             anchor_states)
                if cfgs.USE_IOU_FACTOR:
                    reg_loss = losses.iou_smooth_l1_loss_(
                        target_delta, rpn_box_pred, anchor_states,
                        target_boxes, anchors)
                else:
                    reg_loss = losses.smooth_l1_loss(target_delta,
                                                     rpn_box_pred,
                                                     anchor_states)

                if cfgs.USE_SUPERVISED_MASK:
                    with tf.variable_scope("supervised_mask_loss"):
                        mask_loss = 0.0
                        for i in range(len(mask_list)):
                            a_mask, a_mask_gt = mask_list[i], mask_gt_list[i]
                            # b, h, w, c = a_mask.shape
                            last_dim = 2 if cfgs.BINARY_MASK else cfgs.CLASS_NUM + 1
                            a_mask = tf.reshape(a_mask, shape=[-1, last_dim])
                            a_mask_gt = tf.reshape(a_mask_gt, shape=[-1])
                            a_mask_loss = tf.reduce_mean(
                                tf.nn.sparse_softmax_cross_entropy_with_logits(
                                    logits=a_mask, labels=a_mask_gt))
                            mask_loss += a_mask_loss
                        self.losses_dict[
                            'mask_loss'] = mask_loss * cfgs.SUPERVISED_MASK_LOSS_WEIGHT / float(
                                len(mask_list))

                self.losses_dict['cls_loss'] = cls_loss * cfgs.CLS_WEIGHT
                self.losses_dict['reg_loss'] = reg_loss * cfgs.REG_WEIGHT

        box_pred_list, cls_prob_list, proposal_list = rpn_box_pred_list, rpn_cls_prob_list, anchor_list

        all_box_pred_list, all_cls_prob_list, all_proposal_list = [], [], []

        for i in range(cfgs.NUM_REFINE_STAGE):
            box_pred_list, cls_prob_list, proposal_list = self.refine_stage(
                input_img_batch,
                gtboxes_batch_r,
                box_pred_list,
                cls_prob_list,
                proposal_list,
                feature_pyramid,
                dot_layer_list,
                gpu_id,
                pos_threshold=cfgs.REFINE_IOU_POSITIVE_THRESHOLD[i],
                neg_threshold=cfgs.REFINE_IOU_NEGATIVE_THRESHOLD[i],
                stage='' if i == 0 else '_stage{}'.format(i + 2),
                proposal_filter=True if i == 0 else False)

            if not self.is_training:
                all_box_pred_list.extend(box_pred_list)
                all_cls_prob_list.extend(cls_prob_list)
                all_proposal_list.extend(proposal_list)
            else:
                all_box_pred_list, all_cls_prob_list, all_proposal_list = box_pred_list, cls_prob_list, proposal_list

        with tf.variable_scope('postprocess_detctions'):
            box_pred = tf.concat(all_box_pred_list, axis=0)
            cls_prob = tf.concat(all_cls_prob_list, axis=0)
            proposal = tf.concat(all_proposal_list, axis=0)

            boxes, scores, category = postprocess_detctions(
                refine_bbox_pred=box_pred,
                refine_cls_prob=cls_prob,
                anchors=proposal,
                is_training=self.is_training,
                gpu_id=gpu_id)
            boxes = tf.stop_gradient(boxes)
            scores = tf.stop_gradient(scores)
            category = tf.stop_gradient(category)

        if self.is_training:
            return boxes, scores, category, self.losses_dict
        else:
            return boxes, scores, category
示例#9
0
    def build_whole_detection_network(self,
                                      input_img_batch,
                                      gtboxes_batch_h,
                                      gtboxes_batch_r,
                                      gt_encode_label,
                                      gpu_id=0):

        if self.is_training:
            gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5])
            gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32)

            gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6])
            gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32)

            gt_encode_label = tf.reshape(gt_encode_label,
                                         [-1, self.coding_len])
            gt_encode_label = tf.cast(gt_encode_label, tf.float32)

        # 1. build base network
        feature_pyramid = self.build_base_network(input_img_batch)

        # 2. build rpn
        rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net(
            feature_pyramid, 'rpn_net')

        # 3. generate_anchors
        anchor_list = self.make_anchors(feature_pyramid)
        rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0)
        rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0)
        # rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0)
        anchors = tf.concat(anchor_list, axis=0)

        if self.is_training:
            with tf.variable_scope('build_loss'):
                labels, target_delta, anchor_states, target_boxes = tf.py_func(
                    func=anchor_target_layer,
                    inp=[gtboxes_batch_h, gtboxes_batch_r, anchors],
                    Tout=[tf.float32, tf.float32, tf.float32, tf.float32])

                if self.method == 'H':
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 0)
                else:
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 1)

                cls_loss = losses.focal_loss(labels, rpn_cls_score,
                                             anchor_states)
                if cfgs.USE_IOU_FACTOR:
                    reg_loss = losses.iou_smooth_l1_loss_(
                        target_delta, rpn_box_pred, anchor_states,
                        target_boxes, anchors)
                else:
                    reg_loss = losses.smooth_l1_loss(target_delta,
                                                     rpn_box_pred,
                                                     anchor_states)

                self.losses_dict['cls_loss'] = cls_loss * cfgs.CLS_WEIGHT
                self.losses_dict['reg_loss'] = reg_loss * cfgs.REG_WEIGHT

        with tf.variable_scope('refine_feature_pyramid'):
            refine_feature_pyramid = {}
            refine_boxes_list = []

            for box_pred, cls_prob, anchor, stride, level in \
                    zip(rpn_box_pred_list, rpn_cls_prob_list, anchor_list,
                        cfgs.ANCHOR_STRIDE, cfgs.LEVEL):

                box_pred = tf.reshape(box_pred,
                                      [-1, self.num_anchors_per_location, 5])
                anchor = tf.reshape(anchor, [
                    -1, self.num_anchors_per_location,
                    5 if self.method == 'R' else 4
                ])
                cls_prob = tf.reshape(
                    cls_prob,
                    [-1, self.num_anchors_per_location, cfgs.CLASS_NUM])

                cls_max_prob = tf.reduce_max(cls_prob, axis=-1)
                box_pred_argmax = tf.cast(
                    tf.reshape(tf.argmax(cls_max_prob, axis=-1), [-1, 1]),
                    tf.int32)
                indices = tf.cast(
                    tf.cumsum(tf.ones_like(box_pred_argmax), axis=0),
                    tf.int32) - tf.constant(1, tf.int32)
                indices = tf.concat([indices, box_pred_argmax], axis=-1)

                box_pred_filter = tf.reshape(tf.gather_nd(box_pred, indices),
                                             [-1, 5])
                anchor_filter = tf.reshape(tf.gather_nd(
                    anchor, indices), [-1, 5 if self.method == 'R' else 4])

                if cfgs.METHOD == 'H':
                    x_c = (anchor_filter[:, 2] + anchor_filter[:, 0]) / 2
                    y_c = (anchor_filter[:, 3] + anchor_filter[:, 1]) / 2
                    h = anchor_filter[:, 2] - anchor_filter[:, 0] + 1
                    w = anchor_filter[:, 3] - anchor_filter[:, 1] + 1
                    theta = -90 * tf.ones_like(x_c)
                    anchor_filter = tf.transpose(
                        tf.stack([x_c, y_c, w, h, theta]))

                boxes_filter = bbox_transform.rbbox_transform_inv(
                    boxes=anchor_filter, deltas=box_pred_filter)
                refine_boxes_list.append(boxes_filter)
                center_point = boxes_filter[:, :2] / stride

                refine_feature_pyramid[level] = self.refine_feature_op(
                    points=center_point,
                    feature_map=feature_pyramid[level],
                    name=level)

        refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list, refine_angle_cls_list = self.refine_net(
            refine_feature_pyramid, 'refine_net')

        refine_box_pred = tf.concat(refine_box_pred_list, axis=0)
        refine_cls_score = tf.concat(refine_cls_score_list, axis=0)
        refine_cls_prob = tf.concat(refine_cls_prob_list, axis=0)
        refine_angle_cls = tf.concat(refine_angle_cls_list, axis=0)
        refine_boxes = tf.concat(refine_boxes_list, axis=0)

        # 4. postprocess rpn proposals. such as: decode, clip, filter
        if self.is_training:
            with tf.variable_scope('build_refine_loss'):
                refine_labels, refine_target_delta, refine_box_states, refine_target_boxes, refine_target_encode_label = tf.py_func(
                    func=refinebox_target_layer,
                    inp=[
                        gtboxes_batch_r, gt_encode_label, refine_boxes,
                        cfgs.REFINE_IOU_POSITIVE_THRESHOLD[0],
                        cfgs.REFINE_IOU_NEGATIVE_THRESHOLD[0], gpu_id
                    ],
                    Tout=[
                        tf.float32, tf.float32, tf.float32, tf.float32,
                        tf.float32
                    ])

                self.add_anchor_img_smry(input_img_batch, refine_boxes,
                                         refine_box_states, 1)

                refine_cls_loss = losses.focal_loss(refine_labels,
                                                    refine_cls_score,
                                                    refine_box_states)
                refine_reg_loss = losses.smooth_l1_loss(
                    refine_target_delta, refine_box_pred, refine_box_states)
                angle_cls_loss = losses_dcl.angle_cls_period_focal_loss(
                    refine_target_encode_label,
                    refine_angle_cls,
                    refine_box_states,
                    refine_target_boxes,
                    decimal_weight=cfgs.DATASET_NAME.startswith('DOTA'))

                self.losses_dict[
                    'refine_cls_loss'] = refine_cls_loss * cfgs.CLS_WEIGHT
                self.losses_dict[
                    'refine_reg_loss'] = refine_reg_loss * cfgs.REG_WEIGHT
                self.losses_dict[
                    'angle_cls_loss'] = angle_cls_loss * cfgs.ANGLE_WEIGHT

        with tf.variable_scope('postprocess_detctions'):
            scores, category, boxes_angle = postprocess_detctions(
                refine_bbox_pred=refine_box_pred,
                refine_cls_prob=refine_cls_prob,
                refine_angle_prob=tf.sigmoid(refine_angle_cls),
                refine_boxes=refine_boxes,
                is_training=self.is_training)
            scores = tf.stop_gradient(scores)
            category = tf.stop_gradient(category)
            boxes_angle = tf.stop_gradient(boxes_angle)

        if self.is_training:
            return scores, category, boxes_angle, self.losses_dict
        else:
            return scores, category, boxes_angle
    def fast_rcnn_loss(self):
        with tf.variable_scope('fast_rcnn_loss'):
            minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, \
            minibatch_reference_boxes_mattached_gtboxes_rotate, \
            minibatch_reference_boxes_mattached_head_quadrant, minibatch_object_mask, \
            minibatch_label_one_hot = self.fast_rcnn_minibatch(self.fast_rcnn_all_level_proposals)

            minibatch_reference_boxes = tf.gather(
                self.fast_rcnn_all_level_proposals, minibatch_indices)

            minibatch_encode_boxes = tf.gather(
                self.fast_rcnn_encode_boxes,
                minibatch_indices)  # [minibatch_size, num_classes*4]

            minibatch_encode_boxes_rotate = tf.gather(
                self.fast_rcnn_encode_boxes_rotate,
                minibatch_indices)  # [minibatch_size, num_classes*5]

            minibatch_head_quadrant = tf.gather(self.fast_rcnn_head_quadrant,
                                                minibatch_indices)

            minibatch_scores = tf.gather(self.fast_rcnn_scores,
                                         minibatch_indices)
            minibatch_scores_rotate = tf.gather(self.fast_rcnn_scores_rotate,
                                                minibatch_indices)

            # encode gtboxes
            minibatch_encode_gtboxes = \
                encode_and_decode.encode_boxes(
                    unencode_boxes=minibatch_reference_boxes_mattached_gtboxes,
                    reference_boxes=minibatch_reference_boxes,
                    scale_factors=self.scale_factors
                )

            minibatch_encode_gtboxes_rotate = encode_and_decode.encode_boxes_rotate(
                unencode_boxes=
                minibatch_reference_boxes_mattached_gtboxes_rotate,
                reference_boxes=minibatch_reference_boxes,
                scale_factors=self.scale_factors)
            ############### Class-agnostic Without tile
            # [minibatch_size, num_classes*4]
            # minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes, [1, self.num_classes])
            ############### Class-agnostic Without tile
            # [minibatch_size, num_classes*5]
            # minibatch_encode_gtboxes_rotate = tf.tile(minibatch_encode_gtboxes_rotate, [1, self.num_classes])
            ############### Class-agnostic Without tile
            # minibatch_gt_head_quadrant = tf.tile(minibatch_reference_boxes_mattached_head_quadrant, [1, self.num_classes])
            minibatch_gt_head_quadrant = minibatch_reference_boxes_mattached_head_quadrant

            class_weights_list = []
            category_list = tf.unstack(minibatch_label_one_hot, axis=1)
            for i in range(1, self.num_classes + 1):
                tmp_class_weights = tf.ones(
                    shape=[tf.shape(minibatch_encode_boxes)[0], 4],
                    dtype=tf.float32)
                tmp_class_weights = tmp_class_weights * tf.expand_dims(
                    category_list[i], axis=1)
                class_weights_list.append(tmp_class_weights)
            class_weights = tf.concat(
                class_weights_list, axis=1)  # [minibatch_size, num_classes*4]

            class_weights_list_rotate = []
            category_list_rotate = tf.unstack(minibatch_label_one_hot, axis=1)
            for i in range(1, self.num_classes + 1):
                tmp_class_weights_rotate = tf.ones(
                    shape=[tf.shape(minibatch_encode_boxes_rotate)[0], 5],
                    dtype=tf.float32)
                tmp_class_weights_rotate = tmp_class_weights_rotate * tf.expand_dims(
                    category_list_rotate[i], axis=1)
                class_weights_list_rotate.append(tmp_class_weights_rotate)
            class_weights_rotate = tf.concat(
                class_weights_list_rotate,
                axis=1)  # [minibatch_size, num_classes*5]

            class_weights_list_head = []
            category_list_head = tf.unstack(minibatch_label_one_hot, axis=1)
            for i in range(1, self.num_classes + 1):
                tmp_class_weights_head = tf.ones(
                    shape=[tf.shape(minibatch_head_quadrant)[0], 4],
                    dtype=tf.float32)
                tmp_class_weights_head = tmp_class_weights_head * tf.expand_dims(
                    category_list_head[i], axis=1)
                class_weights_list_head.append(tmp_class_weights_head)
            class_weights_head = tf.concat(class_weights_list_head, axis=1)
            # loss
            with tf.variable_scope('fast_rcnn_classification_loss'):
                # fast_rcnn_classification_loss = slim.losses.softmax_cross_entropy(logits=minibatch_scores,
                #                                                                   onehot_labels=minibatch_label_one_hot)
                fast_rcnn_classification_loss = losses.focal_loss(
                    prediction_tensor=minibatch_scores,
                    target_tensor=minibatch_label_one_hot)
                slim.losses.add_loss(fast_rcnn_classification_loss)
            with tf.variable_scope('fast_rcnn_location_loss'):
                # fast_rcnn_location_loss = losses.l1_smooth_losses(predict_boxes=minibatch_encode_boxes,
                #                                                   gtboxes=minibatch_encode_gtboxes,
                #                                                   object_weights=minibatch_object_mask,
                #                                                   classes_weights=class_weights)
                # Class-agnostic regression
                fast_rcnn_location_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_encode_boxes,
                    gtboxes=minibatch_encode_gtboxes,
                    object_weights=minibatch_object_mask,
                    classes_weights=None)
                slim.losses.add_loss(fast_rcnn_location_loss)

            with tf.variable_scope('fast_rcnn_classification_rotate_loss'):
                # fast_rcnn_classification_rotate_loss = slim.losses.softmax_cross_entropy(logits=minibatch_scores_rotate,
                #                                                                          onehot_labels=minibatch_label_one_hot)
                fast_rcnn_classification_rotate_loss = losses.focal_loss(
                    prediction_tensor=minibatch_scores_rotate,
                    target_tensor=minibatch_label_one_hot)
                slim.losses.add_loss(fast_rcnn_classification_rotate_loss)

            with tf.variable_scope('fast_rcnn_location_rotate_loss'):
                # fast_rcnn_location_rotate_loss = losses.l1_smooth_losses(predict_boxes=minibatch_encode_boxes_rotate,
                #                                                          gtboxes=minibatch_encode_gtboxes_rotate,
                #                                                          object_weights=minibatch_object_mask,
                #                                                          classes_weights=class_weights_rotate)
                # Class-agnostic regression
                fast_rcnn_location_rotate_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_encode_boxes_rotate,
                    gtboxes=minibatch_encode_gtboxes_rotate,
                    object_weights=minibatch_object_mask,
                    classes_weights=None)
                slim.losses.add_loss(fast_rcnn_location_rotate_loss)

            with tf.variable_scope('fast_rcnn_head_quadrant_loss'):
                # fast_rcnn_head_quadrant_loss = losses.l1_smooth_losses(predict_boxes=minibatch_head_quadrant,
                #                                                        gtboxes=minibatch_gt_head_quadrant,
                #                                                        object_weights=minibatch_object_mask,
                #                                                        classes_weights=class_weights_head)
                # Class-agnostic regression
                fast_rcnn_head_quadrant_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_head_quadrant,
                    gtboxes=minibatch_gt_head_quadrant,
                    object_weights=minibatch_object_mask,
                    classes_weights=None)
                slim.losses.add_loss(
                    fast_rcnn_head_quadrant_loss *
                    10)  # More importance by the bigger weight

            return fast_rcnn_location_loss, fast_rcnn_classification_loss, \
                   fast_rcnn_location_rotate_loss, fast_rcnn_classification_rotate_loss, fast_rcnn_head_quadrant_loss * 10
    def build_whole_detection_network(self,
                                      input_img_batch,
                                      gtboxes_batch_h,
                                      gtboxes_batch_r,
                                      gt_encode_label,
                                      gpu_id=0):

        if self.is_training:
            gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5])
            gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32)

            gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6])
            gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32)

            gt_encode_label = tf.reshape(gt_encode_label,
                                         [-1, self.coding_len])
            gt_encode_label = tf.cast(gt_encode_label, tf.float32)

        # 1. build base network
        feature_pyramid = self.build_base_network(input_img_batch)

        # 2. build rpn
        rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net(
            feature_pyramid, 'rpn_net')

        # 3. generate_anchors
        anchor_list = self.make_anchors(feature_pyramid)
        rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0)
        rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0)
        # rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0)
        anchors = tf.concat(anchor_list, axis=0)

        if self.is_training:
            with tf.variable_scope('build_loss'):
                labels, target_delta, anchor_states, target_boxes = tf.py_func(
                    func=anchor_target_layer,
                    inp=[gtboxes_batch_h, gtboxes_batch_r, anchors],
                    Tout=[tf.float32, tf.float32, tf.float32, tf.float32])

                if self.method == 'H':
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 0)
                else:
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 1)

                cls_loss = losses.focal_loss(labels, rpn_cls_score,
                                             anchor_states)
                if cfgs.USE_IOU_FACTOR:
                    reg_loss = losses.iou_smooth_l1_loss_(
                        target_delta, rpn_box_pred, anchor_states,
                        target_boxes, anchors)
                else:
                    reg_loss = losses.smooth_l1_loss(target_delta,
                                                     rpn_box_pred,
                                                     anchor_states)

                self.losses_dict['cls_loss'] = cls_loss * cfgs.CLS_WEIGHT
                self.losses_dict['reg_loss'] = reg_loss * cfgs.REG_WEIGHT

        refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list, refine_angle_cls_list = self.refine_net(
            feature_pyramid, 'refine_net')

        refine_box_pred = tf.concat(refine_box_pred_list, axis=0)
        refine_cls_score = tf.concat(refine_cls_score_list, axis=0)
        refine_cls_prob = tf.concat(refine_cls_prob_list, axis=0)
        refine_angle_cls = tf.concat(refine_angle_cls_list, axis=0)

        if cfgs.METHOD == 'H':
            x_c = (anchors[:, 2] + anchors[:, 0]) / 2
            y_c = (anchors[:, 3] + anchors[:, 1]) / 2
            h = anchors[:, 2] - anchors[:, 0] + 1
            w = anchors[:, 3] - anchors[:, 1] + 1
            theta = -90 * tf.ones_like(x_c)
            anchors = tf.transpose(tf.stack([x_c, y_c, w, h, theta]))
        # if cfgs.ANGLE_RANGE == 180:
        #     anchors = tf.py_func(coordinate_present_convert,
        #                          inp=[anchors, -1],
        #                          Tout=[tf.float32])
        #     anchors = tf.reshape(anchors, [-1, 5])

        refine_boxes = bbox_transform.rbbox_transform_inv(boxes=anchors,
                                                          deltas=rpn_box_pred)

        # 4. postprocess rpn proposals. such as: decode, clip, filter
        if self.is_training:
            with tf.variable_scope('build_refine_loss'):
                refine_labels, refine_target_delta, refine_box_states, refine_target_boxes, refine_target_encode_label = tf.py_func(
                    func=refinebox_target_layer,
                    inp=[
                        gtboxes_batch_r, gt_encode_label, refine_boxes,
                        cfgs.REFINE_IOU_POSITIVE_THRESHOLD[0],
                        cfgs.REFINE_IOU_NEGATIVE_THRESHOLD[0], gpu_id
                    ],
                    Tout=[
                        tf.float32, tf.float32, tf.float32, tf.float32,
                        tf.float32
                    ])

                self.add_anchor_img_smry(input_img_batch, refine_boxes,
                                         refine_box_states, 1)

                refine_cls_loss = losses.focal_loss(refine_labels,
                                                    refine_cls_score,
                                                    refine_box_states)
                refine_reg_loss = losses.smooth_l1_loss(
                    refine_target_delta, refine_box_pred, refine_box_states)
                angle_cls_loss = losses_dcl.angle_cls_period_focal_loss(
                    refine_target_encode_label,
                    refine_angle_cls,
                    refine_box_states,
                    refine_target_boxes,
                    decimal_weight=True)

                self.losses_dict[
                    'refine_cls_loss'] = refine_cls_loss * cfgs.CLS_WEIGHT
                self.losses_dict[
                    'refine_reg_loss'] = refine_reg_loss * cfgs.REG_WEIGHT
                self.losses_dict[
                    'angle_cls_loss'] = angle_cls_loss * cfgs.ANGLE_WEIGHT

        with tf.variable_scope('postprocess_detctions'):
            scores, category, boxes_angle = postprocess_detctions(
                refine_bbox_pred=refine_box_pred,
                refine_cls_prob=refine_cls_prob,
                refine_angle_prob=tf.sigmoid(refine_angle_cls),
                refine_boxes=refine_boxes,
                is_training=self.is_training)
            scores = tf.stop_gradient(scores)
            category = tf.stop_gradient(category)
            boxes_angle = tf.stop_gradient(boxes_angle)

        if self.is_training:
            return scores, category, boxes_angle, self.losses_dict
        else:
            return scores, category, boxes_angle
示例#12
0
    def build_whole_detection_network(self,
                                      input_img_batch,
                                      gtboxes_batch_h,
                                      gtboxes_batch_r,
                                      gthead_quadrant,
                                      gt_smooth_label,
                                      gpu_id=0):

        if self.is_training:
            gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5])
            gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32)

            gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6])
            gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32)

            gthead_quadrant = tf.reshape(gthead_quadrant, [-1, 1])
            gthead_quadrant = tf.cast(gthead_quadrant, tf.int32)

            gt_smooth_label = tf.reshape(gt_smooth_label,
                                         [-1, self.angle_range])
            gt_smooth_label = tf.cast(gt_smooth_label, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        feature_pyramid = self.build_base_network(input_img_batch)

        # 2. build rpn
        rpn_box_pred, rpn_cls_score, rpn_cls_prob, rpn_head_cls, rpn_angle_cls = self.rpn_net(
            feature_pyramid)

        # 3. generate_anchors
        anchors = self.make_anchors(feature_pyramid)

        # 4. postprocess rpn proposals. such as: decode, clip, filter
        if self.is_training:
            with tf.variable_scope('build_loss'):
                labels, target_delta, anchor_states, target_boxes, target_head_quadrant, target_smooth_label = tf.py_func(
                    func=anchor_target_layer,
                    inp=[
                        gtboxes_batch_h, gtboxes_batch_r, gthead_quadrant,
                        gt_smooth_label, anchors, gpu_id
                    ],
                    Tout=[
                        tf.float32, tf.float32, tf.float32, tf.float32,
                        tf.float32, tf.float32
                    ])

                if self.method == 'H':
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 0)
                else:
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 1)

                cls_loss = losses.focal_loss(labels, rpn_cls_score,
                                             anchor_states)

                if cfgs.REG_LOSS_MODE == 0:
                    reg_loss = losses.iou_smooth_l1_loss(
                        target_delta, rpn_box_pred, anchor_states,
                        target_boxes, anchors)
                elif cfgs.REG_LOSS_MODE == 1:
                    reg_loss = losses.smooth_l1_loss_atan(
                        target_delta, rpn_box_pred, anchor_states)
                else:
                    reg_loss = losses.smooth_l1_loss(target_delta,
                                                     rpn_box_pred,
                                                     anchor_states)

                if cfgs.DATASET_NAME.startswith('DOTA'):
                    head_cls_loss = losses.head_specific_cls_focal_loss(
                        target_head_quadrant,
                        rpn_head_cls,
                        anchor_states,
                        labels,
                        specific_cls=[6, 7, 8, 9, 10, 11])
                else:
                    head_cls_loss = losses.head_focal_loss(
                        target_head_quadrant, rpn_head_cls, anchor_states)
                angle_cls_loss = losses.angle_focal_loss(
                    target_smooth_label, rpn_angle_cls, anchor_states)

                self.losses_dict = {
                    'cls_loss': cls_loss * cfgs.CLS_WEIGHT,
                    'reg_loss': reg_loss * cfgs.REG_WEIGHT,
                    'head_cls_loss': head_cls_loss * cfgs.HEAD_WEIGHT,
                    'angle_cls_loss': angle_cls_loss * cfgs.ANGLE_WEIGHT
                }

        with tf.variable_scope('postprocess_detctions'):
            boxes, scores, category, boxes_head, boxes_angle = postprocess_detctions(
                rpn_bbox_pred=rpn_box_pred,
                rpn_cls_prob=rpn_cls_prob,
                rpn_angle_prob=tf.sigmoid(rpn_angle_cls),
                rpn_head_prob=tf.sigmoid(rpn_head_cls),
                anchors=anchors,
                is_training=self.is_training)
            boxes = tf.stop_gradient(boxes)
            scores = tf.stop_gradient(scores)
            category = tf.stop_gradient(category)
            boxes_head = tf.stop_gradient(boxes_head)
            boxes_angle = tf.stop_gradient(boxes_angle)

        if self.is_training:
            return boxes, scores, category, boxes_head, boxes_angle, self.losses_dict
        else:
            return boxes, scores, category, boxes_head, boxes_angle