def generate_mask(self, mask_list, img_shape, gtboxes_batch_h,
                      gtboxes_batch_r, feature_pyramid):
        mask_gt_list = []

        for i, l in enumerate(cfgs.LEVEL):
            p_h, p_w = tf.shape(feature_pyramid[l])[1], tf.shape(
                feature_pyramid[l])[2]
            if cfgs.USE_SUPERVISED_MASK and i < len(
                    mask_list) and self.is_training:
                if cfgs.MASK_TYPE.strip() == 'h':
                    mask = tf.py_func(mask_utils.make_gt_mask, [
                        p_h, p_w, img_shape[1], img_shape[2], gtboxes_batch_h
                    ],
                                      Tout=tf.int32)
                elif cfgs.MASK_TYPE.strip() == 'r':
                    mask = tf.py_func(mask_utils.make_r_gt_mask, [
                        p_h, p_w, img_shape[1], img_shape[2], gtboxes_batch_r
                    ],
                                      Tout=tf.int32)
                if cfgs.BINARY_MASK:
                    mask = tf.where(tf.greater(mask, 0), tf.ones_like(mask),
                                    tf.zeros_like(mask))
                mask_gt_list.append(mask)
                mask_utils.vis_mask_tfsmry(mask, name="MASK/%s" % l)
        return mask_gt_list
Пример #2
0
    def build_whole_detection_network(self, input_img_batch, gtboxes_batch,
                                      gtboxes_r_batch, gpu_id):

        if self.is_training:
            # ensure shape is [M, 5]
            gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5])
            gtboxes_batch = tf.cast(gtboxes_batch, tf.float32)

            gtboxes_r_batch = tf.reshape(gtboxes_r_batch, [-1, 6])
            gtboxes_r_batch = tf.cast(gtboxes_r_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        mask_list = []
        if cfgs.USE_SUPERVISED_MASK:
            P_list, mask_list = self.build_base_network(
                input_img_batch)  # [P2, P3, P4, P5, P6], [mask_p2, mask_p3]
        else:
            P_list = self.build_base_network(
                input_img_batch)  # [P2, P3, P4, P5, P6]

        # 2. build rpn
        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):

            fpn_cls_score = []
            fpn_box_pred = []
            for level_name, p in zip(cfgs.LEVLES, P_list):
                if cfgs.SHARE_HEADS:
                    reuse_flag = None if level_name == cfgs.LEVLES[0] else True
                    scope_list = [
                        'rpn_conv/3x3', 'rpn_cls_score', 'rpn_bbox_pred'
                    ]
                else:
                    reuse_flag = None
                    scope_list = [
                        'rpn_conv/3x3_%s' % level_name,
                        'rpn_cls_score_%s' % level_name,
                        'rpn_bbox_pred_%s' % level_name
                    ]
                rpn_conv3x3 = slim.conv2d(p,
                                          512, [3, 3],
                                          trainable=self.is_training,
                                          weights_initializer=cfgs.INITIALIZER,
                                          padding="SAME",
                                          activation_fn=tf.nn.relu,
                                          scope=scope_list[0],
                                          reuse=reuse_flag)
                rpn_cls_score = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location * 2, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.INITIALIZER,
                    activation_fn=None,
                    padding="VALID",
                    scope=scope_list[1],
                    reuse=reuse_flag)
                rpn_box_pred = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location * 4, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.BBOX_INITIALIZER,
                    activation_fn=None,
                    padding="VALID",
                    scope=scope_list[2],
                    reuse=reuse_flag)
                rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
                rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])

                fpn_cls_score.append(rpn_cls_score)
                fpn_box_pred.append(rpn_box_pred)

            fpn_cls_score = tf.concat(fpn_cls_score,
                                      axis=0,
                                      name='fpn_cls_score')
            fpn_box_pred = tf.concat(fpn_box_pred, axis=0, name='fpn_box_pred')
            fpn_cls_prob = slim.softmax(fpn_cls_score, scope='fpn_cls_prob')

        # 3. generate_anchors
        all_anchors = []
        mask_gt_list = []
        for i in range(len(cfgs.LEVLES)):
            level_name, p = cfgs.LEVLES[i], P_list[i]

            p_h, p_w = tf.shape(p)[1], tf.shape(p)[2]
            if cfgs.USE_SUPERVISED_MASK and i < len(
                    mask_list) and self.is_training:
                if cfgs.MASK_TYPE.strip() == 'h':
                    mask = tf.py_func(
                        mask_utils.make_gt_mask,
                        [p_h, p_w, img_shape[1], img_shape[2], gtboxes_batch],
                        Tout=tf.int32)
                elif cfgs.MASK_TYPE.strip() == 'r':
                    mask = tf.py_func(mask_utils.make_r_gt_mask, [
                        p_h, p_w, img_shape[1], img_shape[2], gtboxes_r_batch
                    ],
                                      Tout=tf.int32)
                if cfgs.BINARY_MASK:
                    mask = tf.where(tf.greater(mask, 0), tf.ones_like(mask),
                                    tf.zeros_like(mask))
                mask_gt_list.append(mask)
                mask_utils.vis_mask_tfsmry(mask, name="MASK/%s" % level_name)

            featuremap_height = tf.cast(p_h, tf.float32)
            featuremap_width = tf.cast(p_w, tf.float32)
            anchors = anchor_utils.make_anchors(
                base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[i],
                anchor_scales=cfgs.ANCHOR_SCALES,
                anchor_ratios=cfgs.ANCHOR_RATIOS,
                featuremap_height=featuremap_height,
                featuremap_width=featuremap_width,
                stride=cfgs.ANCHOR_STRIDE_LIST[i],
                name="make_anchors_for%s" % level_name)
            all_anchors.append(anchors)
        all_anchors = tf.concat(all_anchors, axis=0, name='all_anchors_of_FPN')

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_FPN'):
            rois, roi_scores = postprocess_rpn_proposals(
                rpn_bbox_pred=fpn_box_pred,
                rpn_cls_prob=fpn_cls_prob,
                img_shape=img_shape,
                anchors=all_anchors,
                is_training=self.is_training)

        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                fpn_labels, fpn_bbox_targets = \
                    tf.py_func(
                        anchor_target_layer,
                        [gtboxes_batch, img_shape, all_anchors],
                        [tf.float32, tf.float32])
                fpn_bbox_targets = tf.reshape(fpn_bbox_targets, [-1, 4])
                fpn_labels = tf.to_int32(fpn_labels, name="to_int32")
                fpn_labels = tf.reshape(fpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch,
                                         all_anchors,
                                         fpn_labels,
                                         method=0)

            # --------------------------------------add smry-----------------------------------------------------------

            fpn_cls_category = tf.argmax(fpn_cls_prob, axis=1)
            kept_rpppn = tf.reshape(tf.where(tf.not_equal(fpn_labels, -1)),
                                    [-1])
            fpn_cls_category = tf.gather(fpn_cls_category, kept_rpppn)
            acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(fpn_cls_category,
                             tf.to_int64(tf.gather(fpn_labels, kept_rpppn)))))
            tf.summary.scalar('ACC/fpn_accuracy', acc)

            with tf.control_dependencies([fpn_labels]):
                with tf.variable_scope('sample_RCNN_minibatch'):
                    rois, labels, bbox_targets = \
                        tf.py_func(proposal_target_layer,
                                   [rois, gtboxes_batch, gtboxes_r_batch],
                                   [tf.float32, tf.float32, tf.float32])
                    rois = tf.reshape(rois, [-1, 4])
                    labels = tf.to_int32(labels)
                    labels = tf.reshape(labels, [-1])
                    bbox_targets = tf.reshape(bbox_targets,
                                              [-1, 5 * (cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry(input_img_batch,
                                                rois,
                                                labels,
                                                method=0)

        if not cfgs.USE_CONCAT:
            if self.is_training:
                rois_list, labels, bbox_targets = self.assign_levels(
                    all_rois=rois, labels=labels, bbox_targets=bbox_targets)
            else:
                rois_list = self.assign_levels(
                    all_rois=rois
                )  # rois_list: [P2_rois, P3_rois, P4_rois, P5_rois]

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN
        if not cfgs.USE_CONCAT:
            bbox_pred, cls_score = self.build_fastrcnn(P_list=P_list,
                                                       rois_list=rois_list,
                                                       img_shape=img_shape)
            rois = tf.concat(rois_list, axis=0, name='concat_rois')
        else:
            bbox_pred, cls_score = self.build_concat_fastrcnn(
                P_list=P_list, all_rois=rois, img_shape=img_shape)

        cls_prob = slim.softmax(cls_score, 'cls_prob')

        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            cls_category = tf.argmax(cls_prob, axis=1)
            fast_acc = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc', fast_acc)

        #  6. postprocess_fastrcnn
        if self.is_training:
            self.build_loss(
                rpn_box_pred=fpn_box_pred,
                rpn_bbox_targets=fpn_bbox_targets,
                rpn_cls_score=fpn_cls_score,
                rpn_labels=fpn_labels,
                bbox_pred=bbox_pred,
                bbox_targets=bbox_targets,
                cls_score=cls_score,
                labels=labels,
                mask_list=mask_list if cfgs.USE_SUPERVISED_MASK else None,
                mask_gt_list=mask_gt_list
                if cfgs.USE_SUPERVISED_MASK else None)

        final_bbox, final_scores, final_category = self.postprocess_fastrcnn(
            rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, gpu_id=gpu_id)
        if self.is_training:
            return final_bbox, final_scores, final_category, self.loss_dict
        else:
            return final_bbox, final_scores, final_category