Пример #1
0
    def rpn(self, image, features, inputs):
        '''
        1.对features 加上rpn head,提取出proposal box,proposal score,用inputs这个标注
        和proposal box,proposal score计算box_loss,label_loss
        2.根据image的尺寸,以及上述proposal box,proposal score生成proposal region(绝对坐标),
        
        返回
        BoxProposals:封装了选择的(proposal_boxes)
        losses:[label_loss,box_loss]
        :param image: (1,None,None,3)
        :param features:[(1,None,None,1024)] for resnetC4
        :param inputs: (None,None,A,2)anchor_label,(None,None,A,4)anchor_boxes,
        :return: 
        '''
        featuremap = features[0]
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR)
        anchors = RPNAnchors(get_all_anchors(), inputs['anchor_labels'], inputs['anchor_boxes'])
        anchors = anchors.narrow_to(featuremap)

        image_shape2d = tf.shape(image)[2:]     # h,w
        pred_boxes_decoded = anchors.decode_logits(rpn_box_logits)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(pred_boxes_decoded, [-1, 4]),
            tf.reshape(rpn_label_logits, [-1]),
            image_shape2d,
            cfg.RPN.TRAIN_PRE_NMS_TOPK if self.training else cfg.RPN.TEST_PRE_NMS_TOPK,
            cfg.RPN.TRAIN_POST_NMS_TOPK if self.training else cfg.RPN.TEST_POST_NMS_TOPK)

        if self.training:
            losses = rpn_losses(
                anchors.gt_labels, anchors.encoded_gt_boxes(), rpn_label_logits, rpn_box_logits)
        else:
            losses = []

        return BoxProposals(proposal_boxes), losses
Пример #2
0
    def rpn(self, image, features, inputs):
        featuremap = features[0]
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap,
                                                    cfg.RPN.HEAD_DIM,
                                                    cfg.RPN.NUM_ANCHOR)
        anchors = RPNAnchors(
            get_all_anchors(stride=cfg.RPN.ANCHOR_STRIDE,
                            sizes=cfg.RPN.ANCHOR_SIZES,
                            ratios=cfg.RPN.ANCHOR_RATIOS,
                            max_size=cfg.PREPROC.MAX_SIZE),
            inputs['anchor_labels'], inputs['anchor_boxes'])
        anchors = anchors.narrow_to(featuremap)

        image_shape2d = tf.shape(image)[2:]  # h,w
        pred_boxes_decoded = anchors.decode_logits(
            rpn_box_logits)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(pred_boxes_decoded, [-1, 4]),
            tf.reshape(rpn_label_logits,
                       [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK
            if self.training else cfg.RPN.TEST_PRE_NMS_TOPK,
            cfg.RPN.TRAIN_POST_NMS_TOPK
            if self.training else cfg.RPN.TEST_POST_NMS_TOPK)

        if self.training:
            losses = rpn_losses(anchors.gt_labels, anchors.encoded_gt_boxes(),
                                rpn_label_logits, rpn_box_logits)
        else:
            losses = []

        return BoxProposals(proposal_boxes), losses
Пример #3
0
 def _get_anchors(self, shape2d):
     """
     Returns:
         FSxFSxNAx4 anchors,
     """
     # FSxFSxNAx4 (FS=MAX_SIZE//ANCHOR_STRIDE)
     with tf.name_scope('anchors'):
         all_anchors = tf.constant(get_all_anchors(), name='all_anchors', dtype=tf.float32)
         fm_anchors = tf.slice(
             all_anchors, [0, 0, 0, 0], tf.stack([
                 shape2d[0], shape2d[1], -1, -1]), name='fm_anchors')
         return fm_anchors
Пример #4
0
 def _get_anchors(self, image):
     """
     Returns:
         FSxFSxNAx4 anchors,
     """
     # FSxFSxNAx4 (FS=MAX_SIZE//ANCHOR_STRIDE)
     with tf.name_scope('anchors'):
         all_anchors = tf.constant(get_all_anchors(), name='all_anchors', dtype=tf.float32)
         fm_anchors = tf.slice(
             all_anchors, [0, 0, 0, 0], tf.stack([
                 tf.shape(image)[0] // config.ANCHOR_STRIDE,
                 tf.shape(image)[1] // config.ANCHOR_STRIDE,
                 -1, -1]), name='fm_anchors')
         return fm_anchors
Пример #5
0
    def rpn(self, image, features, inputs):
        featuremap = features[0]
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR)
        anchors = RPNAnchors(get_all_anchors(), inputs['anchor_labels'], inputs['anchor_boxes'])
        anchors = anchors.narrow_to(featuremap)

        image_shape2d = tf.shape(image)[2:]     # h,w
        pred_boxes_decoded = anchors.decode_logits(rpn_box_logits)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(pred_boxes_decoded, [-1, 4]),
            tf.reshape(rpn_label_logits, [-1]),
            image_shape2d,
            cfg.RPN.TRAIN_PRE_NMS_TOPK,
            cfg.RPN.TRAIN_POST_NMS_TOPK)
        losses = rpn_losses(
            anchors.gt_labels, anchors.encoded_gt_boxes(), rpn_label_logits, rpn_box_logits)
        return BoxProposals(proposal_boxes), losses
Пример #6
0
    def build_graph(self, *inputs):
        is_training = get_current_tower_context().is_training
        if cfg.MODE_MASK:
            image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks = inputs
        else:
            image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs
        image = self.preprocess(image)  # 1CHW
        #with  varreplace.freeze_variables(stop_gradient=True, skip_collection=True):
        featuremap = resnet_c4_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK[:3])
        # freeze
        # featuremap = tf.stop_gradient(featuremap)
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR)

        anchors = RPNAnchors(get_all_anchors(), anchor_labels, anchor_boxes)
        anchors = anchors.narrow_to(featuremap)

        image_shape2d = tf.shape(image)[2:]  # h,w
        pred_boxes_decoded = anchors.decode_logits(rpn_box_logits)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(pred_boxes_decoded, [-1, 4]),
            tf.reshape(rpn_label_logits, [-1]),
            image_shape2d,
            cfg.RPN.TRAIN_PRE_NMS_TOPK if is_training else cfg.RPN.TEST_PRE_NMS_TOPK,
            cfg.RPN.TRAIN_POST_NMS_TOPK if is_training else cfg.RPN.TEST_POST_NMS_TOPK)

        if is_training:
            # sample proposal boxes in training
            rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets(
                proposal_boxes, gt_boxes, gt_labels)
        else:
            # The boxes to be used to crop RoIs.
            # Use all proposal boxes in inference
            rcnn_boxes = proposal_boxes
        featuremap = resnet_conv5(featuremap, cfg.BACKBONE.RESNET_NUM_BLOCK[-1])
        rfcn_cls = Conv2D('rfcn_cls', featuremap, cfg.DATA.NUM_CLASS*3*3, (1, 1), data_format='channels_first')
        rfcn_reg = Conv2D('rfcn_reg', featuremap, cfg.DATA.NUM_CLASS*4*3*3, (1, 1), data_format='channels_first')
        boxes_on_featuremap = rcnn_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE)

        classify_vote = VotePooling('votepooling_cls', rfcn_cls, boxes_on_featuremap, 3, 3)
        classify_regr = VotePooling('votepooling_regr', rfcn_reg, boxes_on_featuremap, 3, 3, isCls=False)
        classify_regr = tf.reshape(classify_regr, [-1, cfg.DATA.NUM_CLASS, 4])
        if is_training:
            # rpn loss
            rpn_label_loss, rpn_box_loss = rpn_losses(
                anchors.gt_labels, anchors.encoded_gt_boxes(), rpn_label_logits, rpn_box_logits)

            # fastrcnn loss
            matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt)

            fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1])  # fg inds w.r.t all samples
            fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample)
            fg_fastrcnn_box_logits = tf.gather(classify_regr, fg_inds_wrt_sample)

            fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training(
                image, rcnn_labels, fg_sampled_boxes,
                matched_gt_boxes, classify_vote, fg_fastrcnn_box_logits)

            if cfg.MODE_MASK:
                # maskrcnn loss
                fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample)
                # In training, mask branch shares the same C5 feature.
                fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample)
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY, num_convs=0)  # #fg x #cat x 14x14

                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(gt_masks, 1),
                    fg_sampled_boxes,
                    fg_inds_wrt_gt, 14,
                    pad_border=False)  # nfg x 1x14x14
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets')
                mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels, target_masks_for_fg)
            else:
                mrcnn_loss = 0.0

            wd_cost = regularize_cost(
                '.*/W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost')

            total_cost = tf.add_n([
                rpn_label_loss, rpn_box_loss,
                fastrcnn_label_loss, fastrcnn_box_loss,
                mrcnn_loss, wd_cost], 'total_cost')

            add_moving_summary(total_cost, wd_cost)
            return total_cost
        else:
            final_boxes, final_labels = self.fastrcnn_inference(
                image_shape2d, rcnn_boxes, classify_vote, classify_regr)

            if cfg.MODE_MASK:
                roi_resized = roi_align(featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE), 14)
                feature_maskrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1])
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 0)  # #result x #cat x 14x14
                indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1)
                final_mask_logits = tf.gather_nd(mask_logits, indices)  # #resultx14x14
                tf.sigmoid(final_mask_logits, name='final_masks')
Пример #7
0
    def _build_graph(self, inputs):
        is_training = get_current_tower_context().is_training
        image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs
        image = tf.expand_dims(image, 0)

        # FSxFSxNAx4 (FS=MAX_SIZE//ANCHOR_STRIDE)
        with tf.name_scope('anchors'):
            all_anchors = tf.constant(get_all_anchors(),
                                      name='all_anchors',
                                      dtype=tf.float32)
            fm_anchors = tf.slice(
                all_anchors, [0, 0, 0, 0],
                tf.stack([
                    tf.shape(image)[1] // config.ANCHOR_STRIDE,
                    tf.shape(image)[2] // config.ANCHOR_STRIDE, -1, -1
                ]),
                name='fm_anchors')
            anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors)

        image = image_preprocess(image, bgr=True)
        image = tf.transpose(image, [0, 3, 1, 2])

        # resnet50
        featuremap = pretrained_resnet_conv4(image, [3, 4, 6])
        rpn_label_logits, rpn_box_logits = rpn_head(featuremap)
        rpn_label_loss, rpn_box_loss = rpn_losses(anchor_labels,
                                                  anchor_boxes_encoded,
                                                  rpn_label_logits,
                                                  rpn_box_logits)

        decoded_boxes = decode_bbox_target(
            rpn_box_logits, fm_anchors)  # (fHxfWxNA)x4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            decoded_boxes, tf.reshape(rpn_label_logits, [-1]),
            tf.shape(image)[2:])

        if is_training:
            rcnn_sampled_boxes, rcnn_encoded_boxes, rcnn_labels = sample_fast_rcnn_targets(
                proposal_boxes, gt_boxes, gt_labels)
            boxes_on_featuremap = rcnn_sampled_boxes * (1.0 /
                                                        config.ANCHOR_STRIDE)
            roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)
            feature_fastrcnn = resnet_conv5(roi_resized)  # nxc
            fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_head(
                feature_fastrcnn, config.NUM_CLASS)

            fastrcnn_label_loss, fastrcnn_box_loss = fastrcnn_losses(
                rcnn_labels, rcnn_encoded_boxes, fastrcnn_label_logits,
                fastrcnn_box_logits)

            wd_cost = regularize_cost(
                '(?:group1|group2|group3|rpn|fastrcnn)/.*W',
                l2_regularizer(1e-4),
                name='wd_cost')

            self.cost = tf.add_n([
                rpn_label_loss, rpn_box_loss, fastrcnn_label_loss,
                fastrcnn_box_loss, wd_cost
            ], 'total_cost')

            for k in self.cost, wd_cost:
                add_moving_summary(k)
        else:
            roi_resized = roi_align(
                featuremap, proposal_boxes * (1.0 / config.ANCHOR_STRIDE), 14)
            feature_fastrcnn = resnet_conv5(roi_resized)  # nxc
            label_logits, fastrcnn_box_logits = fastrcnn_head(
                feature_fastrcnn, config.NUM_CLASS)
            label_probs = tf.nn.softmax(label_logits,
                                        name='fastrcnn_all_probs')  # NP,
            labels = tf.argmax(label_logits, axis=1)
            fg_ind, fg_box_logits = fastrcnn_predict_boxes(
                labels, fastrcnn_box_logits)
            fg_label_probs = tf.gather(label_probs,
                                       fg_ind,
                                       name='fastrcnn_fg_probs')
            fg_boxes = tf.gather(proposal_boxes, fg_ind)

            fg_box_logits = fg_box_logits / tf.constant(
                config.FASTRCNN_BBOX_REG_WEIGHTS)
            decoded_boxes = decode_bbox_target(fg_box_logits,
                                               fg_boxes)  # Nfx4, floatbox
            decoded_boxes = tf.identity(decoded_boxes,
                                        name='fastrcnn_fg_boxes')
                                  'anchor_boxes')
    gt_boxes = tf.placeholder(tf.float32, (None, 4), 'gt_boxes')
    gt_labels = tf.placeholder(tf.int64, (None, ), 'gt_labels')

    image = preprocess(image_P)
    Load_Weights = []

    with TowerContext('', is_training=Trainining_is):

        featuremap = resnet_c4_backbone(image,
                                        cfg.BACKBONE.RESNET_NUM_BLOCK[:3])
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap,
                                                    cfg.RPN.HEAD_DIM,
                                                    cfg.RPN.NUM_ANCHOR)

        anchors = RPNAnchors(get_all_anchors(), anchor_labels, anchor_boxes)
        anchors = anchors.narrow_to(featuremap)

        image_shape2d = tf.shape(image)[2:]  # h,w
        pred_boxes_decoded = anchors.decode_logits(
            rpn_box_logits)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(pred_boxes_decoded, [-1, 4]),
            tf.reshape(rpn_label_logits,
                       [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK
            if is_training else cfg.RPN.TEST_PRE_NMS_TOPK,
            cfg.RPN.TRAIN_POST_NMS_TOPK
            if is_training else cfg.RPN.TEST_POST_NMS_TOPK)
        if is_training:
            # sample proposal boxes in training
            rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets(
Пример #9
0
    def build_graph(self, *inputs):
        is_training = get_current_tower_context().is_training
        if config.MODE_MASK:
            image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks = inputs
        else:
            image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs
        image = self.preprocess(image)  # 1CHW

        featuremap = resnet_c4_backbone(image, config.RESNET_NUM_BLOCK[:3])
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024,
                                                    config.NUM_ANCHOR)

        fm_anchors, anchor_labels, anchor_boxes = self.narrow_to_featuremap(
            featuremap, get_all_anchors(), anchor_labels, anchor_boxes)
        anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors)

        image_shape2d = tf.shape(image)[2:]  # h,w
        pred_boxes_decoded = decode_bbox_target(
            rpn_box_logits, fm_anchors)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(pred_boxes_decoded, [-1, 4]),
            tf.reshape(rpn_label_logits,
                       [-1]), image_shape2d, config.TRAIN_PRE_NMS_TOPK
            if is_training else config.TEST_PRE_NMS_TOPK,
            config.TRAIN_POST_NMS_TOPK
            if is_training else config.TEST_POST_NMS_TOPK)

        if is_training:
            # sample proposal boxes in training
            rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets(
                proposal_boxes, gt_boxes, gt_labels)
        else:
            # The boxes to be used to crop RoIs.
            # Use all proposal boxes in inference
            rcnn_boxes = proposal_boxes

        boxes_on_featuremap = rcnn_boxes * (1.0 / config.ANCHOR_STRIDE)
        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)

        # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657
        # which was fixed in TF 1.6
        def ff_true():
            feature_fastrcnn = resnet_conv5(
                roi_resized, config.RESNET_NUM_BLOCK[-1])  # nxcx7x7
            feature_gap = GlobalAvgPooling('gap',
                                           feature_fastrcnn,
                                           data_format='channels_first')
            fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs(
                'fastrcnn', feature_gap, config.NUM_CLASS)
            # Return C5 feature to be shared with mask branch
            return feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits

        def ff_false():
            ncls = config.NUM_CLASS
            return tf.zeros([0, 2048, 7,
                             7]), tf.zeros([0,
                                            ncls]), tf.zeros([0, ncls - 1, 4])

        if get_tf_version_number() >= 1.6:
            feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits = ff_true(
            )
        else:
            logger.warn("This example may drop support for TF < 1.6 soon.")
            feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits = tf.cond(
                tf.size(boxes_on_featuremap) > 0, ff_true, ff_false)

        if is_training:
            # rpn loss
            rpn_label_loss, rpn_box_loss = rpn_losses(anchor_labels,
                                                      anchor_boxes_encoded,
                                                      rpn_label_logits,
                                                      rpn_box_logits)

            # fastrcnn loss
            matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt)

            fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0),
                                            [-1])  # fg inds w.r.t all samples
            fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample)
            fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits,
                                               fg_inds_wrt_sample)

            fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training(
                image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes,
                fastrcnn_label_logits, fg_fastrcnn_box_logits)

            if config.MODE_MASK:
                # maskrcnn loss
                fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample)
                # In training, mask branch shares the same C5 feature.
                fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample)
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', fg_feature, config.NUM_CLASS,
                    num_convs=0)  # #fg x #cat x 14x14

                matched_gt_masks = tf.gather(gt_masks,
                                             fg_inds_wrt_gt)  # nfg x H x W
                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(matched_gt_masks, 1),
                    fg_sampled_boxes,
                    tf.range(tf.size(fg_inds_wrt_gt)),
                    14,
                    pad_border=False)  # nfg x 1x14x14
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1,
                                                 'sampled_fg_mask_targets')
                mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels,
                                           target_masks_for_fg)
            else:
                mrcnn_loss = 0.0

            wd_cost = regularize_cost(
                '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W',
                l2_regularizer(1e-4),
                name='wd_cost')

            total_cost = tf.add_n([
                rpn_label_loss, rpn_box_loss, fastrcnn_label_loss,
                fastrcnn_box_loss, mrcnn_loss, wd_cost
            ], 'total_cost')

            add_moving_summary(total_cost, wd_cost)
            return total_cost
        else:
            final_boxes, final_labels = self.fastrcnn_inference(
                image_shape2d, rcnn_boxes, fastrcnn_label_logits,
                fastrcnn_box_logits)

            if config.MODE_MASK:
                # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657
                def f1():
                    roi_resized = roi_align(
                        featuremap, final_boxes * (1.0 / config.ANCHOR_STRIDE),
                        14)
                    feature_maskrcnn = resnet_conv5(
                        roi_resized, config.RESNET_NUM_BLOCK[-1])
                    mask_logits = maskrcnn_upXconv_head(
                        'maskrcnn', feature_maskrcnn, config.NUM_CLASS,
                        0)  # #result x #cat x 14x14
                    indices = tf.stack([
                        tf.range(tf.size(final_labels)),
                        tf.to_int32(final_labels) - 1
                    ],
                                       axis=1)
                    final_mask_logits = tf.gather_nd(mask_logits,
                                                     indices)  # #resultx14x14
                    return tf.sigmoid(final_mask_logits)

                final_masks = tf.cond(
                    tf.size(final_labels) > 0, f1,
                    lambda: tf.zeros([0, 14, 14]))
                tf.identity(final_masks, name='final_masks')
Пример #10
0
    def build_graph(self, *inputs):
        is_training = get_current_tower_context().is_training
        if config.MODE_MASK:
            image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks = inputs
        else:
            image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs
        image = self._preprocess(image)  # 1CHW

        featuremap = pretrained_resnet_c4_backbone(image,
                                                   config.RESNET_NUM_BLOCK[:3])
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024,
                                                    config.NUM_ANCHOR)

        fm_anchors, anchor_labels, anchor_boxes = self.slice_to_featuremap(
            featuremap, get_all_anchors(), anchor_labels, anchor_boxes)
        anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors)

        image_shape2d = tf.shape(image)[2:]  # h,w
        decoded_boxes = decode_bbox_target(rpn_box_logits,
                                           fm_anchors)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(decoded_boxes, [-1, 4]),
            tf.reshape(rpn_label_logits, [-1]), image_shape2d)

        if is_training:
            # sample proposal boxes in training
            rcnn_sampled_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets(
                proposal_boxes, gt_boxes, gt_labels)
            boxes_on_featuremap = rcnn_sampled_boxes * (1.0 /
                                                        config.ANCHOR_STRIDE)
        else:
            # use all proposal boxes in inference
            boxes_on_featuremap = proposal_boxes * (1.0 / config.ANCHOR_STRIDE)

        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)

        # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657
        def ff_true():
            feature_fastrcnn = resnet_conv5(
                roi_resized, config.RESNET_NUM_BLOCK[-1])  # nxcx7x7
            fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_head(
                'fastrcnn', feature_fastrcnn, config.NUM_CLASS)
            return feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits

        def ff_false():
            ncls = config.NUM_CLASS
            return tf.zeros([0, 2048, 7,
                             7]), tf.zeros([0,
                                            ncls]), tf.zeros([0, ncls - 1, 4])

        feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits = tf.cond(
            tf.size(boxes_on_featuremap) > 0, ff_true, ff_false)

        if is_training:
            # rpn loss
            rpn_label_loss, rpn_box_loss = rpn_losses(anchor_labels,
                                                      anchor_boxes_encoded,
                                                      rpn_label_logits,
                                                      rpn_box_logits)

            # fastrcnn loss
            fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0),
                                            [-1])  # fg inds w.r.t all samples
            fg_sampled_boxes = tf.gather(rcnn_sampled_boxes,
                                         fg_inds_wrt_sample)

            with tf.name_scope('fg_sample_patch_viz'):
                fg_sampled_patches = crop_and_resize(
                    image, fg_sampled_boxes,
                    tf.zeros_like(fg_inds_wrt_sample, dtype=tf.int32), 300)
                fg_sampled_patches = tf.transpose(fg_sampled_patches,
                                                  [0, 2, 3, 1])
                fg_sampled_patches = tf.reverse(fg_sampled_patches,
                                                axis=[-1])  # BGR->RGB
                tf.summary.image('viz', fg_sampled_patches, max_outputs=30)

            matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt)
            encoded_boxes = encode_bbox_target(
                matched_gt_boxes, fg_sampled_boxes) * tf.constant(
                    config.FASTRCNN_BBOX_REG_WEIGHTS)
            fastrcnn_label_loss, fastrcnn_box_loss = fastrcnn_losses(
                rcnn_labels, fastrcnn_label_logits, encoded_boxes,
                tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample))

            if config.MODE_MASK:
                # maskrcnn loss
                fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample)
                fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample)
                mask_logits = maskrcnn_head(
                    'maskrcnn', fg_feature,
                    config.NUM_CLASS)  # #fg x #cat x 14x14

                gt_masks_for_fg = tf.gather(gt_masks,
                                            fg_inds_wrt_gt)  # nfg x H x W
                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(gt_masks_for_fg, 1),
                    fg_sampled_boxes,
                    tf.range(tf.size(fg_inds_wrt_gt)),
                    14,
                    pad_border=False)  # nfg x 1x14x14
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1,
                                                 'sampled_fg_mask_targets')
                mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels,
                                           target_masks_for_fg)
            else:
                mrcnn_loss = 0.0

            wd_cost = regularize_cost(
                '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W',
                l2_regularizer(1e-4),
                name='wd_cost')

            total_cost = tf.add_n([
                rpn_label_loss, rpn_box_loss, fastrcnn_label_loss,
                fastrcnn_box_loss, mrcnn_loss, wd_cost
            ], 'total_cost')

            add_moving_summary(total_cost, wd_cost)
            return total_cost
        else:
            label_probs = tf.nn.softmax(
                fastrcnn_label_logits,
                name='fastrcnn_all_probs')  # #proposal x #Class
            anchors = tf.tile(
                tf.expand_dims(proposal_boxes, 1),
                [1, config.NUM_CLASS - 1, 1])  # #proposal x #Cat x 4
            decoded_boxes = decode_bbox_target(
                fastrcnn_box_logits /
                tf.constant(config.FASTRCNN_BBOX_REG_WEIGHTS), anchors)
            decoded_boxes = clip_boxes(decoded_boxes,
                                       image_shape2d,
                                       name='fastrcnn_all_boxes')

            # indices: Nx2. Each index into (#proposal, #category)
            pred_indices, final_probs = fastrcnn_predictions(
                decoded_boxes, label_probs)
            final_probs = tf.identity(final_probs, 'final_probs')
            final_boxes = tf.gather_nd(decoded_boxes,
                                       pred_indices,
                                       name='final_boxes')
            final_labels = tf.add(pred_indices[:, 1], 1, name='final_labels')

            if config.MODE_MASK:
                # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657
                def f1():
                    roi_resized = roi_align(
                        featuremap, final_boxes * (1.0 / config.ANCHOR_STRIDE),
                        14)
                    feature_maskrcnn = resnet_conv5(
                        roi_resized, config.RESNET_NUM_BLOCK[-1])
                    mask_logits = maskrcnn_head(
                        'maskrcnn', feature_maskrcnn,
                        config.NUM_CLASS)  # #result x #cat x 14x14
                    indices = tf.stack([
                        tf.range(tf.size(final_labels)),
                        tf.to_int32(final_labels) - 1
                    ],
                                       axis=1)
                    final_mask_logits = tf.gather_nd(mask_logits,
                                                     indices)  # #resultx14x14
                    return tf.sigmoid(final_mask_logits)

                final_masks = tf.cond(
                    tf.size(final_probs) > 0, f1,
                    lambda: tf.zeros([0, 14, 14]))
                tf.identity(final_masks, name='final_masks')
Пример #11
0
    def build_graph(self, *inputs):
        inputs = dict(zip(self.input_names, inputs))
        is_training = get_current_tower_context().is_training
        image = self.preprocess(inputs['image'])  # 1CHW

        featuremap = resnet_c4_backbone(image,
                                        cfg.BACKBONE.RESNET_NUM_BLOCK[:3])
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap,
                                                    cfg.RPN.HEAD_DIM,
                                                    cfg.RPN.NUM_ANCHOR)

        anchors = RPNAnchors(get_all_anchors(), inputs['anchor_labels'],
                             inputs['anchor_boxes'])
        anchors = anchors.narrow_to(featuremap)

        image_shape2d = tf.shape(image)[2:]  # h,w
        pred_boxes_decoded = anchors.decode_logits(
            rpn_box_logits)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(pred_boxes_decoded, [-1, 4]),
            tf.reshape(rpn_label_logits,
                       [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK
            if is_training else cfg.RPN.TEST_PRE_NMS_TOPK,
            cfg.RPN.TRAIN_POST_NMS_TOPK
            if is_training else cfg.RPN.TEST_POST_NMS_TOPK)

        gt_boxes, gt_labels, gt_masks = inputs['gt_boxes'], inputs[
            'gt_labels'], inputs['gt_masks']
        if is_training:
            # sample proposal boxes in training
            rcnn_boxes, rcnn_labels, fg_inds_wrt_gt, rcnn_masks = sample_fast_rcnn_targets(
                proposal_boxes, gt_boxes, gt_labels, gt_masks)
            matched_gt_boxes = tf.gather(gt_boxes,
                                         fg_inds_wrt_gt,
                                         name='gt_boxes_per_fg_proposal')
            matched_gt_masks = tf.gather(gt_masks,
                                         fg_inds_wrt_gt,
                                         name='gt_masks_per_fg_proposal')
        else:
            # The boxes to be used to crop RoIs.
            # Use all proposal boxes in inference
            rcnn_boxes = proposal_boxes
            angles = tf.ones((tf.shape(proposal_boxes)[0], 1)) * (-45.)
            x1y1, x2y2 = proposal_boxes[:, 0:2], proposal_boxes[:, 2:4]
            wh = x2y2 - x1y1
            xy = (x2y2 + x1y1) * 0.5
            rcnn_masks = tf.concat([xy, wh, angles], axis=1)
            rcnn_labels, matched_gt_boxes, matched_gt_masks = None, None, None
            # ToDo

        boxes_on_featuremap = rcnn_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE)
        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)

        feature_fastrcnn = resnet_conv5(
            roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1])  # nxcx7x7
        # Keep C5 feature to be shared with mask branch
        feature_gap = GlobalAvgPooling('gap',
                                       feature_fastrcnn,
                                       data_format='channels_first')
        fastrcnn_label_logits, fastrcnn_box_logits, fastrcnn_mask_logits = fastrcnn_outputs(
            'fastrcnn', feature_gap, cfg.DATA.NUM_CLASS)

        fastrcnn_head = FastRCNNHead(rcnn_boxes, rcnn_masks,
                                     fastrcnn_box_logits, fastrcnn_mask_logits,
                                     fastrcnn_label_logits, rcnn_labels,
                                     matched_gt_boxes, matched_gt_masks)

        if is_training:
            # rpn loss
            rpn_label_loss, rpn_box_loss = rpn_losses(
                anchors.gt_labels, anchors.encoded_gt_boxes(),
                rpn_label_logits, rpn_box_logits)

            # fastrcnn loss
            fastrcnn_label_loss, fastrcnn_box_loss, mask_loss = fastrcnn_head.losses(
            )

            wd_cost = regularize_cost('.*/W',
                                      l2_regularizer(cfg.TRAIN.WEIGHT_DECAY),
                                      name='wd_cost')

            total_cost = tf.add_n([
                rpn_label_loss, rpn_box_loss, fastrcnn_label_loss,
                fastrcnn_box_loss, mask_loss, wd_cost
            ], 'total_cost')

            add_moving_summary(total_cost, wd_cost)
            return total_cost
        else:
            # ToDo
            final_boxes, final_labels = self.fastrcnn_inference(
                image_shape2d, fastrcnn_head)
            indices = tf.stack([
                tf.range(tf.size(final_labels)),
                tf.to_int32(final_labels) - 1
            ],
                               axis=1)
            final_mask_logits = tf.gather_nd(fastrcnn_mask_logits,
                                             indices,
                                             name='final_masks')
Пример #12
0
    def build_graph(self, *inputs):
        is_training = get_current_tower_context().is_training
        if cfg.MODE_MASK:
            image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks = inputs
        else:
            image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs
        image = self.preprocess(image)  # 1CHW

        featuremap = resnet_c4_backbone(image,
                                        cfg.BACKBONE.RESNET_NUM_BLOCK[:3])
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024,
                                                    cfg.RPN.NUM_ANCHOR)

        fm_anchors, anchor_labels, anchor_boxes = self.narrow_to_featuremap(
            featuremap, get_all_anchors(), anchor_labels, anchor_boxes)
        anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors)

        image_shape2d = tf.shape(image)[2:]  # h,w
        pred_boxes_decoded = decode_bbox_target(
            rpn_box_logits, fm_anchors)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(pred_boxes_decoded, [-1, 4]),
            tf.reshape(rpn_label_logits,
                       [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK
            if is_training else cfg.RPN.TEST_PRE_NMS_TOPK,
            cfg.RPN.TRAIN_POST_NMS_TOPK
            if is_training else cfg.RPN.TEST_POST_NMS_TOPK)

        if is_training:
            # sample proposal boxes in training
            rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets(
                proposal_boxes, gt_boxes, gt_labels)
        else:
            # The boxes to be used to crop RoIs.
            # Use all proposal boxes in inference
            rcnn_boxes = proposal_boxes

        boxes_on_featuremap = rcnn_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE)
        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)

        feature_fastrcnn = resnet_conv5(
            roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1])  # nxcx7x7
        # Keep C5 feature to be shared with mask branch
        feature_gap = GlobalAvgPooling('gap',
                                       feature_fastrcnn,
                                       data_format='channels_first')
        fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs(
            'fastrcnn', feature_gap, cfg.DATA.NUM_CLASS)

        if is_training:
            # rpn loss
            rpn_label_loss, rpn_box_loss = rpn_losses(anchor_labels,
                                                      anchor_boxes_encoded,
                                                      rpn_label_logits,
                                                      rpn_box_logits)

            # fastrcnn loss
            matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt)

            fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0),
                                            [-1])  # fg inds w.r.t all samples
            fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample)
            fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits,
                                               fg_inds_wrt_sample)

            fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training(
                image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes,
                fastrcnn_label_logits, fg_fastrcnn_box_logits)

            if cfg.MODE_MASK:
                # maskrcnn loss
                fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample)
                # In training, mask branch shares the same C5 feature.
                fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample)
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY,
                    num_convs=0)  # #fg x #cat x 14x14

                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(gt_masks, 1),
                    fg_sampled_boxes,
                    fg_inds_wrt_gt,
                    14,
                    pad_border=False)  # nfg x 1x14x14
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1,
                                                 'sampled_fg_mask_targets')
                mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels,
                                           target_masks_for_fg)
            else:
                mrcnn_loss = 0.0

            wd_cost = regularize_cost(
                '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W',
                l2_regularizer(cfg.TRAIN.WEIGHT_DECAY),
                name='wd_cost')

            total_cost = tf.add_n([
                rpn_label_loss, rpn_box_loss, fastrcnn_label_loss,
                fastrcnn_box_loss, mrcnn_loss, wd_cost
            ], 'total_cost')

            add_moving_summary(total_cost, wd_cost)
            return total_cost
        else:
            final_boxes, final_labels = self.fastrcnn_inference(
                image_shape2d, rcnn_boxes, fastrcnn_label_logits,
                fastrcnn_box_logits)

            if cfg.MODE_MASK:
                roi_resized = roi_align(
                    featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE),
                    14)
                feature_maskrcnn = resnet_conv5(
                    roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1])
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY,
                    0)  # #result x #cat x 14x14
                indices = tf.stack([
                    tf.range(tf.size(final_labels)),
                    tf.to_int32(final_labels) - 1
                ],
                                   axis=1)
                final_mask_logits = tf.gather_nd(mask_logits,
                                                 indices)  # #resultx14x14
                tf.sigmoid(final_mask_logits, name='final_masks')
Пример #13
0
    def build_graph(self, *inputs):
        is_training = get_current_tower_context().is_training
        if config.MODE_MASK:
            image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks = inputs
        else:
            image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs
        image = self.preprocess(image)     # 1CHW

        featuremap = resnet_c4_backbone(image, config.RESNET_NUM_BLOCK[:3])
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024, config.NUM_ANCHOR)

        fm_anchors, anchor_labels, anchor_boxes = self.narrow_to_featuremap(
            featuremap, get_all_anchors(), anchor_labels, anchor_boxes)
        anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors)

        image_shape2d = tf.shape(image)[2:]     # h,w
        pred_boxes_decoded = decode_bbox_target(rpn_box_logits, fm_anchors)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(pred_boxes_decoded, [-1, 4]),
            tf.reshape(rpn_label_logits, [-1]),
            image_shape2d,
            config.TRAIN_PRE_NMS_TOPK if is_training else config.TEST_PRE_NMS_TOPK,
            config.TRAIN_POST_NMS_TOPK if is_training else config.TEST_POST_NMS_TOPK)

        if is_training:
            # sample proposal boxes in training
            rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets(
                proposal_boxes, gt_boxes, gt_labels)
        else:
            # The boxes to be used to crop RoIs.
            # Use all proposal boxes in inference
            rcnn_boxes = proposal_boxes

        boxes_on_featuremap = rcnn_boxes * (1.0 / config.ANCHOR_STRIDE)
        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)

        # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657
        # which was fixed in TF 1.6
        def ff_true():
            feature_fastrcnn = resnet_conv5(roi_resized, config.RESNET_NUM_BLOCK[-1])    # nxcx7x7
            feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first')
            fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs('fastrcnn', feature_gap, config.NUM_CLASS)
            # Return C5 feature to be shared with mask branch
            return feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits

        def ff_false():
            ncls = config.NUM_CLASS
            return tf.zeros([0, 2048, 7, 7]), tf.zeros([0, ncls]), tf.zeros([0, ncls - 1, 4])

        if get_tf_version_number() >= 1.6:
            feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits = ff_true()
        else:
            logger.warn("This example may drop support for TF < 1.6 soon.")
            feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits = tf.cond(
                tf.size(boxes_on_featuremap) > 0, ff_true, ff_false)

        if is_training:
            # rpn loss
            rpn_label_loss, rpn_box_loss = rpn_losses(
                anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits)

            # fastrcnn loss
            matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt)

            fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1])   # fg inds w.r.t all samples
            fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample)
            fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample)

            fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training(
                image, rcnn_labels, fg_sampled_boxes,
                matched_gt_boxes, fastrcnn_label_logits, fg_fastrcnn_box_logits)

            if config.MODE_MASK:
                # maskrcnn loss
                fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample)
                # In training, mask branch shares the same C5 feature.
                fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample)
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', fg_feature, config.NUM_CLASS, num_convs=0)   # #fg x #cat x 14x14

                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(gt_masks, 1),
                    fg_sampled_boxes,
                    fg_inds_wrt_gt, 14,
                    pad_border=False)  # nfg x 1x14x14
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets')
                mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels, target_masks_for_fg)
            else:
                mrcnn_loss = 0.0

            wd_cost = regularize_cost(
                '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W',
                l2_regularizer(1e-4), name='wd_cost')

            total_cost = tf.add_n([
                rpn_label_loss, rpn_box_loss,
                fastrcnn_label_loss, fastrcnn_box_loss,
                mrcnn_loss,
                wd_cost], 'total_cost')

            add_moving_summary(total_cost, wd_cost)
            return total_cost
        else:
            final_boxes, final_labels = self.fastrcnn_inference(
                image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits)

            if config.MODE_MASK:
                # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657
                def f1():
                    roi_resized = roi_align(featuremap, final_boxes * (1.0 / config.ANCHOR_STRIDE), 14)
                    feature_maskrcnn = resnet_conv5(roi_resized, config.RESNET_NUM_BLOCK[-1])
                    mask_logits = maskrcnn_upXconv_head(
                        'maskrcnn', feature_maskrcnn, config.NUM_CLASS, 0)   # #result x #cat x 14x14
                    indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1)
                    final_mask_logits = tf.gather_nd(mask_logits, indices)   # #resultx14x14
                    return tf.sigmoid(final_mask_logits)

                final_masks = tf.cond(tf.size(final_labels) > 0, f1, lambda: tf.zeros([0, 14, 14]))
                tf.identity(final_masks, name='final_masks')
Пример #14
0
    def build_graph(self, *inputs):
        is_training = get_current_tower_context().is_training
        if cfg.MODE_MASK:
            image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks = inputs
        else:
            image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs
        image = self.preprocess(image)     # 1CHW

        featuremap = resnet_c4_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK[:3])
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024, cfg.RPN.NUM_ANCHOR)

        fm_anchors, anchor_labels, anchor_boxes = self.narrow_to_featuremap(
            featuremap, get_all_anchors(), anchor_labels, anchor_boxes)
        anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors)

        image_shape2d = tf.shape(image)[2:]     # h,w
        pred_boxes_decoded = decode_bbox_target(rpn_box_logits, fm_anchors)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(pred_boxes_decoded, [-1, 4]),
            tf.reshape(rpn_label_logits, [-1]),
            image_shape2d,
            cfg.RPN.TRAIN_PRE_NMS_TOPK if is_training else cfg.RPN.TEST_PRE_NMS_TOPK,
            cfg.RPN.TRAIN_POST_NMS_TOPK if is_training else cfg.RPN.TEST_POST_NMS_TOPK)

        if is_training:
            # sample proposal boxes in training
            rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets(
                proposal_boxes, gt_boxes, gt_labels)
        else:
            # The boxes to be used to crop RoIs.
            # Use all proposal boxes in inference
            rcnn_boxes = proposal_boxes

        boxes_on_featuremap = rcnn_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE)
        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)

        feature_fastrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1])    # nxcx7x7
        # Keep C5 feature to be shared with mask branch
        feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first')
        fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs('fastrcnn', feature_gap, cfg.DATA.NUM_CLASS)

        if is_training:
            # rpn loss
            rpn_label_loss, rpn_box_loss = rpn_losses(
                anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits)

            # fastrcnn loss
            matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt)

            fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1])   # fg inds w.r.t all samples
            fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample)
            fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample)

            fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training(
                image, rcnn_labels, fg_sampled_boxes,
                matched_gt_boxes, fastrcnn_label_logits, fg_fastrcnn_box_logits)

            if cfg.MODE_MASK:
                # maskrcnn loss
                fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample)
                # In training, mask branch shares the same C5 feature.
                fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample)
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY, num_convs=0)   # #fg x #cat x 14x14

                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(gt_masks, 1),
                    fg_sampled_boxes,
                    fg_inds_wrt_gt, 14,
                    pad_border=False)  # nfg x 1x14x14
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets')
                mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels, target_masks_for_fg)
            else:
                mrcnn_loss = 0.0

            wd_cost = regularize_cost(
                '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W',
                l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost')

            total_cost = tf.add_n([
                rpn_label_loss, rpn_box_loss,
                fastrcnn_label_loss, fastrcnn_box_loss,
                mrcnn_loss,
                wd_cost], 'total_cost')

            add_moving_summary(total_cost, wd_cost)
            return total_cost
        else:
            final_boxes, final_labels = self.fastrcnn_inference(
                image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits)

            if cfg.MODE_MASK:
                roi_resized = roi_align(featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE), 14)
                feature_maskrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1])
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 0)   # #result x #cat x 14x14
                indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1)
                final_mask_logits = tf.gather_nd(mask_logits, indices)   # #resultx14x14
                tf.sigmoid(final_mask_logits, name='final_masks')
Пример #15
0
    def build_graph(self, *inputs):
        is_training = get_current_tower_context().is_training
        image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_ids, orig_shape = inputs
        image = self.preprocess(image)  # 1CHW

        featuremap = resnet_c4_backbone(image,
                                        cfg.BACKBONE.RESNET_NUM_BLOCK[:3])
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap,
                                                    cfg.RPN.HEAD_DIM,
                                                    cfg.RPN.NUM_ANCHOR)

        anchors = RPNAnchors(get_all_anchors(), anchor_labels, anchor_boxes)
        anchors = anchors.narrow_to(featuremap)

        image_shape2d = tf.shape(image)[2:]  # h,w
        # decode into actual image coordinates
        pred_boxes_decoded = anchors.decode_logits(
            rpn_box_logits)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(pred_boxes_decoded, [-1, 4]),
            tf.reshape(rpn_label_logits,
                       [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK
            if is_training else cfg.RPN.TEST_PRE_NMS_TOPK,
            cfg.RPN.TRAIN_POST_NMS_TOPK
            if is_training else cfg.RPN.TEST_POST_NMS_TOPK)

        if is_training:
            # sample proposal boxes in training
            rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets(
                proposal_boxes, gt_boxes, gt_labels)
        else:
            # The boxes to be used to crop RoIs.
            # Use all proposal boxes in inference
            rcnn_boxes = proposal_boxes

        boxes_on_featuremap = rcnn_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE)
        # size? #proposals*h*w*c?
        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)

        feature_fastrcnn = resnet_conv5(
            roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1])  # nxcx7x7
        # Keep C5 feature to be shared with mask branch
        feature_gap = GlobalAvgPooling('gap',
                                       feature_fastrcnn,
                                       data_format='channels_first')
        fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs(
            'fastrcnn', feature_gap, cfg.DATA.NUM_CLASS)

        if is_training:
            # rpn loss
            rpn_label_loss, rpn_box_loss = rpn_losses(
                anchors.gt_labels, anchors.encoded_gt_boxes(),
                rpn_label_logits, rpn_box_logits)

            # fastrcnn loss
            matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt)

            fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0),
                                            [-1])  # fg inds w.r.t all samples
            # outputs from fg proposals
            fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample)
            fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits,
                                               fg_inds_wrt_sample)

            # rcnn_labels: the labels of the proposals
            # fg_sampled_boxes: fg proposals
            # matched_gt_boxes: just like RPN, the gt boxes
            #                   that match the corresponding fg proposals
            fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training(
                image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes,
                fastrcnn_label_logits, fg_fastrcnn_box_logits)

            # acquire pred for re-id training
            # turning NMS off gives re-id branch more training samples
            if cfg.RE_ID.NMS:
                boxes, final_labels, final_probs = self.fastrcnn_inference(
                    image_shape2d, rcnn_boxes, fastrcnn_label_logits,
                    fastrcnn_box_logits)
            else:
                boxes, final_labels, final_probs = self.fastrcnn_inference_id(
                    image_shape2d, rcnn_boxes, fastrcnn_label_logits,
                    fastrcnn_box_logits)
            # scale = tf.sqrt(tf.cast(image_shape2d[0], tf.float32) / tf.cast(orig_shape[0], tf.float32) *
            #                 tf.cast(image_shape2d[1], tf.float32) / tf.cast(orig_shape[1], tf.float32))
            # final_boxes = boxes / scale
            # # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more.
            # final_boxes = tf_clip_boxes(final_boxes, orig_shape)

            # IOU, discard bad dets, assign re-id labels
            # the results are already NMS so no need to NMS again
            # crop from conv4 with dets (maybe plus gts)
            # feedforward re-id branch
            # resizing during ROIalign?
            iou = pairwise_iou(boxes, gt_boxes)  # are the gt boxes resized?
            tp_mask = tf.reduce_max(iou, axis=1) >= cfg.RE_ID.IOU_THRESH
            iou = tf.boolean_mask(iou, tp_mask)

            # return iou to debug

            def re_id_loss(pred_boxes, pred_matching_gt_ids, featuremap):
                with tf.variable_scope('id_head'):
                    num_of_samples_used = tf.get_variable(
                        'num_of_samples_used', initializer=0, trainable=False)
                    num_of_samples_used = num_of_samples_used.assign_add(
                        tf.shape(pred_boxes)[0])

                    boxes_on_featuremap = pred_boxes * (1.0 /
                                                        cfg.RPN.ANCHOR_STRIDE)
                    # name scope?
                    # stop gradient
                    roi_resized = roi_align(featuremap, boxes_on_featuremap,
                                            14)
                    feature_idhead = resnet_conv5(
                        roi_resized,
                        cfg.BACKBONE.RESNET_NUM_BLOCK[-1])  # nxcx7x7
                    feature_gap = GlobalAvgPooling(
                        'gap', feature_idhead, data_format='channels_first')

                    init = tf.variance_scaling_initializer()
                    hidden = FullyConnected('fc6',
                                            feature_gap,
                                            1024,
                                            kernel_initializer=init,
                                            activation=tf.nn.relu)
                    hidden = FullyConnected('fc7',
                                            hidden,
                                            1024,
                                            kernel_initializer=init,
                                            activation=tf.nn.relu)
                    hidden = FullyConnected('fc8',
                                            hidden,
                                            256,
                                            kernel_initializer=init,
                                            activation=tf.nn.relu)
                    id_logits = FullyConnected(
                        'class',
                        hidden,
                        cfg.DATA.NUM_ID,
                        kernel_initializer=tf.random_normal_initializer(
                            stddev=0.01))

                label_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=pred_matching_gt_ids, logits=id_logits)
                label_loss = tf.reduce_mean(label_loss, name='label_loss')

                return label_loss, num_of_samples_used

            def check_unid_pedes(iou, gt_ids, boxes, tp_mask, featuremap):
                pred_gt_ind = tf.argmax(iou, axis=1)
                # output following tensors
                # pick out the -2 class here
                pred_matching_gt_ids = tf.gather(gt_ids, pred_gt_ind)
                pred_boxes = tf.boolean_mask(boxes, tp_mask)
                # label 1 corresponds to unid pedes
                unid_ind = tf.not_equal(pred_matching_gt_ids, 1)
                pred_matching_gt_ids = tf.boolean_mask(pred_matching_gt_ids,
                                                       unid_ind)
                pred_boxes = tf.boolean_mask(pred_boxes, unid_ind)

                ret = tf.cond(
                    tf.equal(tf.size(pred_boxes), 0), lambda:
                    (tf.constant(cfg.RE_ID.STABLE_LOSS), tf.constant(0)),
                    lambda: re_id_loss(pred_boxes, pred_matching_gt_ids,
                                       featuremap))
                return ret

            with tf.name_scope('id_head'):
                # no detection has IOU > 0.7, re-id returns 0 loss
                re_id_loss, num_of_samples_used = tf.cond(
                    tf.equal(tf.size(iou), 0), lambda:
                    (tf.constant(cfg.RE_ID.STABLE_LOSS), tf.constant(0)),
                    lambda: check_unid_pedes(iou, gt_ids, boxes, tp_mask,
                                             featuremap))
                add_tensor_summary(num_of_samples_used, ['scalar'],
                                   name='num_of_samples_used')
            # for debug, use tensor name to take out the handle
            # return re_id_loss

            # pred_gt_ind = tf.argmax(iou, axis=1)
            # # output following tensors
            # # pick out the -2 class here
            # pred_gt_ids = tf.gather(gt_ids, pred_gt_ind)
            # pred_boxes = tf.boolean_mask(boxes, tp_mask)
            # unid_ind = pred_gt_ids != 1

            # return unid_ind

            # return tf.shape(boxes)[0]

            unnormed_id_loss = tf.identity(re_id_loss, name='unnormed_id_loss')
            re_id_loss = tf.divide(re_id_loss, cfg.RE_ID.LOSS_NORMALIZATION,
                                   're_id_loss')
            add_moving_summary(unnormed_id_loss)
            add_moving_summary(re_id_loss)

            wd_cost = regularize_cost('.*/W',
                                      l2_regularizer(cfg.TRAIN.WEIGHT_DECAY),
                                      name='wd_cost')

            # weights on the losses?
            total_cost = tf.add_n([
                rpn_label_loss, rpn_box_loss, fastrcnn_label_loss,
                fastrcnn_box_loss, re_id_loss, wd_cost
            ], 'total_cost')

            add_moving_summary(total_cost, wd_cost)
            return total_cost
        else:
            if cfg.RE_ID.QUERY_EVAL:
                # resize the gt_boxes in dataflow
                final_boxes = gt_boxes
            else:
                final_boxes, final_labels, _ = self.fastrcnn_inference(
                    image_shape2d, rcnn_boxes, fastrcnn_label_logits,
                    fastrcnn_box_logits)

            with tf.variable_scope('id_head'):
                preds_on_featuremap = final_boxes * (1.0 /
                                                     cfg.RPN.ANCHOR_STRIDE)
                # name scope?
                # stop gradient
                roi_resized = roi_align(featuremap, preds_on_featuremap, 14)
                feature_idhead = resnet_conv5(
                    roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1])  # nxcx7x7
                feature_gap = GlobalAvgPooling('gap',
                                               feature_idhead,
                                               data_format='channels_first')

                hidden = FullyConnected('fc6',
                                        feature_gap,
                                        1024,
                                        activation=tf.nn.relu)
                hidden = FullyConnected('fc7',
                                        hidden,
                                        1024,
                                        activation=tf.nn.relu)
                fv = FullyConnected('fc8', hidden, 256, activation=tf.nn.relu)
                id_logits = FullyConnected(
                    'class',
                    fv,
                    cfg.DATA.NUM_ID,
                    kernel_initializer=tf.random_normal_initializer(
                        stddev=0.01))

            scale = tf.sqrt(
                tf.cast(image_shape2d[0], tf.float32) /
                tf.cast(orig_shape[0], tf.float32) *
                tf.cast(image_shape2d[1], tf.float32) /
                tf.cast(orig_shape[1], tf.float32))
            rescaled_final_boxes = final_boxes / scale
            # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more.
            # rescaled_final_boxes_pre_clip = tf.identity(rescaled_final_boxes, name='re_boxes_pre_clip')
            rescaled_final_boxes = tf_clip_boxes(rescaled_final_boxes,
                                                 orig_shape)
            rescaled_final_boxes = tf.identity(rescaled_final_boxes,
                                               'rescaled_final_boxes')

            fv = tf.identity(fv, name='feature_vector')
            prob = tf.nn.softmax(id_logits, name='re_id_probs')
Пример #16
0
    def build_graph(self, *inputs):
        # TODO need to make tensorpack handles dict better
        inputs = dict(zip(self.input_names, inputs))
        is_training = get_current_tower_context().is_training
        image = self.preprocess(inputs['image'])  # 1CHW

        featuremap = resnet_c4_backbone(image,
                                        cfg.BACKBONE.RESNET_NUM_BLOCK[:3])
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap,
                                                    cfg.RPN.HEAD_DIM,
                                                    cfg.RPN.NUM_ANCHOR)

        anchors = RPNAnchors(get_all_anchors(), inputs['anchor_labels'],
                             inputs['anchor_boxes'])
        anchors = anchors.narrow_to(featuremap)

        image_shape2d = tf.shape(image)[2:]  # h,w
        pred_boxes_decoded = anchors.decode_logits(
            rpn_box_logits)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(pred_boxes_decoded, [-1, 4]),
            tf.reshape(rpn_label_logits,
                       [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK
            if is_training else cfg.RPN.TEST_PRE_NMS_TOPK,
            cfg.RPN.TRAIN_POST_NMS_TOPK
            if is_training else cfg.RPN.TEST_POST_NMS_TOPK)

        gt_boxes, gt_labels = inputs['gt_boxes'], inputs['gt_labels']
        if is_training:
            # sample proposal boxes in training
            proposals = sample_fast_rcnn_targets(proposal_boxes, gt_boxes,
                                                 gt_labels)
        else:
            # The boxes to be used to crop RoIs.
            # Use all proposal boxes in inference
            proposals = BoxProposals(proposal_boxes)

        boxes_on_featuremap = proposals.boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE)
        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)

        feature_fastrcnn = resnet_conv5(
            roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1])  # nxcx7x7
        # Keep C5 feature to be shared with mask branch
        feature_gap = GlobalAvgPooling('gap',
                                       feature_fastrcnn,
                                       data_format='channels_first')
        fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs(
            'fastrcnn', feature_gap, cfg.DATA.NUM_CLASS)

        fastrcnn_head = FastRCNNHead(
            proposals, fastrcnn_box_logits, fastrcnn_label_logits,
            tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32))

        if is_training:
            all_losses = []
            # rpn loss
            all_losses.extend(
                rpn_losses(anchors.gt_labels, anchors.encoded_gt_boxes(),
                           rpn_label_logits, rpn_box_logits))

            # fastrcnn loss
            all_losses.extend(fastrcnn_head.losses())

            if cfg.MODE_MASK:
                # maskrcnn loss
                # In training, mask branch shares the same C5 feature.
                fg_feature = tf.gather(feature_fastrcnn, proposals.fg_inds())
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY,
                    num_convs=0)  # #fg x #cat x 14x14

                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(inputs['gt_masks'], 1),
                    proposals.fg_boxes(),
                    proposals.fg_inds_wrt_gt,
                    14,
                    pad_border=False)  # nfg x 1x14x14
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1,
                                                 'sampled_fg_mask_targets')
                all_losses.append(
                    maskrcnn_loss(mask_logits, proposals.fg_labels(),
                                  target_masks_for_fg))

            wd_cost = regularize_cost('.*/W',
                                      l2_regularizer(cfg.TRAIN.WEIGHT_DECAY),
                                      name='wd_cost')
            all_losses.append(wd_cost)

            total_cost = tf.add_n(all_losses, 'total_cost')
            add_moving_summary(total_cost, wd_cost)
            return total_cost
        else:
            decoded_boxes = fastrcnn_head.decoded_output_boxes()
            decoded_boxes = clip_boxes(decoded_boxes,
                                       image_shape2d,
                                       name='fastrcnn_all_boxes')
            label_scores = fastrcnn_head.output_scores(
                name='fastrcnn_all_scores')
            final_boxes, final_scores, final_labels = fastrcnn_predictions(
                decoded_boxes, label_scores, name_scope='output')

            if cfg.MODE_MASK:
                roi_resized = roi_align(
                    featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE),
                    14)
                feature_maskrcnn = resnet_conv5(
                    roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1])
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY,
                    0)  # #result x #cat x 14x14
                indices = tf.stack([
                    tf.range(tf.size(final_labels)),
                    tf.to_int32(final_labels) - 1
                ],
                                   axis=1)
                final_mask_logits = tf.gather_nd(mask_logits,
                                                 indices)  # #resultx14x14
                tf.sigmoid(final_mask_logits, name='output/masks')
Пример #17
0
    def build_graph(self, *inputs):
        inputs = dict(zip(self.input_names, inputs))
        is_training = get_current_tower_context().is_training
        image = self.preprocess(inputs['image'])     # 1CHW

        featuremap = resnet_c4_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK[:3])
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR)

        anchors = RPNAnchors(get_all_anchors(), inputs['anchor_labels'], inputs['anchor_boxes'])
        anchors = anchors.narrow_to(featuremap)

        image_shape2d = tf.shape(image)[2:]     # h,w
        pred_boxes_decoded = anchors.decode_logits(rpn_box_logits)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(pred_boxes_decoded, [-1, 4]),
            tf.reshape(rpn_label_logits, [-1]),
            image_shape2d,
            cfg.RPN.TRAIN_PRE_NMS_TOPK if is_training else cfg.RPN.TEST_PRE_NMS_TOPK,
            cfg.RPN.TRAIN_POST_NMS_TOPK if is_training else cfg.RPN.TEST_POST_NMS_TOPK)

        gt_boxes, gt_labels = inputs['gt_boxes'], inputs['gt_labels']
        if is_training:
            # sample proposal boxes in training
            proposals = sample_fast_rcnn_targets(proposal_boxes, gt_boxes, gt_labels)
        else:
            # The boxes to be used to crop RoIs.
            # Use all proposal boxes in inference
            proposals = BoxProposals(proposal_boxes)

        boxes_on_featuremap = proposals.boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE)
        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)

        feature_fastrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1])    # nxcx7x7
        # Keep C5 feature to be shared with mask branch
        feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first')
        fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs('fastrcnn', feature_gap, cfg.DATA.NUM_CLASS)

        fastrcnn_head = FastRCNNHead(proposals, fastrcnn_box_logits, fastrcnn_label_logits,
                                     tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32))

        if is_training:
            all_losses = []
            # rpn loss
            all_losses.extend(rpn_losses(
                anchors.gt_labels, anchors.encoded_gt_boxes(), rpn_label_logits, rpn_box_logits))

            # fastrcnn loss
            all_losses.extend(fastrcnn_head.losses())

            if cfg.MODE_MASK:
                # maskrcnn loss
                # In training, mask branch shares the same C5 feature.
                fg_feature = tf.gather(feature_fastrcnn, proposals.fg_inds())
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY, num_convs=0)   # #fg x #cat x 14x14

                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(inputs['gt_masks'], 1),
                    proposals.fg_boxes(),
                    proposals.fg_inds_wrt_gt, 14,
                    pad_border=False)  # nfg x 1x14x14
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets')
                all_losses.append(maskrcnn_loss(mask_logits, proposals.fg_labels(), target_masks_for_fg))

            wd_cost = regularize_cost(
                '.*/W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost')
            all_losses.append(wd_cost)

            total_cost = tf.add_n(all_losses, 'total_cost')
            add_moving_summary(total_cost, wd_cost)
            return total_cost
        else:
            decoded_boxes = fastrcnn_head.decoded_output_boxes()
            decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes')
            label_scores = fastrcnn_head.output_scores(name='fastrcnn_all_scores')
            final_boxes, final_scores, final_labels = fastrcnn_predictions(
                decoded_boxes, label_scores, name_scope='output')

            if cfg.MODE_MASK:
                roi_resized = roi_align(featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE), 14)
                feature_maskrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1])
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 0)   # #result x #cat x 14x14
                indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1)
                final_mask_logits = tf.gather_nd(mask_logits, indices)   # #resultx14x14
                tf.sigmoid(final_mask_logits, name='output/masks')