Exemplo n.º 1
0
def rpn_net(config, stage='train'):
    batch_size = config.IMAGES_PER_GPU
    input_image = Input(shape=config.IMAGE_INPUT_SHAPE)
    input_class_ids = Input(shape=(config.MAX_GT_INSTANCES, 1 + 1))
    input_boxes = Input(shape=(config.MAX_GT_INSTANCES, 4 + 1))
    input_image_meta = Input(shape=(12, ))

    # 特征及预测结果
    features = resnet50(input_image)
    boxes_regress, class_logits = rpn(features, config.RPN_ANCHOR_NUM)

    # 生成anchor
    anchors, anchors_tag = Anchor(config.RPN_ANCHOR_HEIGHTS,
                                  config.RPN_ANCHOR_WIDTHS,
                                  config.RPN_ANCHOR_BASE_SIZE,
                                  config.RPN_ANCHOR_RATIOS,
                                  config.RPN_ANCHOR_SCALES,
                                  config.BACKBONE_STRIDE,
                                  name='gen_anchors')(features)
    # 裁剪到窗口内
    # anchors = UniqueClipBoxes(config.IMAGE_INPUT_SHAPE, name='clip_anchors')(anchors)
    # windows = Lambda(lambda x: x[:, 7:11])(input_image_meta)
    # anchors = ClipBoxes()([anchors, windows])

    if stage == 'train':
        # 生成分类和回归目标
        rpn_targets = RpnTarget(batch_size,
                                config.RPN_TRAIN_ANCHORS_PER_IMAGE,
                                name='rpn_target')([
                                    input_boxes, input_class_ids, anchors,
                                    anchors_tag
                                ])  # [deltas,cls_ids,indices,..]
        deltas, cls_ids, anchor_indices = rpn_targets[:3]
        # 定义损失layer
        cls_loss = Lambda(lambda x: rpn_cls_loss(*x), name='rpn_class_loss')(
            [class_logits, cls_ids, anchor_indices])
        regress_loss = Lambda(lambda x: rpn_regress_loss(*x),
                              name='rpn_bbox_loss')(
                                  [boxes_regress, deltas, anchor_indices])

        return Model(inputs=[
            input_image, input_image_meta, input_class_ids, input_boxes
        ],
                     outputs=[cls_loss, regress_loss])
    else:  # 测试阶段
        # 应用分类和回归
        detect_boxes, class_scores, _ = RpnToProposal(
            batch_size,
            output_box_num=config.POST_NMS_ROIS_INFERENCE,
            iou_threshold=config.RPN_NMS_THRESHOLD_INFERENCE,
            name='rpn2proposals')(
                [boxes_regress, class_logits, anchors, anchors_tag])
        return Model(inputs=[input_image, input_image_meta],
                     outputs=[detect_boxes, class_scores])
Exemplo n.º 2
0
def frcnn(config, stage='train'):
    batch_size = config.IMAGES_PER_GPU
    # 输入
    input_image = Input(shape=config.IMAGE_INPUT_SHAPE, name='input_image')
    input_image_meta = Input(shape=(12, ), name='input_image_meta')
    gt_class_ids = Input(shape=(config.MAX_GT_INSTANCES, 1 + 1),
                         name='input_gt_class_ids')
    gt_boxes = Input(shape=(config.MAX_GT_INSTANCES, 4 + 1),
                     name='input_gt_boxes')

    # 特征及预测结果
    features = config.base_fn(input_image)
    boxes_regress, class_logits = rpn(features, config.RPN_ANCHOR_NUM)

    # 生成anchor
    anchors, anchors_tag = Anchor(config.RPN_ANCHOR_HEIGHTS,
                                  config.RPN_ANCHOR_WIDTHS,
                                  config.RPN_ANCHOR_BASE_SIZE,
                                  config.RPN_ANCHOR_RATIOS,
                                  config.RPN_ANCHOR_SCALES,
                                  config.BACKBONE_STRIDE,
                                  name='gen_anchors')(features)
    # 裁剪到输入形状内
    # anchors = UniqueClipBoxes(config.IMAGE_INPUT_SHAPE, name='clip_anchors')(anchors)
    windows = Lambda(lambda x: x[:, 7:11])(input_image_meta)
    # anchors = ClipBoxes()([anchors, windows])

    # 应用分类和回归生成proposal
    output_box_num = config.POST_NMS_ROIS_TRAINING if stage == 'train' else config.POST_NMS_ROIS_INFERENCE
    iou_threshold = config.RPN_NMS_THRESHOLD_TRAINING if stage == 'train' else config.RPN_NMS_THRESHOLD_INFERENCE
    proposal_boxes, _, _ = RpnToProposal(batch_size,
                                         output_box_num=output_box_num,
                                         iou_threshold=iou_threshold,
                                         name='rpn2proposals')([
                                             boxes_regress, class_logits,
                                             anchors, anchors_tag,
                                             input_image_meta
                                         ])
    # proposal裁剪到图像窗口内
    # proposal_boxes_coordinate, proposal_boxes_tag = Lambda(lambda x: [x[..., :4], x[..., 4:]])(proposal_boxes)
    # proposal_boxes_coordinate = ClipBoxes()([proposal_boxes_coordinate, windows])
    # proposal_boxes_coordinate = UniqueClipBoxes(config.IMAGE_INPUT_SHAPE,
    #                                             name='clip_proposals')(proposal_boxes_coordinate)
    # 最后再合并tag返回
    # proposal_boxes = Lambda(lambda x: tf.concat(x, axis=-1))([proposal_boxes_coordinate, proposal_boxes_tag])

    if stage == 'train':
        # 生成分类和回归目标
        rpn_targets = RpnTarget(batch_size,
                                config.RPN_TRAIN_ANCHORS_PER_IMAGE,
                                name='rpn_target')([
                                    gt_boxes, gt_class_ids, anchors,
                                    anchors_tag
                                ])  # [deltas,cls_ids,indices,..]
        rpn_deltas, rpn_cls_ids, anchor_indices = rpn_targets[:3]
        # 定义rpn损失layer
        cls_loss_rpn = Lambda(lambda x: rpn_cls_loss(*x),
                              name='rpn_class_loss')(
                                  [class_logits, rpn_cls_ids, anchor_indices])
        regress_loss_rpn = Lambda(lambda x: rpn_regress_loss(*x),
                                  name='rpn_bbox_loss')([
                                      boxes_regress, rpn_deltas, anchor_indices
                                  ])

        # 检测网络的分类和回归目标
        detect_targets = DetectTarget(
            batch_size,
            config.TRAIN_ROIS_PER_IMAGE,
            config.ROI_POSITIVE_RATIO,
            name='rcnn_target')([gt_boxes, gt_class_ids, proposal_boxes])
        roi_deltas, roi_class_ids, train_rois = detect_targets[:3]

        # 检测网络
        rcnn_deltas, rcnn_class_logits = rcnn(
            features,
            train_rois,
            config.NUM_CLASSES,
            config.IMAGE_MAX_DIM,
            config.head_fn,
            pool_size=config.POOL_SIZE,
            fc_layers_size=config.RCNN_FC_LAYERS_SIZE)

        # 检测网络损失函数
        regress_loss_rcnn = Lambda(lambda x: detect_regress_loss(*x),
                                   name='rcnn_bbox_loss')([
                                       rcnn_deltas, roi_deltas, roi_class_ids
                                   ])
        cls_loss_rcnn = Lambda(lambda x: detect_cls_loss(*x),
                               name='rcnn_class_loss')(
                                   [rcnn_class_logits, roi_class_ids])
        # 自定义度量命名
        gt_num, positive_num, negative_num, rpn_miss_gt_num, rpn_gt_min_max_iou = rpn_targets[
            3:]
        rcnn_miss_gt_num, rcnn_miss_gt_num_as, gt_min_max_iou, pos_roi_num, neg_roi_num, roi_num = detect_targets[
            3:]
        gt_num = Lambda(lambda x: tf.identity(x),
                        name='identity_gt_num')(gt_num)
        positive_num = Lambda(lambda x: tf.identity(x),
                              name='identity_positive_num')(positive_num)
        negative_num = Lambda(lambda x: tf.identity(x),
                              name='identity_negative_num')(negative_num)
        rpn_miss_gt_num = Lambda(
            lambda x: tf.identity(x),
            name='identity_rpn_miss_gt_num')(rpn_miss_gt_num)
        rpn_gt_min_max_iou = Lambda(
            lambda x: tf.identity(x),
            name='identity_rpn_gt_min_max_iou')(rpn_gt_min_max_iou)
        rcnn_miss_gt_num = Lambda(
            lambda x: tf.identity(x),
            name='identity_rcnn_miss_gt_num')(rcnn_miss_gt_num)
        rcnn_miss_gt_num_as = Lambda(
            lambda x: tf.identity(x),
            name='identity_rcnn_miss_gt_num_as')(rcnn_miss_gt_num_as)
        gt_min_max_iou = Lambda(lambda x: tf.identity(x),
                                name='identity_gt_min_max_iou')(gt_min_max_iou)
        pos_roi_num = Lambda(lambda x: tf.identity(x),
                             name='identity_pos_roi_num')(pos_roi_num)
        neg_roi_num = Lambda(lambda x: tf.identity(x),
                             name='identity_neg_roi_num')(neg_roi_num)
        roi_num = Lambda(lambda x: tf.identity(x),
                         name='identity_roi_num')(roi_num)

        # 构建模型
        model = Model(
            inputs=[input_image, input_image_meta, gt_class_ids, gt_boxes],
            outputs=[
                cls_loss_rpn, regress_loss_rpn, regress_loss_rcnn,
                cls_loss_rcnn
            ] + [
                gt_num, positive_num, negative_num, rpn_miss_gt_num,
                rpn_gt_min_max_iou, roi_num, pos_roi_num, neg_roi_num,
                rcnn_miss_gt_num, rcnn_miss_gt_num_as, gt_min_max_iou
            ])  # 在并行model中所有自定义度量必须在output中
        # 多gpu训练
        if config.GPU_COUNT > 1:
            model = ParallelModel(model, config.GPU_COUNT)
        return model
    else:  # 测试阶段
        # 检测网络
        rcnn_deltas, rcnn_class_logits = rcnn(
            features,
            proposal_boxes,
            config.NUM_CLASSES,
            config.IMAGE_MAX_DIM,
            config.head_fn,
            pool_size=config.POOL_SIZE,
            fc_layers_size=config.RCNN_FC_LAYERS_SIZE)
        # 处理类别相关
        rcnn_deltas = layers.Lambda(lambda x: deal_delta(*x),
                                    name='deal_delta')(
                                        [rcnn_deltas, rcnn_class_logits])
        # 应用分类和回归生成最终检测框
        detect_boxes, class_scores, detect_class_ids, detect_class_logits = ProposalToDetectBox(
            score_threshold=config.DETECTION_MIN_CONFIDENCE,
            output_box_num=config.DETECTION_MAX_INSTANCES,
            iou_threshold=config.DETECTION_NMS_THRESHOLD,
            name='proposals2detectboxes')(
                [rcnn_deltas, rcnn_class_logits, proposal_boxes])
        # 裁剪到窗口内部
        detect_boxes_coordinate, detect_boxes_tag = Lambda(
            lambda x: [x[..., :4], x[..., 4:]])(detect_boxes)
        detect_boxes_coordinate = ClipBoxes()(
            [detect_boxes_coordinate, windows])
        # 最后再合并tag返回
        detect_boxes = Lambda(lambda x: tf.concat(x, axis=-1))(
            [detect_boxes_coordinate, detect_boxes_tag])
        image_meta = Lambda(lambda x: tf.identity(x))(input_image_meta)  # 原样返回
        return Model(inputs=[input_image, input_image_meta],
                     outputs=[
                         detect_boxes, class_scores, detect_class_ids,
                         detect_class_logits, image_meta
                     ])