Exemplo n.º 1
0
def process_targets_for_training(padded_image_size, boxes, classes, params):
    input_anchors = anchors.Anchors(params['min_level'], params['max_level'],
                                    params['num_scales'],
                                    params['aspect_ratios'],
                                    params['anchor_scale'], padded_image_size)

    anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                           params['num_classes'],
                                           params['rpn_positive_overlap'],
                                           params['rpn_negative_overlap'],
                                           params['rpn_batch_size_per_im'],
                                           params['rpn_fg_fraction'])

    return anchor_labeler.label_anchors(boxes, classes), input_anchors
Exemplo n.º 2
0
def build_model_graph(features, labels, is_training, params):
    """Builds the forward model graph."""
    model_outputs = {}
    is_gpu_inference = not is_training and params['use_batched_nms']

    batch_size, image_height, image_width, _ = features['images'].get_shape(
    ).as_list()

    if 'source_ids' not in features:
        features['source_ids'] = -1 * tf.ones([batch_size], dtype=tf.float32)

    all_anchors = anchors.Anchors(params['min_level'], params['max_level'],
                                  params['num_scales'],
                                  params['aspect_ratios'],
                                  params['anchor_scale'],
                                  (image_height, image_width))

    MODELS["backbone"] = resnet.Resnet_Model("resnet50",
                                             data_format='channels_last',
                                             trainable=is_training,
                                             finetune_bn=params['finetune_bn'])

    backbone_feats = MODELS["backbone"](
        features['images'],
        training=is_training,
    )

    MODELS["FPN"] = fpn.FPNNetwork(params['min_level'],
                                   params['max_level'],
                                   trainable=is_training)
    fpn_feats = MODELS["FPN"](backbone_feats, training=is_training)

    model_outputs.update({'fpn_features': fpn_feats})

    def rpn_head_fn(features, min_level=2, max_level=6, num_anchors=3):
        """Region Proposal Network (RPN) for Mask-RCNN."""
        scores_outputs = dict()
        box_outputs = dict()

        MODELS["RPN_Heads"] = heads.RPN_Head_Model(name="rpn_head",
                                                   num_anchors=num_anchors,
                                                   trainable=is_training)

        for level in range(min_level, max_level + 1):
            scores_outputs[level], box_outputs[level] = MODELS["RPN_Heads"](
                features[level], training=is_training)

        return scores_outputs, box_outputs

    rpn_score_outputs, rpn_box_outputs = rpn_head_fn(
        features=fpn_feats,
        min_level=params['min_level'],
        max_level=params['max_level'],
        num_anchors=len(params['aspect_ratios'] * params['num_scales']))

    if is_training:
        rpn_pre_nms_topn = params['train_rpn_pre_nms_topn']
        rpn_post_nms_topn = params['train_rpn_post_nms_topn']
        rpn_nms_threshold = params['train_rpn_nms_threshold']

    else:
        rpn_pre_nms_topn = params['test_rpn_pre_nms_topn']
        rpn_post_nms_topn = params['test_rpn_post_nms_topn']
        rpn_nms_threshold = params['test_rpn_nms_thresh']

    if params['use_custom_box_proposals_op']:
        rpn_box_scores, rpn_box_rois = roi_ops.custom_multilevel_propose_rois(
            scores_outputs=rpn_score_outputs,
            box_outputs=rpn_box_outputs,
            all_anchors=all_anchors,
            image_info=features['image_info'],
            rpn_pre_nms_topn=rpn_pre_nms_topn,
            rpn_post_nms_topn=rpn_post_nms_topn,
            rpn_nms_threshold=rpn_nms_threshold,
            rpn_min_size=params['rpn_min_size'])

    else:
        rpn_box_scores, rpn_box_rois = roi_ops.multilevel_propose_rois(
            scores_outputs=rpn_score_outputs,
            box_outputs=rpn_box_outputs,
            all_anchors=all_anchors,
            image_info=features['image_info'],
            rpn_pre_nms_topn=rpn_pre_nms_topn,
            rpn_post_nms_topn=rpn_post_nms_topn,
            rpn_nms_threshold=rpn_nms_threshold,
            rpn_min_size=params['rpn_min_size'],
            bbox_reg_weights=None,
            use_batched_nms=params['use_batched_nms'])

    rpn_box_rois = tf.cast(rpn_box_rois, dtype=tf.float32)

    if is_training:
        rpn_box_rois = tf.stop_gradient(rpn_box_rois)
        rpn_box_scores = tf.stop_gradient(
            rpn_box_scores)  # TODO Jonathan: Unused => Shall keep ?

        # Sampling
        box_targets, class_targets, rpn_box_rois, proposal_to_label_map = training_ops.proposal_label_op(
            rpn_box_rois,
            labels['gt_boxes'],
            labels['gt_classes'],
            batch_size_per_im=params['batch_size_per_im'],
            fg_fraction=params['fg_fraction'],
            fg_thresh=params['fg_thresh'],
            bg_thresh_hi=params['bg_thresh_hi'],
            bg_thresh_lo=params['bg_thresh_lo'])

    # Performs multi-level RoIAlign.
    box_roi_features = spatial_transform_ops.multilevel_crop_and_resize(
        features=fpn_feats,
        boxes=rpn_box_rois,
        output_size=7,
        is_gpu_inference=is_gpu_inference)

    MODELS["Box_Head"] = heads.Box_Head_Model(
        num_classes=params['num_classes'],
        mlp_head_dim=params['fast_rcnn_mlp_head_dim'],
        trainable=is_training)

    class_outputs, box_outputs, _ = MODELS["Box_Head"](inputs=box_roi_features)

    if not is_training:
        if params['use_batched_nms']:
            generate_detections_fn = postprocess_ops.generate_detections_gpu

        else:
            generate_detections_fn = postprocess_ops.generate_detections_tpu

        detections = generate_detections_fn(
            class_outputs=class_outputs,
            box_outputs=box_outputs,
            anchor_boxes=rpn_box_rois,
            image_info=features['image_info'],
            pre_nms_num_detections=params['test_rpn_post_nms_topn'],
            post_nms_num_detections=params['test_detections_per_image'],
            nms_threshold=params['test_nms'],
            bbox_reg_weights=params['bbox_reg_weights'])

        model_outputs.update({
            'num_detections': detections[0],
            'detection_boxes': detections[1],
            'detection_classes': detections[2],
            'detection_scores': detections[3],
        })

    else:  # is training
        encoded_box_targets = training_ops.encode_box_targets(
            boxes=rpn_box_rois,
            gt_boxes=box_targets,
            gt_labels=class_targets,
            bbox_reg_weights=params['bbox_reg_weights'])

        model_outputs.update({
            'rpn_score_outputs': rpn_score_outputs,
            'rpn_box_outputs': rpn_box_outputs,
            'class_outputs': class_outputs,
            'box_outputs': box_outputs,
            'class_targets': class_targets,
            'box_targets': encoded_box_targets,
            'box_rois': rpn_box_rois,
        })

    # Faster-RCNN mode.
    if not params['include_mask']:
        return model_outputs

    # Mask sampling
    if not is_training:
        selected_box_rois = model_outputs['detection_boxes']
        class_indices = model_outputs['detection_classes']

        # If using GPU for inference, delay the cast until when Gather ops show up
        # since GPU inference supports float point better.
        # TODO(laigd): revisit this when newer versions of GPU libraries is
        # released.
        if not params['use_batched_nms']:
            class_indices = tf.cast(class_indices, dtype=tf.int32)

    else:
        selected_class_targets, selected_box_targets, \
        selected_box_rois, proposal_to_label_map = training_ops.select_fg_for_masks(
            class_targets=class_targets,
            box_targets=box_targets,
            boxes=rpn_box_rois,
            proposal_to_label_map=proposal_to_label_map,
            max_num_fg=int(params['batch_size_per_im'] * params['fg_fraction'])
        )

        class_indices = tf.cast(selected_class_targets, dtype=tf.int32)

    mask_roi_features = spatial_transform_ops.multilevel_crop_and_resize(
        features=fpn_feats,
        boxes=selected_box_rois,
        output_size=14,
        is_gpu_inference=is_gpu_inference)

    MODELS["Mask_Head"] = heads.Mask_Head_Model(
        class_indices,
        num_classes=params['num_classes'],
        mrcnn_resolution=params['mrcnn_resolution'],
        is_gpu_inference=is_gpu_inference,
        trainable=is_training,
        name="mask_head")

    mask_outputs = MODELS["Mask_Head"](inputs=mask_roi_features)

    if MPI_local_rank() == 0:
        # Print #FLOPs in model.
        compute_model_statistics(batch_size, is_training=is_training)

    if is_training:
        mask_targets = training_ops.get_mask_targets(
            fg_boxes=selected_box_rois,
            fg_proposal_to_label_map=proposal_to_label_map,
            fg_box_targets=selected_box_targets,
            mask_gt_labels=labels['cropped_gt_masks'],
            output_size=params['mrcnn_resolution'])

        model_outputs.update({
            'mask_outputs': mask_outputs,
            'mask_targets': mask_targets,
            'selected_class_targets': selected_class_targets,
        })

    else:
        model_outputs.update({
            'detection_masks': tf.nn.sigmoid(mask_outputs),
        })

    return model_outputs